AIL-framework/bin/torcrawler/tor_crawler.py

34 lines
1010 B
Python
Executable File

#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import configparser
from TorSplashCrawler import TorSplashCrawler
if __name__ == '__main__':
if len(sys.argv) != 4:
print('usage:', 'tor_crawler.py', 'url', 'paste', 'super_father')
exit(1)
configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
if not os.path.exists(configfile):
raise Exception('Unable to find the configuration file. \
Did you set environment variables? \
Or activate the virtualenv.')
cfg = configparser.ConfigParser()
cfg.read(configfile)
splash_url = cfg.get("Crawler", "splash_url")
http_proxy = cfg.get("Crawler", "http_proxy")
crawler_depth_limit = cfg.getint("Crawler", "crawler_depth_limit")
url = sys.argv[1]
paste = sys.argv[2]
super_father = sys.argv[3]
crawler = TorSplashCrawler(splash_url, http_proxy, crawler_depth_limit)
crawler.crawl(url, paste, super_father)