diff --git a/bin/Crawler.py b/bin/Crawler.py index 86668973..50dbd097 100755 --- a/bin/Crawler.py +++ b/bin/Crawler.py @@ -287,9 +287,11 @@ if __name__ == '__main__': splash_name = crawlers.get_splash_name_by_url(splash_url) proxy_name = crawlers.get_splash_proxy(splash_name) - print(f'SPLASH Name: {splash_name}') - print(f'Proxy Name: {proxy_name}') + print(f'SPLASH Name: {splash_name}') + print(f'Proxy Name: {proxy_name}') + print(f'Crawler Type: {get_splash_crawler_type(splash_name)}) + #time.sleep(10) #sys.exit(0) #rotation_mode = deque(['onion', 'regular']) @@ -330,7 +332,7 @@ if __name__ == '__main__': db=p.config.getint("ARDB_Onion", "db"), decode_responses=True) - faup = Faup() + faup = crawlers.get_faup() # get HAR files default_crawler_har = p.config.getboolean("Crawler", "default_crawler_har") diff --git a/bin/lib/crawlers.py b/bin/lib/crawlers.py index 7d62287b..cb57ccd4 100755 --- a/bin/lib/crawlers.py +++ b/bin/lib/crawlers.py @@ -67,6 +67,10 @@ def is_valid_onion_domain(domain): return True return False +# TEMP FIX +def get_faup(): + return faup + ################################################################################ # # TODO: handle prefix cookies @@ -894,7 +898,7 @@ def get_splash_manager_session_uuid(): return res['session_uuid'] else: print(req.json()) - except requests.exceptions.ConnectionError: + except requests.exceptions.ConnectionError, requests.exceptions.MissingSchema: # splash manager unreachable update_splash_manager_connection_status(False)