From 759ec73f8445ffe1ddc0dc548e4efa8a0b211c87 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Tue, 15 Jun 2021 17:25:51 +0200 Subject: [PATCH 1/3] fix: [Splash_Manager errors] catch invalid response --- bin/lib/crawlers.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/bin/lib/crawlers.py b/bin/lib/crawlers.py index 69cce642..b207e548 100755 --- a/bin/lib/crawlers.py +++ b/bin/lib/crawlers.py @@ -892,12 +892,17 @@ def ping_splash_manager(): update_splash_manager_connection_status(True) return True else: - res = req.json() - if 'reason' in res: - req_error = {'status_code': req.status_code, 'error': res['reason']} - else: - print(req.json()) - req_error = {'status_code': req.status_code, 'error': json.dumps(req.json())} + try: + res = req.json() + if 'reason' in res: + req_error = {'status_code': req.status_code, 'error': res['reason']} + else: + print(req.json()) + req_error = {'status_code': req.status_code, 'error': json.dumps(req.json())} + except json.decoder.JSONDecodeError: + print(req.status_code) + print(req.headers) + req_error = {'status_code': req.status_code, 'error': 'Invalid response'} update_splash_manager_connection_status(False, req_error=req_error) return False except requests.exceptions.ConnectionError: From ec727338e643a2ae5b665f068efa27985375a1e8 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Wed, 16 Jun 2021 10:06:04 +0200 Subject: [PATCH 2/3] fix: [crawlers] get_all_splash return type --- bin/lib/crawlers.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/bin/lib/crawlers.py b/bin/lib/crawlers.py index b207e548..712ad28f 100755 --- a/bin/lib/crawlers.py +++ b/bin/lib/crawlers.py @@ -516,7 +516,7 @@ def api_set_nb_crawlers_to_launch(dict_splash_name): # TODO: check if is dict dict_crawlers_to_launch = {} all_splash = get_all_splash() - crawlers_to_launch = list(all_splash & set(dict_splash_name.keys())) + crawlers_to_launch = list(set(all_splash) & set(dict_splash_name.keys())) for splash_name in crawlers_to_launch: try: nb_to_launch = int(dict_splash_name.get(splash_name, 0)) @@ -984,26 +984,24 @@ def api_save_splash_manager_url_api(data): ## SPLASH ## def get_all_splash(r_list=False): res = r_serv_onion.smembers('all_splash') - if res: - if r_list: - return list(res) - else: - return res + if not res: + res = set() + if r_list: + return list(res) else: - return [] + return res def get_splash_proxy(splash_name): return r_serv_onion.hget('splash:metadata:{}'.format(splash_name), 'proxy') def get_splash_all_url(splash_name, r_list=False): res = r_serv_onion.smembers('splash:url:{}'.format(splash_name)) - if res: - if r_list: - return list(res) - else: - return res + if not res: + res = set() + if r_list: + return list(res) else: - return [] + return res def get_splash_name_by_url(splash_url): return r_serv_onion.get('splash:map:url:name:{}'.format(splash_url)) From 0e0a77a86df5f680fe0e1fdd61e1f5fdf8e1a117 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Wed, 16 Jun 2021 11:23:34 +0200 Subject: [PATCH 3/3] chg: [requirements] minimal version + remove old packages --- requirements.txt | 131 +++++++++++++++++++++++------------------------ 1 file changed, 64 insertions(+), 67 deletions(-) diff --git a/requirements.txt b/requirements.txt index 57f1a42e..8dde88c7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,90 +1,87 @@ pyail -pymisp -d4-pyclient +pymisp>=2.4.144 +d4-pyclient>=0.1.6 thehive4py +# Core redis==2.10.6 -#filemagic conflict with magic -crcmod -mmh3 -ssdeep - -pubsublogger -zmq -langid - -#Essential -pyzmq -dnspython -logbook -pubsublogger -textblob - -#Tokeniser -nltk - -html2text -yara-python - -#Crawler -scrapy -scrapy-splash -pycld3 - -#Graph -numpy -matplotlib -networkx -terminaltables -colorama -asciimatics +python-magic>0.4.15 +yara-python>4.0.2 # Hashlib crcmod -mmh3 -ssdeep -python-Levenshtein +mmh3>2.5 +ssdeep>3.3 -#Others -python-magic -pybloomfiltermmap -psutil -phonenumbers +# ZMQ +zmq +pyzmq>19.0.0 -ipython -texttable +# Logging +logbook +pubsublogger -flask -flask-login -bcrypt +# Tokeniser +nltk>3.4.5 +textblob>=0.15.3 -#DomainClassifier +# HTML +html2text>=2020.1.16 +beautifulsoup4>4.8.2 + +#Crawler +scrapy>2.0.0 +scrapy-splash>=0.7.2 + +# Languages +pycld3>0.20 + +#Graph +numpy>1.18.1 +matplotlib>3.2.1 +networkx>2.4 + +# ModuleInformation +asciimatics>=1.11.0 +psutil>=5.7.0 + +colorama>=0.4.4 +python-Levenshtein>=0.12.2 + +# DomainClassifier git+https://github.com/D4-project/BGP-Ranking.git/#egg=pybgpranking&subdirectory=client DomainClassifier -#Indexer requirements -whoosh -beautifulsoup4 +# Indexer +whoosh>=2.7.4 -ipaddress -pycountry==18.12.8 +# LibInjection bindings +pylibinjection>=0.2.4 -# To fetch Onion urls -PySocks +#Others +phonenumbers>8.12.1 -# decompress files -sflock +# Web +flask>1.1.2,<=1.1.4 +flask-login +bcrypt>3.1.6 -#ASN lookup requirements -#https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/adns-python/adns-python-1.2.1.tar.gz -https://github.com/trolldbois/python3-adns/archive/master.zip -https://github.com/trolldbois/python-cymru-services/archive/master.zip +# Tests +nose>=1.3.7 +coverage>=5.5 + +# # # # +PySocks>=1.7.1 +pycountry>=20.7.3 https://github.com/saffsd/langid.py/archive/master.zip -#LibInjection bindings -pylibinjection -# Graph -matplotlib + +##### Old packages +# texttable +#ASN lookup requirements +#https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/adns-python/adns-python-1.2.1.tar.gz +#https://github.com/trolldbois/python3-adns/archive/master.zip +#https://github.com/trolldbois/python-cymru-services/archive/master.zip