From 7a4989ce10737b59dd232818fd438465c4ec3677 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Tue, 12 Feb 2019 15:45:58 +0100 Subject: [PATCH] fix: [Global Crawler] max filename size --- bin/Global.py | 20 ++++++++++---------- bin/Onion.py | 8 ++++++-- bin/torcrawler/TorSplashCrawler.py | 6 +++++- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/bin/Global.py b/bin/Global.py index 32a3656b..2e4595eb 100755 --- a/bin/Global.py +++ b/bin/Global.py @@ -23,23 +23,17 @@ Requirements import base64 import os import time +import uuid from pubsublogger import publisher from Helper import Process import magic -import io -#import gzip -''' -def gunzip_bytes_obj(bytes_obj): - in_ = io.BytesIO() - in_.write(bytes_obj) - in_.seek(0) - with gzip.GzipFile(fileobj=in_, mode='rb') as fo: - gunzipped_bytes_obj = fo.read() +def rreplace(s, old, new, occurrence): + li = s.rsplit(old, occurrence) + return new.join(li) - return gunzipped_bytes_obj.decode()''' if __name__ == '__main__': publisher.port = 6380 @@ -77,6 +71,12 @@ if __name__ == '__main__': processed_paste = 0 time.sleep(1) continue + + file_name_paste = paste.split('/')[-1] + if len(file_name_paste)>255: + new_file_name_paste = '{}{}.gz'.format(file_name_paste[:215], str(uuid.uuid4())) + paste = rreplace(paste, file_name_paste, new_file_name_paste, 1) + # Creating the full filepath filename = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes"), paste) diff --git a/bin/Onion.py b/bin/Onion.py index 801118d5..d15875e4 100755 --- a/bin/Onion.py +++ b/bin/Onion.py @@ -198,8 +198,12 @@ if __name__ == "__main__": print(len(domains_list)) if len(domains_list) > 0: - publisher.warning('{}Detected {} .onion(s);{}'.format( - to_print, len(domains_list),PST.p_path)) + if not activate_crawler: + publisher.warning('{}Detected {} .onion(s);{}'.format( + to_print, len(domains_list),PST.p_path)) + else: + publisher.info('{}Detected {} .onion(s);{}'.format( + to_print, len(domains_list),PST.p_path)) now = datetime.datetime.now() path = os.path.join('onions', str(now.year).zfill(4), str(now.month).zfill(2), diff --git a/bin/torcrawler/TorSplashCrawler.py b/bin/torcrawler/TorSplashCrawler.py index 99a4f3b3..dbe6bbd6 100644 --- a/bin/torcrawler/TorSplashCrawler.py +++ b/bin/torcrawler/TorSplashCrawler.py @@ -126,7 +126,11 @@ class TorSplashCrawler(): print('Connection to proxy refused') else: - UUID = self.domains[0]+str(uuid.uuid4()) + #avoid filename too big + if self.domains[0] > 225: + UUID = self.domains[0][-215:]+str(uuid.uuid4()) + else + UUID = self.domains[0]+str(uuid.uuid4()) filename_paste = os.path.join(self.crawled_paste_filemame, UUID) relative_filename_paste = os.path.join(self.crawler_path, UUID) filename_screenshot = os.path.join(self.crawled_screenshot, UUID +'.png')