diff --git a/bin/ApiKey.py b/bin/ApiKey.py index faf4b2d9..bab2745c 100755 --- a/bin/ApiKey.py +++ b/bin/ApiKey.py @@ -40,7 +40,7 @@ def search_api_key(message): print('found google api key') print(to_print) publisher.warning('{}Checked {} found Google API Key;{}'.format( - to_print, len(google_api_key), paste.p_path)) + to_print, len(google_api_key), paste.p_rel_path)) msg = 'infoleak:automatic-detection="google-api-key";{}'.format(filename) p.populate_set_out(msg, 'Tags') @@ -49,7 +49,7 @@ def search_api_key(message): print(to_print) total = len(aws_access_key) + len(aws_secret_key) publisher.warning('{}Checked {} found AWS Key;{}'.format( - to_print, total, paste.p_path)) + to_print, total, paste.p_rel_path)) msg = 'infoleak:automatic-detection="aws-key";{}'.format(filename) p.populate_set_out(msg, 'Tags') @@ -86,7 +86,7 @@ if __name__ == "__main__": if message is not None: - search_api_key(message) + search_api_key(message) else: publisher.debug("Script ApiKey is Idling 10s") diff --git a/bin/Attributes.py b/bin/Attributes.py index a29f34b3..74357065 100755 --- a/bin/Attributes.py +++ b/bin/Attributes.py @@ -43,8 +43,8 @@ if __name__ == "__main__": # FIXME why not all saving everything there. PST.save_all_attributes_redis() # FIXME Not used. - PST.store.sadd("Pastes_Objects", PST.p_path) + PST.store.sadd("Pastes_Objects", PST.p_rel_path) except IOError: - print("CRC Checksum Failed on :", PST.p_path) + print("CRC Checksum Failed on :", PST.p_rel_path) publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format( PST.p_source, PST.p_date, PST.p_name)) diff --git a/bin/BankAccount.py b/bin/BankAccount.py index 06e86d06..cd58e3c3 100755 --- a/bin/BankAccount.py +++ b/bin/BankAccount.py @@ -67,7 +67,7 @@ def check_all_iban(l_iban, paste, filename): if(nb_valid_iban > 0): to_print = 'Iban;{};{};{};'.format(paste.p_source, paste.p_date, paste.p_name) publisher.warning('{}Checked found {} IBAN;{}'.format( - to_print, nb_valid_iban, paste.p_path)) + to_print, nb_valid_iban, paste.p_rel_path)) msg = 'infoleak:automatic-detection="iban";{}'.format(filename) p.populate_set_out(msg, 'Tags') @@ -113,7 +113,7 @@ if __name__ == "__main__": try: l_iban = iban_regex.findall(content) except TimeoutException: - print ("{0} processing timeout".format(paste.p_path)) + print ("{0} processing timeout".format(paste.p_rel_path)) continue else: signal.alarm(0) diff --git a/bin/Bitcoin.py b/bin/Bitcoin.py index 1b7694b7..da1fc22a 100755 --- a/bin/Bitcoin.py +++ b/bin/Bitcoin.py @@ -32,7 +32,7 @@ def decode_base58(bc, length): for char in bc: n = n * 58 + digits58.index(char) return n.to_bytes(length, 'big') - + def check_bc(bc): try: bcbytes = decode_base58(bc, 25) @@ -75,7 +75,7 @@ def search_key(content, message, paste): to_print = 'Bitcoin;{};{};{};'.format(paste.p_source, paste.p_date, paste.p_name) publisher.warning('{}Detected {} Bitcoin private key;{}'.format( - to_print, len(bitcoin_private_key),paste.p_path)) + to_print, len(bitcoin_private_key),paste.p_rel_path)) if __name__ == "__main__": publisher.port = 6380 diff --git a/bin/Categ.py b/bin/Categ.py index cf78f90f..3ebc42ea 100755 --- a/bin/Categ.py +++ b/bin/Categ.py @@ -89,16 +89,10 @@ if __name__ == "__main__": paste = Paste.Paste(filename) content = paste.get_p_content() - #print('-----------------------------------------------------') - #print(filename) - #print(content) - #print('-----------------------------------------------------') - for categ, pattern in tmp_dict.items(): found = set(re.findall(pattern, content)) if len(found) >= matchingThreshold: - msg = '{} {}'.format(paste.p_path, len(found)) - #msg = " ".join( [paste.p_path, bytes(len(found))] ) + msg = '{} {}'.format(paste.p_rel_path, len(found)) print(msg, categ) p.populate_set_out(msg, categ) @@ -106,4 +100,4 @@ if __name__ == "__main__": publisher.info( 'Categ;{};{};{};Detected {} as {};{}'.format( paste.p_source, paste.p_date, paste.p_name, - len(found), categ, paste.p_path)) + len(found), categ, paste.p_rel_path)) diff --git a/bin/Credential.py b/bin/Credential.py index 7f665227..417b30eb 100755 --- a/bin/Credential.py +++ b/bin/Credential.py @@ -97,7 +97,7 @@ if __name__ == "__main__": if sites_set: message += ' Related websites: {}'.format( (', '.join(sites_set)) ) - to_print = 'Credential;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, message, paste.p_path) + to_print = 'Credential;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, message, paste.p_rel_path) print('\n '.join(creds)) diff --git a/bin/CreditCards.py b/bin/CreditCards.py index 260d1345..a7921a6e 100755 --- a/bin/CreditCards.py +++ b/bin/CreditCards.py @@ -77,9 +77,9 @@ if __name__ == "__main__": paste.p_source, paste.p_date, paste.p_name) if (len(creditcard_set) > 0): publisher.warning('{}Checked {} valid number(s);{}'.format( - to_print, len(creditcard_set), paste.p_path)) + to_print, len(creditcard_set), paste.p_rel_path)) print('{}Checked {} valid number(s);{}'.format( - to_print, len(creditcard_set), paste.p_path)) + to_print, len(creditcard_set), paste.p_rel_path)) #Send to duplicate p.populate_set_out(filename, 'Duplicate') #send to Browse_warning_paste @@ -89,7 +89,7 @@ if __name__ == "__main__": msg = 'infoleak:automatic-detection="credit-card";{}'.format(filename) p.populate_set_out(msg, 'Tags') else: - publisher.info('{}CreditCard related;{}'.format(to_print, paste.p_path)) + publisher.info('{}CreditCard related;{}'.format(to_print, paste.p_rel_path)) else: publisher.debug("Script creditcard is idling 1m") time.sleep(10) diff --git a/bin/Decoder.py b/bin/Decoder.py index abbf760b..fa18e5e6 100755 --- a/bin/Decoder.py +++ b/bin/Decoder.py @@ -229,7 +229,7 @@ if __name__ == '__main__': except TimeoutException: encoded_list = [] p.incr_module_timeout_statistic() # add encoder type - print ("{0} processing timeout".format(paste.p_path)) + print ("{0} processing timeout".format(paste.p_rel_path)) continue else: signal.alarm(0) diff --git a/bin/DomClassifier.py b/bin/DomClassifier.py index aed87a55..1ae5ba13 100755 --- a/bin/DomClassifier.py +++ b/bin/DomClassifier.py @@ -54,14 +54,14 @@ def main(): if localizeddomains: print(localizeddomains) publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format( - PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld, PST.p_path)) + PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld, PST.p_rel_path)) localizeddomains = c.localizedomain(cc=cc) if localizeddomains: print(localizeddomains) publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format( - PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_path)) + PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_rel_path)) except IOError: - print("CRC Checksum Failed on :", PST.p_path) + print("CRC Checksum Failed on :", PST.p_rel_path) publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format( PST.p_source, PST.p_date, PST.p_name)) diff --git a/bin/Duplicates.py b/bin/Duplicates.py index 0c24bec1..611368a1 100755 --- a/bin/Duplicates.py +++ b/bin/Duplicates.py @@ -142,17 +142,17 @@ if __name__ == "__main__": paste_date = paste_date paste_date = paste_date if paste_date != None else "No date available" if paste_path != None: - if paste_path != PST.p_path: + if paste_path != PST.p_rel_path: hash_dico[dico_hash] = (hash_type, paste_path, percent, paste_date) - print('['+hash_type+'] '+'comparing: ' + str(PST.p_path[44:]) + ' and ' + str(paste_path[44:]) + ' percentage: ' + str(percent)) + print('['+hash_type+'] '+'comparing: ' + str(PST.p_rel_path) + ' and ' + str(paste_path) + ' percentage: ' + str(percent)) except Exception: print('hash not comparable, bad hash: '+dico_hash+' , current_hash: '+paste_hash) # Add paste in DB after checking to prevent its analysis twice # hash_type_i -> index_i AND index_i -> PST.PATH - r_serv1.set(index, PST.p_path) + r_serv1.set(index, PST.p_rel_path) r_serv1.set(index+'_date', PST._get_p_date()) r_serv1.sadd("INDEX", index) # Adding hashes in Redis @@ -180,7 +180,7 @@ if __name__ == "__main__": PST.__setattr__("p_duplicate", dupl) PST.save_attribute_duplicate(dupl) PST.save_others_pastes_attribute_duplicate(dupl) - publisher.info('{}Detected {};{}'.format(to_print, len(dupl), PST.p_path)) + publisher.info('{}Detected {};{}'.format(to_print, len(dupl), PST.p_rel_path)) print('{}Detected {}'.format(to_print, len(dupl))) print('') @@ -191,5 +191,5 @@ if __name__ == "__main__": except IOError: to_print = 'Duplicate;{};{};{};'.format( PST.p_source, PST.p_date, PST.p_name) - print("CRC Checksum Failed on :", PST.p_path) + print("CRC Checksum Failed on :", PST.p_rel_path) publisher.error('{}CRC Checksum Failed'.format(to_print)) diff --git a/bin/Global.py b/bin/Global.py index 32a3656b..22b4c4e7 100755 --- a/bin/Global.py +++ b/bin/Global.py @@ -51,6 +51,9 @@ if __name__ == '__main__': p = Process(config_section) + PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes")) + print(PASTES_FOLDER) + # LOGGING # publisher.info("Feed Script started to receive & publish.") @@ -78,8 +81,9 @@ if __name__ == '__main__': time.sleep(1) continue # Creating the full filepath - filename = os.path.join(os.environ['AIL_HOME'], - p.config.get("Directories", "pastes"), paste) + filename = os.path.join(PASTES_FOLDER, paste) + print(filename) + print(paste) dirname = os.path.dirname(filename) if not os.path.exists(dirname): @@ -102,6 +106,7 @@ if __name__ == '__main__': print(filename) print(type) print('-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------') - ''' - p.populate_set_out(filename) + ''' + + p.populate_set_out(paste) processed_paste+=1 diff --git a/bin/LibInjection.py b/bin/LibInjection.py index 283bba00..5088d9c5 100755 --- a/bin/LibInjection.py +++ b/bin/LibInjection.py @@ -47,7 +47,7 @@ def analyse(url, path): paste = Paste.Paste(path) print("Detected (libinjection) SQL in URL: ") print(urllib.request.unquote(url)) - to_print = 'LibInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Detected SQL in URL", paste.p_path) + to_print = 'LibInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Detected SQL in URL", paste.p_rel_path) publisher.warning(to_print) #Send to duplicate p.populate_set_out(path, 'Duplicate') diff --git a/bin/Lines.py b/bin/Lines.py index 8c9f6827..e4187dc7 100755 --- a/bin/Lines.py +++ b/bin/Lines.py @@ -75,10 +75,11 @@ if __name__ == '__main__': PST.save_attribute_redis("p_max_length_line", lines_infos[1]) # FIXME Not used. - PST.store.sadd("Pastes_Objects", PST.p_path) + PST.store.sadd("Pastes_Objects", PST.p_rel_path) + print(PST.p_rel_path) if lines_infos[1] < args.max: - p.populate_set_out( PST.p_path , 'LinesShort') + p.populate_set_out( PST.p_rel_path , 'LinesShort') else: - p.populate_set_out( PST.p_path , 'LinesLong') + p.populate_set_out( PST.p_rel_path , 'LinesLong') except IOError: - print("CRC Checksum Error on : ", PST.p_path) + print("CRC Checksum Error on : ", PST.p_rel_path) diff --git a/bin/MISP_The_Hive_feeder.py b/bin/MISP_The_Hive_feeder.py index 0a8f1791..c1ef414d 100755 --- a/bin/MISP_The_Hive_feeder.py +++ b/bin/MISP_The_Hive_feeder.py @@ -180,7 +180,7 @@ if __name__ == "__main__": if flag_the_hive or flag_misp: tag, path = message.split(';') paste = Paste.Paste(path) - source = '/'.join(paste.p_path.split('/')[-6:]) + source = '/'.join(paste.p_rel_path.split('/')[-6:]) full_path = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes"), path) diff --git a/bin/Mail.py b/bin/Mail.py index 1f682661..33d8de43 100755 --- a/bin/Mail.py +++ b/bin/Mail.py @@ -78,7 +78,7 @@ if __name__ == "__main__": to_print = 'Mails;{};{};{};Checked {} e-mail(s);{}'.\ format(PST.p_source, PST.p_date, PST.p_name, - MX_values[0], PST.p_path) + MX_values[0], PST.p_rel_path) if MX_values[0] > is_critical: publisher.warning(to_print) #Send to duplicate diff --git a/bin/Mixer.py b/bin/Mixer.py index e1656b8e..e41e8e0d 100755 --- a/bin/Mixer.py +++ b/bin/Mixer.py @@ -81,6 +81,8 @@ if __name__ == '__main__': operation_mode = cfg.getint("Module_Mixer", "operation_mode") ttl_key = cfg.getint("Module_Mixer", "ttl_duplicate") + PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes")) + # STATS # processed_paste = 0 processed_paste_per_feeder = {} @@ -103,11 +105,12 @@ if __name__ == '__main__': feeder_name.replace(" ","") if 'import_dir' in feeder_name: feeder_name = feeder_name.split('/')[1] - paste_name = complete_paste except ValueError as e: feeder_name = "unnamed_feeder" - paste_name = complete_paste + + # remove absolute path + paste_name = complete_paste.replace(PASTES_FOLDER, '', 1) # Processed paste processed_paste += 1 @@ -118,6 +121,7 @@ if __name__ == '__main__': processed_paste_per_feeder[feeder_name] = 1 duplicated_paste_per_feeder[feeder_name] = 0 + relay_message = "{0} {1}".format(paste_name, gzip64encoded) #relay_message = b" ".join( [paste_name, gzip64encoded] ) diff --git a/bin/Onion.py b/bin/Onion.py index 1f233fcf..e38f363a 100755 --- a/bin/Onion.py +++ b/bin/Onion.py @@ -164,7 +164,7 @@ if __name__ == "__main__": r_onion.sadd('i2p_domain', domain) r_onion.sadd('i2p_link', url) r_onion.sadd('i2p_domain_crawler_queue', domain) - msg = '{};{}'.format(url,PST.p_path) + msg = '{};{}'.format(url,PST.p_rel_path) r_onion.sadd('i2p_crawler_queue', msg) ''' @@ -178,7 +178,7 @@ if __name__ == "__main__": if len(domains_list) > 0: publisher.warning('{}Detected {} .onion(s);{}'.format( - to_print, len(domains_list),PST.p_path)) + to_print, len(domains_list),PST.p_rel_path)) now = datetime.datetime.now() path = os.path.join('onions', str(now.year).zfill(4), str(now.month).zfill(2), @@ -203,19 +203,19 @@ if __name__ == "__main__": if not r_onion.sismember('onion_domain_crawler_queue', domain): print('send to onion crawler') r_onion.sadd('onion_domain_crawler_queue', domain) - msg = '{};{}'.format(url,PST.p_path) + msg = '{};{}'.format(url,PST.p_rel_path) r_onion.sadd('onion_crawler_queue', msg) #p.populate_set_out(msg, 'Crawler') else: for url in fetch(p, r_cache, urls, domains_list, path): - publisher.info('{}Checked {};{}'.format(to_print, url, PST.p_path)) - p.populate_set_out('onion;{}'.format(PST.p_path), 'alertHandler') + publisher.info('{}Checked {};{}'.format(to_print, url, PST.p_rel_path)) + p.populate_set_out('onion;{}'.format(PST.p_rel_path), 'alertHandler') - msg = 'infoleak:automatic-detection="onion";{}'.format(PST.p_path) + msg = 'infoleak:automatic-detection="onion";{}'.format(PST.p_rel_path) p.populate_set_out(msg, 'Tags') else: - publisher.info('{}Onion related;{}'.format(to_print, PST.p_path)) + publisher.info('{}Onion related;{}'.format(to_print, PST.p_rel_path)) prec_filename = filename else: diff --git a/bin/RegexForTermsFrequency.py b/bin/RegexForTermsFrequency.py index fae7a03a..4e98edcc 100755 --- a/bin/RegexForTermsFrequency.py +++ b/bin/RegexForTermsFrequency.py @@ -106,7 +106,7 @@ if __name__ == "__main__": try: matched = compiled_regex.search(content) except TimeoutException: - print ("{0} processing timeout".format(paste.p_path)) + print ("{0} processing timeout".format(paste.p_rel_path)) continue else: signal.alarm(0) diff --git a/bin/Release.py b/bin/Release.py index 43c84b04..d2f18441 100755 --- a/bin/Release.py +++ b/bin/Release.py @@ -54,7 +54,7 @@ if __name__ == "__main__": if len(releases) == 0: continue - to_print = 'Release;{};{};{};{} releases;{}'.format(paste.p_source, paste.p_date, paste.p_name, len(releases), paste.p_path) + to_print = 'Release;{};{};{};{} releases;{}'.format(paste.p_source, paste.p_date, paste.p_name, len(releases), paste.p_rel_path) print(to_print) if len(releases) > 30: publisher.warning(to_print) @@ -63,7 +63,7 @@ if __name__ == "__main__": except TimeoutException: p.incr_module_timeout_statistic() - print ("{0} processing timeout".format(paste.p_path)) + print ("{0} processing timeout".format(paste.p_rel_path)) continue else: signal.alarm(0) diff --git a/bin/SQLInjectionDetection.py b/bin/SQLInjectionDetection.py index f03d7555..9464fd8a 100755 --- a/bin/SQLInjectionDetection.py +++ b/bin/SQLInjectionDetection.py @@ -78,7 +78,7 @@ def analyse(url, path): if (result_path > 1) or (result_query > 1): print("Detected SQL in URL: ") print(urllib.request.unquote(url)) - to_print = 'SQLInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Detected SQL in URL", paste.p_path) + to_print = 'SQLInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Detected SQL in URL", paste.p_rel_path) publisher.warning(to_print) #Send to duplicate p.populate_set_out(path, 'Duplicate') @@ -97,7 +97,7 @@ def analyse(url, path): else: print("Potential SQL injection:") print(urllib.request.unquote(url)) - to_print = 'SQLInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Potential SQL injection", paste.p_path) + to_print = 'SQLInjection;{};{};{};{};{}'.format(paste.p_source, paste.p_date, paste.p_name, "Potential SQL injection", paste.p_rel_path) publisher.info(to_print) diff --git a/bin/Tokenize.py b/bin/Tokenize.py index 698b4fbc..4e13b9ff 100755 --- a/bin/Tokenize.py +++ b/bin/Tokenize.py @@ -57,11 +57,11 @@ if __name__ == "__main__": try: for word, score in paste._get_top_words().items(): if len(word) >= 4: - msg = '{} {} {}'.format(paste.p_path, word, score) + msg = '{} {} {}'.format(paste.p_rel_path, word, score) p.populate_set_out(msg) except TimeoutException: p.incr_module_timeout_statistic() - print ("{0} processing timeout".format(paste.p_path)) + print ("{0} processing timeout".format(paste.p_rel_path)) continue else: signal.alarm(0) diff --git a/bin/Web.py b/bin/Web.py index 3d53e306..7cc96822 100755 --- a/bin/Web.py +++ b/bin/Web.py @@ -153,7 +153,7 @@ if __name__ == "__main__": pprint.pprint(A_values) publisher.info('Url;{};{};{};Checked {} URL;{}'.format( - PST.p_source, PST.p_date, PST.p_name, A_values[0], PST.p_path)) + PST.p_source, PST.p_date, PST.p_name, A_values[0], PST.p_rel_path)) prec_filename = filename else: diff --git a/bin/packages/Paste.py b/bin/packages/Paste.py index d02a92f5..c5dcc0a6 100755 --- a/bin/packages/Paste.py +++ b/bin/packages/Paste.py @@ -101,7 +101,7 @@ class Paste(object): var = self.p_path.split('/') self.p_date = Date(var[-4], var[-3], var[-2]) - self.p_rel_path = os.path.join(var[-4], var[-3], var[-2], self.p_name) + self.p_date_path = os.path.join(var[-4], var[-3], var[-2], self.p_name) self.p_source = var[-5] self.supposed_url = 'https://{}/{}'.format(self.p_source.replace('_pro', ''), var[-1].split('.gz')[0]) @@ -304,6 +304,9 @@ class Paste(object): def get_p_rel_path(self): return self.p_rel_path + def get_p_date_path(self): + return self.p_date_path + def save_all_attributes_redis(self, key=None): """ Saving all the attributes in a "Redis-like" Database (Redis, LevelDB) diff --git a/var/www/modules/showpaste/Flask_showpaste.py b/var/www/modules/showpaste/Flask_showpaste.py index 188af759..c24e3335 100644 --- a/var/www/modules/showpaste/Flask_showpaste.py +++ b/var/www/modules/showpaste/Flask_showpaste.py @@ -41,12 +41,10 @@ showsavedpastes = Blueprint('showsavedpastes', __name__, template_folder='templa # ============ FUNCTIONS ============ def showpaste(content_range, requested_path): - relative_path = None - if PASTES_FOLDER not in requested_path: - relative_path = requested_path - requested_path = os.path.join(PASTES_FOLDER, requested_path) - # remove old full path - #requested_path = requested_path.replace(PASTES_FOLDER, '') + if PASTES_FOLDER in requested_path: + # remove full path + requested_path = requested_path.replace(PASTES_FOLDER, '', 1) + #requested_path = os.path.join(PASTES_FOLDER, requested_path) # escape directory transversal if os.path.commonprefix((os.path.realpath(requested_path),PASTES_FOLDER)) != PASTES_FOLDER: return 'path transversal detected' @@ -124,8 +122,12 @@ def showpaste(content_range, requested_path): active_taxonomies = r_serv_tags.smembers('active_taxonomies') l_tags = r_serv_metadata.smembers('tag:'+requested_path) + print(l_tags) if relative_path is not None: - l_tags.union( r_serv_metadata.smembers('tag:'+relative_path) ) + print('union') + print(relative_path) + print(r_serv_metadata.smembers('tag:'+relative_path)) + l_tags = l_tags.union( r_serv_metadata.smembers('tag:'+relative_path) ) #active galaxies active_galaxies = r_serv_tags.smembers('active_galaxies') @@ -189,7 +191,7 @@ def showpaste(content_range, requested_path): crawler_metadata['domain'] = r_serv_metadata.hget('paste_metadata:'+requested_path, 'domain') crawler_metadata['paste_father'] = r_serv_metadata.hget('paste_metadata:'+requested_path, 'father') crawler_metadata['real_link'] = r_serv_metadata.hget('paste_metadata:'+requested_path,'real_link') - crawler_metadata['screenshot'] = paste.get_p_rel_path() + crawler_metadata['screenshot'] = paste.get_p_date_path() else: crawler_metadata['get_metadata'] = False