mirror of https://github.com/CIRCL/AIL-framework
				
				
				
			fix: [module Webstats + BankAccount-Decoder] fix faup return type + remove old Paste library from BankAccount-Decoder #465
							parent
							
								
									d8fbd72863
								
							
						
					
					
						commit
						f9856a1589
					
				|  | @ -5,7 +5,7 @@ | |||
| The BankAccount Module | ||||
| ====================== | ||||
| 
 | ||||
| It apply IBAN regexes on paste content and warn if above a threshold. | ||||
| It apply IBAN regexes on item content and warn if above a threshold. | ||||
| 
 | ||||
| """ | ||||
| 
 | ||||
|  | @ -17,7 +17,7 @@ import re | |||
| import string | ||||
| from itertools import chain | ||||
| 
 | ||||
| from packages import Paste | ||||
| from packages import Item | ||||
| from pubsublogger import publisher | ||||
| 
 | ||||
| from Helper import Process | ||||
|  | @ -49,7 +49,7 @@ def is_valid_iban(iban): | |||
|         return True | ||||
|     return False | ||||
| 
 | ||||
| def check_all_iban(l_iban, paste, filename): | ||||
| def check_all_iban(l_iban, obj_id): | ||||
|     nb_valid_iban = 0 | ||||
|     for iban in l_iban: | ||||
|         iban = iban[0]+iban[1]+iban[2] | ||||
|  | @ -65,14 +65,14 @@ def check_all_iban(l_iban, paste, filename): | |||
|                 server_statistics.hincrby('iban_by_country:'+date, iban[0:2], 1) | ||||
| 
 | ||||
|     if(nb_valid_iban > 0): | ||||
|         to_print = 'Iban;{};{};{};'.format(paste.p_source, paste.p_date, paste.p_name) | ||||
|         to_print = 'Iban;{};{};{};'.format(Item.get_source(obj_id), Item.get_item_date(obj_id), Item.get_basename(obj_id)) | ||||
|         publisher.warning('{}Checked found {} IBAN;{}'.format( | ||||
|             to_print, nb_valid_iban, paste.p_rel_path)) | ||||
|         msg = 'infoleak:automatic-detection="iban";{}'.format(filename) | ||||
|             to_print, nb_valid_iban, obj_id)) | ||||
|         msg = 'infoleak:automatic-detection="iban";{}'.format(obj_id) | ||||
|         p.populate_set_out(msg, 'Tags') | ||||
| 
 | ||||
|         #Send to duplicate | ||||
|         p.populate_set_out(filename, 'Duplicate') | ||||
|         p.populate_set_out(obj_id, 'Duplicate') | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|     publisher.port = 6380 | ||||
|  | @ -103,21 +103,21 @@ if __name__ == "__main__": | |||
| 
 | ||||
|         if message is not None: | ||||
| 
 | ||||
|             filename = message | ||||
|             paste = Paste.Paste(filename) | ||||
|             content = paste.get_p_content() | ||||
|             obj_id = Item.get_item_id(message) | ||||
| 
 | ||||
|             content = Item.get_item_content(obj_id) | ||||
| 
 | ||||
|             signal.alarm(max_execution_time) | ||||
|             try: | ||||
|                 l_iban = iban_regex.findall(content) | ||||
|             except TimeoutException: | ||||
|                  print ("{0} processing timeout".format(paste.p_rel_path)) | ||||
|                  print ("{0} processing timeout".format(obj_id)) | ||||
|                  continue | ||||
|             else: | ||||
|                 signal.alarm(0) | ||||
| 
 | ||||
|             if(len(l_iban) > 0): | ||||
|                 check_all_iban(l_iban, paste, filename) | ||||
|                 check_all_iban(l_iban, obj_id) | ||||
| 
 | ||||
|         else: | ||||
|             publisher.debug("Script BankAccount is Idling 10s") | ||||
|  |  | |||
|  | @ -17,7 +17,6 @@ import datetime | |||
| from pubsublogger import publisher | ||||
| 
 | ||||
| from Helper import Process | ||||
| from packages import Paste | ||||
| from packages import Item | ||||
| 
 | ||||
| import re | ||||
|  | @ -50,11 +49,11 @@ def decode_string(content, message, date, encoded_list, decoder_name, encoded_mi | |||
| 
 | ||||
|             save_hash(decoder_name, message, date, decode) | ||||
| 
 | ||||
|             #remove encoded from paste content | ||||
|             #remove encoded from item content | ||||
|             content = content.replace(encoded, '', 1) | ||||
| 
 | ||||
|     if(find): | ||||
|         set_out_paste(decoder_name, message) | ||||
|         set_out_item(decoder_name, message) | ||||
| 
 | ||||
|     return content | ||||
| 
 | ||||
|  | @ -72,8 +71,8 @@ def save_hash(decoder_name, message, date, decoded): | |||
|     data['estimated type'] = type | ||||
|     json_data = json.dumps(data) | ||||
| 
 | ||||
|     date_paste = '{}/{}/{}'.format(date[0:4], date[4:6], date[6:8]) | ||||
|     date_key = date[0:4] + date[4:6] + date[6:8] | ||||
|     date_item = '{}/{}/{}'.format(date[0:4], date[4:6], date[6:8]) | ||||
|     date_key = date | ||||
| 
 | ||||
|     serv_metadata.incrby(decoder_name+'_decoded:'+date_key, 1) | ||||
|     serv_metadata.zincrby('hash_date:'+date_key, hash, 1) | ||||
|  | @ -81,24 +80,24 @@ def save_hash(decoder_name, message, date, decoded): | |||
| 
 | ||||
|     # first time we see this hash | ||||
|     if not serv_metadata.hexists('metadata_hash:'+hash, 'estimated_type'): | ||||
|         serv_metadata.hset('metadata_hash:'+hash, 'first_seen', date_paste) | ||||
|         serv_metadata.hset('metadata_hash:'+hash, 'last_seen', date_paste) | ||||
|         serv_metadata.hset('metadata_hash:'+hash, 'first_seen', date_item) | ||||
|         serv_metadata.hset('metadata_hash:'+hash, 'last_seen', date_item) | ||||
|     else: | ||||
|         serv_metadata.hset('metadata_hash:'+hash, 'last_seen', date_paste) | ||||
|         serv_metadata.hset('metadata_hash:'+hash, 'last_seen', date_item) | ||||
| 
 | ||||
|     # first time we see this hash (all encoding) on this paste | ||||
|     # first time we see this hash (all encoding) on this item | ||||
|     if serv_metadata.zscore('nb_seen_hash:'+hash, message) is None: | ||||
|         serv_metadata.hincrby('metadata_hash:'+hash, 'nb_seen_in_all_pastes', 1) | ||||
|         serv_metadata.sadd('hash_paste:'+message, hash) # paste - hash map | ||||
|         serv_metadata.sadd('hash_paste:'+message, hash) # item - hash map | ||||
|         # create hash metadata | ||||
|         serv_metadata.hset('metadata_hash:'+hash, 'estimated_type', type) | ||||
|         serv_metadata.sadd('hash_all_type', type) | ||||
| 
 | ||||
|     # first time we see this hash encoding on this paste | ||||
|     # first time we see this hash encoding on this item | ||||
|     if serv_metadata.zscore(decoder_name+'_hash:'+hash, message) is None: | ||||
|         print('first '+decoder_name) | ||||
| 
 | ||||
|         serv_metadata.sadd(decoder_name+'_paste:'+message, hash) # paste - hash map | ||||
|         serv_metadata.sadd(decoder_name+'_paste:'+message, hash) # item - hash map | ||||
| 
 | ||||
|         # create hash metadata | ||||
|         serv_metadata.sadd('hash_'+ decoder_name +'_all_type', type) | ||||
|  | @ -118,8 +117,8 @@ def save_hash(decoder_name, message, date, decoded): | |||
| 
 | ||||
|     serv_metadata.zincrby(decoder_name+'_type:'+type, date_key, 1) | ||||
| 
 | ||||
|     serv_metadata.zincrby('nb_seen_hash:'+hash, message, 1)# hash - paste map | ||||
|     serv_metadata.zincrby(decoder_name+'_hash:'+hash, message, 1) # number of b64 on this paste | ||||
|     serv_metadata.zincrby('nb_seen_hash:'+hash, message, 1)# hash - item map | ||||
|     serv_metadata.zincrby(decoder_name+'_hash:'+hash, message, 1) # number of b64 on this item | ||||
| 
 | ||||
|     # Domain Object | ||||
|     if Item.is_crawled(message): | ||||
|  | @ -150,7 +149,7 @@ def save_hash_on_disk(decode, type, hash, json_data): | |||
|     with open(filename_json, 'w') as f: | ||||
|         f.write(json_data) | ||||
| 
 | ||||
| def set_out_paste(decoder_name, message): | ||||
| def set_out_item(decoder_name, message): | ||||
|     publisher.warning(decoder_name+' decoded') | ||||
|     #Send to duplicate | ||||
|     p.populate_set_out(message, 'Duplicate') | ||||
|  | @ -217,12 +216,11 @@ if __name__ == '__main__': | |||
|             time.sleep(1) | ||||
|             continue | ||||
| 
 | ||||
|         filename = message | ||||
|         paste = Paste.Paste(filename) | ||||
|         obj_id = Item.get_item_id(message) | ||||
| 
 | ||||
|         # Do something with the message from the queue | ||||
|         content = paste.get_p_content() | ||||
|         date = str(paste._get_p_date()) | ||||
|         content = Item.get_item_content(obj_id) | ||||
|         date = Item.get_item_date(obj_id) | ||||
| 
 | ||||
|         for decoder in decoder_order: # add threshold and size limit | ||||
| 
 | ||||
|  | @ -233,7 +231,7 @@ if __name__ == '__main__': | |||
|             except TimeoutException: | ||||
|                 encoded_list = [] | ||||
|                 p.incr_module_timeout_statistic() # add encoder type | ||||
|                 print ("{0} processing timeout".format(paste.p_rel_path)) | ||||
|                 print ("{0} processing timeout".format(obj_id)) | ||||
|                 continue | ||||
|             else: | ||||
|                 signal.alarm(0) | ||||
|  |  | |||
|  | @ -29,7 +29,10 @@ num_day_to_look = 5       # the detection of the progression start num_day_to_lo | |||
| def analyse(server, field_name, date, url_parsed): | ||||
|     field = url_parsed[field_name] | ||||
|     if field is not None: | ||||
|         field = field.decode('utf8') | ||||
|         try: # faup version | ||||
|             field = field.decode() | ||||
|         except: | ||||
|             pass | ||||
|         server.hincrby(field, date, 1) | ||||
|         if field_name == "domain": #save domain in a set for the monthly plot | ||||
|             domain_set_name = "domain_set_" + date[0:6] | ||||
|  |  | |||
|  | @ -32,6 +32,9 @@ def exist_item(item_id): | |||
|     else: | ||||
|         return False | ||||
| 
 | ||||
| def get_basename(item_id): | ||||
|     return os.path.basename(item_id) | ||||
| 
 | ||||
| def get_item_id(full_path): | ||||
|     return full_path.replace(PASTES_FOLDER, '', 1) | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	 Terrtia
						Terrtia