mirror of https://github.com/CIRCL/AIL-framework
				
				
				
			
		
			
				
	
	
		
			166 lines
		
	
	
		
			6.0 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
			
		
		
	
	
			166 lines
		
	
	
		
			6.0 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
| #!/usr/bin/env python3
 | |
| # -*-coding:UTF-8 -*
 | |
| 
 | |
| import os
 | |
| import sys
 | |
| import time
 | |
| 
 | |
| sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
 | |
| from lib import ConfigLoader
 | |
| 
 | |
| def update_tracked_terms(main_key, tracked_container_key):
 | |
|     for tracked_item in r_serv_term.smembers(main_key):
 | |
|         all_items = r_serv_term.smembers(tracked_container_key.format(tracked_item))
 | |
|         for item_path in all_items:
 | |
|             if PASTES_FOLDER in item_path:
 | |
|                 new_item_path = item_path.replace(PASTES_FOLDER, '', 1)
 | |
|                 r_serv_term.sadd(tracked_container_key.format(tracked_item), new_item_path)
 | |
|                 r_serv_term.srem(tracked_container_key.format(tracked_item), item_path)
 | |
| 
 | |
| def update_hash_item(has_type):
 | |
|     #get all hash items:
 | |
|     all_hash_items = r_serv_tag.smembers('infoleak:automatic-detection=\"{}\"'.format(has_type))
 | |
|     for item_path in all_hash_items:
 | |
|         if PASTES_FOLDER in item_path:
 | |
|             base64_key = '{}_paste:{}'.format(has_type, item_path)
 | |
|             hash_key = 'hash_paste:{}'.format(item_path)
 | |
| 
 | |
|             if r_serv_metadata.exists(base64_key):
 | |
|                 new_base64_key = base64_key.replace(PASTES_FOLDER, '', 1)
 | |
|                 res = r_serv_metadata.renamenx(base64_key, new_base64_key)
 | |
|                 if res == 0:
 | |
|                     print('same key, double name: {}'.format(item_path))
 | |
|                     # fusion
 | |
|                     all_key = r_serv_metadata.smembers(base64_key)
 | |
|                     for elem in all_key:
 | |
|                         r_serv_metadata.sadd(new_base64_key, elem)
 | |
|                         r_serv_metadata.srem(base64_key, elem)
 | |
| 
 | |
|             if r_serv_metadata.exists(hash_key):
 | |
|                 new_hash_key = hash_key.replace(PASTES_FOLDER, '', 1)
 | |
|                 res = r_serv_metadata.renamenx(hash_key, new_hash_key)
 | |
|                 if res == 0:
 | |
|                     print('same key, double name: {}'.format(item_path))
 | |
|                     # fusion
 | |
|                     all_key = r_serv_metadata.smembers(hash_key)
 | |
|                     for elem in all_key:
 | |
|                         r_serv_metadata.sadd(new_hash_key, elem)
 | |
|                         r_serv_metadata.srem(hash_key, elem)
 | |
| 
 | |
| if __name__ == '__main__':
 | |
| 
 | |
|     start_deb = time.time()
 | |
| 
 | |
|     config_loader = ConfigLoader.ConfigLoader()
 | |
| 
 | |
|     PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
 | |
| 
 | |
|     r_serv = config_loader.get_redis_conn("ARDB_DB")
 | |
|     r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
 | |
|     r_serv_tag = config_loader.get_redis_conn("ARDB_Tags")
 | |
|     r_serv_term = config_loader.get_redis_conn("ARDB_TermFreq")
 | |
|     r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
 | |
|     config_loader = None
 | |
| 
 | |
|     r_serv.set('ail:current_background_script', 'metadata')
 | |
| 
 | |
|     ## Update metadata ##
 | |
|     print('Updating ARDB_Metadata ...')
 | |
|     index = 0
 | |
|     start = time.time()
 | |
| 
 | |
|     #update stats
 | |
|     r_serv.set('ail:current_background_script_stat', 0)
 | |
| 
 | |
|     # Update base64
 | |
|     update_hash_item('base64')
 | |
| 
 | |
|     #update stats
 | |
|     r_serv.set('ail:current_background_script_stat', 20)
 | |
|     # Update binary
 | |
|     update_hash_item('binary')
 | |
| 
 | |
|     #update stats
 | |
|     r_serv.set('ail:current_background_script_stat', 40)
 | |
|     # Update binary
 | |
|     update_hash_item('hexadecimal')
 | |
| 
 | |
|     #update stats
 | |
|     r_serv.set('ail:current_background_script_stat', 60)
 | |
| 
 | |
|     total_onion = r_serv_tag.scard('infoleak:submission=\"crawler\"')
 | |
|     nb_updated = 0
 | |
|     last_progress = 0
 | |
| 
 | |
|     # Update onion metadata
 | |
|     all_crawled_items = r_serv_tag.smembers('infoleak:submission=\"crawler\"')
 | |
|     for item_path in all_crawled_items:
 | |
|         domain = None
 | |
|         if PASTES_FOLDER in item_path:
 | |
|             old_item_metadata = 'paste_metadata:{}'.format(item_path)
 | |
|             item_path = item_path.replace(PASTES_FOLDER, '', 1)
 | |
|             new_item_metadata = 'paste_metadata:{}'.format(item_path)
 | |
|             res = r_serv_metadata.renamenx(old_item_metadata, new_item_metadata)
 | |
|             #key already exist
 | |
|             if res == 0:
 | |
|                 r_serv_metadata.delete(old_item_metadata)
 | |
| 
 | |
|         # update domain port
 | |
|         domain = r_serv_metadata.hget(new_item_metadata, 'domain')
 | |
|         if domain:
 | |
|             if domain[-3:] != ':80':
 | |
|                 r_serv_metadata.hset(new_item_metadata, 'domain', '{}:80'.format(domain))
 | |
|         super_father = r_serv_metadata.hget(new_item_metadata, 'super_father')
 | |
|         if super_father:
 | |
|             if PASTES_FOLDER in super_father:
 | |
|                 r_serv_metadata.hset(new_item_metadata, 'super_father', super_father.replace(PASTES_FOLDER, '', 1))
 | |
|         father = r_serv_metadata.hget(new_item_metadata, 'father')
 | |
|         if father:
 | |
|             if PASTES_FOLDER in father:
 | |
|                 r_serv_metadata.hset(new_item_metadata, 'father', father.replace(PASTES_FOLDER, '', 1))
 | |
| 
 | |
|         nb_updated += 1
 | |
|         progress = int((nb_updated * 30) /total_onion)
 | |
|         print('{}/{}    updated    {}%'.format(nb_updated, total_onion, progress + 60))
 | |
|         # update progress stats
 | |
|         if progress != last_progress:
 | |
|             r_serv.set('ail:current_background_script_stat', progress + 60)
 | |
|             last_progress = progress
 | |
| 
 | |
|     #update stats
 | |
|     r_serv.set('ail:current_background_script_stat', 90)
 | |
| 
 | |
|     ## update tracked term/set/regex
 | |
|     # update tracked term
 | |
|     update_tracked_terms('TrackedSetTermSet', 'tracked_{}')
 | |
| 
 | |
|     #update stats
 | |
|     r_serv.set('ail:current_background_script_stat', 93)
 | |
|     # update tracked set
 | |
|     update_tracked_terms('TrackedSetSet', 'set_{}')
 | |
| 
 | |
|     #update stats
 | |
|     r_serv.set('ail:current_background_script_stat', 96)
 | |
|     # update tracked regex
 | |
|     update_tracked_terms('TrackedRegexSet', 'regex_{}')
 | |
| 
 | |
|     #update stats
 | |
|     r_serv.set('ail:current_background_script_stat', 100)
 | |
|     ##
 | |
| 
 | |
|     end = time.time()
 | |
| 
 | |
|     print('Updating ARDB_Metadata Done => {} paths: {} s'.format(index, end - start))
 | |
|     print()
 | |
| 
 | |
|     r_serv.sadd('ail:update_v1.5', 'metadata')
 | |
| 
 | |
|     ##
 | |
|     #Key, Dynamic Update
 | |
|     ##
 | |
|     #paste_children
 | |
|     #nb_seen_hash, base64_hash, binary_hash
 | |
|     #paste_onion_external_links
 | |
|     #misp_events, hive_cases
 | |
|     ##
 |