mirror of https://github.com/CIRCL/AIL-framework
166 lines
6.0 KiB
Python
Executable File
166 lines
6.0 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# -*-coding:UTF-8 -*
|
|
|
|
import os
|
|
import sys
|
|
import time
|
|
|
|
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
|
|
from lib import ConfigLoader
|
|
|
|
def update_tracked_terms(main_key, tracked_container_key):
|
|
for tracked_item in r_serv_term.smembers(main_key):
|
|
all_items = r_serv_term.smembers(tracked_container_key.format(tracked_item))
|
|
for item_path in all_items:
|
|
if PASTES_FOLDER in item_path:
|
|
new_item_path = item_path.replace(PASTES_FOLDER, '', 1)
|
|
r_serv_term.sadd(tracked_container_key.format(tracked_item), new_item_path)
|
|
r_serv_term.srem(tracked_container_key.format(tracked_item), item_path)
|
|
|
|
def update_hash_item(has_type):
|
|
#get all hash items:
|
|
all_hash_items = r_serv_tag.smembers('infoleak:automatic-detection=\"{}\"'.format(has_type))
|
|
for item_path in all_hash_items:
|
|
if PASTES_FOLDER in item_path:
|
|
base64_key = '{}_paste:{}'.format(has_type, item_path)
|
|
hash_key = 'hash_paste:{}'.format(item_path)
|
|
|
|
if r_serv_metadata.exists(base64_key):
|
|
new_base64_key = base64_key.replace(PASTES_FOLDER, '', 1)
|
|
res = r_serv_metadata.renamenx(base64_key, new_base64_key)
|
|
if res == 0:
|
|
print('same key, double name: {}'.format(item_path))
|
|
# fusion
|
|
all_key = r_serv_metadata.smembers(base64_key)
|
|
for elem in all_key:
|
|
r_serv_metadata.sadd(new_base64_key, elem)
|
|
r_serv_metadata.srem(base64_key, elem)
|
|
|
|
if r_serv_metadata.exists(hash_key):
|
|
new_hash_key = hash_key.replace(PASTES_FOLDER, '', 1)
|
|
res = r_serv_metadata.renamenx(hash_key, new_hash_key)
|
|
if res == 0:
|
|
print('same key, double name: {}'.format(item_path))
|
|
# fusion
|
|
all_key = r_serv_metadata.smembers(hash_key)
|
|
for elem in all_key:
|
|
r_serv_metadata.sadd(new_hash_key, elem)
|
|
r_serv_metadata.srem(hash_key, elem)
|
|
|
|
if __name__ == '__main__':
|
|
|
|
start_deb = time.time()
|
|
|
|
config_loader = ConfigLoader.ConfigLoader()
|
|
|
|
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
|
|
|
|
r_serv = config_loader.get_redis_conn("ARDB_DB")
|
|
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
|
|
r_serv_tag = config_loader.get_redis_conn("ARDB_Tags")
|
|
r_serv_term = config_loader.get_redis_conn("ARDB_TermFreq")
|
|
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
|
|
config_loader = None
|
|
|
|
r_serv.set('ail:current_background_script', 'metadata')
|
|
|
|
## Update metadata ##
|
|
print('Updating ARDB_Metadata ...')
|
|
index = 0
|
|
start = time.time()
|
|
|
|
#update stats
|
|
r_serv.set('ail:current_background_script_stat', 0)
|
|
|
|
# Update base64
|
|
update_hash_item('base64')
|
|
|
|
#update stats
|
|
r_serv.set('ail:current_background_script_stat', 20)
|
|
# Update binary
|
|
update_hash_item('binary')
|
|
|
|
#update stats
|
|
r_serv.set('ail:current_background_script_stat', 40)
|
|
# Update binary
|
|
update_hash_item('hexadecimal')
|
|
|
|
#update stats
|
|
r_serv.set('ail:current_background_script_stat', 60)
|
|
|
|
total_onion = r_serv_tag.scard('infoleak:submission=\"crawler\"')
|
|
nb_updated = 0
|
|
last_progress = 0
|
|
|
|
# Update onion metadata
|
|
all_crawled_items = r_serv_tag.smembers('infoleak:submission=\"crawler\"')
|
|
for item_path in all_crawled_items:
|
|
domain = None
|
|
if PASTES_FOLDER in item_path:
|
|
old_item_metadata = 'paste_metadata:{}'.format(item_path)
|
|
item_path = item_path.replace(PASTES_FOLDER, '', 1)
|
|
new_item_metadata = 'paste_metadata:{}'.format(item_path)
|
|
res = r_serv_metadata.renamenx(old_item_metadata, new_item_metadata)
|
|
#key already exist
|
|
if res == 0:
|
|
r_serv_metadata.delete(old_item_metadata)
|
|
|
|
# update domain port
|
|
domain = r_serv_metadata.hget(new_item_metadata, 'domain')
|
|
if domain:
|
|
if domain[-3:] != ':80':
|
|
r_serv_metadata.hset(new_item_metadata, 'domain', '{}:80'.format(domain))
|
|
super_father = r_serv_metadata.hget(new_item_metadata, 'super_father')
|
|
if super_father:
|
|
if PASTES_FOLDER in super_father:
|
|
r_serv_metadata.hset(new_item_metadata, 'super_father', super_father.replace(PASTES_FOLDER, '', 1))
|
|
father = r_serv_metadata.hget(new_item_metadata, 'father')
|
|
if father:
|
|
if PASTES_FOLDER in father:
|
|
r_serv_metadata.hset(new_item_metadata, 'father', father.replace(PASTES_FOLDER, '', 1))
|
|
|
|
nb_updated += 1
|
|
progress = int((nb_updated * 30) /total_onion)
|
|
print('{}/{} updated {}%'.format(nb_updated, total_onion, progress + 60))
|
|
# update progress stats
|
|
if progress != last_progress:
|
|
r_serv.set('ail:current_background_script_stat', progress + 60)
|
|
last_progress = progress
|
|
|
|
#update stats
|
|
r_serv.set('ail:current_background_script_stat', 90)
|
|
|
|
## update tracked term/set/regex
|
|
# update tracked term
|
|
update_tracked_terms('TrackedSetTermSet', 'tracked_{}')
|
|
|
|
#update stats
|
|
r_serv.set('ail:current_background_script_stat', 93)
|
|
# update tracked set
|
|
update_tracked_terms('TrackedSetSet', 'set_{}')
|
|
|
|
#update stats
|
|
r_serv.set('ail:current_background_script_stat', 96)
|
|
# update tracked regex
|
|
update_tracked_terms('TrackedRegexSet', 'regex_{}')
|
|
|
|
#update stats
|
|
r_serv.set('ail:current_background_script_stat', 100)
|
|
##
|
|
|
|
end = time.time()
|
|
|
|
print('Updating ARDB_Metadata Done => {} paths: {} s'.format(index, end - start))
|
|
print()
|
|
|
|
r_serv.sadd('ail:update_v1.5', 'metadata')
|
|
|
|
##
|
|
#Key, Dynamic Update
|
|
##
|
|
#paste_children
|
|
#nb_seen_hash, base64_hash, binary_hash
|
|
#paste_onion_external_links
|
|
#misp_events, hive_cases
|
|
##
|