chg: [migration] migrate Screenshots + Trackers + Duplicates + fix zadd zincrby

pull/594/head
Terrtia 2022-11-29 16:01:01 +01:00
parent 25a8eb09c0
commit af583939d8
No known key found for this signature in database
GPG Key ID: 1E1B1F50D84613D0
24 changed files with 191 additions and 252 deletions

View File

@ -43,34 +43,34 @@ config_loader = None
# # - - CONFIGS - - # #
from core import ail_2_ail
spec = importlib.util.find_spec('ail_2_ail')
spec = importlib.util.find_spec('core.ail_2_ail')
old_ail_2_ail = importlib.util.module_from_spec(spec)
spec.loader.exec_module(old_ail_2_ail)
old_ail_2_ail.r_serv_sync = r_serv_db
from lib import Tracker
spec = importlib.util.find_spec('Tracker')
spec = importlib.util.find_spec('lib.Tracker')
old_Tracker = importlib.util.module_from_spec(spec)
spec.loader.exec_module(old_Tracker)
old_Tracker.r_serv_tracker = r_serv_tracker
from lib import Investigations
spec = importlib.util.find_spec('Investigations')
spec = importlib.util.find_spec('lib.Investigations')
old_Investigations = importlib.util.module_from_spec(spec)
spec.loader.exec_module(old_Investigations)
old_Investigations.r_tracking = r_serv_tracker
from lib import crawlers
spec = importlib.util.find_spec('crawlers')
spec = importlib.util.find_spec('lib.crawlers')
old_crawlers = importlib.util.module_from_spec(spec)
spec.loader.exec_module(old_crawlers)
old_crawlers.r_serv_onion = r_crawler
# # TODO: desable features - credentials - stats ? - sentiment analysis
# # TODO: disable features - credentials - stats ? - sentiment analysis
# CREATE FUNCTION BY DB/FEATURES
@ -112,10 +112,10 @@ def core_migration():
r_kvrocks.hset('d4:passivedns', 'update_time', d4_update_time)
# Crawler Manager
manager_url = old_crawlers.get_splash_manager_url()
manager_api_key = old_crawlers.get_splash_api_key()
crawlers.save_splash_manager_url_api(manager_url, manager_api_key)
crawlers.reload_splash_and_proxies_list()
# manager_url = old_crawlers.get_splash_manager_url()
# manager_api_key = old_crawlers.get_splash_api_key()
# crawlers.save_splash_manager_url_api(manager_url, manager_api_key)
# crawlers.reload_splash_and_proxies_list()
# Auto Export Migration
ail_misp = r_serv_db.get('ail:misp')
@ -237,6 +237,7 @@ def trackers_migration():
# object migration # # TODO: in background
for item_id in old_Tracker.get_tracker_items_by_daterange(tracker_uuid, meta['first_seen'], meta['last_seen']):
print(item_id)
Tracker.add_tracked_item(tracker_uuid, item_id)
print('RETRO HUNT MIGRATION...')
@ -269,7 +270,7 @@ def item_submit_migration():
###############################
# #
# ITEMS MIGRATION #
# TAGS MIGRATION #
# #
###############################
@ -340,15 +341,53 @@ def tags_migration():
def get_item_father(item_id):
return r_serv_metadata.hget(f'paste_metadata:{item_id}', 'father')
def get_item_duplicate(item_id, r_list=True):
res = r_serv_metadata.smembers(f'dup:{item_id}')
if r_list:
if res:
return list(res)
else:
return []
return res
def get_item_duplicates_dict(item_id):
dict_duplicates = {}
for duplicate in get_item_duplicate(item_id):
duplicate = duplicate[1:-1].replace('\'', '').replace(' ', '').split(',')
duplicate_id = duplicate[1]
if duplicate_id not in dict_duplicates:
dict_duplicates[duplicate_id] = {}
algo = duplicate[0]
if algo == 'tlsh':
similarity = 100 - int(duplicate[2])
else:
similarity = int(duplicate[2])
dict_duplicates[duplicate_id][algo] = similarity
return dict_duplicates
def items_migration():
print('ITEMS MIGRATION...')
# MIGRATE IMPORTED URLEXTRACT Father
for item_id in Items.get_items_by_source('urlextract'):
father_id = get_item_father(item_id)
if father_id:
item = Items.Item(item_id)
item.set_father(father_id)
# for item_id in Items.get_items_by_source('urlextract'):
# father_id = get_item_father(item_id)
# if father_id:
# item = Items.Item(item_id)
# item.set_father(father_id)
for tag in ['infoleak:automatic-detection="credential"']: # Creditcards, Mail, Keys ???????????????????????????????
print(f'Duplicate migration: {tag}')
tag_first = get_tag_first_seen(tag)
if tag_first:
for date in Date.get_date_range_today(tag_first):
print(date)
for item_id in get_all_items_tags_by_day(tag, date):
item = Items.Item(item_id)
duplicates_dict = get_item_duplicates_dict(item_id)
for id_2 in duplicates_dict:
for algo in duplicates_dict[id_2]:
print(algo, duplicates_dict[id_2][algo], id_2)
item.add_duplicate(algo, duplicates_dict[id_2][algo], id_2)
# TODO: test cookies migration
@ -360,7 +399,7 @@ def items_migration():
# #
###############################
# Retun last crawled domains by type
# Return last crawled domains by type
# domain;epoch
def get_last_crawled_domains(domain_type):
return r_crawler.lrange(f'last_{domain_type}', 0, -1)
@ -414,9 +453,6 @@ def get_domain_down_by_date(domain_type, date):
def get_item_link(item_id):
return r_serv_metadata.hget(f'paste_metadata:{item_id}', 'real_link')
def get_item_father(item_id):
return r_serv_metadata.hget(f'paste_metadata:{item_id}', 'father')
def get_item_children(item_id):
return r_serv_metadata.smembers(f'paste_children:{item_id}')
@ -564,7 +600,7 @@ def domain_migration():
###############################
# #
# DECODEDS MIGRATION #
# DECODED MIGRATION #
# #
###############################
def get_estimated_type(decoded_id):
@ -809,10 +845,10 @@ if __name__ == '__main__':
#decodeds_migration()
# screenshots_migration()
#subtypes_obj_migration()
ail_2_ail_migration()
# ail_2_ail_migration()
trackers_migration()
investigations_migration()
statistics_migration()
# investigations_migration()
# statistics_migration()

View File

@ -233,6 +233,8 @@ function launching_scripts {
sleep 0.1
screen -S "Script_AIL" -X screen -t "Onion" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Onion.py; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "PgpDump" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./PgpDump.py; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "Telegram" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Telegram.py; read x"
sleep 0.1
@ -281,8 +283,6 @@ function launching_scripts {
sleep 0.1
screen -S "Script_AIL" -X screen -t "Mixer" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Mixer.py; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "PgpDump" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./PgpDump.py; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "Tools" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Tools.py; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "MISPtheHIVEfeeder" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./MISP_The_Hive_feeder.py; read x"

View File

@ -126,7 +126,7 @@ def feeder(message, count=0):
## FIXME: remove it
if not item_basic.exist_item(item_id):
if count < 10:
r_serv_db.zincrby('mess_not_saved_export', message, 1)
r_serv_db.zincrby('mess_not_saved_export', 1, message)
return 0
else:
r_serv_db.zrem('mess_not_saved_export', message)

View File

@ -34,13 +34,6 @@ r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
config_loader = None
## -- ##
def get_ail_uuid():
uuid_ail = r_serv_db.get('ail:uuid')
if uuid_ail is None:
uuid_ail = str(uuid4())
r_serv_db.set('ail:uuid', uuid_ail)
return uuid_ail
def load_tags_to_export_in_cache():
all_exports = ['misp', 'thehive']
for export_target in all_exports:

View File

@ -16,7 +16,7 @@ sys.path.append(os.environ['AIL_BIN'])
from lib.ConfigLoader import ConfigLoader
config_loader = ConfigLoader()
r_serv_db = config_loader.get_db_conn("Kvrocks_DB")
r_serv_db = config_loader.get_db_conn("Kvrocks_Duplicates")
MIN_ITEM_SIZE = float(config_loader.get_config_str('Modules_Duplicates', 'min_paste_size')) # # TODO: RENAME ME
config_loader = None
@ -71,33 +71,40 @@ def save_object_hash(algo, date_ymonth, hash, obj_id):
r_serv_db.hset(f'duplicates:hashs:{algo}:{date_ymonth}', hash, obj_id)
def get_duplicates(obj_type, subtype, id):
def get_obj_duplicates(obj_type, subtype, obj_id):
dict_dup = {}
duplicates = r_serv_db.smembers(f'obj:duplicates:{obj_type}:{subtype}:{id}')
duplicates = r_serv_db.smembers(f'obj:duplicates:{obj_type}:{subtype}:{obj_id}')
for str_dup in duplicates:
similarity, algo, id = str_dup.split(':', 2)
if not dict_dup.get(id):
dict_dup[id] = []
dict_dup[id].append({'algo': algo, 'similarity': int(similarity)})
similarity, algo, id_2 = str_dup.split(':', 2)
if not dict_dup.get(id_2):
dict_dup[id_2] = []
dict_dup[id_2].append({'algo': algo, 'similarity': int(similarity)})
return dict_dup
def add_obj_duplicate(algo, similarity, obj_type, subtype, obj_id, id_2):
r_serv_db.sadd(f'obj:duplicates:{obj_type}:{subtype}:{obj_id}', f'{similarity}:{algo}:{id_2}')
def _add_obj_duplicate(algo, similarity, obj_type, subtype, id, id_2):
r_serv_db.sadd(f'obj:duplicates:{obj_type}:{subtype}:{id}', f'{similarity}:{algo}:{id_2}')
def add_obj_duplicate(algo, hash, similarity, obj_type, subtype, id, date_ymonth):
obj2_id = get_object_id_by_hash(algo, hash, date_ymonth)
def add_duplicate(algo, hash_, similarity, obj_type, subtype, id, date_ymonth):
obj2_id = get_object_id_by_hash(algo, hash_, date_ymonth)
# same content
if similarity == 100:
dups = get_duplicates(obj_type, subtype, id)
dups = get_obj_duplicates(obj_type, subtype, id)
for dup_id in dups:
for algo_dict in dups[dup_id]:
if algo_dict['similarity'] == 100 and algo_dict['algo'] == algo:
_add_obj_duplicate(algo, similarity, obj_type, subtype, id, dups[dup_id])
_add_obj_duplicate(algo, similarity, obj_type, subtype, dups[dup_id], id)
_add_obj_duplicate(algo, similarity, obj_type, subtype, id, obj2_id)
_add_obj_duplicate(algo, similarity, obj_type, subtype, obj2_id, id)
add_obj_duplicate(algo, similarity, obj_type, subtype, id, dups[dup_id])
add_obj_duplicate(algo, similarity, obj_type, subtype, dups[dup_id], id)
add_obj_duplicate(algo, similarity, obj_type, subtype, id, obj2_id)
add_obj_duplicate(algo, similarity, obj_type, subtype, obj2_id, id)
# TODO
def delete_obj_duplicates():
pass
# TODO
def delete_obj_duplicate():
pass
def get_last_x_month_dates(nb_months):
now = datetime.datetime.now()

View File

@ -15,7 +15,6 @@ import datetime
import time
import uuid
from abc import ABC
from enum import Enum
from flask import escape
@ -279,12 +278,12 @@ def get_obj_investigations(obj_id, obj_type, subtype=''):
return r_tracking.smembers(f'obj:investigations:{obj_type}:{subtype}:{obj_id}')
def delete_obj_investigations(obj_id, obj_type, subtype=''):
unregistred = False
unregistered = False
for investigation_uuid in get_obj_investigations(obj_id, obj_type, subtype=subtype):
investigation = Investigation(investigation_uuid)
investigation.unregister_object(obj_id, obj_type, subtype)
unregistred = True
return unregistred
unregistered = True
return unregistered
def _set_timestamp(investigation_uuid, timestamp):
@ -304,8 +303,8 @@ def _re_create_investagation(investigation_uuid, user_id, date, name, threat_lev
# # TODO: fix default threat_level analysis
# # TODO: limit description + name
# # TODO: sanityze tags
# # TODO: sanityze date
# # TODO: sanitize tags
# # TODO: sanitize date
def create_investigation(user_id, date, name, threat_level, analysis, info, tags=[], investigation_uuid=None):
if investigation_uuid:
if not is_valid_uuid_v4(investigation_uuid):
@ -472,18 +471,18 @@ def api_unregister_object(json_dict):
##-- API --##
if __name__ == '__main__':
# res = create_star_list(user_id, name, description)
# print(res)
# res = r_tracking.dbsize()
# print(res)
investigation_uuid = 'a6545c38083444eeb9383d357f8fa747'
_set_timestamp(investigation_uuid, int(time.time()))
# investigation = Investigation(investigation_uuid)
# investigation.delete()
#
# if __name__ == '__main__':
# # res = create_star_list(user_id, name, description)
# # print(res)
#
# # res = r_tracking.dbsize()
# # print(res)
#
# investigation_uuid = 'a6545c38083444eeb9383d357f8fa747'
# _set_timestamp(investigation_uuid, int(time.time()))
#
# # investigation = Investigation(investigation_uuid)
# # investigation.delete()
# # TODO: PAGINATION

View File

@ -72,30 +72,31 @@ def update_item_stats_size_nb(item_id, source, size, date):
# TOP Items Size
if r_statistics.zcard(f'top_size_set_{date}') < PIE_CHART_MAX_CARDINALITY:
r_statistics.zadd(f'top_avg_size_set_{date}', new_avg, source)
r_statistics.zadd(f'top_avg_size_set_{date}', {source: new_avg})
else:
member_set = r_statistics.zrangebyscore(f'top_avg_size_set_{date}', '-inf', '+inf', withscores=True, start=0, num=1)
# Member set is a list of (value, score) pairs
if float(member_set[0][1]) < new_avg:
# remove min from set and add the new one
r_statistics.zrem(f'top_avg_size_set_{date}', member_set[0][0])
r_statistics.zadd(f'top_avg_size_set_{date}', new_avg, source)
r_statistics.zadd(f'top_avg_size_set_{date}', {source: new_avg})
# TOP Nb Items
if r_statistics.zcard(f'providers_set_{date}') < PIE_CHART_MAX_CARDINALITY or r_statistics.zscore(f'providers_set_{date}', source) != None:
r_statistics.zadd(f'providers_set_{date}', float(nb_items), source)
r_statistics.zadd(f'providers_set_{date}', {source: float(nb_items)})
else: # zset at full capacity
member_set = r_statistics.zrangebyscore(f'providers_set_{date}', '-inf', '+inf', withscores=True, start=0, num=1)
# Member set is a list of (value, score) pairs
if int(member_set[0][1]) < nb_items:
# remove min from set and add the new one
r_statistics.zrem(member_set[0][0])
r_statistics.zadd(f'providers_set_{date}', float(nb_items), source)
r_statistics.zadd(f'providers_set_{date}', {source: float(nb_items)})
# keyword num
def _add_module_stats(module_name, total_sum, keyword, date):
r_statistics.zadd(f'top_{module_name}_set_{date}', float(total_sum), keyword)
r_statistics.zadd(f'top_{module_name}_set_{date}', {keyword: float(total_sum)})
# # TODO: ONE HSET BY MODULE / CUSTOM STATS
def update_module_stats(module_name, num, keyword, date):
@ -111,14 +112,14 @@ def update_module_stats(module_name, num, keyword, date):
keyword_total_sum += int(curr_value) if curr_value is not None else 0
if r_statistics.zcard(f'top_{module_name}_set_{date}') < PIE_CHART_MAX_CARDINALITY:
r_statistics.zadd(f'top_{module_name}_set_{date}', float(keyword_total_sum), keyword)
r_statistics.zadd(f'top_{module_name}_set_{date}', {keyword: float(keyword_total_sum)})
else: # zset at full capacity
member_set = r_statistics.zrangebyscore(f'top_{module_name}_set_{date}', '-inf', '+inf', withscores=True, start=0, num=1)
# Member set is a list of (value, score) pairs
if int(member_set[0][1]) < keyword_total_sum:
#remove min from set and add the new one
r_statistics.zrem(f'top_{module_name}_set_{date}', member_set[0][0])
r_statistics.zadd(f'top_{module_name}_set_{date}', float(keyword_total_sum), keyword)
r_statistics.zadd(f'top_{module_name}_set_{date}', {keyword: float(keyword_total_sum)})
def get_module_tld_stats_by_tld_date(date, tld):
nb_tld = r_statistics.hget(f'credential_by_tld:{date}', tld)
@ -132,5 +133,5 @@ def get_module_tld_stats_by_date(module, date):
def add_module_tld_stats_by_date(module, date, tld, nb):
r_statistics.hincrby(f'{module}_by_tld:{date}', tld, int(nb))
# r_stats.zincrby('module:Global:incomplete_file', datetime.datetime.now().strftime('%Y%m%d'), 1)
# r_stats.zincrby('module:Global:invalid_file', datetime.datetime.now().strftime('%Y%m%d'), 1)
# r_stats.zincrby('module:Global:incomplete_file', 1, datetime.datetime.now().strftime('%Y%m%d'))
# r_stats.zincrby('module:Global:invalid_file', 1, datetime.datetime.now().strftime('%Y%m%d'))

View File

@ -21,12 +21,12 @@ from packages import Date
from lib import ConfigLoader
from lib import item_basic
from lib import Tag
from lib.Users import User
config_loader = ConfigLoader.ConfigLoader()
r_cache = config_loader.get_redis_conn("Redis_Cache")
r_serv_db = config_loader.get_db_conn("Kvrocks_DB")
r_serv_tracker = config_loader.get_db_conn("Kvrocks_DB")
r_serv_tracker = config_loader.get_db_conn("Kvrocks_Trackers")
items_dir = config_loader.get_config_str("Directories", "pastes")
if items_dir[-1] == '/':
@ -250,7 +250,7 @@ def add_tracked_item(tracker_uuid, item_id):
res = r_serv_tracker.sadd(f'tracker:item:{tracker_uuid}:{item_date}', item_id)
# track nb item by date
if res == 1:
nb_items = r_serv_tracker.zincrby('tracker:stat:{}'.format(tracker_uuid), int(item_date), 1)
nb_items = r_serv_tracker.zincrby(f'tracker:stat:{tracker_uuid}', 1, int(item_date))
if nb_items == 1:
update_tracker_daterange(tracker_uuid, item_date)
@ -289,7 +289,7 @@ def remove_tracked_item(item_id):
r_serv_tracker.srem(f'obj:trackers:item:{item_id}', tracker_uuid)
res = r_serv_tracker.srem(f'tracker:item:{tracker_uuid}:{item_date}', item_id)
if res:
r_serv_tracker.zincrby('tracker:stat:{}'.format(tracker_uuid), int(item_date), -1)
r_serv_tracker.zincrby(f'tracker:stat:{tracker_uuid}', -1, int(item_date))
def get_item_all_trackers_uuid(obj_id):
#obj_type = 'item'
@ -326,13 +326,6 @@ def trigger_trackers_refresh(tracker_type):
######################
#### TRACKERS ACL ####
# # TODO: use new package => duplicate fct
def is_in_role(user_id, role):
if r_serv_db.sismember('user_role:{}'.format(role), user_id):
return True
else:
return False
def is_tracker_in_global_level(tracker, tracker_type):
res = r_serv_tracker.smembers('all:tracker_uuid:{}:{}'.format(tracker_type, tracker))
if res:
@ -364,11 +357,11 @@ def api_is_allowed_to_edit_tracker(tracker_uuid, user_id):
tracker_creator = r_serv_tracker.hget('tracker:{}'.format(tracker_uuid), 'user_id')
if not tracker_creator:
return {"status": "error", "reason": "Unknown uuid"}, 404
if not is_in_role(user_id, 'admin') and user_id != tracker_creator:
user = User(user_id)
if not user.is_in_role('admin') and user_id != tracker_creator:
return {"status": "error", "reason": "Access Denied"}, 403
return {"uuid": tracker_uuid}, 200
##-- ACL --##
#### FIX DB ####
@ -385,7 +378,7 @@ def fix_tracker_stats_per_day(tracker_uuid):
nb_items = r_serv_tracker.scard(f'tracker:item:{tracker_uuid}:{date_day}')
if nb_items:
r_serv_tracker.zincrby('tracker:stat:{}'.format(tracker_uuid), int(date_day), nb_items)
r_serv_tracker.zincrby(f'tracker:stat:{tracker_uuid}', nb_items, int(date_day))
# update first_seen/last_seen
update_tracker_daterange(tracker_uuid, date_day)
@ -470,9 +463,12 @@ def _re_create_tracker(tracker, tracker_type, user_id, level, tags, mails, descr
def create_tracker(tracker, tracker_type, user_id, level, tags, mails, description, webhook, dashboard=0, tracker_uuid=None, sources=[]):
# edit tracker
if tracker_uuid:
edit_tracker = True
# check if type changed
old_type = get_tracker_type(tracker_uuid)
if not old_type:
edit_tracker = False
else:
edit_tracker = True
old_tracker = get_tracker_by_uuid(tracker_uuid)
old_level = get_tracker_level(tracker_uuid)
tracker_user_id = get_tracker_user_id(tracker_uuid)
@ -497,19 +493,19 @@ def create_tracker(tracker, tracker_type, user_id, level, tags, mails, descripti
tracker_type = 'yara'
# create metadata
r_serv_tracker.hset('tracker:{}'.format(tracker_uuid), 'tracked', tracker)
r_serv_tracker.hset('tracker:{}'.format(tracker_uuid), 'type', tracker_type)
r_serv_tracker.hset('tracker:{}'.format(tracker_uuid), 'date', datetime.date.today().strftime("%Y%m%d"))
r_serv_tracker.hset('tracker:{}'.format(tracker_uuid), 'level', level)
r_serv_tracker.hset('tracker:{}'.format(tracker_uuid), 'dashboard', dashboard)
r_serv_tracker.hset(f'tracker:{tracker_uuid}', 'tracked', tracker)
r_serv_tracker.hset(f'tracker:{tracker_uuid}', 'type', tracker_type)
r_serv_tracker.hset(f'tracker:{tracker_uuid}', 'date', datetime.date.today().strftime("%Y%m%d"))
r_serv_tracker.hset(f'tracker:{tracker_uuid}', 'level', level)
r_serv_tracker.hset(f'tracker:{tracker_uuid}', 'dashboard', dashboard)
if not edit_tracker:
r_serv_tracker.hset('tracker:{}'.format(tracker_uuid), 'user_id', user_id)
r_serv_tracker.hset(f'tracker:{tracker_uuid}', 'user_id', user_id)
if description:
r_serv_tracker.hset('tracker:{}'.format(tracker_uuid), 'description', description)
r_serv_tracker.hset(f'tracker:{tracker_uuid}', 'description', description)
if webhook:
r_serv_tracker.hset('tracker:{}'.format(tracker_uuid), 'webhook', webhook)
r_serv_tracker.hset(f'tracker:{tracker_uuid}', 'webhook', webhook)
# type change
if edit_tracker:
@ -1165,7 +1161,7 @@ def save_retro_hunt_match(task_uuid, id, object_type='item'):
res = r_serv_tracker.sadd(f'tracker:retro_hunt:task:item:{task_uuid}:{item_date}', id)
# track nb item by date
if res == 1:
r_serv_tracker.zincrby(f'tracker:retro_hunt:task:stat:{task_uuid}', int(item_date), 1)
r_serv_tracker.zincrby(f'tracker:retro_hunt:task:stat:{task_uuid}', 1, int(item_date))
# Add map obj_id -> task_uuid
r_serv_tracker.sadd(f'obj:retro_hunt:item:{id}', task_uuid)

View File

@ -168,11 +168,11 @@ def get_all_roles():
# create role_list
def _create_roles_list():
if not r_serv_db.exists('ail:roles:all'):
r_serv_db.zadd('ail:roles:all', 1, 'admin')
r_serv_db.zadd('ail:roles:all', 2, 'analyst')
r_serv_db.zadd('ail:roles:all', 3, 'user')
r_serv_db.zadd('ail:roles:all', 4, 'user_no_api')
r_serv_db.zadd('ail:roles:all', 5, 'read_only')
r_serv_db.zadd('ail:roles:all', {'admin': 1})
r_serv_db.zadd('ail:roles:all', {'analyst': 2})
r_serv_db.zadd('ail:roles:all', {'user': 3})
r_serv_db.zadd('ail:roles:all', {'user_no_api': 4})
r_serv_db.zadd('ail:roles:all', {'read_only': 5})
def get_role_level(role):
return int(r_serv_db.zscore('ail:roles:all', role))
@ -236,6 +236,9 @@ class User(UserMixin):
else:
self.id = "__anonymous__"
def exists(self):
return self.id != "__anonymous__"
# return True or False
# def is_authenticated():

View File

@ -1,31 +0,0 @@
#!/usr/bin/python3
"""
``basic domain lib``
===================
"""
import os
import sys
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib import ConfigLoader
config_loader = ConfigLoader.ConfigLoader()
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
config_loader = None
def get_domain_type(domain):
if str(domain).endswith('.onion'):
return 'onion'
else:
return 'regular'
def delete_domain_item_core(item_id, domain, port):
domain_type = get_domain_type(domain)
r_serv_onion.zrem('crawler_history_{}:{}:{}'.format(domain_type, domain, port), item_id)

View File

@ -220,17 +220,17 @@ class Decoded(AbstractObject):
if not self.is_seen_this_day(date):
# mimetype
r_metadata.zincrby(f'decoded:mimetype:{date}', mimetype, 1)
r_metadata.zincrby(f'decoded:mimetype:{date}', 1, mimetype)
r_metadata.sadd(f'decoded:mimetypes', mimetype)
# filter hash encoded in the same object
if not self.is_correlated('item', None, obj_id):
r_metadata.hincrby(f'metadata_hash:{self.id}', f'{decoder_name}_decoder', 1)
r_metadata.zincrby(f'{decoder_name}_type:{mimetype}', date, 1)
r_metadata.zincrby(f'{decoder_name}_type:{mimetype}', 1, date)
r_metadata.incrby(f'{decoder_name}_decoded:{date}', 1)
r_metadata.zincrby(f'{decoder_name}_date:{date}', self.id, 1)
r_metadata.zincrby(f'{decoder_name}_date:{date}', 1, self.id)
self.update_daterange(date)
@ -268,7 +268,7 @@ class Decoded(AbstractObject):
# mimetype # # # # # # # #
r_metadata.zincrby(f'decoded:mimetype:{date}', mimetype, 1)
r_metadata.zincrby(f'decoded:mimetype:{date}', 1, mimetype)
# create hash metadata
r_metadata.sadd(f'decoded:mimetypes', mimetype)
@ -280,13 +280,13 @@ class Decoded(AbstractObject):
self.update_daterange(date)
r_metadata.incrby(f'{decoder_type}_decoded:{date}', 1)
r_metadata.zincrby(f'{decoder_type}_date:{date}', self.id, 1)
r_metadata.zincrby(f'{decoder_type}_date:{date}', 1, self.id)
r_metadata.hincrby(f'metadata_hash:{self.id}', f'{decoder_type}_decoder', 1)
r_metadata.zincrby(f'{decoder_type}_type:{mimetype}', date, 1) # # TODO: # DUP1
r_metadata.zincrby(f'{decoder_type}_type:{mimetype}', 1, date) # # TODO: # DUP1
################################################################ # TODO: REMOVE ?????????????????????????????????
r_metadata.zincrby(f'{decoder_type}_hash:{self.id}', obj_id, 1) # number of b64 on this item
r_metadata.zincrby(f'{decoder_type}_hash:{self.id}', 1, obj_id) # number of b64 on this item
# first time we see this hash encoding on this item
@ -297,7 +297,7 @@ class Decoded(AbstractObject):
# first time we see this hash encoding today
if not r_metadata.zscore(f'{decoder_type}_date:{date}', self.id):
r_metadata.zincrby(f'{decoder_type}_type:{mimetype}', date, 1) # # TODO: # DUP1
r_metadata.zincrby(f'{decoder_type}_type:{mimetype}', 1, date) # # TODO: # DUP1
# Correlations

View File

@ -17,7 +17,7 @@ from lib.ConfigLoader import ConfigLoader
from lib.objects.abstract_object import AbstractObject
config_loader = ConfigLoader()
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
r_serv_metadata = config_loader.get_db_conn("Kvrocks_Objects")
SCREENSHOT_FOLDER = config_loader.get_files_directory('screenshot')
config_loader = None

View File

@ -24,7 +24,6 @@ from packages import Date
# LOAD CONFIG
config_loader = ConfigLoader()
# r_metadata = config_loader.get_redis_conn("ARDB_Metadata")
r_object = config_loader.get_db_conn("Kvrocks_Objects")
config_loader = None
@ -118,7 +117,7 @@ class AbstractDaterangeObject(AbstractObject, ABC):
# NB Object seen by day
r_object.hincrby(f'{self.type}:date:{date}', self.id, 1)
r_object.zincrby(f'{self.type}:date:{date}', self.id, 1) # # # # # # # # # #
r_object.zincrby(f'{self.type}:date:{date}', 1, self.id) # # # # # # # # # #
# NB Object seen
r_object.hincrby(f'{self.type}:meta:{self.id}', 'nb', 1)

View File

@ -74,9 +74,6 @@ class AbstractObject(ABC):
tags = list(tags)
return tags
def get_duplicates(self):
return Duplicate.get_duplicates(self.type, self.get_subtype(r_str=True), self.id)
## ADD TAGS ????
def add_tag(self, tag):
Tag.add_object_tag(tag, self.type, self.id, subtype=self.get_subtype(r_str=True))
@ -88,6 +85,14 @@ class AbstractObject(ABC):
#- Tags -#
## Duplicates ##
def get_duplicates(self):
return Duplicate.get_obj_duplicates(self.type, self.get_subtype(r_str=True), self.id)
def add_duplicate(self, algo, similarity, id_2):
return Duplicate.add_obj_duplicate(algo, similarity, self.type, self.get_subtype(r_str=True), self.id, id_2)
# -Duplicates -#
## Investigations ##
# # TODO: unregister =====

View File

@ -77,7 +77,7 @@ class Duplicates(AbstractModule):
obj_hash = self.algos[algo]['hash']
for date_ymonth in last_month_dates:
if Duplicate.exists_algo_hash_by_month(algo, obj_hash, date_ymonth):
Duplicate.add_obj_duplicate(algo, obj_hash, 100, 'item', '', item.get_id(), date_ymonth)
Duplicate.add_duplicate(algo, obj_hash, 100, 'item', '', item.get_id(), date_ymonth)
nb_duplicates += 1
else:
for hash in Duplicate.get_algo_hashs_by_month(algo, date_ymonth):
@ -85,7 +85,7 @@ class Duplicates(AbstractModule):
similarity = Duplicate.get_algo_similarity(algo, obj_hash, hash)
print(f'[{algo}] comparing: {obj_hash} and {hash} similarity: {similarity}') # DEBUG:
if similarity >= self.algos[algo]['threshold']:
Duplicate.add_obj_duplicate(algo, hash, similarity, 'item', '', item.get_id(), date_ymonth)
Duplicate.add_duplicate(algo, hash, similarity, 'item', '', item.get_id(), date_ymonth)
nb_duplicates += 1
# Save Hashs

View File

@ -194,12 +194,12 @@ class Global(AbstractModule):
self.redis_logger.warning(f'Global; Incomplete file: {filename}')
print(f'Global; Incomplete file: {filename}')
# save daily stats
self.r_stats.zincrby('module:Global:incomplete_file', datetime.datetime.now().strftime('%Y%m%d'), 1)
self.r_stats.zincrby('module:Global:incomplete_file', 1, datetime.datetime.now().strftime('%Y%m%d'))
except OSError:
self.redis_logger.warning(f'Global; Not a gzipped file: {filename}')
print(f'Global; Not a gzipped file: {filename}')
# save daily stats
self.r_stats.zincrby('module:Global:invalid_file', datetime.datetime.now().strftime('%Y%m%d'), 1)
self.r_stats.zincrby('module:Global:invalid_file', 1, datetime.datetime.now().strftime('%Y%m%d'))
return curr_file_content

View File

@ -24,7 +24,7 @@ from packages import Date
from lib.objects import Items
config_loader = ConfigLoader.ConfigLoader()
r_serv_term = config_loader.get_db_conn("Kvrocks_DB")
r_serv_term = config_loader.get_db_conn("Kvrocks_Trackers")
config_loader = None
email_regex = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}'
@ -387,11 +387,11 @@ def add_tracked_item(term_uuid, item_id, item_date):
# track item
r_serv_term.sadd('tracker:item:{}:{}'.format(term_uuid, item_date), item_id)
# track nb item by date
r_serv_term.zadd('tracker:stat:{}'.format(term_uuid), item_date, int(item_date))
r_serv_term.zadd('tracker:stat:{}'.format(term_uuid), {item_date: item_date})
def create_token_statistics(item_date, word, nb):
r_serv_term.zincrby('stat_token_per_item_by_day:{}'.format(item_date), word, 1)
r_serv_term.zincrby('stat_token_total_by_day:{}'.format(item_date), word, nb)
r_serv_term.zincrby('stat_token_per_item_by_day:{}'.format(item_date), 1, word)
r_serv_term.zincrby('stat_token_total_by_day:{}'.format(item_date), nb, word)
r_serv_term.sadd('stat_token_history', item_date)
def delete_token_statistics_by_date(item_date):

View File

@ -1,68 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import redis
import bcrypt
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
from flask_login import UserMixin
def get_all_users():
return r_serv_db.hkeys('user:all')
class User(UserMixin):
def __init__(self, id):
config_loader = ConfigLoader.ConfigLoader()
self.r_serv_db = config_loader.get_redis_conn("ARDB_DB")
config_loader = None
if self.r_serv_db.hexists('user:all', id):
self.id = id
else:
self.id = "__anonymous__"
# return True or False
#def is_authenticated():
# return True or False
#def is_anonymous():
@classmethod
def get(self_class, id):
return self_class(id)
def user_is_anonymous(self):
if self.id == "__anonymous__":
return True
else:
return False
def check_password(self, password):
if self.user_is_anonymous():
return False
password = password.encode()
hashed_password = self.r_serv_db.hget('user:all', self.id).encode()
if bcrypt.checkpw(password, hashed_password):
return True
else:
return False
def request_password_change(self):
if self.r_serv_db.hget('user_metadata:{}'.format(self.id), 'change_passwd') == 'True':
return True
else:
return False
def is_in_role(self, role):
if self.r_serv_db.sismember('user_role:{}'.format(role), self.id):
return True
else:
return False

View File

@ -102,7 +102,7 @@ if __name__ == '__main__':
# create new history
root_key = get_domain_root_from_paste_childrens(item_father, onion_domain)
if root_key:
r_serv_onion.zadd('crawler_history_onion:{}:80'.format(onion_domain), get_date_epoch(date_history), root_key)
r_serv_onion.zadd(f'crawler_history_onion:{onion_domain}:80', {root_key: get_date_epoch(date_history)})
print('crawler_history_onion:{}:80 {} {}'.format(onion_domain, get_date_epoch(date_history), root_key))
#update service metadata: paste_parent
r_serv_onion.hset('onion_metadata:{}'.format(onion_domain), 'paste_parent', root_key)

View File

@ -19,9 +19,9 @@ if __name__ == '__main__':
r_serv = config_loader.get_redis_conn("ARDB_DB")
config_loader = None
r_serv.zadd('ail:all_role', 3, 'user')
r_serv.zadd('ail:all_role', 4, 'user_no_api')
r_serv.zadd('ail:all_role', 5, 'read_only')
r_serv.zadd('ail:all_role', {'user': 3})
r_serv.zadd('ail:all_role', {'user_no_api': 4})
r_serv.zadd('ail:all_role', {'read_only': 5})
for user in r_serv.hkeys('user:all'):
r_serv.sadd('user_role:user', user)

View File

@ -3,7 +3,6 @@
import os
import sys
import redis
sys.path.append(os.environ['AIL_BIN'])
##################################

View File

@ -191,7 +191,7 @@
<tbody>
{% for decoded in dict_domain['decoded']%}
<tr>
<td><a target="_blank" href="{{ url_for('correlation.show_correlation') }}?object_type=decoded&correlation_id={{ decoded }}&correlation_objects=domain">{{ decoded }}</a></td>
<td><a target="_blank" href="{{ url_for('correlation.show_correlation') }}?type=decoded&id={{ decoded }}">{{ decoded }}</a></td>
</tr>
{% endfor %}
</tbody>
@ -245,7 +245,7 @@
<i class="{{ var_icon }}"></i>
&nbsp;&nbsp;{{ dict_key }}
</td>
<td><a target="_blank" href="{{ url_for('correlation.show_correlation') }}?object_type=pgp&correlation_id={{ key_id }}&type_id={{ dict_key }}&correlation_objects=domain">{{ key_id }}</a></td>
<td><a target="_blank" href="{{ url_for('correlation.show_correlation') }}?type=pgp&subtype={{ dict_key }}&id={{ key_id }}">{{ key_id }}</a></td>
</tr>
{% endfor %}
{% endif %}
@ -304,7 +304,7 @@
<i class="{{ var_icon }}"></i>
&nbsp;&nbsp;{{ dict_key }}
</td>
<td><a target="_blank" href="{{ url_for('correlation.show_correlation') }}?object_type=cryptocurrency&correlation_id={{ key_id }}&type_id={{ dict_key }}&correlation_objects=domain">{{ key_id }}</a></td>
<td><a target="_blank" href="{{ url_for('correlation.show_correlation') }}?type=cryptocurrency&subtype={{ dict_key }}&id={{ key_id }}">{{ key_id }}</a></td>
</tr>
{% endfor %}
{% endif %}
@ -347,7 +347,7 @@
<tbody>
{% for screenshot in dict_domain['screenshot']%}
<tr>
<td><a target="_blank" href="{{ url_for('correlation.show_correlation') }}?object_type=screenshot&correlation_id={{ screenshot }}&correlation_objects=domain">{{ screenshot }}</a></td>
<td><a target="_blank" href="{{ url_for('correlation.show_correlation') }}?type=screenshot&id={{ screenshot }}">{{ screenshot }}</a></td>
</tr>
{% endfor %}
</tbody>

View File

@ -248,7 +248,7 @@
{% for b64 in l_64 %}
<tr>
<td><i class="fas {{ b64[0] }}"></i>&nbsp;&nbsp;{{ b64[1] }}</td>
<td><a target="_blank" href="{{ url_for('correlation.show_correlation') }}?object_type=decoded&correlation_id={{ b64[2] }}&correlation_objects=paste"> {{b64[2]}} ({{ b64[4] }})</a></td>
<td><a target="_blank" href="{{ url_for('correlation.show_correlation') }}?type=decoded&id={{ b64[2] }}"> {{b64[2]}} ({{ b64[4] }})</a></td>
</tr>
{% endfor %}
</tbody>

View File

@ -110,7 +110,7 @@
{% for dict_obj in dict_tagged["tagged_obj"] %}
<tr>
<td class="pb-0">
<a target="_blank" href="{{ url_for('correlation.show_correlation') }}?object_type=screenshot&correlation_id={{dict_obj['id']}}" class="text-secondary">
<a target="_blank" href="{{ url_for('correlation.show_correlation') }}?type=screenshot&id={{dict_obj['id']}}" class="text-secondary">
<div style="line-height:0.9;">{{ dict_obj['id'] }}</div>
</a>
<div class="mb-2">
@ -153,7 +153,7 @@
<tr>
<td class="pb-0">{{ dict_obj['file_type'] }}</td>
<td class="pb-0">
<a target="_blank" href="{{ url_for('correlation.show_correlation') }}?object_type={{dict_tagged['object_type']}}&correlation_id={{dict_obj['id']}}" class="text-secondary">
<a target="_blank" href="{{ url_for('correlation.show_correlation') }}?type={{dict_tagged['object_type']}}&id={{dict_obj['id']}}" class="text-secondary">
<div style="line-height:0.9;">{{ dict_obj['id'] }}</div>
</a>
<div class="mb-2">