mirror of https://github.com/CIRCL/AIL-framework
chg: [perf] reduce memory usage
parent
6ca4b29329
commit
61701e2fcc
|
@ -9,7 +9,6 @@ The ``Domain``
|
|||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import redis
|
||||
import configparser
|
||||
|
||||
|
|
|
@ -32,6 +32,9 @@ config_loader = None
|
|||
|
||||
# # # # UNSAFE TAGS # # # #
|
||||
|
||||
# set of unsafe tags
|
||||
UNSAFE_TAGS = None
|
||||
|
||||
def build_unsafe_tags():
|
||||
tags = set()
|
||||
# CE content
|
||||
|
@ -52,12 +55,12 @@ def is_tags_safe(ltags):
|
|||
:return: is a tag in the set unsafe
|
||||
:rtype: boolean
|
||||
"""
|
||||
return unsafe_tags.isdisjoint(ltags)
|
||||
global UNSAFE_TAGS
|
||||
if UNSAFE_TAGS is None:
|
||||
UNSAFE_TAGS = build_unsafe_tags()
|
||||
return UNSAFE_TAGS.isdisjoint(ltags)
|
||||
|
||||
|
||||
# set of unsafe tags
|
||||
unsafe_tags = build_unsafe_tags()
|
||||
|
||||
# - - - UNSAFE TAGS - - - #
|
||||
|
||||
# # TODO: verify tags + object_type
|
||||
|
@ -80,16 +83,15 @@ def get_obj_by_tag(key_tag):
|
|||
|
||||
#### Taxonomies ####
|
||||
|
||||
TAXONOMIES = {}
|
||||
TAXONOMIES = None
|
||||
def load_taxonomies():
|
||||
global TAXONOMIES
|
||||
manifest = os.path.join(os.environ['AIL_HOME'], 'files/misp-taxonomies/MANIFEST.json')
|
||||
TAXONOMIES = Taxonomies(manifest_path=manifest)
|
||||
|
||||
|
||||
load_taxonomies()
|
||||
|
||||
def get_taxonomies():
|
||||
if TAXONOMIES is None:
|
||||
load_taxonomies()
|
||||
return TAXONOMIES.keys()
|
||||
|
||||
# TODO rename me to get enabled_taxonomies
|
||||
|
@ -111,12 +113,18 @@ def disable_taxonomy(taxonomy):
|
|||
r_tags.srem('taxonomies:enabled', taxonomy)
|
||||
|
||||
def exists_taxonomy(taxonomy):
|
||||
if TAXONOMIES is None:
|
||||
load_taxonomies()
|
||||
return TAXONOMIES.get(taxonomy) is not None
|
||||
|
||||
def get_taxonomy_description(taxonomy):
|
||||
if TAXONOMIES is None:
|
||||
load_taxonomies()
|
||||
return TAXONOMIES.get(taxonomy).description
|
||||
|
||||
def get_taxonomy_name(taxonomy):
|
||||
if TAXONOMIES is None:
|
||||
load_taxonomies()
|
||||
return TAXONOMIES.get(taxonomy).name
|
||||
|
||||
def get_taxonomy_predicates(taxonomy):
|
||||
|
@ -133,12 +141,18 @@ def get_taxonomy_predicates(taxonomy):
|
|||
return meta
|
||||
|
||||
def get_taxonomy_refs(taxonomy):
|
||||
if TAXONOMIES is None:
|
||||
load_taxonomies()
|
||||
return TAXONOMIES.get(taxonomy).refs
|
||||
|
||||
def get_taxonomy_version(taxonomy):
|
||||
if TAXONOMIES is None:
|
||||
load_taxonomies()
|
||||
return TAXONOMIES.get(taxonomy).version
|
||||
|
||||
def get_taxonomy_tags(taxonomy, enabled=False):
|
||||
if TAXONOMIES is None:
|
||||
load_taxonomies()
|
||||
taxonomy_obj = TAXONOMIES.get(taxonomy)
|
||||
tags = []
|
||||
for p, content in taxonomy_obj.items():
|
||||
|
@ -165,6 +179,8 @@ def get_taxonomy_meta(taxonomy_name, enabled=False, enabled_tags=False, nb_activ
|
|||
meta = {}
|
||||
if not exists_taxonomy(taxonomy_name):
|
||||
return meta
|
||||
if TAXONOMIES is None:
|
||||
load_taxonomies()
|
||||
taxonomy = TAXONOMIES.get(taxonomy_name)
|
||||
meta['description'] = taxonomy.description
|
||||
meta['name'] = taxonomy.name
|
||||
|
@ -241,6 +257,8 @@ def api_update_taxonomy_tag_enabled(data):
|
|||
if not exists_taxonomy(taxonomy):
|
||||
return {'error': f'taxonomy {taxonomy} not found'}, 404
|
||||
tags = data.get('tags', [])
|
||||
if TAXONOMIES is None:
|
||||
load_taxonomies()
|
||||
taxonomy_tags = set(TAXONOMIES.get(taxonomy).machinetags())
|
||||
for tag in tags:
|
||||
if tag not in taxonomy_tags:
|
||||
|
@ -249,6 +267,8 @@ def api_update_taxonomy_tag_enabled(data):
|
|||
|
||||
def enable_taxonomy_tags(taxonomy):
|
||||
enable_taxonomy(taxonomy)
|
||||
if TAXONOMIES is None:
|
||||
load_taxonomies()
|
||||
for tag in TAXONOMIES.get(taxonomy).machinetags():
|
||||
add_taxonomy_tag_enabled(taxonomy, tag)
|
||||
|
||||
|
@ -279,9 +299,8 @@ def api_disable_taxonomy_tags(data):
|
|||
#
|
||||
|
||||
# TODO Synonyms
|
||||
|
||||
GALAXIES = {}
|
||||
CLUSTERS = {}
|
||||
GALAXIES = None
|
||||
CLUSTERS = None
|
||||
def load_galaxies():
|
||||
global GALAXIES
|
||||
galaxies = []
|
||||
|
@ -298,11 +317,10 @@ def load_galaxies():
|
|||
clusters.append(json.load(f))
|
||||
CLUSTERS = Clusters(clusters)
|
||||
|
||||
|
||||
# LOAD GALAXY + CLUSTERS
|
||||
load_galaxies()
|
||||
|
||||
def get_galaxies():
|
||||
if GALAXIES is None:
|
||||
# LOAD GALAXY + CLUSTERS
|
||||
load_galaxies()
|
||||
return GALAXIES.keys()
|
||||
|
||||
# TODO RENAME ME
|
||||
|
@ -310,9 +328,15 @@ def get_active_galaxies():
|
|||
return r_tags.smembers('galaxies:enabled')
|
||||
|
||||
def get_galaxy(galaxy_name):
|
||||
if GALAXIES is None:
|
||||
# LOAD GALAXY + CLUSTERS
|
||||
load_galaxies()
|
||||
return GALAXIES.get(galaxy_name)
|
||||
|
||||
def exists_galaxy(galaxy):
|
||||
if CLUSTERS is None:
|
||||
# LOAD GALAXY + CLUSTERS
|
||||
load_galaxies()
|
||||
return CLUSTERS.get(galaxy) is not None
|
||||
|
||||
def is_galaxy_enabled(galaxy):
|
||||
|
@ -369,9 +393,15 @@ def get_galaxy_tag_meta(galaxy_type, tag):
|
|||
|
||||
|
||||
def get_clusters():
|
||||
if CLUSTERS is None:
|
||||
# LOAD GALAXY + CLUSTERS
|
||||
load_galaxies()
|
||||
return CLUSTERS.keys()
|
||||
|
||||
def get_cluster(cluster_type):
|
||||
if CLUSTERS is None:
|
||||
# LOAD GALAXY + CLUSTERS
|
||||
load_galaxies()
|
||||
return CLUSTERS.get(cluster_type)
|
||||
|
||||
def get_galaxy_tags(galaxy_type):
|
||||
|
|
|
@ -12,7 +12,6 @@ import yara
|
|||
import datetime
|
||||
import base64
|
||||
|
||||
from ail_typo_squatting import runAll
|
||||
import math
|
||||
|
||||
from collections import defaultdict
|
||||
|
@ -38,24 +37,22 @@ logger = logging.getLogger()
|
|||
|
||||
config_loader = ConfigLoader.ConfigLoader()
|
||||
r_cache = config_loader.get_redis_conn("Redis_Cache")
|
||||
|
||||
r_tracker = config_loader.get_db_conn("Kvrocks_Trackers")
|
||||
|
||||
items_dir = config_loader.get_config_str("Directories", "pastes")
|
||||
if items_dir[-1] == '/':
|
||||
items_dir = items_dir[:-1]
|
||||
config_loader = None
|
||||
|
||||
email_regex = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}'
|
||||
email_regex = re.compile(email_regex)
|
||||
|
||||
special_characters = set('[<>~!?@#$%^&*|()_-+={}":;,.\'\n\r\t]/\\')
|
||||
special_characters.add('\\s')
|
||||
|
||||
# NLTK tokenizer
|
||||
tokenizer = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]+',
|
||||
TOKENIZER = None
|
||||
|
||||
def init_tokenizer():
|
||||
global TOKENIZER
|
||||
TOKENIZER = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]+',
|
||||
gaps=True, discard_empty=True)
|
||||
|
||||
def get_special_characters():
|
||||
special_characters = set('[<>~!?@#$%^&*|()_-+={}":;,.\'\n\r\t]/\\')
|
||||
special_characters.add('\\s')
|
||||
return special_characters
|
||||
|
||||
###############
|
||||
#### UTILS ####
|
||||
def is_valid_uuid_v4(curr_uuid):
|
||||
|
@ -76,6 +73,8 @@ def is_valid_regex(tracker_regex):
|
|||
return False
|
||||
|
||||
def is_valid_mail(email):
|
||||
email_regex = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}'
|
||||
email_regex = re.compile(email_regex)
|
||||
result = email_regex.match(email)
|
||||
if result:
|
||||
return True
|
||||
|
@ -400,6 +399,9 @@ class Tracker:
|
|||
tracker_type = 'yara'
|
||||
|
||||
elif tracker_type == 'typosquatting':
|
||||
|
||||
from ail_typo_squatting import runAll
|
||||
|
||||
domain = to_track.split(" ")[0]
|
||||
typo_generation = runAll(domain=domain, limit=math.inf, formatoutput="text", pathOutput="-", verbose=False) # TODO REPLACE LIMIT BY -1
|
||||
for typo in typo_generation:
|
||||
|
@ -857,7 +859,7 @@ def api_validate_tracker_to_add(to_track, tracker_type, nb_words=1):
|
|||
# force lowercase
|
||||
to_track = to_track.lower()
|
||||
word_set = set(to_track)
|
||||
set_inter = word_set.intersection(special_characters)
|
||||
set_inter = word_set.intersection(get_special_characters())
|
||||
if set_inter:
|
||||
return {"status": "error",
|
||||
"reason": f'special character(s) not allowed: {set_inter}',
|
||||
|
@ -1113,7 +1115,9 @@ def get_text_word_frequency(content, filtering=True):
|
|||
words_dict = defaultdict(int)
|
||||
|
||||
if filtering:
|
||||
blob = TextBlob(content, tokenizer=tokenizer)
|
||||
if TOKENIZER is None:
|
||||
init_tokenizer()
|
||||
blob = TextBlob(content, tokenizer=TOKENIZER)
|
||||
else:
|
||||
blob = TextBlob(content)
|
||||
for word in blob.tokens:
|
||||
|
@ -1800,9 +1804,9 @@ def _fix_db_custom_tags():
|
|||
#### -- ####
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# if __name__ == '__main__':
|
||||
|
||||
_fix_db_custom_tags()
|
||||
# _fix_db_custom_tags()
|
||||
# fix_all_tracker_uuid_list()
|
||||
# res = get_all_tracker_uuid()
|
||||
# print(len(res))
|
||||
|
|
|
@ -8,7 +8,6 @@ import sys
|
|||
import requests
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
from lib.objects.CryptoCurrencies import CryptoCurrency
|
||||
|
||||
logger = logging.getLogger()
|
||||
|
||||
|
@ -53,9 +52,11 @@ def get_bitcoin_info(bitcoin_address, nb_transaction=50):
|
|||
|
||||
# filter btc seen in ail
|
||||
def filter_btc_seen(btc_addr_set):
|
||||
from lib.objects import CryptoCurrencies
|
||||
|
||||
list_seen_btc = []
|
||||
for btc_addr in btc_addr_set:
|
||||
cryptocurrency = CryptoCurrency(btc_addr, 'bitcoin')
|
||||
cryptocurrency = CryptoCurrencies.CryptoCurrency(btc_addr, 'bitcoin')
|
||||
if cryptocurrency.exists():
|
||||
list_seen_btc.append(btc_addr)
|
||||
return list_seen_btc
|
||||
|
|
|
@ -18,13 +18,10 @@ from lib.ConfigLoader import ConfigLoader
|
|||
from lib.objects.abstract_chat_object import AbstractChatObject, AbstractChatObjects
|
||||
|
||||
|
||||
from lib.objects.abstract_subtype_object import AbstractSubtypeObject, get_all_id
|
||||
from lib.data_retention_engine import update_obj_date
|
||||
from lib.objects import ail_objects
|
||||
from lib.objects.abstract_subtype_object import get_all_id
|
||||
# from lib.data_retention_engine import update_obj_date
|
||||
from lib.timeline_engine import Timeline
|
||||
|
||||
from lib.correlations_engine import get_correlation_by_correl_type
|
||||
|
||||
config_loader = ConfigLoader()
|
||||
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
|
||||
r_object = config_loader.get_db_conn("Kvrocks_Objects")
|
||||
|
|
|
@ -213,10 +213,10 @@ class Ocr(AbstractObject):
|
|||
draw = ImageDraw.Draw(img)
|
||||
for bbox in self.get_coords():
|
||||
c1, c2, c3, c4 = bbox
|
||||
draw.line((tuple(c1), tuple(c2)), fill="yellow")
|
||||
draw.line((tuple(c2), tuple(c3)), fill="yellow")
|
||||
draw.line((tuple(c3), tuple(c4)), fill="yellow")
|
||||
draw.line((tuple(c4), tuple(c1)), fill="yellow")
|
||||
draw.line((tuple(c1), tuple(c2)), fill="yellow", width=2)
|
||||
draw.line((tuple(c2), tuple(c3)), fill="yellow", width=2)
|
||||
draw.line((tuple(c3), tuple(c4)), fill="yellow", width=2)
|
||||
draw.line((tuple(c4), tuple(c1)), fill="yellow", width=2)
|
||||
# img.show()
|
||||
buff = BytesIO()
|
||||
img.save(buff, "PNG")
|
||||
|
|
|
@ -37,7 +37,7 @@ from lib.objects import Ocrs
|
|||
from lib.objects import Pgps
|
||||
from lib.objects.Screenshots import Screenshot
|
||||
from lib.objects import Titles
|
||||
from lib.objects.UsersAccount import UserAccount
|
||||
from lib.objects import UsersAccount
|
||||
from lib.objects import Usernames
|
||||
|
||||
config_loader = ConfigLoader()
|
||||
|
@ -113,7 +113,7 @@ def get_object(obj_type, subtype, obj_id):
|
|||
elif obj_type == 'pgp':
|
||||
return Pgps.Pgp(obj_id, subtype)
|
||||
elif obj_type == 'user-account':
|
||||
return UserAccount(obj_id, subtype)
|
||||
return UsersAccount.UserAccount(obj_id, subtype)
|
||||
elif obj_type == 'username':
|
||||
return Usernames.Username(obj_id, subtype)
|
||||
else:
|
||||
|
|
|
@ -26,7 +26,6 @@ sys.path.append(os.environ['AIL_BIN'])
|
|||
# Import Project packages #
|
||||
##################################
|
||||
from modules.abstract_module import AbstractModule
|
||||
from lib.objects.Items import Item
|
||||
from lib.ConfigLoader import ConfigLoader
|
||||
# from lib import Statistics
|
||||
|
||||
|
|
Loading…
Reference in New Issue