chg: [perf] reduce memory usage

2024-04-09 14:22:11 +02:00 · 2024-04-09 14:22:11 +02:00 · 61701e2fcc
parent 6ca4b29329
commit 61701e2fcc
8 changed files with 77 additions and 47 deletions
--- a/bin/lib/ConfigLoader.py
+++ b/bin/lib/ConfigLoader.py
@ -9,7 +9,6 @@ The ``Domain``
 import os
 import sys
 import time
 import redis
 import configparser
--- a/bin/lib/Tag.py
+++ b/bin/lib/Tag.py
@ -32,6 +32,9 @@ config_loader = None
 # # # # UNSAFE TAGS # # # #
 # set of unsafe tags
 UNSAFE_TAGS = None
 def build_unsafe_tags():
    tags = set()
    # CE content
@ -52,12 +55,12 @@ def is_tags_safe(ltags):
    :return: is a tag in the set unsafe
    :rtype: boolean
    """
-    return unsafe_tags.isdisjoint(ltags)
+    global UNSAFE_TAGS
    if UNSAFE_TAGS is None:
        UNSAFE_TAGS = build_unsafe_tags()
    return UNSAFE_TAGS.isdisjoint(ltags)
 # set of unsafe tags
 unsafe_tags = build_unsafe_tags()
 # - - - UNSAFE TAGS - - - #
 # # TODO: verify tags + object_type
@ -80,16 +83,15 @@ def get_obj_by_tag(key_tag):
 #### Taxonomies ####
-TAXONOMIES = {}
+TAXONOMIES = None
 def load_taxonomies():
    global TAXONOMIES
    manifest = os.path.join(os.environ['AIL_HOME'], 'files/misp-taxonomies/MANIFEST.json')
    TAXONOMIES = Taxonomies(manifest_path=manifest)
 load_taxonomies()
 def get_taxonomies():
    if TAXONOMIES is None:
        load_taxonomies()
    return TAXONOMIES.keys()
 # TODO rename me to get enabled_taxonomies
@ -111,12 +113,18 @@ def disable_taxonomy(taxonomy):
    r_tags.srem('taxonomies:enabled', taxonomy)
 def exists_taxonomy(taxonomy):
    if TAXONOMIES is None:
        load_taxonomies()
    return TAXONOMIES.get(taxonomy) is not None
 def get_taxonomy_description(taxonomy):
    if TAXONOMIES is None:
        load_taxonomies()
    return TAXONOMIES.get(taxonomy).description
 def get_taxonomy_name(taxonomy):
    if TAXONOMIES is None:
        load_taxonomies()
    return TAXONOMIES.get(taxonomy).name
 def get_taxonomy_predicates(taxonomy):
@ -133,12 +141,18 @@ def get_taxonomy_predicates(taxonomy):
    return meta
 def get_taxonomy_refs(taxonomy):
    if TAXONOMIES is None:
        load_taxonomies()
    return TAXONOMIES.get(taxonomy).refs
 def get_taxonomy_version(taxonomy):
    if TAXONOMIES is None:
        load_taxonomies()
    return TAXONOMIES.get(taxonomy).version
 def get_taxonomy_tags(taxonomy, enabled=False):
    if TAXONOMIES is None:
        load_taxonomies()
    taxonomy_obj = TAXONOMIES.get(taxonomy)
    tags = []
    for p, content in taxonomy_obj.items():
@ -165,6 +179,8 @@ def get_taxonomy_meta(taxonomy_name, enabled=False, enabled_tags=False, nb_activ
    meta = {}
    if not exists_taxonomy(taxonomy_name):
        return meta
    if TAXONOMIES is None:
        load_taxonomies()
    taxonomy = TAXONOMIES.get(taxonomy_name)
    meta['description'] = taxonomy.description
    meta['name'] = taxonomy.name
@ -241,6 +257,8 @@ def api_update_taxonomy_tag_enabled(data):
    if not exists_taxonomy(taxonomy):
        return {'error': f'taxonomy {taxonomy} not found'}, 404
    tags = data.get('tags', [])
    if TAXONOMIES is None:
        load_taxonomies()
    taxonomy_tags = set(TAXONOMIES.get(taxonomy).machinetags())
    for tag in tags:
        if tag not in taxonomy_tags:
@ -249,6 +267,8 @@ def api_update_taxonomy_tag_enabled(data):
 def enable_taxonomy_tags(taxonomy):
    enable_taxonomy(taxonomy)
    if TAXONOMIES is None:
        load_taxonomies()
    for tag in TAXONOMIES.get(taxonomy).machinetags():
        add_taxonomy_tag_enabled(taxonomy, tag)
@ -279,9 +299,8 @@ def api_disable_taxonomy_tags(data):
 #
 # TODO Synonyms
-
+GALAXIES = None
-GALAXIES = {}
+CLUSTERS = None
 CLUSTERS = {}
 def load_galaxies():
    global GALAXIES
    galaxies = []
@ -298,11 +317,10 @@ def load_galaxies():
            clusters.append(json.load(f))
    CLUSTERS = Clusters(clusters)
-
+def get_galaxies():
    if GALAXIES is None:
        # LOAD GALAXY + CLUSTERS
        load_galaxies()
 def get_galaxies():
    return GALAXIES.keys()
 # TODO RENAME ME
@ -310,9 +328,15 @@ def get_active_galaxies():
    return r_tags.smembers('galaxies:enabled')
 def get_galaxy(galaxy_name):
    if GALAXIES is None:
        # LOAD GALAXY + CLUSTERS
        load_galaxies()
    return GALAXIES.get(galaxy_name)
 def exists_galaxy(galaxy):
    if CLUSTERS is None:
        # LOAD GALAXY + CLUSTERS
        load_galaxies()
    return CLUSTERS.get(galaxy) is not None
 def is_galaxy_enabled(galaxy):
@ -369,9 +393,15 @@ def get_galaxy_tag_meta(galaxy_type, tag):
 def get_clusters():
    if CLUSTERS is None:
        # LOAD GALAXY + CLUSTERS
        load_galaxies()
    return CLUSTERS.keys()
 def get_cluster(cluster_type):
    if CLUSTERS is None:
        # LOAD GALAXY + CLUSTERS
        load_galaxies()
    return CLUSTERS.get(cluster_type)
 def get_galaxy_tags(galaxy_type):
--- a/bin/lib/Tracker.py
+++ b/bin/lib/Tracker.py
@ -12,7 +12,6 @@ import yara
 import datetime
 import base64
 from ail_typo_squatting import runAll
 import math
 from collections import defaultdict
@ -38,23 +37,21 @@ logger = logging.getLogger()
 config_loader = ConfigLoader.ConfigLoader()
 r_cache = config_loader.get_redis_conn("Redis_Cache")
 r_tracker = config_loader.get_db_conn("Kvrocks_Trackers")
 items_dir = config_loader.get_config_str("Directories", "pastes")
 if items_dir[-1] == '/':
    items_dir = items_dir[:-1]
 config_loader = None
-email_regex = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}'
+# NLTK tokenizer
-email_regex = re.compile(email_regex)
+TOKENIZER = None
 def init_tokenizer():
    global TOKENIZER
    TOKENIZER = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]+',
                            gaps=True, discard_empty=True)
 def get_special_characters():
    special_characters = set('[<>~!?@#$%^&*|()_-+={}":;,.\'\n\r\t]/\\')
    special_characters.add('\\s')
-
+    return special_characters
 # NLTK tokenizer
 tokenizer = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]+',
                            gaps=True, discard_empty=True)
 ###############
 #### UTILS ####
@ -76,6 +73,8 @@ def is_valid_regex(tracker_regex):
        return False
 def is_valid_mail(email):
    email_regex = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}'
    email_regex = re.compile(email_regex)
    result = email_regex.match(email)
    if result:
        return True
@ -400,6 +399,9 @@ class Tracker:
            tracker_type = 'yara'
        elif tracker_type == 'typosquatting':
            from ail_typo_squatting import runAll
            domain = to_track.split(" ")[0]
            typo_generation = runAll(domain=domain, limit=math.inf, formatoutput="text", pathOutput="-", verbose=False) # TODO REPLACE LIMIT BY -1
            for typo in typo_generation:
@ -857,7 +859,7 @@ def api_validate_tracker_to_add(to_track, tracker_type, nb_words=1):
        # force lowercase
        to_track = to_track.lower()
        word_set = set(to_track)
-        set_inter = word_set.intersection(special_characters)
+        set_inter = word_set.intersection(get_special_characters())
        if set_inter:
            return {"status": "error",
                    "reason": f'special character(s) not allowed: {set_inter}',
@ -1113,7 +1115,9 @@ def get_text_word_frequency(content, filtering=True):
    words_dict = defaultdict(int)
    if filtering:
-        blob = TextBlob(content, tokenizer=tokenizer)
+        if TOKENIZER is None:
            init_tokenizer()
        blob = TextBlob(content, tokenizer=TOKENIZER)
    else:
        blob = TextBlob(content)
    for word in blob.tokens:
@ -1800,9 +1804,9 @@ def _fix_db_custom_tags():
 #### -- ####
-if __name__ == '__main__':
+# if __name__ == '__main__':
-    _fix_db_custom_tags()
+    # _fix_db_custom_tags()
    # fix_all_tracker_uuid_list()
    # res = get_all_tracker_uuid()
    # print(len(res))
--- a/bin/lib/btc_ail.py
+++ b/bin/lib/btc_ail.py
@ -8,7 +8,6 @@ import sys
 import requests
 sys.path.append(os.environ['AIL_BIN'])
 from lib.objects.CryptoCurrencies import CryptoCurrency
 logger = logging.getLogger()
@ -53,9 +52,11 @@ def get_bitcoin_info(bitcoin_address, nb_transaction=50):
 # filter btc seen in ail
 def filter_btc_seen(btc_addr_set):
    from lib.objects import CryptoCurrencies
    list_seen_btc = []
    for btc_addr in btc_addr_set:
-        cryptocurrency = CryptoCurrency(btc_addr, 'bitcoin')
+        cryptocurrency = CryptoCurrencies.CryptoCurrency(btc_addr, 'bitcoin')
        if cryptocurrency.exists():
            list_seen_btc.append(btc_addr)
    return list_seen_btc
--- a/bin/lib/objects/Chats.py
+++ b/bin/lib/objects/Chats.py
@ -18,13 +18,10 @@ from lib.ConfigLoader import ConfigLoader
 from lib.objects.abstract_chat_object import AbstractChatObject, AbstractChatObjects
-from lib.objects.abstract_subtype_object import AbstractSubtypeObject, get_all_id
+from lib.objects.abstract_subtype_object import get_all_id
-from lib.data_retention_engine import update_obj_date
+# from lib.data_retention_engine import update_obj_date
 from lib.objects import ail_objects
 from lib.timeline_engine import Timeline
 from lib.correlations_engine import get_correlation_by_correl_type
 config_loader = ConfigLoader()
 baseurl = config_loader.get_config_str("Notifications", "ail_domain")
 r_object = config_loader.get_db_conn("Kvrocks_Objects")
--- a/bin/lib/objects/Ocrs.py
+++ b/bin/lib/objects/Ocrs.py
@ -213,10 +213,10 @@ class Ocr(AbstractObject):
        draw = ImageDraw.Draw(img)
        for bbox in self.get_coords():
            c1, c2, c3, c4 = bbox
-            draw.line((tuple(c1), tuple(c2)), fill="yellow")
+            draw.line((tuple(c1), tuple(c2)), fill="yellow", width=2)
-            draw.line((tuple(c2), tuple(c3)), fill="yellow")
+            draw.line((tuple(c2), tuple(c3)), fill="yellow", width=2)
-            draw.line((tuple(c3), tuple(c4)), fill="yellow")
+            draw.line((tuple(c3), tuple(c4)), fill="yellow", width=2)
-            draw.line((tuple(c4), tuple(c1)), fill="yellow")
+            draw.line((tuple(c4), tuple(c1)), fill="yellow", width=2)
        # img.show()
        buff = BytesIO()
        img.save(buff, "PNG")
--- a/bin/lib/objects/ail_objects.py
+++ b/bin/lib/objects/ail_objects.py
@ -37,7 +37,7 @@ from lib.objects import Ocrs
 from lib.objects import Pgps
 from lib.objects.Screenshots import Screenshot
 from lib.objects import Titles
-from lib.objects.UsersAccount import UserAccount
+from lib.objects import UsersAccount
 from lib.objects import Usernames
 config_loader = ConfigLoader()
@ -113,7 +113,7 @@ def get_object(obj_type, subtype, obj_id):
        elif obj_type == 'pgp':
            return Pgps.Pgp(obj_id, subtype)
        elif obj_type == 'user-account':
-            return UserAccount(obj_id, subtype)
+            return UsersAccount.UserAccount(obj_id, subtype)
        elif obj_type == 'username':
            return Usernames.Username(obj_id, subtype)
        else:
--- a/bin/modules/Mail.py
+++ b/bin/modules/Mail.py
@ -26,7 +26,6 @@ sys.path.append(os.environ['AIL_BIN'])
 # Import Project packages        #
 ##################################
 from modules.abstract_module import AbstractModule
 from lib.objects.Items import Item
 from lib.ConfigLoader import ConfigLoader
 # from lib import Statistics