mirror of https://github.com/CIRCL/AIL-framework
				
				
				
			chg: [perf] reduce memory usage
							parent
							
								
									6ca4b29329
								
							
						
					
					
						commit
						61701e2fcc
					
				| 
						 | 
				
			
			@ -9,7 +9,6 @@ The ``Domain``
 | 
			
		|||
 | 
			
		||||
import os
 | 
			
		||||
import sys
 | 
			
		||||
import time
 | 
			
		||||
import redis
 | 
			
		||||
import configparser
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -32,6 +32,9 @@ config_loader = None
 | 
			
		|||
 | 
			
		||||
# # # # UNSAFE TAGS # # # #
 | 
			
		||||
 | 
			
		||||
# set of unsafe tags
 | 
			
		||||
UNSAFE_TAGS = None
 | 
			
		||||
 | 
			
		||||
def build_unsafe_tags():
 | 
			
		||||
    tags = set()
 | 
			
		||||
    # CE content
 | 
			
		||||
| 
						 | 
				
			
			@ -52,12 +55,12 @@ def is_tags_safe(ltags):
 | 
			
		|||
    :return: is a tag in the set unsafe
 | 
			
		||||
    :rtype: boolean
 | 
			
		||||
    """
 | 
			
		||||
    return unsafe_tags.isdisjoint(ltags)
 | 
			
		||||
    global UNSAFE_TAGS
 | 
			
		||||
    if UNSAFE_TAGS is None:
 | 
			
		||||
        UNSAFE_TAGS = build_unsafe_tags()
 | 
			
		||||
    return UNSAFE_TAGS.isdisjoint(ltags)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# set of unsafe tags
 | 
			
		||||
unsafe_tags = build_unsafe_tags()
 | 
			
		||||
 | 
			
		||||
# - - - UNSAFE TAGS - - - #
 | 
			
		||||
 | 
			
		||||
# # TODO: verify tags + object_type
 | 
			
		||||
| 
						 | 
				
			
			@ -80,16 +83,15 @@ def get_obj_by_tag(key_tag):
 | 
			
		|||
 | 
			
		||||
#### Taxonomies ####
 | 
			
		||||
 | 
			
		||||
TAXONOMIES = {}
 | 
			
		||||
TAXONOMIES = None
 | 
			
		||||
def load_taxonomies():
 | 
			
		||||
    global TAXONOMIES
 | 
			
		||||
    manifest = os.path.join(os.environ['AIL_HOME'], 'files/misp-taxonomies/MANIFEST.json')
 | 
			
		||||
    TAXONOMIES = Taxonomies(manifest_path=manifest)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
load_taxonomies()
 | 
			
		||||
 | 
			
		||||
def get_taxonomies():
 | 
			
		||||
    if TAXONOMIES is None:
 | 
			
		||||
        load_taxonomies()
 | 
			
		||||
    return TAXONOMIES.keys()
 | 
			
		||||
 | 
			
		||||
# TODO rename me to get enabled_taxonomies
 | 
			
		||||
| 
						 | 
				
			
			@ -111,12 +113,18 @@ def disable_taxonomy(taxonomy):
 | 
			
		|||
    r_tags.srem('taxonomies:enabled', taxonomy)
 | 
			
		||||
 | 
			
		||||
def exists_taxonomy(taxonomy):
 | 
			
		||||
    if TAXONOMIES is None:
 | 
			
		||||
        load_taxonomies()
 | 
			
		||||
    return TAXONOMIES.get(taxonomy) is not None
 | 
			
		||||
 | 
			
		||||
def get_taxonomy_description(taxonomy):
 | 
			
		||||
    if TAXONOMIES is None:
 | 
			
		||||
        load_taxonomies()
 | 
			
		||||
    return TAXONOMIES.get(taxonomy).description
 | 
			
		||||
 | 
			
		||||
def get_taxonomy_name(taxonomy):
 | 
			
		||||
    if TAXONOMIES is None:
 | 
			
		||||
        load_taxonomies()
 | 
			
		||||
    return TAXONOMIES.get(taxonomy).name
 | 
			
		||||
 | 
			
		||||
def get_taxonomy_predicates(taxonomy):
 | 
			
		||||
| 
						 | 
				
			
			@ -133,12 +141,18 @@ def get_taxonomy_predicates(taxonomy):
 | 
			
		|||
    return meta
 | 
			
		||||
 | 
			
		||||
def get_taxonomy_refs(taxonomy):
 | 
			
		||||
    if TAXONOMIES is None:
 | 
			
		||||
        load_taxonomies()
 | 
			
		||||
    return TAXONOMIES.get(taxonomy).refs
 | 
			
		||||
 | 
			
		||||
def get_taxonomy_version(taxonomy):
 | 
			
		||||
    if TAXONOMIES is None:
 | 
			
		||||
        load_taxonomies()
 | 
			
		||||
    return TAXONOMIES.get(taxonomy).version
 | 
			
		||||
 | 
			
		||||
def get_taxonomy_tags(taxonomy, enabled=False):
 | 
			
		||||
    if TAXONOMIES is None:
 | 
			
		||||
        load_taxonomies()
 | 
			
		||||
    taxonomy_obj = TAXONOMIES.get(taxonomy)
 | 
			
		||||
    tags = []
 | 
			
		||||
    for p, content in taxonomy_obj.items():
 | 
			
		||||
| 
						 | 
				
			
			@ -165,6 +179,8 @@ def get_taxonomy_meta(taxonomy_name, enabled=False, enabled_tags=False, nb_activ
 | 
			
		|||
    meta = {}
 | 
			
		||||
    if not exists_taxonomy(taxonomy_name):
 | 
			
		||||
        return meta
 | 
			
		||||
    if TAXONOMIES is None:
 | 
			
		||||
        load_taxonomies()
 | 
			
		||||
    taxonomy = TAXONOMIES.get(taxonomy_name)
 | 
			
		||||
    meta['description'] = taxonomy.description
 | 
			
		||||
    meta['name'] = taxonomy.name
 | 
			
		||||
| 
						 | 
				
			
			@ -241,6 +257,8 @@ def api_update_taxonomy_tag_enabled(data):
 | 
			
		|||
    if not exists_taxonomy(taxonomy):
 | 
			
		||||
        return {'error': f'taxonomy {taxonomy} not found'}, 404
 | 
			
		||||
    tags = data.get('tags', [])
 | 
			
		||||
    if TAXONOMIES is None:
 | 
			
		||||
        load_taxonomies()
 | 
			
		||||
    taxonomy_tags = set(TAXONOMIES.get(taxonomy).machinetags())
 | 
			
		||||
    for tag in tags:
 | 
			
		||||
        if tag not in taxonomy_tags:
 | 
			
		||||
| 
						 | 
				
			
			@ -249,6 +267,8 @@ def api_update_taxonomy_tag_enabled(data):
 | 
			
		|||
 | 
			
		||||
def enable_taxonomy_tags(taxonomy):
 | 
			
		||||
    enable_taxonomy(taxonomy)
 | 
			
		||||
    if TAXONOMIES is None:
 | 
			
		||||
        load_taxonomies()
 | 
			
		||||
    for tag in TAXONOMIES.get(taxonomy).machinetags():
 | 
			
		||||
        add_taxonomy_tag_enabled(taxonomy, tag)
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -279,9 +299,8 @@ def api_disable_taxonomy_tags(data):
 | 
			
		|||
#
 | 
			
		||||
 | 
			
		||||
# TODO Synonyms
 | 
			
		||||
 | 
			
		||||
GALAXIES = {}
 | 
			
		||||
CLUSTERS = {}
 | 
			
		||||
GALAXIES = None
 | 
			
		||||
CLUSTERS = None
 | 
			
		||||
def load_galaxies():
 | 
			
		||||
    global GALAXIES
 | 
			
		||||
    galaxies = []
 | 
			
		||||
| 
						 | 
				
			
			@ -298,11 +317,10 @@ def load_galaxies():
 | 
			
		|||
            clusters.append(json.load(f))
 | 
			
		||||
    CLUSTERS = Clusters(clusters)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# LOAD GALAXY + CLUSTERS
 | 
			
		||||
load_galaxies()
 | 
			
		||||
 | 
			
		||||
def get_galaxies():
 | 
			
		||||
    if GALAXIES is None:
 | 
			
		||||
        # LOAD GALAXY + CLUSTERS
 | 
			
		||||
        load_galaxies()
 | 
			
		||||
    return GALAXIES.keys()
 | 
			
		||||
 | 
			
		||||
# TODO RENAME ME
 | 
			
		||||
| 
						 | 
				
			
			@ -310,9 +328,15 @@ def get_active_galaxies():
 | 
			
		|||
    return r_tags.smembers('galaxies:enabled')
 | 
			
		||||
 | 
			
		||||
def get_galaxy(galaxy_name):
 | 
			
		||||
    if GALAXIES is None:
 | 
			
		||||
        # LOAD GALAXY + CLUSTERS
 | 
			
		||||
        load_galaxies()
 | 
			
		||||
    return GALAXIES.get(galaxy_name)
 | 
			
		||||
 | 
			
		||||
def exists_galaxy(galaxy):
 | 
			
		||||
    if CLUSTERS is None:
 | 
			
		||||
        # LOAD GALAXY + CLUSTERS
 | 
			
		||||
        load_galaxies()
 | 
			
		||||
    return CLUSTERS.get(galaxy) is not None
 | 
			
		||||
 | 
			
		||||
def is_galaxy_enabled(galaxy):
 | 
			
		||||
| 
						 | 
				
			
			@ -369,9 +393,15 @@ def get_galaxy_tag_meta(galaxy_type, tag):
 | 
			
		|||
 | 
			
		||||
 | 
			
		||||
def get_clusters():
 | 
			
		||||
    if CLUSTERS is None:
 | 
			
		||||
        # LOAD GALAXY + CLUSTERS
 | 
			
		||||
        load_galaxies()
 | 
			
		||||
    return CLUSTERS.keys()
 | 
			
		||||
 | 
			
		||||
def get_cluster(cluster_type):
 | 
			
		||||
    if CLUSTERS is None:
 | 
			
		||||
        # LOAD GALAXY + CLUSTERS
 | 
			
		||||
        load_galaxies()
 | 
			
		||||
    return CLUSTERS.get(cluster_type)
 | 
			
		||||
 | 
			
		||||
def get_galaxy_tags(galaxy_type):
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -12,7 +12,6 @@ import yara
 | 
			
		|||
import datetime
 | 
			
		||||
import base64
 | 
			
		||||
 | 
			
		||||
from ail_typo_squatting import runAll
 | 
			
		||||
import math
 | 
			
		||||
 | 
			
		||||
from collections import defaultdict
 | 
			
		||||
| 
						 | 
				
			
			@ -38,24 +37,22 @@ logger = logging.getLogger()
 | 
			
		|||
 | 
			
		||||
config_loader = ConfigLoader.ConfigLoader()
 | 
			
		||||
r_cache = config_loader.get_redis_conn("Redis_Cache")
 | 
			
		||||
 | 
			
		||||
r_tracker = config_loader.get_db_conn("Kvrocks_Trackers")
 | 
			
		||||
 | 
			
		||||
items_dir = config_loader.get_config_str("Directories", "pastes")
 | 
			
		||||
if items_dir[-1] == '/':
 | 
			
		||||
    items_dir = items_dir[:-1]
 | 
			
		||||
config_loader = None
 | 
			
		||||
 | 
			
		||||
email_regex = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}'
 | 
			
		||||
email_regex = re.compile(email_regex)
 | 
			
		||||
 | 
			
		||||
special_characters = set('[<>~!?@#$%^&*|()_-+={}":;,.\'\n\r\t]/\\')
 | 
			
		||||
special_characters.add('\\s')
 | 
			
		||||
 | 
			
		||||
# NLTK tokenizer
 | 
			
		||||
tokenizer = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]+',
 | 
			
		||||
TOKENIZER = None
 | 
			
		||||
 | 
			
		||||
def init_tokenizer():
 | 
			
		||||
    global TOKENIZER
 | 
			
		||||
    TOKENIZER = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]+',
 | 
			
		||||
                            gaps=True, discard_empty=True)
 | 
			
		||||
 | 
			
		||||
def get_special_characters():
 | 
			
		||||
    special_characters = set('[<>~!?@#$%^&*|()_-+={}":;,.\'\n\r\t]/\\')
 | 
			
		||||
    special_characters.add('\\s')
 | 
			
		||||
    return special_characters
 | 
			
		||||
 | 
			
		||||
###############
 | 
			
		||||
#### UTILS ####
 | 
			
		||||
def is_valid_uuid_v4(curr_uuid):
 | 
			
		||||
| 
						 | 
				
			
			@ -76,6 +73,8 @@ def is_valid_regex(tracker_regex):
 | 
			
		|||
        return False
 | 
			
		||||
 | 
			
		||||
def is_valid_mail(email):
 | 
			
		||||
    email_regex = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}'
 | 
			
		||||
    email_regex = re.compile(email_regex)
 | 
			
		||||
    result = email_regex.match(email)
 | 
			
		||||
    if result:
 | 
			
		||||
        return True
 | 
			
		||||
| 
						 | 
				
			
			@ -400,6 +399,9 @@ class Tracker:
 | 
			
		|||
            tracker_type = 'yara'
 | 
			
		||||
 | 
			
		||||
        elif tracker_type == 'typosquatting':
 | 
			
		||||
 | 
			
		||||
            from ail_typo_squatting import runAll
 | 
			
		||||
 | 
			
		||||
            domain = to_track.split(" ")[0]
 | 
			
		||||
            typo_generation = runAll(domain=domain, limit=math.inf, formatoutput="text", pathOutput="-", verbose=False) # TODO REPLACE LIMIT BY -1
 | 
			
		||||
            for typo in typo_generation:
 | 
			
		||||
| 
						 | 
				
			
			@ -857,7 +859,7 @@ def api_validate_tracker_to_add(to_track, tracker_type, nb_words=1):
 | 
			
		|||
        # force lowercase
 | 
			
		||||
        to_track = to_track.lower()
 | 
			
		||||
        word_set = set(to_track)
 | 
			
		||||
        set_inter = word_set.intersection(special_characters)
 | 
			
		||||
        set_inter = word_set.intersection(get_special_characters())
 | 
			
		||||
        if set_inter:
 | 
			
		||||
            return {"status": "error",
 | 
			
		||||
                    "reason": f'special character(s) not allowed: {set_inter}',
 | 
			
		||||
| 
						 | 
				
			
			@ -1113,7 +1115,9 @@ def get_text_word_frequency(content, filtering=True):
 | 
			
		|||
    words_dict = defaultdict(int)
 | 
			
		||||
 | 
			
		||||
    if filtering:
 | 
			
		||||
        blob = TextBlob(content, tokenizer=tokenizer)
 | 
			
		||||
        if TOKENIZER is None:
 | 
			
		||||
            init_tokenizer()
 | 
			
		||||
        blob = TextBlob(content, tokenizer=TOKENIZER)
 | 
			
		||||
    else:
 | 
			
		||||
        blob = TextBlob(content)
 | 
			
		||||
    for word in blob.tokens:
 | 
			
		||||
| 
						 | 
				
			
			@ -1800,9 +1804,9 @@ def _fix_db_custom_tags():
 | 
			
		|||
#### -- ####
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
# if __name__ == '__main__':
 | 
			
		||||
 | 
			
		||||
    _fix_db_custom_tags()
 | 
			
		||||
    # _fix_db_custom_tags()
 | 
			
		||||
    # fix_all_tracker_uuid_list()
 | 
			
		||||
    # res = get_all_tracker_uuid()
 | 
			
		||||
    # print(len(res))
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -8,7 +8,6 @@ import sys
 | 
			
		|||
import requests
 | 
			
		||||
 | 
			
		||||
sys.path.append(os.environ['AIL_BIN'])
 | 
			
		||||
from lib.objects.CryptoCurrencies import CryptoCurrency
 | 
			
		||||
 | 
			
		||||
logger = logging.getLogger()
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -53,9 +52,11 @@ def get_bitcoin_info(bitcoin_address, nb_transaction=50):
 | 
			
		|||
 | 
			
		||||
# filter btc seen in ail
 | 
			
		||||
def filter_btc_seen(btc_addr_set):
 | 
			
		||||
    from lib.objects import CryptoCurrencies
 | 
			
		||||
 | 
			
		||||
    list_seen_btc = []
 | 
			
		||||
    for btc_addr in btc_addr_set:
 | 
			
		||||
        cryptocurrency = CryptoCurrency(btc_addr, 'bitcoin')
 | 
			
		||||
        cryptocurrency = CryptoCurrencies.CryptoCurrency(btc_addr, 'bitcoin')
 | 
			
		||||
        if cryptocurrency.exists():
 | 
			
		||||
            list_seen_btc.append(btc_addr)
 | 
			
		||||
    return list_seen_btc
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -18,13 +18,10 @@ from lib.ConfigLoader import ConfigLoader
 | 
			
		|||
from lib.objects.abstract_chat_object import AbstractChatObject, AbstractChatObjects
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
from lib.objects.abstract_subtype_object import AbstractSubtypeObject, get_all_id
 | 
			
		||||
from lib.data_retention_engine import update_obj_date
 | 
			
		||||
from lib.objects import ail_objects
 | 
			
		||||
from lib.objects.abstract_subtype_object import get_all_id
 | 
			
		||||
# from lib.data_retention_engine import update_obj_date
 | 
			
		||||
from lib.timeline_engine import Timeline
 | 
			
		||||
 | 
			
		||||
from lib.correlations_engine import get_correlation_by_correl_type
 | 
			
		||||
 | 
			
		||||
config_loader = ConfigLoader()
 | 
			
		||||
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
 | 
			
		||||
r_object = config_loader.get_db_conn("Kvrocks_Objects")
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -213,10 +213,10 @@ class Ocr(AbstractObject):
 | 
			
		|||
        draw = ImageDraw.Draw(img)
 | 
			
		||||
        for bbox in self.get_coords():
 | 
			
		||||
            c1, c2, c3, c4 = bbox
 | 
			
		||||
            draw.line((tuple(c1), tuple(c2)), fill="yellow")
 | 
			
		||||
            draw.line((tuple(c2), tuple(c3)), fill="yellow")
 | 
			
		||||
            draw.line((tuple(c3), tuple(c4)), fill="yellow")
 | 
			
		||||
            draw.line((tuple(c4), tuple(c1)), fill="yellow")
 | 
			
		||||
            draw.line((tuple(c1), tuple(c2)), fill="yellow", width=2)
 | 
			
		||||
            draw.line((tuple(c2), tuple(c3)), fill="yellow", width=2)
 | 
			
		||||
            draw.line((tuple(c3), tuple(c4)), fill="yellow", width=2)
 | 
			
		||||
            draw.line((tuple(c4), tuple(c1)), fill="yellow", width=2)
 | 
			
		||||
        # img.show()
 | 
			
		||||
        buff = BytesIO()
 | 
			
		||||
        img.save(buff, "PNG")
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -37,7 +37,7 @@ from lib.objects import Ocrs
 | 
			
		|||
from lib.objects import Pgps
 | 
			
		||||
from lib.objects.Screenshots import Screenshot
 | 
			
		||||
from lib.objects import Titles
 | 
			
		||||
from lib.objects.UsersAccount import UserAccount
 | 
			
		||||
from lib.objects import UsersAccount
 | 
			
		||||
from lib.objects import Usernames
 | 
			
		||||
 | 
			
		||||
config_loader = ConfigLoader()
 | 
			
		||||
| 
						 | 
				
			
			@ -113,7 +113,7 @@ def get_object(obj_type, subtype, obj_id):
 | 
			
		|||
        elif obj_type == 'pgp':
 | 
			
		||||
            return Pgps.Pgp(obj_id, subtype)
 | 
			
		||||
        elif obj_type == 'user-account':
 | 
			
		||||
            return UserAccount(obj_id, subtype)
 | 
			
		||||
            return UsersAccount.UserAccount(obj_id, subtype)
 | 
			
		||||
        elif obj_type == 'username':
 | 
			
		||||
            return Usernames.Username(obj_id, subtype)
 | 
			
		||||
        else:
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -26,7 +26,6 @@ sys.path.append(os.environ['AIL_BIN'])
 | 
			
		|||
# Import Project packages        #
 | 
			
		||||
##################################
 | 
			
		||||
from modules.abstract_module import AbstractModule
 | 
			
		||||
from lib.objects.Items import Item
 | 
			
		||||
from lib.ConfigLoader import ConfigLoader
 | 
			
		||||
# from lib import Statistics
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue