mirror of https://github.com/CIRCL/AIL-framework
chg: [perf] reduce memory usage
parent
6ca4b29329
commit
61701e2fcc
|
@ -9,7 +9,6 @@ The ``Domain``
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import time
|
|
||||||
import redis
|
import redis
|
||||||
import configparser
|
import configparser
|
||||||
|
|
||||||
|
|
|
@ -32,6 +32,9 @@ config_loader = None
|
||||||
|
|
||||||
# # # # UNSAFE TAGS # # # #
|
# # # # UNSAFE TAGS # # # #
|
||||||
|
|
||||||
|
# set of unsafe tags
|
||||||
|
UNSAFE_TAGS = None
|
||||||
|
|
||||||
def build_unsafe_tags():
|
def build_unsafe_tags():
|
||||||
tags = set()
|
tags = set()
|
||||||
# CE content
|
# CE content
|
||||||
|
@ -52,12 +55,12 @@ def is_tags_safe(ltags):
|
||||||
:return: is a tag in the set unsafe
|
:return: is a tag in the set unsafe
|
||||||
:rtype: boolean
|
:rtype: boolean
|
||||||
"""
|
"""
|
||||||
return unsafe_tags.isdisjoint(ltags)
|
global UNSAFE_TAGS
|
||||||
|
if UNSAFE_TAGS is None:
|
||||||
|
UNSAFE_TAGS = build_unsafe_tags()
|
||||||
|
return UNSAFE_TAGS.isdisjoint(ltags)
|
||||||
|
|
||||||
|
|
||||||
# set of unsafe tags
|
|
||||||
unsafe_tags = build_unsafe_tags()
|
|
||||||
|
|
||||||
# - - - UNSAFE TAGS - - - #
|
# - - - UNSAFE TAGS - - - #
|
||||||
|
|
||||||
# # TODO: verify tags + object_type
|
# # TODO: verify tags + object_type
|
||||||
|
@ -80,16 +83,15 @@ def get_obj_by_tag(key_tag):
|
||||||
|
|
||||||
#### Taxonomies ####
|
#### Taxonomies ####
|
||||||
|
|
||||||
TAXONOMIES = {}
|
TAXONOMIES = None
|
||||||
def load_taxonomies():
|
def load_taxonomies():
|
||||||
global TAXONOMIES
|
global TAXONOMIES
|
||||||
manifest = os.path.join(os.environ['AIL_HOME'], 'files/misp-taxonomies/MANIFEST.json')
|
manifest = os.path.join(os.environ['AIL_HOME'], 'files/misp-taxonomies/MANIFEST.json')
|
||||||
TAXONOMIES = Taxonomies(manifest_path=manifest)
|
TAXONOMIES = Taxonomies(manifest_path=manifest)
|
||||||
|
|
||||||
|
|
||||||
load_taxonomies()
|
|
||||||
|
|
||||||
def get_taxonomies():
|
def get_taxonomies():
|
||||||
|
if TAXONOMIES is None:
|
||||||
|
load_taxonomies()
|
||||||
return TAXONOMIES.keys()
|
return TAXONOMIES.keys()
|
||||||
|
|
||||||
# TODO rename me to get enabled_taxonomies
|
# TODO rename me to get enabled_taxonomies
|
||||||
|
@ -111,12 +113,18 @@ def disable_taxonomy(taxonomy):
|
||||||
r_tags.srem('taxonomies:enabled', taxonomy)
|
r_tags.srem('taxonomies:enabled', taxonomy)
|
||||||
|
|
||||||
def exists_taxonomy(taxonomy):
|
def exists_taxonomy(taxonomy):
|
||||||
|
if TAXONOMIES is None:
|
||||||
|
load_taxonomies()
|
||||||
return TAXONOMIES.get(taxonomy) is not None
|
return TAXONOMIES.get(taxonomy) is not None
|
||||||
|
|
||||||
def get_taxonomy_description(taxonomy):
|
def get_taxonomy_description(taxonomy):
|
||||||
|
if TAXONOMIES is None:
|
||||||
|
load_taxonomies()
|
||||||
return TAXONOMIES.get(taxonomy).description
|
return TAXONOMIES.get(taxonomy).description
|
||||||
|
|
||||||
def get_taxonomy_name(taxonomy):
|
def get_taxonomy_name(taxonomy):
|
||||||
|
if TAXONOMIES is None:
|
||||||
|
load_taxonomies()
|
||||||
return TAXONOMIES.get(taxonomy).name
|
return TAXONOMIES.get(taxonomy).name
|
||||||
|
|
||||||
def get_taxonomy_predicates(taxonomy):
|
def get_taxonomy_predicates(taxonomy):
|
||||||
|
@ -133,12 +141,18 @@ def get_taxonomy_predicates(taxonomy):
|
||||||
return meta
|
return meta
|
||||||
|
|
||||||
def get_taxonomy_refs(taxonomy):
|
def get_taxonomy_refs(taxonomy):
|
||||||
|
if TAXONOMIES is None:
|
||||||
|
load_taxonomies()
|
||||||
return TAXONOMIES.get(taxonomy).refs
|
return TAXONOMIES.get(taxonomy).refs
|
||||||
|
|
||||||
def get_taxonomy_version(taxonomy):
|
def get_taxonomy_version(taxonomy):
|
||||||
|
if TAXONOMIES is None:
|
||||||
|
load_taxonomies()
|
||||||
return TAXONOMIES.get(taxonomy).version
|
return TAXONOMIES.get(taxonomy).version
|
||||||
|
|
||||||
def get_taxonomy_tags(taxonomy, enabled=False):
|
def get_taxonomy_tags(taxonomy, enabled=False):
|
||||||
|
if TAXONOMIES is None:
|
||||||
|
load_taxonomies()
|
||||||
taxonomy_obj = TAXONOMIES.get(taxonomy)
|
taxonomy_obj = TAXONOMIES.get(taxonomy)
|
||||||
tags = []
|
tags = []
|
||||||
for p, content in taxonomy_obj.items():
|
for p, content in taxonomy_obj.items():
|
||||||
|
@ -165,6 +179,8 @@ def get_taxonomy_meta(taxonomy_name, enabled=False, enabled_tags=False, nb_activ
|
||||||
meta = {}
|
meta = {}
|
||||||
if not exists_taxonomy(taxonomy_name):
|
if not exists_taxonomy(taxonomy_name):
|
||||||
return meta
|
return meta
|
||||||
|
if TAXONOMIES is None:
|
||||||
|
load_taxonomies()
|
||||||
taxonomy = TAXONOMIES.get(taxonomy_name)
|
taxonomy = TAXONOMIES.get(taxonomy_name)
|
||||||
meta['description'] = taxonomy.description
|
meta['description'] = taxonomy.description
|
||||||
meta['name'] = taxonomy.name
|
meta['name'] = taxonomy.name
|
||||||
|
@ -241,6 +257,8 @@ def api_update_taxonomy_tag_enabled(data):
|
||||||
if not exists_taxonomy(taxonomy):
|
if not exists_taxonomy(taxonomy):
|
||||||
return {'error': f'taxonomy {taxonomy} not found'}, 404
|
return {'error': f'taxonomy {taxonomy} not found'}, 404
|
||||||
tags = data.get('tags', [])
|
tags = data.get('tags', [])
|
||||||
|
if TAXONOMIES is None:
|
||||||
|
load_taxonomies()
|
||||||
taxonomy_tags = set(TAXONOMIES.get(taxonomy).machinetags())
|
taxonomy_tags = set(TAXONOMIES.get(taxonomy).machinetags())
|
||||||
for tag in tags:
|
for tag in tags:
|
||||||
if tag not in taxonomy_tags:
|
if tag not in taxonomy_tags:
|
||||||
|
@ -249,6 +267,8 @@ def api_update_taxonomy_tag_enabled(data):
|
||||||
|
|
||||||
def enable_taxonomy_tags(taxonomy):
|
def enable_taxonomy_tags(taxonomy):
|
||||||
enable_taxonomy(taxonomy)
|
enable_taxonomy(taxonomy)
|
||||||
|
if TAXONOMIES is None:
|
||||||
|
load_taxonomies()
|
||||||
for tag in TAXONOMIES.get(taxonomy).machinetags():
|
for tag in TAXONOMIES.get(taxonomy).machinetags():
|
||||||
add_taxonomy_tag_enabled(taxonomy, tag)
|
add_taxonomy_tag_enabled(taxonomy, tag)
|
||||||
|
|
||||||
|
@ -279,9 +299,8 @@ def api_disable_taxonomy_tags(data):
|
||||||
#
|
#
|
||||||
|
|
||||||
# TODO Synonyms
|
# TODO Synonyms
|
||||||
|
GALAXIES = None
|
||||||
GALAXIES = {}
|
CLUSTERS = None
|
||||||
CLUSTERS = {}
|
|
||||||
def load_galaxies():
|
def load_galaxies():
|
||||||
global GALAXIES
|
global GALAXIES
|
||||||
galaxies = []
|
galaxies = []
|
||||||
|
@ -298,11 +317,10 @@ def load_galaxies():
|
||||||
clusters.append(json.load(f))
|
clusters.append(json.load(f))
|
||||||
CLUSTERS = Clusters(clusters)
|
CLUSTERS = Clusters(clusters)
|
||||||
|
|
||||||
|
def get_galaxies():
|
||||||
|
if GALAXIES is None:
|
||||||
# LOAD GALAXY + CLUSTERS
|
# LOAD GALAXY + CLUSTERS
|
||||||
load_galaxies()
|
load_galaxies()
|
||||||
|
|
||||||
def get_galaxies():
|
|
||||||
return GALAXIES.keys()
|
return GALAXIES.keys()
|
||||||
|
|
||||||
# TODO RENAME ME
|
# TODO RENAME ME
|
||||||
|
@ -310,9 +328,15 @@ def get_active_galaxies():
|
||||||
return r_tags.smembers('galaxies:enabled')
|
return r_tags.smembers('galaxies:enabled')
|
||||||
|
|
||||||
def get_galaxy(galaxy_name):
|
def get_galaxy(galaxy_name):
|
||||||
|
if GALAXIES is None:
|
||||||
|
# LOAD GALAXY + CLUSTERS
|
||||||
|
load_galaxies()
|
||||||
return GALAXIES.get(galaxy_name)
|
return GALAXIES.get(galaxy_name)
|
||||||
|
|
||||||
def exists_galaxy(galaxy):
|
def exists_galaxy(galaxy):
|
||||||
|
if CLUSTERS is None:
|
||||||
|
# LOAD GALAXY + CLUSTERS
|
||||||
|
load_galaxies()
|
||||||
return CLUSTERS.get(galaxy) is not None
|
return CLUSTERS.get(galaxy) is not None
|
||||||
|
|
||||||
def is_galaxy_enabled(galaxy):
|
def is_galaxy_enabled(galaxy):
|
||||||
|
@ -369,9 +393,15 @@ def get_galaxy_tag_meta(galaxy_type, tag):
|
||||||
|
|
||||||
|
|
||||||
def get_clusters():
|
def get_clusters():
|
||||||
|
if CLUSTERS is None:
|
||||||
|
# LOAD GALAXY + CLUSTERS
|
||||||
|
load_galaxies()
|
||||||
return CLUSTERS.keys()
|
return CLUSTERS.keys()
|
||||||
|
|
||||||
def get_cluster(cluster_type):
|
def get_cluster(cluster_type):
|
||||||
|
if CLUSTERS is None:
|
||||||
|
# LOAD GALAXY + CLUSTERS
|
||||||
|
load_galaxies()
|
||||||
return CLUSTERS.get(cluster_type)
|
return CLUSTERS.get(cluster_type)
|
||||||
|
|
||||||
def get_galaxy_tags(galaxy_type):
|
def get_galaxy_tags(galaxy_type):
|
||||||
|
|
|
@ -12,7 +12,6 @@ import yara
|
||||||
import datetime
|
import datetime
|
||||||
import base64
|
import base64
|
||||||
|
|
||||||
from ail_typo_squatting import runAll
|
|
||||||
import math
|
import math
|
||||||
|
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
@ -38,23 +37,21 @@ logger = logging.getLogger()
|
||||||
|
|
||||||
config_loader = ConfigLoader.ConfigLoader()
|
config_loader = ConfigLoader.ConfigLoader()
|
||||||
r_cache = config_loader.get_redis_conn("Redis_Cache")
|
r_cache = config_loader.get_redis_conn("Redis_Cache")
|
||||||
|
|
||||||
r_tracker = config_loader.get_db_conn("Kvrocks_Trackers")
|
r_tracker = config_loader.get_db_conn("Kvrocks_Trackers")
|
||||||
|
|
||||||
items_dir = config_loader.get_config_str("Directories", "pastes")
|
|
||||||
if items_dir[-1] == '/':
|
|
||||||
items_dir = items_dir[:-1]
|
|
||||||
config_loader = None
|
config_loader = None
|
||||||
|
|
||||||
email_regex = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}'
|
# NLTK tokenizer
|
||||||
email_regex = re.compile(email_regex)
|
TOKENIZER = None
|
||||||
|
|
||||||
|
def init_tokenizer():
|
||||||
|
global TOKENIZER
|
||||||
|
TOKENIZER = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]+',
|
||||||
|
gaps=True, discard_empty=True)
|
||||||
|
|
||||||
|
def get_special_characters():
|
||||||
special_characters = set('[<>~!?@#$%^&*|()_-+={}":;,.\'\n\r\t]/\\')
|
special_characters = set('[<>~!?@#$%^&*|()_-+={}":;,.\'\n\r\t]/\\')
|
||||||
special_characters.add('\\s')
|
special_characters.add('\\s')
|
||||||
|
return special_characters
|
||||||
# NLTK tokenizer
|
|
||||||
tokenizer = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]+',
|
|
||||||
gaps=True, discard_empty=True)
|
|
||||||
|
|
||||||
###############
|
###############
|
||||||
#### UTILS ####
|
#### UTILS ####
|
||||||
|
@ -76,6 +73,8 @@ def is_valid_regex(tracker_regex):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def is_valid_mail(email):
|
def is_valid_mail(email):
|
||||||
|
email_regex = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}'
|
||||||
|
email_regex = re.compile(email_regex)
|
||||||
result = email_regex.match(email)
|
result = email_regex.match(email)
|
||||||
if result:
|
if result:
|
||||||
return True
|
return True
|
||||||
|
@ -400,6 +399,9 @@ class Tracker:
|
||||||
tracker_type = 'yara'
|
tracker_type = 'yara'
|
||||||
|
|
||||||
elif tracker_type == 'typosquatting':
|
elif tracker_type == 'typosquatting':
|
||||||
|
|
||||||
|
from ail_typo_squatting import runAll
|
||||||
|
|
||||||
domain = to_track.split(" ")[0]
|
domain = to_track.split(" ")[0]
|
||||||
typo_generation = runAll(domain=domain, limit=math.inf, formatoutput="text", pathOutput="-", verbose=False) # TODO REPLACE LIMIT BY -1
|
typo_generation = runAll(domain=domain, limit=math.inf, formatoutput="text", pathOutput="-", verbose=False) # TODO REPLACE LIMIT BY -1
|
||||||
for typo in typo_generation:
|
for typo in typo_generation:
|
||||||
|
@ -857,7 +859,7 @@ def api_validate_tracker_to_add(to_track, tracker_type, nb_words=1):
|
||||||
# force lowercase
|
# force lowercase
|
||||||
to_track = to_track.lower()
|
to_track = to_track.lower()
|
||||||
word_set = set(to_track)
|
word_set = set(to_track)
|
||||||
set_inter = word_set.intersection(special_characters)
|
set_inter = word_set.intersection(get_special_characters())
|
||||||
if set_inter:
|
if set_inter:
|
||||||
return {"status": "error",
|
return {"status": "error",
|
||||||
"reason": f'special character(s) not allowed: {set_inter}',
|
"reason": f'special character(s) not allowed: {set_inter}',
|
||||||
|
@ -1113,7 +1115,9 @@ def get_text_word_frequency(content, filtering=True):
|
||||||
words_dict = defaultdict(int)
|
words_dict = defaultdict(int)
|
||||||
|
|
||||||
if filtering:
|
if filtering:
|
||||||
blob = TextBlob(content, tokenizer=tokenizer)
|
if TOKENIZER is None:
|
||||||
|
init_tokenizer()
|
||||||
|
blob = TextBlob(content, tokenizer=TOKENIZER)
|
||||||
else:
|
else:
|
||||||
blob = TextBlob(content)
|
blob = TextBlob(content)
|
||||||
for word in blob.tokens:
|
for word in blob.tokens:
|
||||||
|
@ -1800,9 +1804,9 @@ def _fix_db_custom_tags():
|
||||||
#### -- ####
|
#### -- ####
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
# if __name__ == '__main__':
|
||||||
|
|
||||||
_fix_db_custom_tags()
|
# _fix_db_custom_tags()
|
||||||
# fix_all_tracker_uuid_list()
|
# fix_all_tracker_uuid_list()
|
||||||
# res = get_all_tracker_uuid()
|
# res = get_all_tracker_uuid()
|
||||||
# print(len(res))
|
# print(len(res))
|
||||||
|
|
|
@ -8,7 +8,6 @@ import sys
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
sys.path.append(os.environ['AIL_BIN'])
|
sys.path.append(os.environ['AIL_BIN'])
|
||||||
from lib.objects.CryptoCurrencies import CryptoCurrency
|
|
||||||
|
|
||||||
logger = logging.getLogger()
|
logger = logging.getLogger()
|
||||||
|
|
||||||
|
@ -53,9 +52,11 @@ def get_bitcoin_info(bitcoin_address, nb_transaction=50):
|
||||||
|
|
||||||
# filter btc seen in ail
|
# filter btc seen in ail
|
||||||
def filter_btc_seen(btc_addr_set):
|
def filter_btc_seen(btc_addr_set):
|
||||||
|
from lib.objects import CryptoCurrencies
|
||||||
|
|
||||||
list_seen_btc = []
|
list_seen_btc = []
|
||||||
for btc_addr in btc_addr_set:
|
for btc_addr in btc_addr_set:
|
||||||
cryptocurrency = CryptoCurrency(btc_addr, 'bitcoin')
|
cryptocurrency = CryptoCurrencies.CryptoCurrency(btc_addr, 'bitcoin')
|
||||||
if cryptocurrency.exists():
|
if cryptocurrency.exists():
|
||||||
list_seen_btc.append(btc_addr)
|
list_seen_btc.append(btc_addr)
|
||||||
return list_seen_btc
|
return list_seen_btc
|
||||||
|
|
|
@ -18,13 +18,10 @@ from lib.ConfigLoader import ConfigLoader
|
||||||
from lib.objects.abstract_chat_object import AbstractChatObject, AbstractChatObjects
|
from lib.objects.abstract_chat_object import AbstractChatObject, AbstractChatObjects
|
||||||
|
|
||||||
|
|
||||||
from lib.objects.abstract_subtype_object import AbstractSubtypeObject, get_all_id
|
from lib.objects.abstract_subtype_object import get_all_id
|
||||||
from lib.data_retention_engine import update_obj_date
|
# from lib.data_retention_engine import update_obj_date
|
||||||
from lib.objects import ail_objects
|
|
||||||
from lib.timeline_engine import Timeline
|
from lib.timeline_engine import Timeline
|
||||||
|
|
||||||
from lib.correlations_engine import get_correlation_by_correl_type
|
|
||||||
|
|
||||||
config_loader = ConfigLoader()
|
config_loader = ConfigLoader()
|
||||||
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
|
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
|
||||||
r_object = config_loader.get_db_conn("Kvrocks_Objects")
|
r_object = config_loader.get_db_conn("Kvrocks_Objects")
|
||||||
|
|
|
@ -213,10 +213,10 @@ class Ocr(AbstractObject):
|
||||||
draw = ImageDraw.Draw(img)
|
draw = ImageDraw.Draw(img)
|
||||||
for bbox in self.get_coords():
|
for bbox in self.get_coords():
|
||||||
c1, c2, c3, c4 = bbox
|
c1, c2, c3, c4 = bbox
|
||||||
draw.line((tuple(c1), tuple(c2)), fill="yellow")
|
draw.line((tuple(c1), tuple(c2)), fill="yellow", width=2)
|
||||||
draw.line((tuple(c2), tuple(c3)), fill="yellow")
|
draw.line((tuple(c2), tuple(c3)), fill="yellow", width=2)
|
||||||
draw.line((tuple(c3), tuple(c4)), fill="yellow")
|
draw.line((tuple(c3), tuple(c4)), fill="yellow", width=2)
|
||||||
draw.line((tuple(c4), tuple(c1)), fill="yellow")
|
draw.line((tuple(c4), tuple(c1)), fill="yellow", width=2)
|
||||||
# img.show()
|
# img.show()
|
||||||
buff = BytesIO()
|
buff = BytesIO()
|
||||||
img.save(buff, "PNG")
|
img.save(buff, "PNG")
|
||||||
|
|
|
@ -37,7 +37,7 @@ from lib.objects import Ocrs
|
||||||
from lib.objects import Pgps
|
from lib.objects import Pgps
|
||||||
from lib.objects.Screenshots import Screenshot
|
from lib.objects.Screenshots import Screenshot
|
||||||
from lib.objects import Titles
|
from lib.objects import Titles
|
||||||
from lib.objects.UsersAccount import UserAccount
|
from lib.objects import UsersAccount
|
||||||
from lib.objects import Usernames
|
from lib.objects import Usernames
|
||||||
|
|
||||||
config_loader = ConfigLoader()
|
config_loader = ConfigLoader()
|
||||||
|
@ -113,7 +113,7 @@ def get_object(obj_type, subtype, obj_id):
|
||||||
elif obj_type == 'pgp':
|
elif obj_type == 'pgp':
|
||||||
return Pgps.Pgp(obj_id, subtype)
|
return Pgps.Pgp(obj_id, subtype)
|
||||||
elif obj_type == 'user-account':
|
elif obj_type == 'user-account':
|
||||||
return UserAccount(obj_id, subtype)
|
return UsersAccount.UserAccount(obj_id, subtype)
|
||||||
elif obj_type == 'username':
|
elif obj_type == 'username':
|
||||||
return Usernames.Username(obj_id, subtype)
|
return Usernames.Username(obj_id, subtype)
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -26,7 +26,6 @@ sys.path.append(os.environ['AIL_BIN'])
|
||||||
# Import Project packages #
|
# Import Project packages #
|
||||||
##################################
|
##################################
|
||||||
from modules.abstract_module import AbstractModule
|
from modules.abstract_module import AbstractModule
|
||||||
from lib.objects.Items import Item
|
|
||||||
from lib.ConfigLoader import ConfigLoader
|
from lib.ConfigLoader import ConfigLoader
|
||||||
# from lib import Statistics
|
# from lib import Statistics
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue