chg: [perf] reduce memory usage

ocr
terrtia 2024-04-09 14:22:11 +02:00
parent 6ca4b29329
commit 61701e2fcc
No known key found for this signature in database
GPG Key ID: 1E1B1F50D84613D0
8 changed files with 77 additions and 47 deletions

View File

@ -9,7 +9,6 @@ The ``Domain``
import os import os
import sys import sys
import time
import redis import redis
import configparser import configparser

View File

@ -32,6 +32,9 @@ config_loader = None
# # # # UNSAFE TAGS # # # # # # # # UNSAFE TAGS # # # #
# set of unsafe tags
UNSAFE_TAGS = None
def build_unsafe_tags(): def build_unsafe_tags():
tags = set() tags = set()
# CE content # CE content
@ -52,12 +55,12 @@ def is_tags_safe(ltags):
:return: is a tag in the set unsafe :return: is a tag in the set unsafe
:rtype: boolean :rtype: boolean
""" """
return unsafe_tags.isdisjoint(ltags) global UNSAFE_TAGS
if UNSAFE_TAGS is None:
UNSAFE_TAGS = build_unsafe_tags()
return UNSAFE_TAGS.isdisjoint(ltags)
# set of unsafe tags
unsafe_tags = build_unsafe_tags()
# - - - UNSAFE TAGS - - - # # - - - UNSAFE TAGS - - - #
# # TODO: verify tags + object_type # # TODO: verify tags + object_type
@ -80,16 +83,15 @@ def get_obj_by_tag(key_tag):
#### Taxonomies #### #### Taxonomies ####
TAXONOMIES = {} TAXONOMIES = None
def load_taxonomies(): def load_taxonomies():
global TAXONOMIES global TAXONOMIES
manifest = os.path.join(os.environ['AIL_HOME'], 'files/misp-taxonomies/MANIFEST.json') manifest = os.path.join(os.environ['AIL_HOME'], 'files/misp-taxonomies/MANIFEST.json')
TAXONOMIES = Taxonomies(manifest_path=manifest) TAXONOMIES = Taxonomies(manifest_path=manifest)
load_taxonomies()
def get_taxonomies(): def get_taxonomies():
if TAXONOMIES is None:
load_taxonomies()
return TAXONOMIES.keys() return TAXONOMIES.keys()
# TODO rename me to get enabled_taxonomies # TODO rename me to get enabled_taxonomies
@ -111,12 +113,18 @@ def disable_taxonomy(taxonomy):
r_tags.srem('taxonomies:enabled', taxonomy) r_tags.srem('taxonomies:enabled', taxonomy)
def exists_taxonomy(taxonomy): def exists_taxonomy(taxonomy):
if TAXONOMIES is None:
load_taxonomies()
return TAXONOMIES.get(taxonomy) is not None return TAXONOMIES.get(taxonomy) is not None
def get_taxonomy_description(taxonomy): def get_taxonomy_description(taxonomy):
if TAXONOMIES is None:
load_taxonomies()
return TAXONOMIES.get(taxonomy).description return TAXONOMIES.get(taxonomy).description
def get_taxonomy_name(taxonomy): def get_taxonomy_name(taxonomy):
if TAXONOMIES is None:
load_taxonomies()
return TAXONOMIES.get(taxonomy).name return TAXONOMIES.get(taxonomy).name
def get_taxonomy_predicates(taxonomy): def get_taxonomy_predicates(taxonomy):
@ -133,12 +141,18 @@ def get_taxonomy_predicates(taxonomy):
return meta return meta
def get_taxonomy_refs(taxonomy): def get_taxonomy_refs(taxonomy):
if TAXONOMIES is None:
load_taxonomies()
return TAXONOMIES.get(taxonomy).refs return TAXONOMIES.get(taxonomy).refs
def get_taxonomy_version(taxonomy): def get_taxonomy_version(taxonomy):
if TAXONOMIES is None:
load_taxonomies()
return TAXONOMIES.get(taxonomy).version return TAXONOMIES.get(taxonomy).version
def get_taxonomy_tags(taxonomy, enabled=False): def get_taxonomy_tags(taxonomy, enabled=False):
if TAXONOMIES is None:
load_taxonomies()
taxonomy_obj = TAXONOMIES.get(taxonomy) taxonomy_obj = TAXONOMIES.get(taxonomy)
tags = [] tags = []
for p, content in taxonomy_obj.items(): for p, content in taxonomy_obj.items():
@ -165,6 +179,8 @@ def get_taxonomy_meta(taxonomy_name, enabled=False, enabled_tags=False, nb_activ
meta = {} meta = {}
if not exists_taxonomy(taxonomy_name): if not exists_taxonomy(taxonomy_name):
return meta return meta
if TAXONOMIES is None:
load_taxonomies()
taxonomy = TAXONOMIES.get(taxonomy_name) taxonomy = TAXONOMIES.get(taxonomy_name)
meta['description'] = taxonomy.description meta['description'] = taxonomy.description
meta['name'] = taxonomy.name meta['name'] = taxonomy.name
@ -241,6 +257,8 @@ def api_update_taxonomy_tag_enabled(data):
if not exists_taxonomy(taxonomy): if not exists_taxonomy(taxonomy):
return {'error': f'taxonomy {taxonomy} not found'}, 404 return {'error': f'taxonomy {taxonomy} not found'}, 404
tags = data.get('tags', []) tags = data.get('tags', [])
if TAXONOMIES is None:
load_taxonomies()
taxonomy_tags = set(TAXONOMIES.get(taxonomy).machinetags()) taxonomy_tags = set(TAXONOMIES.get(taxonomy).machinetags())
for tag in tags: for tag in tags:
if tag not in taxonomy_tags: if tag not in taxonomy_tags:
@ -249,6 +267,8 @@ def api_update_taxonomy_tag_enabled(data):
def enable_taxonomy_tags(taxonomy): def enable_taxonomy_tags(taxonomy):
enable_taxonomy(taxonomy) enable_taxonomy(taxonomy)
if TAXONOMIES is None:
load_taxonomies()
for tag in TAXONOMIES.get(taxonomy).machinetags(): for tag in TAXONOMIES.get(taxonomy).machinetags():
add_taxonomy_tag_enabled(taxonomy, tag) add_taxonomy_tag_enabled(taxonomy, tag)
@ -279,9 +299,8 @@ def api_disable_taxonomy_tags(data):
# #
# TODO Synonyms # TODO Synonyms
GALAXIES = None
GALAXIES = {} CLUSTERS = None
CLUSTERS = {}
def load_galaxies(): def load_galaxies():
global GALAXIES global GALAXIES
galaxies = [] galaxies = []
@ -298,11 +317,10 @@ def load_galaxies():
clusters.append(json.load(f)) clusters.append(json.load(f))
CLUSTERS = Clusters(clusters) CLUSTERS = Clusters(clusters)
def get_galaxies():
if GALAXIES is None:
# LOAD GALAXY + CLUSTERS # LOAD GALAXY + CLUSTERS
load_galaxies() load_galaxies()
def get_galaxies():
return GALAXIES.keys() return GALAXIES.keys()
# TODO RENAME ME # TODO RENAME ME
@ -310,9 +328,15 @@ def get_active_galaxies():
return r_tags.smembers('galaxies:enabled') return r_tags.smembers('galaxies:enabled')
def get_galaxy(galaxy_name): def get_galaxy(galaxy_name):
if GALAXIES is None:
# LOAD GALAXY + CLUSTERS
load_galaxies()
return GALAXIES.get(galaxy_name) return GALAXIES.get(galaxy_name)
def exists_galaxy(galaxy): def exists_galaxy(galaxy):
if CLUSTERS is None:
# LOAD GALAXY + CLUSTERS
load_galaxies()
return CLUSTERS.get(galaxy) is not None return CLUSTERS.get(galaxy) is not None
def is_galaxy_enabled(galaxy): def is_galaxy_enabled(galaxy):
@ -369,9 +393,15 @@ def get_galaxy_tag_meta(galaxy_type, tag):
def get_clusters(): def get_clusters():
if CLUSTERS is None:
# LOAD GALAXY + CLUSTERS
load_galaxies()
return CLUSTERS.keys() return CLUSTERS.keys()
def get_cluster(cluster_type): def get_cluster(cluster_type):
if CLUSTERS is None:
# LOAD GALAXY + CLUSTERS
load_galaxies()
return CLUSTERS.get(cluster_type) return CLUSTERS.get(cluster_type)
def get_galaxy_tags(galaxy_type): def get_galaxy_tags(galaxy_type):

View File

@ -12,7 +12,6 @@ import yara
import datetime import datetime
import base64 import base64
from ail_typo_squatting import runAll
import math import math
from collections import defaultdict from collections import defaultdict
@ -38,23 +37,21 @@ logger = logging.getLogger()
config_loader = ConfigLoader.ConfigLoader() config_loader = ConfigLoader.ConfigLoader()
r_cache = config_loader.get_redis_conn("Redis_Cache") r_cache = config_loader.get_redis_conn("Redis_Cache")
r_tracker = config_loader.get_db_conn("Kvrocks_Trackers") r_tracker = config_loader.get_db_conn("Kvrocks_Trackers")
items_dir = config_loader.get_config_str("Directories", "pastes")
if items_dir[-1] == '/':
items_dir = items_dir[:-1]
config_loader = None config_loader = None
email_regex = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}' # NLTK tokenizer
email_regex = re.compile(email_regex) TOKENIZER = None
def init_tokenizer():
global TOKENIZER
TOKENIZER = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]+',
gaps=True, discard_empty=True)
def get_special_characters():
special_characters = set('[<>~!?@#$%^&*|()_-+={}":;,.\'\n\r\t]/\\') special_characters = set('[<>~!?@#$%^&*|()_-+={}":;,.\'\n\r\t]/\\')
special_characters.add('\\s') special_characters.add('\\s')
return special_characters
# NLTK tokenizer
tokenizer = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]+',
gaps=True, discard_empty=True)
############### ###############
#### UTILS #### #### UTILS ####
@ -76,6 +73,8 @@ def is_valid_regex(tracker_regex):
return False return False
def is_valid_mail(email): def is_valid_mail(email):
email_regex = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}'
email_regex = re.compile(email_regex)
result = email_regex.match(email) result = email_regex.match(email)
if result: if result:
return True return True
@ -400,6 +399,9 @@ class Tracker:
tracker_type = 'yara' tracker_type = 'yara'
elif tracker_type == 'typosquatting': elif tracker_type == 'typosquatting':
from ail_typo_squatting import runAll
domain = to_track.split(" ")[0] domain = to_track.split(" ")[0]
typo_generation = runAll(domain=domain, limit=math.inf, formatoutput="text", pathOutput="-", verbose=False) # TODO REPLACE LIMIT BY -1 typo_generation = runAll(domain=domain, limit=math.inf, formatoutput="text", pathOutput="-", verbose=False) # TODO REPLACE LIMIT BY -1
for typo in typo_generation: for typo in typo_generation:
@ -857,7 +859,7 @@ def api_validate_tracker_to_add(to_track, tracker_type, nb_words=1):
# force lowercase # force lowercase
to_track = to_track.lower() to_track = to_track.lower()
word_set = set(to_track) word_set = set(to_track)
set_inter = word_set.intersection(special_characters) set_inter = word_set.intersection(get_special_characters())
if set_inter: if set_inter:
return {"status": "error", return {"status": "error",
"reason": f'special character(s) not allowed: {set_inter}', "reason": f'special character(s) not allowed: {set_inter}',
@ -1113,7 +1115,9 @@ def get_text_word_frequency(content, filtering=True):
words_dict = defaultdict(int) words_dict = defaultdict(int)
if filtering: if filtering:
blob = TextBlob(content, tokenizer=tokenizer) if TOKENIZER is None:
init_tokenizer()
blob = TextBlob(content, tokenizer=TOKENIZER)
else: else:
blob = TextBlob(content) blob = TextBlob(content)
for word in blob.tokens: for word in blob.tokens:
@ -1800,9 +1804,9 @@ def _fix_db_custom_tags():
#### -- #### #### -- ####
if __name__ == '__main__': # if __name__ == '__main__':
_fix_db_custom_tags() # _fix_db_custom_tags()
# fix_all_tracker_uuid_list() # fix_all_tracker_uuid_list()
# res = get_all_tracker_uuid() # res = get_all_tracker_uuid()
# print(len(res)) # print(len(res))

View File

@ -8,7 +8,6 @@ import sys
import requests import requests
sys.path.append(os.environ['AIL_BIN']) sys.path.append(os.environ['AIL_BIN'])
from lib.objects.CryptoCurrencies import CryptoCurrency
logger = logging.getLogger() logger = logging.getLogger()
@ -53,9 +52,11 @@ def get_bitcoin_info(bitcoin_address, nb_transaction=50):
# filter btc seen in ail # filter btc seen in ail
def filter_btc_seen(btc_addr_set): def filter_btc_seen(btc_addr_set):
from lib.objects import CryptoCurrencies
list_seen_btc = [] list_seen_btc = []
for btc_addr in btc_addr_set: for btc_addr in btc_addr_set:
cryptocurrency = CryptoCurrency(btc_addr, 'bitcoin') cryptocurrency = CryptoCurrencies.CryptoCurrency(btc_addr, 'bitcoin')
if cryptocurrency.exists(): if cryptocurrency.exists():
list_seen_btc.append(btc_addr) list_seen_btc.append(btc_addr)
return list_seen_btc return list_seen_btc

View File

@ -18,13 +18,10 @@ from lib.ConfigLoader import ConfigLoader
from lib.objects.abstract_chat_object import AbstractChatObject, AbstractChatObjects from lib.objects.abstract_chat_object import AbstractChatObject, AbstractChatObjects
from lib.objects.abstract_subtype_object import AbstractSubtypeObject, get_all_id from lib.objects.abstract_subtype_object import get_all_id
from lib.data_retention_engine import update_obj_date # from lib.data_retention_engine import update_obj_date
from lib.objects import ail_objects
from lib.timeline_engine import Timeline from lib.timeline_engine import Timeline
from lib.correlations_engine import get_correlation_by_correl_type
config_loader = ConfigLoader() config_loader = ConfigLoader()
baseurl = config_loader.get_config_str("Notifications", "ail_domain") baseurl = config_loader.get_config_str("Notifications", "ail_domain")
r_object = config_loader.get_db_conn("Kvrocks_Objects") r_object = config_loader.get_db_conn("Kvrocks_Objects")

View File

@ -213,10 +213,10 @@ class Ocr(AbstractObject):
draw = ImageDraw.Draw(img) draw = ImageDraw.Draw(img)
for bbox in self.get_coords(): for bbox in self.get_coords():
c1, c2, c3, c4 = bbox c1, c2, c3, c4 = bbox
draw.line((tuple(c1), tuple(c2)), fill="yellow") draw.line((tuple(c1), tuple(c2)), fill="yellow", width=2)
draw.line((tuple(c2), tuple(c3)), fill="yellow") draw.line((tuple(c2), tuple(c3)), fill="yellow", width=2)
draw.line((tuple(c3), tuple(c4)), fill="yellow") draw.line((tuple(c3), tuple(c4)), fill="yellow", width=2)
draw.line((tuple(c4), tuple(c1)), fill="yellow") draw.line((tuple(c4), tuple(c1)), fill="yellow", width=2)
# img.show() # img.show()
buff = BytesIO() buff = BytesIO()
img.save(buff, "PNG") img.save(buff, "PNG")

View File

@ -37,7 +37,7 @@ from lib.objects import Ocrs
from lib.objects import Pgps from lib.objects import Pgps
from lib.objects.Screenshots import Screenshot from lib.objects.Screenshots import Screenshot
from lib.objects import Titles from lib.objects import Titles
from lib.objects.UsersAccount import UserAccount from lib.objects import UsersAccount
from lib.objects import Usernames from lib.objects import Usernames
config_loader = ConfigLoader() config_loader = ConfigLoader()
@ -113,7 +113,7 @@ def get_object(obj_type, subtype, obj_id):
elif obj_type == 'pgp': elif obj_type == 'pgp':
return Pgps.Pgp(obj_id, subtype) return Pgps.Pgp(obj_id, subtype)
elif obj_type == 'user-account': elif obj_type == 'user-account':
return UserAccount(obj_id, subtype) return UsersAccount.UserAccount(obj_id, subtype)
elif obj_type == 'username': elif obj_type == 'username':
return Usernames.Username(obj_id, subtype) return Usernames.Username(obj_id, subtype)
else: else:

View File

@ -26,7 +26,6 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages # # Import Project packages #
################################## ##################################
from modules.abstract_module import AbstractModule from modules.abstract_module import AbstractModule
from lib.objects.Items import Item
from lib.ConfigLoader import ConfigLoader from lib.ConfigLoader import ConfigLoader
# from lib import Statistics # from lib import Statistics