chg: [perf] reduce memory usage

ocr
terrtia 2024-04-09 14:22:11 +02:00
parent 6ca4b29329
commit 61701e2fcc
No known key found for this signature in database
GPG Key ID: 1E1B1F50D84613D0
8 changed files with 77 additions and 47 deletions

View File

@ -9,7 +9,6 @@ The ``Domain``
import os
import sys
import time
import redis
import configparser

View File

@ -32,6 +32,9 @@ config_loader = None
# # # # UNSAFE TAGS # # # #
# set of unsafe tags
UNSAFE_TAGS = None
def build_unsafe_tags():
tags = set()
# CE content
@ -52,12 +55,12 @@ def is_tags_safe(ltags):
:return: is a tag in the set unsafe
:rtype: boolean
"""
return unsafe_tags.isdisjoint(ltags)
global UNSAFE_TAGS
if UNSAFE_TAGS is None:
UNSAFE_TAGS = build_unsafe_tags()
return UNSAFE_TAGS.isdisjoint(ltags)
# set of unsafe tags
unsafe_tags = build_unsafe_tags()
# - - - UNSAFE TAGS - - - #
# # TODO: verify tags + object_type
@ -80,16 +83,15 @@ def get_obj_by_tag(key_tag):
#### Taxonomies ####
TAXONOMIES = {}
TAXONOMIES = None
def load_taxonomies():
global TAXONOMIES
manifest = os.path.join(os.environ['AIL_HOME'], 'files/misp-taxonomies/MANIFEST.json')
TAXONOMIES = Taxonomies(manifest_path=manifest)
load_taxonomies()
def get_taxonomies():
if TAXONOMIES is None:
load_taxonomies()
return TAXONOMIES.keys()
# TODO rename me to get enabled_taxonomies
@ -111,12 +113,18 @@ def disable_taxonomy(taxonomy):
r_tags.srem('taxonomies:enabled', taxonomy)
def exists_taxonomy(taxonomy):
if TAXONOMIES is None:
load_taxonomies()
return TAXONOMIES.get(taxonomy) is not None
def get_taxonomy_description(taxonomy):
if TAXONOMIES is None:
load_taxonomies()
return TAXONOMIES.get(taxonomy).description
def get_taxonomy_name(taxonomy):
if TAXONOMIES is None:
load_taxonomies()
return TAXONOMIES.get(taxonomy).name
def get_taxonomy_predicates(taxonomy):
@ -133,12 +141,18 @@ def get_taxonomy_predicates(taxonomy):
return meta
def get_taxonomy_refs(taxonomy):
if TAXONOMIES is None:
load_taxonomies()
return TAXONOMIES.get(taxonomy).refs
def get_taxonomy_version(taxonomy):
if TAXONOMIES is None:
load_taxonomies()
return TAXONOMIES.get(taxonomy).version
def get_taxonomy_tags(taxonomy, enabled=False):
if TAXONOMIES is None:
load_taxonomies()
taxonomy_obj = TAXONOMIES.get(taxonomy)
tags = []
for p, content in taxonomy_obj.items():
@ -165,6 +179,8 @@ def get_taxonomy_meta(taxonomy_name, enabled=False, enabled_tags=False, nb_activ
meta = {}
if not exists_taxonomy(taxonomy_name):
return meta
if TAXONOMIES is None:
load_taxonomies()
taxonomy = TAXONOMIES.get(taxonomy_name)
meta['description'] = taxonomy.description
meta['name'] = taxonomy.name
@ -241,6 +257,8 @@ def api_update_taxonomy_tag_enabled(data):
if not exists_taxonomy(taxonomy):
return {'error': f'taxonomy {taxonomy} not found'}, 404
tags = data.get('tags', [])
if TAXONOMIES is None:
load_taxonomies()
taxonomy_tags = set(TAXONOMIES.get(taxonomy).machinetags())
for tag in tags:
if tag not in taxonomy_tags:
@ -249,6 +267,8 @@ def api_update_taxonomy_tag_enabled(data):
def enable_taxonomy_tags(taxonomy):
enable_taxonomy(taxonomy)
if TAXONOMIES is None:
load_taxonomies()
for tag in TAXONOMIES.get(taxonomy).machinetags():
add_taxonomy_tag_enabled(taxonomy, tag)
@ -279,9 +299,8 @@ def api_disable_taxonomy_tags(data):
#
# TODO Synonyms
GALAXIES = {}
CLUSTERS = {}
GALAXIES = None
CLUSTERS = None
def load_galaxies():
global GALAXIES
galaxies = []
@ -298,11 +317,10 @@ def load_galaxies():
clusters.append(json.load(f))
CLUSTERS = Clusters(clusters)
# LOAD GALAXY + CLUSTERS
load_galaxies()
def get_galaxies():
if GALAXIES is None:
# LOAD GALAXY + CLUSTERS
load_galaxies()
return GALAXIES.keys()
# TODO RENAME ME
@ -310,9 +328,15 @@ def get_active_galaxies():
return r_tags.smembers('galaxies:enabled')
def get_galaxy(galaxy_name):
if GALAXIES is None:
# LOAD GALAXY + CLUSTERS
load_galaxies()
return GALAXIES.get(galaxy_name)
def exists_galaxy(galaxy):
if CLUSTERS is None:
# LOAD GALAXY + CLUSTERS
load_galaxies()
return CLUSTERS.get(galaxy) is not None
def is_galaxy_enabled(galaxy):
@ -369,9 +393,15 @@ def get_galaxy_tag_meta(galaxy_type, tag):
def get_clusters():
if CLUSTERS is None:
# LOAD GALAXY + CLUSTERS
load_galaxies()
return CLUSTERS.keys()
def get_cluster(cluster_type):
if CLUSTERS is None:
# LOAD GALAXY + CLUSTERS
load_galaxies()
return CLUSTERS.get(cluster_type)
def get_galaxy_tags(galaxy_type):

View File

@ -12,7 +12,6 @@ import yara
import datetime
import base64
from ail_typo_squatting import runAll
import math
from collections import defaultdict
@ -38,24 +37,22 @@ logger = logging.getLogger()
config_loader = ConfigLoader.ConfigLoader()
r_cache = config_loader.get_redis_conn("Redis_Cache")
r_tracker = config_loader.get_db_conn("Kvrocks_Trackers")
items_dir = config_loader.get_config_str("Directories", "pastes")
if items_dir[-1] == '/':
items_dir = items_dir[:-1]
config_loader = None
email_regex = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}'
email_regex = re.compile(email_regex)
special_characters = set('[<>~!?@#$%^&*|()_-+={}":;,.\'\n\r\t]/\\')
special_characters.add('\\s')
# NLTK tokenizer
tokenizer = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]+',
TOKENIZER = None
def init_tokenizer():
global TOKENIZER
TOKENIZER = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]+',
gaps=True, discard_empty=True)
def get_special_characters():
special_characters = set('[<>~!?@#$%^&*|()_-+={}":;,.\'\n\r\t]/\\')
special_characters.add('\\s')
return special_characters
###############
#### UTILS ####
def is_valid_uuid_v4(curr_uuid):
@ -76,6 +73,8 @@ def is_valid_regex(tracker_regex):
return False
def is_valid_mail(email):
email_regex = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}'
email_regex = re.compile(email_regex)
result = email_regex.match(email)
if result:
return True
@ -400,6 +399,9 @@ class Tracker:
tracker_type = 'yara'
elif tracker_type == 'typosquatting':
from ail_typo_squatting import runAll
domain = to_track.split(" ")[0]
typo_generation = runAll(domain=domain, limit=math.inf, formatoutput="text", pathOutput="-", verbose=False) # TODO REPLACE LIMIT BY -1
for typo in typo_generation:
@ -857,7 +859,7 @@ def api_validate_tracker_to_add(to_track, tracker_type, nb_words=1):
# force lowercase
to_track = to_track.lower()
word_set = set(to_track)
set_inter = word_set.intersection(special_characters)
set_inter = word_set.intersection(get_special_characters())
if set_inter:
return {"status": "error",
"reason": f'special character(s) not allowed: {set_inter}',
@ -1113,7 +1115,9 @@ def get_text_word_frequency(content, filtering=True):
words_dict = defaultdict(int)
if filtering:
blob = TextBlob(content, tokenizer=tokenizer)
if TOKENIZER is None:
init_tokenizer()
blob = TextBlob(content, tokenizer=TOKENIZER)
else:
blob = TextBlob(content)
for word in blob.tokens:
@ -1800,9 +1804,9 @@ def _fix_db_custom_tags():
#### -- ####
if __name__ == '__main__':
# if __name__ == '__main__':
_fix_db_custom_tags()
# _fix_db_custom_tags()
# fix_all_tracker_uuid_list()
# res = get_all_tracker_uuid()
# print(len(res))

View File

@ -8,7 +8,6 @@ import sys
import requests
sys.path.append(os.environ['AIL_BIN'])
from lib.objects.CryptoCurrencies import CryptoCurrency
logger = logging.getLogger()
@ -53,9 +52,11 @@ def get_bitcoin_info(bitcoin_address, nb_transaction=50):
# filter btc seen in ail
def filter_btc_seen(btc_addr_set):
from lib.objects import CryptoCurrencies
list_seen_btc = []
for btc_addr in btc_addr_set:
cryptocurrency = CryptoCurrency(btc_addr, 'bitcoin')
cryptocurrency = CryptoCurrencies.CryptoCurrency(btc_addr, 'bitcoin')
if cryptocurrency.exists():
list_seen_btc.append(btc_addr)
return list_seen_btc

View File

@ -18,13 +18,10 @@ from lib.ConfigLoader import ConfigLoader
from lib.objects.abstract_chat_object import AbstractChatObject, AbstractChatObjects
from lib.objects.abstract_subtype_object import AbstractSubtypeObject, get_all_id
from lib.data_retention_engine import update_obj_date
from lib.objects import ail_objects
from lib.objects.abstract_subtype_object import get_all_id
# from lib.data_retention_engine import update_obj_date
from lib.timeline_engine import Timeline
from lib.correlations_engine import get_correlation_by_correl_type
config_loader = ConfigLoader()
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
r_object = config_loader.get_db_conn("Kvrocks_Objects")

View File

@ -213,10 +213,10 @@ class Ocr(AbstractObject):
draw = ImageDraw.Draw(img)
for bbox in self.get_coords():
c1, c2, c3, c4 = bbox
draw.line((tuple(c1), tuple(c2)), fill="yellow")
draw.line((tuple(c2), tuple(c3)), fill="yellow")
draw.line((tuple(c3), tuple(c4)), fill="yellow")
draw.line((tuple(c4), tuple(c1)), fill="yellow")
draw.line((tuple(c1), tuple(c2)), fill="yellow", width=2)
draw.line((tuple(c2), tuple(c3)), fill="yellow", width=2)
draw.line((tuple(c3), tuple(c4)), fill="yellow", width=2)
draw.line((tuple(c4), tuple(c1)), fill="yellow", width=2)
# img.show()
buff = BytesIO()
img.save(buff, "PNG")

View File

@ -37,7 +37,7 @@ from lib.objects import Ocrs
from lib.objects import Pgps
from lib.objects.Screenshots import Screenshot
from lib.objects import Titles
from lib.objects.UsersAccount import UserAccount
from lib.objects import UsersAccount
from lib.objects import Usernames
config_loader = ConfigLoader()
@ -113,7 +113,7 @@ def get_object(obj_type, subtype, obj_id):
elif obj_type == 'pgp':
return Pgps.Pgp(obj_id, subtype)
elif obj_type == 'user-account':
return UserAccount(obj_id, subtype)
return UsersAccount.UserAccount(obj_id, subtype)
elif obj_type == 'username':
return Usernames.Username(obj_id, subtype)
else:

View File

@ -26,7 +26,6 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages #
##################################
from modules.abstract_module import AbstractModule
from lib.objects.Items import Item
from lib.ConfigLoader import ConfigLoader
# from lib import Statistics