mirror of https://github.com/CIRCL/AIL-framework
chg: [ocr] add cache + correlation ocr-chats-messages + launch ocr extractor by default
parent
8bd1ae3815
commit
c25ccb8618
|
@ -275,8 +275,11 @@ function launching_scripts {
|
||||||
screen -S "Script_AIL" -X screen -t "MISP_Thehive_Auto_Push" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./MISP_Thehive_Auto_Push.py; read x"
|
screen -S "Script_AIL" -X screen -t "MISP_Thehive_Auto_Push" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./MISP_Thehive_Auto_Push.py; read x"
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
|
|
||||||
|
# IMAGES
|
||||||
screen -S "Script_AIL" -X screen -t "Exif" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Exif.py; read x"
|
screen -S "Script_AIL" -X screen -t "Exif" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Exif.py; read x"
|
||||||
sleep 0.1
|
sleep 0.1
|
||||||
|
screen -S "Script_AIL" -X screen -t "OcrExtractor" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./OcrExtractor.py; read x"
|
||||||
|
sleep 0.1
|
||||||
|
|
||||||
##################################
|
##################################
|
||||||
# TRACKERS MODULES #
|
# TRACKERS MODULES #
|
||||||
|
|
|
@ -41,26 +41,26 @@ config_loader = None
|
||||||
##################################
|
##################################
|
||||||
|
|
||||||
CORRELATION_TYPES_BY_OBJ = {
|
CORRELATION_TYPES_BY_OBJ = {
|
||||||
"chat": ["chat-subchannel", "chat-thread", "image", "user-account"], # message or direct correlation like cve, bitcoin, ... ???
|
"chat": ["chat-subchannel", "chat-thread", "image", "message", "ocr", "user-account"], # message or direct correlation like cve, bitcoin, ... ???
|
||||||
"chat-subchannel": ["chat", "chat-thread", "image", "message", "user-account"],
|
"chat-subchannel": ["chat", "chat-thread", "image", "message", "ocr", "user-account"],
|
||||||
"chat-thread": ["chat", "chat-subchannel", "image", "message", "user-account"], # TODO user account
|
"chat-thread": ["chat", "chat-subchannel", "image", "message", "ocr", "user-account"], # TODO user account
|
||||||
"cookie-name": ["domain"],
|
"cookie-name": ["domain"],
|
||||||
"cryptocurrency": ["domain", "item", "message"],
|
"cryptocurrency": ["domain", "item", "message", "ocr"],
|
||||||
"cve": ["domain", "item", "message"],
|
"cve": ["domain", "item", "message", "ocr"],
|
||||||
"decoded": ["domain", "item", "message"],
|
"decoded": ["domain", "item", "message", "ocr"],
|
||||||
"domain": ["cve", "cookie-name", "cryptocurrency", "decoded", "etag", "favicon", "hhhash", "item", "pgp", "title", "screenshot", "username"],
|
"domain": ["cve", "cookie-name", "cryptocurrency", "decoded", "etag", "favicon", "hhhash", "item", "pgp", "title", "screenshot", "username"],
|
||||||
"etag": ["domain"],
|
"etag": ["domain"],
|
||||||
"favicon": ["domain", "item"], # TODO Decoded
|
"favicon": ["domain", "item"], # TODO Decoded
|
||||||
"file-name": ["chat", "message"],
|
"file-name": ["chat", "message"],
|
||||||
"hhhash": ["domain"],
|
"hhhash": ["domain"],
|
||||||
"image": ["chat", "message", "ocr", "user-account"],
|
"image": ["chat", "chat-subchannel", "chat-thread", "message", "ocr", "user-account"], # TODO subchannel + threads ????
|
||||||
"item": ["cve", "cryptocurrency", "decoded", "domain", "favicon", "pgp", "screenshot", "title", "username"], # chat ???
|
"item": ["cve", "cryptocurrency", "decoded", "domain", "favicon", "pgp", "screenshot", "title", "username"], # chat ???
|
||||||
"message": ["chat", "chat-subchannel", "chat-thread", "cve", "cryptocurrency", "decoded", "file-name", "image", "pgp", "user-account"], # chat ??
|
"message": ["chat", "chat-subchannel", "chat-thread", "cve", "cryptocurrency", "decoded", "file-name", "image", "ocr", "pgp", "user-account"],
|
||||||
"ocr": ["image"],
|
"ocr": ["chat", "chat-subchannel", "chat-thread", "cve", "cryptocurrency", "decoded", "image", "message", "pgp", "user-account"],
|
||||||
"pgp": ["domain", "item", "message"],
|
"pgp": ["domain", "item", "message", "ocr"],
|
||||||
"screenshot": ["domain", "item"],
|
"screenshot": ["domain", "item"],
|
||||||
"title": ["domain", "item"],
|
"title": ["domain", "item"],
|
||||||
"user-account": ["chat", "chat-subchannel", "chat-thread", "image", "message", "username"],
|
"user-account": ["chat", "chat-subchannel", "chat-thread", "image", "message", "ocr", "username"],
|
||||||
"username": ["domain", "item", "message", "user-account"],
|
"username": ["domain", "item", "message", "user-account"],
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -209,7 +209,7 @@ class Domain(AbstractObject):
|
||||||
def get_screenshot(self):
|
def get_screenshot(self):
|
||||||
last_item = self.get_last_item_root()
|
last_item = self.get_last_item_root()
|
||||||
if last_item:
|
if last_item:
|
||||||
screenshot = self._get_external_correlation('item', '', last_item, 'screenshot').get('screenshot')
|
screenshot = self.get_obj_correlations('item', '', last_item, ['screenshot']).get('screenshot')
|
||||||
if screenshot:
|
if screenshot:
|
||||||
return screenshot.pop()[1:]
|
return screenshot.pop()[1:]
|
||||||
|
|
||||||
|
@ -392,7 +392,7 @@ class Domain(AbstractObject):
|
||||||
print(har)
|
print(har)
|
||||||
_write_in_zip_buffer(zf, os.path.join(hars_dir, har), f'{basename}.json.gz')
|
_write_in_zip_buffer(zf, os.path.join(hars_dir, har), f'{basename}.json.gz')
|
||||||
# Screenshot
|
# Screenshot
|
||||||
screenshot = self._get_external_correlation('item', '', item_id, 'screenshot')
|
screenshot = self.get_obj_correlations('item', '', item_id, ['screenshot'])
|
||||||
if screenshot and screenshot['screenshot']:
|
if screenshot and screenshot['screenshot']:
|
||||||
screenshot = screenshot['screenshot'].pop()[1:]
|
screenshot = screenshot['screenshot'].pop()[1:]
|
||||||
screenshot = os.path.join(screenshot[0:2], screenshot[2:4], screenshot[4:6], screenshot[6:8],
|
screenshot = os.path.join(screenshot[0:2], screenshot[2:4], screenshot[4:6], screenshot[6:8],
|
||||||
|
|
|
@ -141,7 +141,7 @@ class Message(AbstractObject):
|
||||||
# TODO get thread ID
|
# TODO get thread ID
|
||||||
|
|
||||||
def _get_image_ocr(self, obj_id):
|
def _get_image_ocr(self, obj_id):
|
||||||
return bool(self._get_external_correlation('image', '', obj_id, 'ocr').get('ocr'))
|
return bool(self.get_correlation('ocr').get('ocr'))
|
||||||
|
|
||||||
def get_images(self):
|
def get_images(self):
|
||||||
images = []
|
images = []
|
||||||
|
|
|
@ -228,6 +228,14 @@ class Ocr(AbstractObject):
|
||||||
def remove(self, val):
|
def remove(self, val):
|
||||||
return r_object.srem(f'ocr:{self.id}', val)
|
return r_object.srem(f'ocr:{self.id}', val)
|
||||||
|
|
||||||
|
def update_correlation(self):
|
||||||
|
image_correl = self.get_obj_correlations('image', '', self.id)
|
||||||
|
for obj_type in image_correl:
|
||||||
|
if obj_type != 'ocr':
|
||||||
|
for obj_raw in image_correl[obj_type]:
|
||||||
|
obj_subtype, obj_id = obj_raw.split(':', 1)
|
||||||
|
self.add_correlation(obj_type, obj_subtype, obj_id)
|
||||||
|
|
||||||
def create(self, extracted_texts, tags=[]):
|
def create(self, extracted_texts, tags=[]):
|
||||||
r_object.sadd(f'{self.type}:all', self.id)
|
r_object.sadd(f'{self.type}:all', self.id)
|
||||||
for extracted in extracted_texts:
|
for extracted in extracted_texts:
|
||||||
|
@ -235,6 +243,9 @@ class Ocr(AbstractObject):
|
||||||
if len(text) > 1:
|
if len(text) > 1:
|
||||||
str_coords = self.create_coord_str(bbox)
|
str_coords = self.create_coord_str(bbox)
|
||||||
self.add(str_coords, text)
|
self.add(str_coords, text)
|
||||||
|
|
||||||
|
# Correlations
|
||||||
|
self.update_correlation()
|
||||||
self.add_correlation('image', '', self.id)
|
self.add_correlation('image', '', self.id)
|
||||||
|
|
||||||
for tag in tags:
|
for tag in tags:
|
||||||
|
|
|
@ -225,11 +225,11 @@ class AbstractObject(ABC):
|
||||||
|
|
||||||
## Correlation ##
|
## Correlation ##
|
||||||
|
|
||||||
def _get_external_correlation(self, req_type, req_subtype, req_id, obj_type):
|
def get_obj_correlations(self, obj_type, obj_subtype, obj_id, filter_types=[]):
|
||||||
"""
|
"""
|
||||||
Get object correlation
|
Get object correlation
|
||||||
"""
|
"""
|
||||||
return get_correlations(req_type, req_subtype, req_id, filter_types=[obj_type])
|
return get_correlations(obj_type, obj_subtype, obj_id, filter_types=filter_types)
|
||||||
|
|
||||||
def get_correlation(self, obj_type):
|
def get_correlation(self, obj_type):
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -17,6 +17,7 @@ sys.path.append(os.environ['AIL_BIN'])
|
||||||
# Import Project packages
|
# Import Project packages
|
||||||
##################################
|
##################################
|
||||||
from modules.abstract_module import AbstractModule
|
from modules.abstract_module import AbstractModule
|
||||||
|
from lib.ConfigLoader import ConfigLoader
|
||||||
from lib import chats_viewer
|
from lib import chats_viewer
|
||||||
from lib.objects import Messages
|
from lib.objects import Messages
|
||||||
from lib.objects import Ocrs
|
from lib.objects import Ocrs
|
||||||
|
@ -68,30 +69,45 @@ class OcrExtractor(AbstractModule):
|
||||||
# Waiting time in seconds between to message processed
|
# Waiting time in seconds between to message processed
|
||||||
self.pending_seconds = 1
|
self.pending_seconds = 1
|
||||||
|
|
||||||
|
config_loader = ConfigLoader()
|
||||||
|
self.r_cache = config_loader.get_redis_conn("Redis_Cache")
|
||||||
|
|
||||||
# Send module state to logs
|
# Send module state to logs
|
||||||
self.logger.info(f'Module {self.module_name} initialized')
|
self.logger.info(f'Module {self.module_name} initialized')
|
||||||
|
|
||||||
|
def is_cached(self):
|
||||||
|
return self.r_cache.exists(f'ocr:no:{self.obj.id}')
|
||||||
|
|
||||||
|
def add_to_cache(self):
|
||||||
|
self.r_cache.setex(f'ocr:no:{self.obj.id}', 86400, 0)
|
||||||
|
|
||||||
def compute(self, message):
|
def compute(self, message):
|
||||||
image = self.get_obj()
|
image = self.get_obj()
|
||||||
path = image.get_filepath()
|
print(image.id)
|
||||||
print(image)
|
|
||||||
|
|
||||||
languages = get_model_languages(image)
|
|
||||||
print(languages)
|
|
||||||
|
|
||||||
ocr = Ocrs.Ocr(image.id)
|
ocr = Ocrs.Ocr(image.id)
|
||||||
ocr.delete()
|
if self.is_cached():
|
||||||
|
return None
|
||||||
|
|
||||||
if not ocr.exists():
|
if not ocr.exists():
|
||||||
|
path = image.get_filepath()
|
||||||
|
languages = get_model_languages(image)
|
||||||
|
print(languages)
|
||||||
texts = Ocrs.extract_text(path, languages)
|
texts = Ocrs.extract_text(path, languages)
|
||||||
if texts:
|
if texts:
|
||||||
|
print('create')
|
||||||
ocr = Ocrs.create(image.id, texts)
|
ocr = Ocrs.create(image.id, texts)
|
||||||
self.add_message_to_queue(ocr)
|
self.add_message_to_queue(ocr)
|
||||||
|
# Save in cache
|
||||||
|
else:
|
||||||
|
print('no text detected')
|
||||||
|
self.add_to_cache()
|
||||||
|
else:
|
||||||
|
print('update correlation')
|
||||||
|
ocr.update_correlation()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
module = OcrExtractor()
|
module = OcrExtractor()
|
||||||
module.run()
|
module.run()
|
||||||
# from lib.objects import Images
|
|
||||||
# module.obj = Images.Image('')
|
|
||||||
# module.compute('')
|
|
||||||
|
|
|
@ -0,0 +1,26 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.append(os.environ['AIL_HOME'])
|
||||||
|
##################################
|
||||||
|
# Import Project packages
|
||||||
|
##################################
|
||||||
|
from update.bin.ail_updater import AIL_Updater
|
||||||
|
from lib import ail_updates
|
||||||
|
from lib import chats_viewer
|
||||||
|
|
||||||
|
class Updater(AIL_Updater):
|
||||||
|
"""default Updater."""
|
||||||
|
|
||||||
|
def __init__(self, version):
|
||||||
|
super(Updater, self).__init__(version)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
chats_viewer.fix_correlations_subchannel_message()
|
||||||
|
updater = Updater('v5.5')
|
||||||
|
updater.run_update()
|
||||||
|
|
|
@ -0,0 +1,40 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
|
||||||
|
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
|
||||||
|
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
|
||||||
|
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
|
||||||
|
|
||||||
|
export PATH=$AIL_HOME:$PATH
|
||||||
|
export PATH=$AIL_REDIS:$PATH
|
||||||
|
export PATH=$AIL_BIN:$PATH
|
||||||
|
export PATH=$AIL_FLASK:$PATH
|
||||||
|
|
||||||
|
GREEN="\\033[1;32m"
|
||||||
|
DEFAULT="\\033[0;39m"
|
||||||
|
|
||||||
|
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
|
||||||
|
bash ${AIL_BIN}/LAUNCH.sh -ks
|
||||||
|
wait
|
||||||
|
|
||||||
|
# SUBMODULES #
|
||||||
|
git submodule update
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo -e $GREEN"Updating python packages ..."$DEFAULT
|
||||||
|
echo ""
|
||||||
|
pip install -U easyocr
|
||||||
|
|
||||||
|
|
||||||
|
bash ${AIL_BIN}/LAUNCH.sh -lrv
|
||||||
|
bash ${AIL_BIN}/LAUNCH.sh -lkv
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
|
||||||
|
echo ""
|
||||||
|
python ${AIL_HOME}/update/v5.5/Update.py
|
||||||
|
wait
|
||||||
|
echo ""
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
exit 0
|
Loading…
Reference in New Issue