Compare commits

...

13 Commits

44 changed files with 1863 additions and 86 deletions

View File

@ -29,6 +29,8 @@ Contributions are welcome! Fork the repository, experiment with the code, and su
AIL supports crawling of websites and Tor hidden services. Ensure your Tor client's proxy configuration is correct, especially the SOCKS5 proxy settings.
![Crawler](./doc/screenshots/ail-lacus.png?raw=true "AIL framework Crawler")
### Installation
[Install Lacus](https://github.com/ail-project/lacus)

View File

@ -29,6 +29,8 @@ AIL framework - Framework for Analysis of Information Leaks
AIL is a modular framework to analyse potential information leaks from unstructured data sources like pastes from Pastebin or similar services or unstructured data streams. AIL framework is flexible and can be extended to support other functionalities to mine or process sensitive information (e.g. data leak prevention).
![Overview](./doc/screenshots/ail-overview.png?raw=true "AIL framework Overview")
![Dashboard](./doc/screenshots/dashboard0.png?raw=true "AIL framework dashboard")
@ -55,6 +57,8 @@ Allow easy creation and customization by extending an abstract class.
## Features
![Internal](./doc/screenshots/ail-internal.png?raw=true "AIL framework Internal")
- Modular architecture to handle streams of unstructured or structured information
- Default support for external ZMQ feeds, such as provided by CIRCL or other providers
- Multiple Importers and feeds support

View File

@ -275,8 +275,11 @@ function launching_scripts {
screen -S "Script_AIL" -X screen -t "MISP_Thehive_Auto_Push" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./MISP_Thehive_Auto_Push.py; read x"
sleep 0.1
# IMAGES
screen -S "Script_AIL" -X screen -t "Exif" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./Exif.py; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "OcrExtractor" bash -c "cd ${AIL_BIN}/modules; ${ENV_PY} ./OcrExtractor.py; read x"
sleep 0.1
##################################
# TRACKERS MODULES #

View File

@ -109,6 +109,9 @@ class FeederImporter(AbstractImporter):
gzip64_content = feeder.get_gzip64_content()
relay_message = f'{feeder_name} {gzip64_content}'
objs_messages.append({'obj': obj, 'message': relay_message})
elif obj.type == 'image':
date = feeder.get_date()
objs_messages.append({'obj': obj, 'message': f'{feeder_name} {date}'})
else: # Messages save on DB
if obj.exists() and obj.type != 'chat':
objs_messages.append({'obj': obj, 'message': feeder_name})

View File

@ -41,6 +41,9 @@ class DefaultFeeder:
def get_source(self):
return self.json_data.get('source')
def get_date(self):
return datetime.date.today().strftime("%Y%m%d")
def get_json_data(self):
"""
Return the JSON data,

View File

@ -92,6 +92,14 @@ class AbstractChatFeeder(DefaultFeeder, ABC):
def get_reactions(self):
return self.json_data['meta'].get('reactions', [])
def get_date(self):
if self.json_data['meta'].get('date'):
date = datetime.datetime.fromtimestamp( self.json_data['meta']['date']['timestamp'])
date = date.strftime('%Y%m%d')
else:
date = datetime.date.today().strftime("%Y%m%d")
return date
def get_message_timestamp(self):
if not self.json_data['meta'].get('date'):
return None

View File

@ -9,7 +9,6 @@ The ``Domain``
import os
import sys
import time
import redis
import configparser

View File

@ -330,6 +330,11 @@ def get_obj_languages(obj_type, obj_subtype, obj_id):
def get_obj_language_stats(obj_type, obj_subtype, obj_id):
return r_lang.zrange(f'obj:langs:stat:{obj_type}:{obj_subtype}:{obj_id}', 0, -1, withscores=True)
def get_obj_main_language(obj_type, obj_subtype, obj_id):
language = r_lang.zrevrange(f'obj:langs:stat:{obj_type}:{obj_subtype}:{obj_id}', 0, 0)
if language:
return language[0]
# TODO ADD language to CHAT GLOBAL SET
def add_obj_language(language, obj_type, obj_subtype, obj_id, objs_containers=set()): # (s)
if not obj_subtype:

View File

@ -32,6 +32,9 @@ config_loader = None
# # # # UNSAFE TAGS # # # #
# set of unsafe tags
UNSAFE_TAGS = None
def build_unsafe_tags():
tags = set()
# CE content
@ -52,12 +55,12 @@ def is_tags_safe(ltags):
:return: is a tag in the set unsafe
:rtype: boolean
"""
return unsafe_tags.isdisjoint(ltags)
global UNSAFE_TAGS
if UNSAFE_TAGS is None:
UNSAFE_TAGS = build_unsafe_tags()
return UNSAFE_TAGS.isdisjoint(ltags)
# set of unsafe tags
unsafe_tags = build_unsafe_tags()
# - - - UNSAFE TAGS - - - #
# # TODO: verify tags + object_type
@ -80,16 +83,15 @@ def get_obj_by_tag(key_tag):
#### Taxonomies ####
TAXONOMIES = {}
TAXONOMIES = None
def load_taxonomies():
global TAXONOMIES
manifest = os.path.join(os.environ['AIL_HOME'], 'files/misp-taxonomies/MANIFEST.json')
TAXONOMIES = Taxonomies(manifest_path=manifest)
load_taxonomies()
def get_taxonomies():
if TAXONOMIES is None:
load_taxonomies()
return TAXONOMIES.keys()
# TODO rename me to get enabled_taxonomies
@ -111,12 +113,18 @@ def disable_taxonomy(taxonomy):
r_tags.srem('taxonomies:enabled', taxonomy)
def exists_taxonomy(taxonomy):
if TAXONOMIES is None:
load_taxonomies()
return TAXONOMIES.get(taxonomy) is not None
def get_taxonomy_description(taxonomy):
if TAXONOMIES is None:
load_taxonomies()
return TAXONOMIES.get(taxonomy).description
def get_taxonomy_name(taxonomy):
if TAXONOMIES is None:
load_taxonomies()
return TAXONOMIES.get(taxonomy).name
def get_taxonomy_predicates(taxonomy):
@ -133,12 +141,18 @@ def get_taxonomy_predicates(taxonomy):
return meta
def get_taxonomy_refs(taxonomy):
if TAXONOMIES is None:
load_taxonomies()
return TAXONOMIES.get(taxonomy).refs
def get_taxonomy_version(taxonomy):
if TAXONOMIES is None:
load_taxonomies()
return TAXONOMIES.get(taxonomy).version
def get_taxonomy_tags(taxonomy, enabled=False):
if TAXONOMIES is None:
load_taxonomies()
taxonomy_obj = TAXONOMIES.get(taxonomy)
tags = []
for p, content in taxonomy_obj.items():
@ -165,6 +179,8 @@ def get_taxonomy_meta(taxonomy_name, enabled=False, enabled_tags=False, nb_activ
meta = {}
if not exists_taxonomy(taxonomy_name):
return meta
if TAXONOMIES is None:
load_taxonomies()
taxonomy = TAXONOMIES.get(taxonomy_name)
meta['description'] = taxonomy.description
meta['name'] = taxonomy.name
@ -241,6 +257,8 @@ def api_update_taxonomy_tag_enabled(data):
if not exists_taxonomy(taxonomy):
return {'error': f'taxonomy {taxonomy} not found'}, 404
tags = data.get('tags', [])
if TAXONOMIES is None:
load_taxonomies()
taxonomy_tags = set(TAXONOMIES.get(taxonomy).machinetags())
for tag in tags:
if tag not in taxonomy_tags:
@ -249,6 +267,8 @@ def api_update_taxonomy_tag_enabled(data):
def enable_taxonomy_tags(taxonomy):
enable_taxonomy(taxonomy)
if TAXONOMIES is None:
load_taxonomies()
for tag in TAXONOMIES.get(taxonomy).machinetags():
add_taxonomy_tag_enabled(taxonomy, tag)
@ -279,9 +299,8 @@ def api_disable_taxonomy_tags(data):
#
# TODO Synonyms
GALAXIES = {}
CLUSTERS = {}
GALAXIES = None
CLUSTERS = None
def load_galaxies():
global GALAXIES
galaxies = []
@ -298,11 +317,10 @@ def load_galaxies():
clusters.append(json.load(f))
CLUSTERS = Clusters(clusters)
# LOAD GALAXY + CLUSTERS
load_galaxies()
def get_galaxies():
if GALAXIES is None:
# LOAD GALAXY + CLUSTERS
load_galaxies()
return GALAXIES.keys()
# TODO RENAME ME
@ -310,9 +328,15 @@ def get_active_galaxies():
return r_tags.smembers('galaxies:enabled')
def get_galaxy(galaxy_name):
if GALAXIES is None:
# LOAD GALAXY + CLUSTERS
load_galaxies()
return GALAXIES.get(galaxy_name)
def exists_galaxy(galaxy):
if CLUSTERS is None:
# LOAD GALAXY + CLUSTERS
load_galaxies()
return CLUSTERS.get(galaxy) is not None
def is_galaxy_enabled(galaxy):
@ -369,9 +393,15 @@ def get_galaxy_tag_meta(galaxy_type, tag):
def get_clusters():
if CLUSTERS is None:
# LOAD GALAXY + CLUSTERS
load_galaxies()
return CLUSTERS.keys()
def get_cluster(cluster_type):
if CLUSTERS is None:
# LOAD GALAXY + CLUSTERS
load_galaxies()
return CLUSTERS.get(cluster_type)
def get_galaxy_tags(galaxy_type):

View File

@ -12,7 +12,6 @@ import yara
import datetime
import base64
from ail_typo_squatting import runAll
import math
from collections import defaultdict
@ -38,24 +37,22 @@ logger = logging.getLogger()
config_loader = ConfigLoader.ConfigLoader()
r_cache = config_loader.get_redis_conn("Redis_Cache")
r_tracker = config_loader.get_db_conn("Kvrocks_Trackers")
items_dir = config_loader.get_config_str("Directories", "pastes")
if items_dir[-1] == '/':
items_dir = items_dir[:-1]
config_loader = None
email_regex = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}'
email_regex = re.compile(email_regex)
special_characters = set('[<>~!?@#$%^&*|()_-+={}":;,.\'\n\r\t]/\\')
special_characters.add('\\s')
# NLTK tokenizer
tokenizer = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]+',
TOKENIZER = None
def init_tokenizer():
global TOKENIZER
TOKENIZER = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]+',
gaps=True, discard_empty=True)
def get_special_characters():
special_characters = set('[<>~!?@#$%^&*|()_-+={}":;,.\'\n\r\t]/\\')
special_characters.add('\\s')
return special_characters
###############
#### UTILS ####
def is_valid_uuid_v4(curr_uuid):
@ -76,6 +73,8 @@ def is_valid_regex(tracker_regex):
return False
def is_valid_mail(email):
email_regex = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}'
email_regex = re.compile(email_regex)
result = email_regex.match(email)
if result:
return True
@ -400,6 +399,9 @@ class Tracker:
tracker_type = 'yara'
elif tracker_type == 'typosquatting':
from ail_typo_squatting import runAll
domain = to_track.split(" ")[0]
typo_generation = runAll(domain=domain, limit=math.inf, formatoutput="text", pathOutput="-", verbose=False) # TODO REPLACE LIMIT BY -1
for typo in typo_generation:
@ -857,7 +859,7 @@ def api_validate_tracker_to_add(to_track, tracker_type, nb_words=1):
# force lowercase
to_track = to_track.lower()
word_set = set(to_track)
set_inter = word_set.intersection(special_characters)
set_inter = word_set.intersection(get_special_characters())
if set_inter:
return {"status": "error",
"reason": f'special character(s) not allowed: {set_inter}',
@ -1113,7 +1115,9 @@ def get_text_word_frequency(content, filtering=True):
words_dict = defaultdict(int)
if filtering:
blob = TextBlob(content, tokenizer=tokenizer)
if TOKENIZER is None:
init_tokenizer()
blob = TextBlob(content, tokenizer=TOKENIZER)
else:
blob = TextBlob(content)
for word in blob.tokens:
@ -1800,9 +1804,9 @@ def _fix_db_custom_tags():
#### -- ####
if __name__ == '__main__':
# if __name__ == '__main__':
_fix_db_custom_tags()
# _fix_db_custom_tags()
# fix_all_tracker_uuid_list()
# res = get_all_tracker_uuid()
# print(len(res))

View File

@ -18,14 +18,14 @@ config_loader = None
AIL_OBJECTS = sorted({'chat', 'chat-subchannel', 'chat-thread', 'cookie-name', 'cve', 'cryptocurrency', 'decoded',
'domain', 'etag', 'favicon', 'file-name', 'hhhash',
'item', 'image', 'message', 'pgp', 'screenshot', 'title', 'user-account', 'username'})
'item', 'image', 'message', 'ocr', 'pgp', 'screenshot', 'title', 'user-account', 'username'})
AIL_OBJECTS_WITH_SUBTYPES = {'chat', 'chat-subchannel', 'cryptocurrency', 'pgp', 'username', 'user-account'}
# TODO by object TYPE ????
AIL_OBJECTS_CORRELATIONS_DEFAULT = sorted({'chat', 'chat-subchannel', 'chat-thread', 'cve', 'cryptocurrency', 'decoded',
'domain', 'favicon', 'file-name',
'item', 'image', 'message', 'pgp', 'screenshot', 'title', 'user-account', 'username'})
'item', 'image', 'message', 'ocr', 'pgp', 'screenshot', 'title', 'user-account', 'username'})
def get_ail_uuid():
ail_uuid = r_serv_db.get('ail:uuid')
@ -105,7 +105,7 @@ def unpack_obj_global_id(global_id, r_type='tuple'):
obj = global_id.split(':', 2)
return {'type': obj[0], 'subtype': obj[1], 'id': obj[2]}
else: # tuple(type, subtype, id)
return global_id.split(':', 2)
return global_id.split(':', 2) # TODO REPLACE get_obj_type_subtype_id_from_global_id(global_id)
def unpack_objs_global_id(objs_global_id, r_type='tuple'):
objs = []

View File

@ -8,7 +8,6 @@ import sys
import requests
sys.path.append(os.environ['AIL_BIN'])
from lib.objects.CryptoCurrencies import CryptoCurrency
logger = logging.getLogger()
@ -53,9 +52,11 @@ def get_bitcoin_info(bitcoin_address, nb_transaction=50):
# filter btc seen in ail
def filter_btc_seen(btc_addr_set):
from lib.objects import CryptoCurrencies
list_seen_btc = []
for btc_addr in btc_addr_set:
cryptocurrency = CryptoCurrency(btc_addr, 'bitcoin')
cryptocurrency = CryptoCurrencies.CryptoCurrency(btc_addr, 'bitcoin')
if cryptocurrency.exists():
list_seen_btc.append(btc_addr)
return list_seen_btc

View File

@ -288,6 +288,10 @@ def get_obj_chat(chat_type, chat_subtype, chat_id):
elif chat_type == 'chat-thread':
return ChatThreads.ChatThread(chat_id, chat_subtype)
def get_obj_chat_from_global_id(chat_gid):
chat_type, chat_subtype, chat_id = chat_gid.split(':', 2)
return get_obj_chat(chat_type, chat_subtype, chat_id)
def get_obj_chat_meta(obj_chat, new_options=set()):
options = {}
if obj_chat.type == 'chat':

View File

@ -41,25 +41,26 @@ config_loader = None
##################################
CORRELATION_TYPES_BY_OBJ = {
"chat": ["chat-subchannel", "chat-thread", "image", "user-account"], # message or direct correlation like cve, bitcoin, ... ???
"chat-subchannel": ["chat", "chat-thread", "image", "message", "user-account"],
"chat-thread": ["chat", "chat-subchannel", "image", "message", "user-account"], # TODO user account
"chat": ["chat-subchannel", "chat-thread", "image", "message", "ocr", "user-account"], # message or direct correlation like cve, bitcoin, ... ???
"chat-subchannel": ["chat", "chat-thread", "image", "message", "ocr", "user-account"],
"chat-thread": ["chat", "chat-subchannel", "image", "message", "ocr", "user-account"], # TODO user account
"cookie-name": ["domain"],
"cryptocurrency": ["domain", "item", "message"],
"cve": ["domain", "item", "message"],
"decoded": ["domain", "item", "message"],
"cryptocurrency": ["domain", "item", "message", "ocr"],
"cve": ["domain", "item", "message", "ocr"],
"decoded": ["domain", "item", "message", "ocr"],
"domain": ["cve", "cookie-name", "cryptocurrency", "decoded", "etag", "favicon", "hhhash", "item", "pgp", "title", "screenshot", "username"],
"etag": ["domain"],
"favicon": ["domain", "item"], # TODO Decoded
"file-name": ["chat", "message"],
"hhhash": ["domain"],
"image": ["chat", "message", "user-account"],
"image": ["chat", "chat-subchannel", "chat-thread", "message", "ocr", "user-account"], # TODO subchannel + threads ????
"item": ["cve", "cryptocurrency", "decoded", "domain", "favicon", "pgp", "screenshot", "title", "username"], # chat ???
"message": ["chat", "chat-subchannel", "chat-thread", "cve", "cryptocurrency", "decoded", "file-name", "image", "pgp", "user-account"], # chat ??
"pgp": ["domain", "item", "message"],
"message": ["chat", "chat-subchannel", "chat-thread", "cve", "cryptocurrency", "decoded", "file-name", "image", "ocr", "pgp", "user-account"],
"ocr": ["chat", "chat-subchannel", "chat-thread", "cve", "cryptocurrency", "decoded", "image", "message", "pgp", "user-account"],
"pgp": ["domain", "item", "message", "ocr"],
"screenshot": ["domain", "item"],
"title": ["domain", "item"],
"user-account": ["chat", "chat-subchannel", "chat-thread", "image", "message", "username"],
"user-account": ["chat", "chat-subchannel", "chat-thread", "image", "message", "ocr", "username"],
"username": ["domain", "item", "message", "user-account"],
}

View File

@ -18,13 +18,10 @@ from lib.ConfigLoader import ConfigLoader
from lib.objects.abstract_chat_object import AbstractChatObject, AbstractChatObjects
from lib.objects.abstract_subtype_object import AbstractSubtypeObject, get_all_id
from lib.data_retention_engine import update_obj_date
from lib.objects import ail_objects
from lib.objects.abstract_subtype_object import get_all_id
# from lib.data_retention_engine import update_obj_date
from lib.timeline_engine import Timeline
from lib.correlations_engine import get_correlation_by_correl_type
config_loader = ConfigLoader()
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
r_object = config_loader.get_db_conn("Kvrocks_Objects")

View File

@ -209,7 +209,7 @@ class Domain(AbstractObject):
def get_screenshot(self):
last_item = self.get_last_item_root()
if last_item:
screenshot = self._get_external_correlation('item', '', last_item, 'screenshot').get('screenshot')
screenshot = self.get_obj_correlations('item', '', last_item, ['screenshot']).get('screenshot')
if screenshot:
return screenshot.pop()[1:]
@ -392,7 +392,7 @@ class Domain(AbstractObject):
print(har)
_write_in_zip_buffer(zf, os.path.join(hars_dir, har), f'{basename}.json.gz')
# Screenshot
screenshot = self._get_external_correlation('item', '', item_id, 'screenshot')
screenshot = self.get_obj_correlations('item', '', item_id, ['screenshot'])
if screenshot and screenshot['screenshot']:
screenshot = screenshot['screenshot'].pop()[1:]
screenshot = os.path.join(screenshot[0:2], screenshot[2:4], screenshot[4:6], screenshot[6:8],

View File

@ -2,6 +2,7 @@
# -*-coding:UTF-8 -*
import base64
import magic
import os
import sys
@ -50,7 +51,7 @@ class Image(AbstractDaterangeObject):
if flask_context:
url = url_for('correlation.show_correlation', type=self.type, id=self.id)
else:
url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}'
url = f'/correlation/show?type={self.type}&id={self.id}'
return url
def get_svg_icon(self):
@ -64,6 +65,14 @@ class Image(AbstractDaterangeObject):
filename = os.path.join(IMAGE_FOLDER, self.get_rel_path())
return os.path.realpath(filename)
def is_gif(self, filepath=None):
if not filepath:
filepath = self.get_filepath()
mime = magic.from_file(filepath, mime=True)
if mime == 'image/gif':
return True
return False
def get_file_content(self):
filepath = self.get_filepath()
with open(filepath, 'rb') as f:
@ -109,6 +118,20 @@ class Image(AbstractDaterangeObject):
def get_screenshot_dir():
return IMAGE_FOLDER
def get_all_images():
images = []
for root, dirs, files in os.walk(get_screenshot_dir()):
for file in files:
path = f'{root}{file}'
image_id = path.replace(IMAGE_FOLDER, '').replace('/', '')
images.append(image_id)
return images
def get_all_images_objects(filters={}):
for image_id in get_all_images():
yield Image(image_id)
def create(content, size_limit=5000000, b64=False, force=False):
size = (len(content)*3) / 4
@ -134,5 +157,6 @@ class Images(AbstractDaterangeObjects):
# if __name__ == '__main__':
# print(json.dumps(get_all_images()))
# name_to_search = '29ba'
# print(search_screenshots_by_name(name_to_search))

View File

@ -140,12 +140,15 @@ class Message(AbstractObject):
# TODO get channel ID
# TODO get thread ID
def _get_image_ocr(self, obj_id):
return bool(self.get_correlation('ocr').get('ocr'))
def get_images(self):
images = []
for child in self.get_childrens():
obj_type, _, obj_id = child.split(':', 2)
if obj_type == 'image':
images.append(obj_id)
images.append({'id': obj_id, 'ocr': self._get_image_ocr(obj_id)})
return images
def get_user_account(self, meta=False):
@ -206,12 +209,6 @@ class Message(AbstractObject):
else:
return None
def _set_translation(self, translation):
"""
Set translated content
"""
return self._set_field('translated', translation) # translation by hash ??? -> avoid translating multiple time
# def get_ail_2_ail_payload(self):
# payload = {'raw': self.get_gzip_content(b64=True)}
# return payload
@ -323,7 +320,6 @@ class Message(AbstractObject):
# content = self.get_content()
# translated = argostranslate.translate.translate(content, 'ru', 'en')
# # Save translation
# self._set_translation(translated)
# return translated
## Language ##
@ -347,7 +343,6 @@ class Message(AbstractObject):
if not language and content:
language = self.detect_language()
if translation and content:
self._set_translation(translation)
self.set_translation(language, translation)
for tag in tags:
self.add_tag(tag)

325
bin/lib/objects/Ocrs.py Executable file
View File

@ -0,0 +1,325 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
from datetime import datetime
from io import BytesIO
from PIL import Image
from PIL import ImageDraw
from pymisp import MISPObject
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.objects.abstract_daterange_object import AbstractDaterangeObject, AbstractDaterangeObjects
from lib.ConfigLoader import ConfigLoader
from packages import Date
# from lib import Language
# from lib.data_retention_engine import update_obj_date, get_obj_date_first
from flask import url_for
config_loader = ConfigLoader()
r_cache = config_loader.get_redis_conn("Redis_Cache")
r_object = config_loader.get_db_conn("Kvrocks_Objects")
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
IMAGE_FOLDER = config_loader.get_files_directory('images')
config_loader = None
# SET x1,y1:x2,y2:x3,y3:x4,y4:extracted_text
class Ocr(AbstractDaterangeObject):
"""
AIL Message Object. (strings)
"""
def __init__(self, id):
super(Ocr, self).__init__('ocr', id)
def exists(self):
return r_object.exists(f'ocr:{self.id}')
def get_content(self, r_type='str'):
"""
Returns content
"""
global_id = self.get_global_id()
content = r_cache.get(f'content:{global_id}')
if not content:
dict_content = {}
for extracted in r_object.smembers(f'ocr:{self.id}'):
extracted = extracted.split(':', 4)
x, y = extracted[0].split(',', 1)
# get text line, y +- 20
rounded_y = round(int(y) / 20) * 20
if rounded_y not in dict_content:
dict_content[rounded_y] = []
dict_content[rounded_y].append((int(x), int(y), extracted[-1]))
content = ''
new_line = True
l_key = sorted(dict_content.keys())
for key in l_key:
dict_content[key] = sorted(dict_content[key], key=lambda c: c[0])
for text in dict_content[key]:
if new_line:
content = f'{content}{text[2]}'
new_line = False
else:
content = f'{content} {text[2]}'
content = f'{content}\n'
new_line = True
# Set Cache
if content:
global_id = self.get_global_id()
r_cache.set(f'content:{global_id}', content)
r_cache.expire(f'content:{global_id}', 300)
if r_type == 'str':
return content
elif r_type == 'bytes':
if content:
return content.encode()
def get_date(self): # TODO
return Date.get_today_date_str()
def get_source(self): # TODO
"""
Returns source/feeder name
"""
return 'ocr'
# l_source = self.id.split('/')[:-2]
# return os.path.join(*l_source)
def get_basename(self): # TODO
return 'ocr'
def get_language(self):
languages = self.get_languages()
if languages:
return languages.pop()
else:
return None
def get_link(self, flask_context=False):
if flask_context:
url = url_for('correlation.show_correlation', type=self.type, id=self.id)
else:
url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}'
return url
def get_svg_icon(self):
return {'style': 'fas', 'icon': '\uf065', 'color': 'yellow', 'radius': 5}
def get_image_path(self):
rel_path = os.path.join(self.id[0:2], self.id[2:4], self.id[4:6], self.id[6:8], self.id[8:10], self.id[10:12], self.id[12:])
filename = os.path.join(IMAGE_FOLDER, rel_path)
return os.path.realpath(filename)
def get_misp_object(self): # TODO
obj = MISPObject('instant-message', standalone=True)
obj_date = self.get_date()
if obj_date:
obj.first_seen = obj_date
else:
self.logger.warning(
f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={obj_date}')
# obj_attrs = [obj.add_attribute('first-seen', value=obj_date),
# obj.add_attribute('raw-data', value=self.id, data=self.get_raw_content()),
# obj.add_attribute('sensor', value=get_ail_uuid())]
obj_attrs = []
for obj_attr in obj_attrs:
for tag in self.get_tags():
obj_attr.add_tag(tag)
return obj
# options: set of optional meta fields
def get_meta(self, options=None, translation_target=''):
"""
:type options: set
"""
if options is None:
options = set()
meta = self._get_meta(options=options)
meta['content'] = self.get_content()
# optional meta fields
if 'investigations' in options:
meta['investigations'] = self.get_investigations()
if 'link' in options:
meta['link'] = self.get_link(flask_context=True)
if 'icon' in options:
meta['svg_icon'] = self.get_svg_icon()
if 'img' in options:
meta['img'] = self.draw_bounding_boxs()
if 'map' in options:
meta['map'] = self.get_img_map_coords()
if 'language' in options:
meta['language'] = self.get_language()
if 'translation' in options and translation_target:
if meta.get('language'):
source = meta['language']
else:
source = None
meta['translation'] = self.translate(content=meta.get('content'), source=source, target=translation_target)
if 'language' in options:
meta['language'] = self.get_language()
return meta
def get_objs_container(self):
objs_containers = set()
# chat
objs_containers.add(self.get_first_correlation('chat'))
subchannel = self.get_first_correlation('chat-subchannel')
if subchannel:
objs_containers.add(subchannel)
thread = self.get_first_correlation('chat-thread')
if thread:
objs_containers.add(thread)
return objs_containers
def create_coord_str(self, bbox):
c1, c2, c3, c4 = bbox
x1, y1 = c1
x2, y2 = c2
x3, y3 = c3
x4, y4 = c4
return f'{int(x1)},{int(y1)}:{int(x2)},{int(y2)}:{int(x3)},{int(y3)}:{int(x4)},{int(y4)}'
def _unpack_coord(self, coord):
return coord.split(',', 1)
def get_coords(self):
coords = []
for extracted in r_object.smembers(f'ocr:{self.id}'):
coord = []
bbox = extracted.split(':', 4)[:-1]
for c in bbox:
x, y = self._unpack_coord(c)
coord.append((int(x), int(y)))
coords.append(coord)
return coords
def get_img_map_coords(self):
coords = []
for extracted in r_object.smembers(f'ocr:{self.id}'):
extract = extracted.split(':', 4)
x1, y1 = self._unpack_coord(extract[0])
x2, y2 = self._unpack_coord(extract[1])
x3, y3 = self._unpack_coord(extract[2])
x4, y4 = self._unpack_coord(extract[3])
coords.append((f'{x1},{y1},{x2},{y2},{x3},{y3},{x4},{y4}', extract[4]))
return coords
def edit_text(self, coordinates, text, new_text, new_coordinates=None):
pass
def add_text(self, coordinates, text):
val = f'{coordinates}:{text}'
return r_object.sadd(f'ocr:{self.id}', val)
def remove_text(self, val):
return r_object.srem(f'ocr:{self.id}', val)
def update_correlation(self, date=None):
if date:
self.add(date, None)
image_correl = self.get_obj_correlations('image', '', self.id)
for obj_type in image_correl:
if obj_type != 'ocr':
for obj_raw in image_correl[obj_type]:
obj_subtype, obj_id = obj_raw.split(':', 1)
self.add_correlation(obj_type, obj_subtype, obj_id)
def create(self, extracted_texts, tags=[]):
# r_object.sadd(f'{self.type}:all', self.id)
created = False
for extracted in extracted_texts:
bbox, text = extracted
if len(text) > 1:
str_coords = self.create_coord_str(bbox)
self.add_text(str_coords, text)
created = True
if created:
# Correlations
self._copy_from('image', self.id)
self.update_correlation()
self.add_correlation('image', '', self.id)
for tag in tags:
self.add_tag(tag)
return self.id
# # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
def delete(self):
r_object.delete(f'ocr:{self.id}')
def draw_bounding_boxs(self):
img = Image.open(self.get_image_path()).convert("RGBA")
draw = ImageDraw.Draw(img)
for bbox in self.get_coords():
c1, c2, c3, c4 = bbox
draw.line((tuple(c1), tuple(c2)), fill="yellow", width=2)
draw.line((tuple(c2), tuple(c3)), fill="yellow", width=2)
draw.line((tuple(c3), tuple(c4)), fill="yellow", width=2)
draw.line((tuple(c4), tuple(c1)), fill="yellow", width=2)
# img.show()
buff = BytesIO()
img.save(buff, "PNG")
return buff.getvalue()
def create(obj_id, detections, tags=[]):
obj = Ocr(obj_id)
if not obj.exists():
obj_id = obj.create(detections, tags=tags)
if obj_id:
return obj
# TODO preload languages
def extract_text(image_path, languages, threshold=0.2):
import easyocr
reader = easyocr.Reader(languages, verbose=False)
texts = reader.readtext(image_path)
# print(texts)
extracted = []
for bbox, text, score in texts:
if score > threshold:
extracted.append((bbox, text))
return extracted
# TODO OCRS Class
def get_ids():
return r_object.smembers(f'ocr:all')
def get_all_ocrs_objects(filters={}):
for obj_id in get_ids():
yield Ocr(obj_id)
class Ocrs(AbstractDaterangeObjects):
"""
OCR Objects
"""
def __init__(self):
super().__init__('ocr', Ocr)
def sanitize_id_to_search(self, name_to_search):
return name_to_search # TODO
#### API ####
def api_get_ocr(obj_id, translation_target=None):
ocr = Ocr(obj_id)
if not ocr.exists():
return {"status": "error", "reason": "Unknown ocr"}, 404
meta = ocr.get_meta({'content', 'icon', 'img', 'language', 'link', 'map', 'translation'}, translation_target=translation_target)
return meta, 200

View File

@ -71,7 +71,7 @@ class AbstractDaterangeObject(AbstractObject, ABC):
else:
return last_seen
def get_nb_seen(self): # TODO REPLACE ME -> correlation image
def get_nb_seen(self): # TODO REPLACE ME -> correlation image chats
return self.get_nb_correlation('item') + self.get_nb_correlation('message')
def get_nb_seen_by_date(self, date):
@ -127,6 +127,20 @@ class AbstractDaterangeObject(AbstractObject, ABC):
def _add_create(self):
r_object.sadd(f'{self.type}:all', self.id)
def _copy_from(self, obj_type, obj_id):
first_seen = r_object.hget(f'meta:{obj_type}:{obj_id}', 'first_seen')
last_seen = r_object.hget(f'meta:{obj_type}:{obj_id}', 'last_seen')
if first_seen and last_seen:
for date in Date.get_daterange(first_seen, last_seen):
nb = r_object.zscore(f'{obj_type}:date:{date}', self.id)
if nb:
r_object.zincrby(f'{self.type}:date:{date}', nb, self.id)
update_obj_date(first_seen, self.type)
update_obj_date(last_seen, self.type)
self._add_create()
self.set_first_seen(first_seen)
self.set_last_seen(last_seen)
def _add(self, date, obj): # TODO OBJ=None
if not self.exists():
self._add_create()

View File

@ -25,7 +25,7 @@ from lib import Duplicate
from lib.correlations_engine import get_nb_correlations, get_correlations, add_obj_correlation, delete_obj_correlation, delete_obj_correlations, exists_obj_correlation, is_obj_correlated, get_nb_correlation_by_correl_type, get_obj_inter_correlation
from lib.Investigations import is_object_investigated, get_obj_investigations, delete_obj_investigations
from lib.relationships_engine import get_obj_nb_relationships, add_obj_relationship
from lib.Language import get_obj_languages, add_obj_language, remove_obj_language, detect_obj_language, get_obj_language_stats, get_obj_translation, set_obj_translation, delete_obj_translation
from lib.Language import get_obj_languages, add_obj_language, remove_obj_language, detect_obj_language, get_obj_language_stats, get_obj_translation, set_obj_translation, delete_obj_translation, get_obj_main_language
from lib.Tracker import is_obj_tracked, get_obj_trackers, delete_obj_trackers
logging.config.dictConfig(ail_logger.get_config(name='ail'))
@ -225,11 +225,11 @@ class AbstractObject(ABC):
## Correlation ##
def _get_external_correlation(self, req_type, req_subtype, req_id, obj_type):
def get_obj_correlations(self, obj_type, obj_subtype, obj_id, filter_types=[]):
"""
Get object correlation
"""
return get_correlations(req_type, req_subtype, req_id, filter_types=[obj_type])
return get_correlations(obj_type, obj_subtype, obj_id, filter_types=filter_types)
def get_correlation(self, obj_type):
"""
@ -237,6 +237,11 @@ class AbstractObject(ABC):
"""
return get_correlations(self.type, self.subtype, self.id, filter_types=[obj_type])
def get_first_correlation(self, obj_type):
correlation = self.get_correlation(obj_type)
if correlation.get(obj_type):
return f'{obj_type}:{correlation[obj_type].pop()}'
def get_correlations(self, filter_types=[], unpack=False):
"""
Get object correlations
@ -330,6 +335,9 @@ class AbstractObject(ABC):
def get_obj_language_stats(self):
return get_obj_language_stats(self.type, self.get_subtype(r_str=True), self.id)
def get_main_language(self):
return get_obj_main_language(self.type, self.get_subtype(r_str=True), self.id)
def get_translation(self, language, field=''):
return get_obj_translation(self.get_global_id(), language, field=field, objs_containers=self.get_objs_container())

View File

@ -9,13 +9,12 @@ sys.path.append(os.environ['AIL_BIN'])
##################################
from lib.exceptions import AILObjectUnknown
from lib.ConfigLoader import ConfigLoader
from lib.ail_core import get_all_objects, get_object_all_subtypes, get_objects_with_subtypes, get_default_correlation_objects
from lib import correlations_engine
from lib import relationships_engine
from lib import btc_ail
from lib import Language
from lib import Tag
from lib import chats_viewer
@ -35,10 +34,11 @@ from lib.objects import HHHashs
from lib.objects.Items import Item, get_all_items_objects, get_nb_items_objects
from lib.objects import Images
from lib.objects import Messages
from lib.objects import Ocrs
from lib.objects import Pgps
from lib.objects.Screenshots import Screenshot
from lib.objects import Titles
from lib.objects.UsersAccount import UserAccount
from lib.objects import UsersAccount
from lib.objects import Usernames
config_loader = ConfigLoader()
@ -93,6 +93,8 @@ def get_object(obj_type, subtype, obj_id):
return Images.Image(obj_id)
elif obj_type == 'message':
return Messages.Message(obj_id)
elif obj_type == 'ocr':
return Ocrs.Ocr(obj_id)
elif obj_type == 'screenshot':
return Screenshot(obj_id)
elif obj_type == 'title':
@ -112,7 +114,7 @@ def get_object(obj_type, subtype, obj_id):
elif obj_type == 'pgp':
return Pgps.Pgp(obj_id, subtype)
elif obj_type == 'user-account':
return UserAccount(obj_id, subtype)
return UsersAccount.UserAccount(obj_id, subtype)
elif obj_type == 'username':
return Usernames.Username(obj_id, subtype)
else:
@ -254,7 +256,7 @@ def get_objects_meta(objs, options=set(), flask_context=False):
def get_object_card_meta(obj_type, subtype, id, related_btc=False):
obj = get_object(obj_type, subtype, id)
meta = obj.get_meta(options={'chat', 'chats', 'created_at', 'icon', 'info', 'nb_messages', 'nb_participants', 'threads', 'username'})
meta = obj.get_meta(options={'chat', 'chats', 'created_at', 'icon', 'info', 'map', 'nb_messages', 'nb_participants', 'threads', 'username'})
# meta['icon'] = obj.get_svg_icon()
meta['svg_icon'] = obj.get_svg_icon()
if subtype or obj_type == 'cookie-name' or obj_type == 'cve' or obj_type == 'etag' or obj_type == 'title' or obj_type == 'favicon' or obj_type == 'hhhash':
@ -274,6 +276,34 @@ def get_object_card_meta(obj_type, subtype, id, related_btc=False):
meta["add_tags_modal"] = Tag.get_modal_add_tags(obj.id, obj.get_type(), obj.get_subtype(r_str=True))
return meta
#### OBJ LANGUAGES ####
def api_detect_language(obj_type, subtype, obj_id):
obj = get_object(obj_type, subtype, obj_id)
if not obj.exists():
return {"status": "error", "reason": "Unknown obj"}, 404
lang = obj.detect_language()
return {"language": lang}, 200
def api_manually_translate(obj_type, subtype, obj_id, source, translation_target, translation):
obj = get_object(obj_type, subtype, obj_id)
if not obj.exists():
return {"status": "error", "reason": "Unknown obj"}, 404
if translation:
if len(translation) > 200000: # TODO REVIEW LIMIT
return {"status": "error", "reason": "Max Size reached"}, 400
all_languages = Language.get_translation_languages()
if source not in all_languages:
return {"status": "error", "reason": "Unknown source Language"}, 400
obj_language = obj.get_language()
if obj_language != source:
obj.edit_language(obj_language, source)
if translation:
if translation_target not in all_languages:
return {"status": "error", "reason": "Unknown target Language"}, 400
obj.set_translation(translation_target, translation)
# TODO SANITYZE translation
return None, 200
#### OBJ FILTERS ####
@ -295,6 +325,8 @@ def is_filtered(obj, filters):
def obj_iterator(obj_type, filters):
if obj_type == 'decoded':
return get_all_decodeds_objects(filters=filters)
elif obj_type == 'image':
return Images.get_all_images_objects(filters=filters)
elif obj_type == 'item':
return get_all_items_objects(filters=filters)
elif obj_type == 'pgp':

View File

@ -89,7 +89,7 @@ class Categ(AbstractModule):
# Search for pattern categories in obj content
for categ, pattern in self.categ_words:
if obj.type == 'message':
if obj.type == 'message' or obj.type == 'ocr':
self.add_message_to_queue(message='0', queue=categ)
else:

View File

@ -128,11 +128,11 @@ class Global(AbstractModule):
else:
self.logger.info(f"Empty Item: {message} not processed")
elif self.obj.type == 'message':
elif self.obj.type == 'message' or self.obj.type == 'ocr':
# TODO send to specific object queue => image, ...
self.add_message_to_queue(obj=self.obj, queue='Item')
elif self.obj.type == 'image':
self.add_message_to_queue(obj=self.obj, queue='Image')
self.add_message_to_queue(obj=self.obj, queue='Image', message=message)
else:
self.logger.critical(f"Empty obj: {self.obj} {message} not processed")

View File

@ -26,7 +26,6 @@ sys.path.append(os.environ['AIL_BIN'])
# Import Project packages #
##################################
from modules.abstract_module import AbstractModule
from lib.objects.Items import Item
from lib.ConfigLoader import ConfigLoader
# from lib import Statistics

View File

@ -218,7 +218,7 @@ class Mixer(AbstractModule):
if self.obj.type == 'item':
self.add_message_to_queue(obj=self.obj, message=gzip64encoded)
else:
self.add_message_to_queue(obj=self.obj)
self.add_message_to_queue(obj=self.obj, message=gzip64encoded)
if __name__ == "__main__":

122
bin/modules/OcrExtractor.py Executable file
View File

@ -0,0 +1,122 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
The OcrExtractor Module
======================
"""
##################################
# Import External packages
##################################
import os
import sys
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from modules.abstract_module import AbstractModule
from lib.ConfigLoader import ConfigLoader
from lib import chats_viewer
from lib.objects import Messages
from lib.objects import Ocrs
# Default to eng
def get_model_languages(obj, add_en=True):
if add_en:
model_languages = {'en'}
else:
model_languages = set()
ob = obj.get_first_correlation('message')
if ob:
message = Messages.Message(ob.split(':', 2)[-1])
lang = message.get_language()
if lang:
model_languages.add(lang)
return model_languages
ob = obj.get_first_correlation('chat-subchannel')
if ob:
ob = chats_viewer.get_obj_chat_from_global_id(ob)
lang = ob.get_main_language()
if lang:
model_languages.add(lang)
return model_languages
ob = obj.get_first_correlation('chat')
if ob:
ob = chats_viewer.get_obj_chat_from_global_id(ob)
lang = ob.get_main_language()
if lang:
model_languages.add(lang)
return model_languages
return model_languages
# TODO thread
class OcrExtractor(AbstractModule):
"""
OcrExtractor for AIL framework
"""
def __init__(self):
super(OcrExtractor, self).__init__()
# Waiting time in seconds between to message processed
self.pending_seconds = 1
config_loader = ConfigLoader()
self.r_cache = config_loader.get_redis_conn("Redis_Cache")
# Send module state to logs
self.logger.info(f'Module {self.module_name} initialized')
def is_cached(self):
return self.r_cache.exists(f'ocr:no:{self.obj.id}')
def add_to_cache(self):
self.r_cache.setex(f'ocr:no:{self.obj.id}', 86400, 0)
def compute(self, message):
image = self.get_obj()
date = message
ocr = Ocrs.Ocr(image.id)
if self.is_cached():
return None
if self.obj.is_gif():
self.logger.warning(f'Ignoring GIF: {self.obj.id}')
return None
if not ocr.exists():
path = image.get_filepath()
languages = get_model_languages(image)
print(image.id, languages)
texts = Ocrs.extract_text(path, languages)
if texts:
print('create')
ocr = Ocrs.create(image.id, texts)
if ocr:
self.add_message_to_queue(ocr)
else:
print('no text')
self.add_to_cache()
# Save in cache
else:
print('no text detected')
self.add_to_cache()
else:
# print(image.id)
# print('update correlation', date)
ocr.update_correlation(date=date)
if __name__ == '__main__':
module = OcrExtractor()
module.run()

View File

@ -162,6 +162,9 @@ publish = Tags
subscribe = Image
publish = Tags
[OcrExtractor]
subscribe = Image
publish = Item
######## CORE ########

Binary file not shown.

After

Width:  |  Height:  |  Size: 208 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 143 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 144 KiB

View File

@ -82,6 +82,9 @@ bcrypt>3.1.6
# Ail typo squatting
ail_typo_squatting
# OCR
easyocr
# Tests
nose2>=0.12.0
coverage>=5.5

View File

@ -30,15 +30,21 @@ from lib.objects import ail_objects
# from modules.Telegram import Telegram
from modules.Languages import Languages
from modules.OcrExtractor import OcrExtractor
MODULES = {
'Languages': Languages
'Languages': Languages,
'OcrExtractor': OcrExtractor
}
def reprocess_message_objects(object_type, module_name=None):
if module_name:
module = MODULES[module_name]()
for obj in ail_objects.obj_iterator(object_type, filters={}):
if not obj.exists():
print(f'ERROR: object does not exist, {obj.id}')
continue
module.obj = obj
module.compute(None)
else:
@ -62,7 +68,7 @@ if __name__ == "__main__":
obj_type = args.type
if not is_object_type(obj_type):
raise Exception(f'Invalid Object Type: {obj_type}')
if obj_type not in ['item', 'message']: # TODO image
if obj_type not in ['image', 'item', 'message']:
raise Exception(f'Currently not supported Object Type: {obj_type}')
modulename = args.module

26
update/v5.5/Update.py Executable file
View File

@ -0,0 +1,26 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
sys.path.append(os.environ['AIL_HOME'])
##################################
# Import Project packages
##################################
from update.bin.ail_updater import AIL_Updater
from lib import ail_updates
from lib import chats_viewer
class Updater(AIL_Updater):
"""default Updater."""
def __init__(self, version):
super(Updater, self).__init__(version)
if __name__ == '__main__':
chats_viewer.fix_correlations_subchannel_message()
updater = Updater('v5.5')
updater.run_update()

40
update/v5.5/Update.sh Executable file
View File

@ -0,0 +1,40 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
# SUBMODULES #
git submodule update
echo ""
echo -e $GREEN"Updating python packages ..."$DEFAULT
echo ""
pip install -U easyocr
bash ${AIL_BIN}/LAUNCH.sh -lrv
bash ${AIL_BIN}/LAUNCH.sh -lkv
echo ""
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
echo ""
python ${AIL_HOME}/update/v5.5/Update.py
wait
echo ""
echo ""
exit 0

View File

@ -35,6 +35,7 @@ import Flask_config
from blueprints.root import root
from blueprints.crawler_splash import crawler_splash
from blueprints.correlation import correlation
from blueprints.languages_ui import languages_ui
from blueprints.tags_ui import tags_ui
from blueprints.import_export import import_export
from blueprints.investigations_b import investigations_b
@ -52,6 +53,7 @@ from blueprints.objects_etag import objects_etag
from blueprints.objects_hhhash import objects_hhhash
from blueprints.chats_explorer import chats_explorer
from blueprints.objects_image import objects_image
from blueprints.objects_ocr import objects_ocr
from blueprints.objects_favicon import objects_favicon
from blueprints.api_rest import api_rest
@ -97,6 +99,7 @@ app.config['MAX_CONTENT_LENGTH'] = 900 * 1024 * 1024
app.register_blueprint(root, url_prefix=baseUrl)
app.register_blueprint(crawler_splash, url_prefix=baseUrl)
app.register_blueprint(correlation, url_prefix=baseUrl)
app.register_blueprint(languages_ui, url_prefix=baseUrl)
app.register_blueprint(tags_ui, url_prefix=baseUrl)
app.register_blueprint(import_export, url_prefix=baseUrl)
app.register_blueprint(investigations_b, url_prefix=baseUrl)
@ -114,6 +117,7 @@ app.register_blueprint(objects_etag, url_prefix=baseUrl)
app.register_blueprint(objects_hhhash, url_prefix=baseUrl)
app.register_blueprint(chats_explorer, url_prefix=baseUrl)
app.register_blueprint(objects_image, url_prefix=baseUrl)
app.register_blueprint(objects_ocr, url_prefix=baseUrl)
app.register_blueprint(objects_favicon, url_prefix=baseUrl)
app.register_blueprint(api_rest, url_prefix=baseUrl)

View File

@ -0,0 +1,83 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
Blueprint Flask: crawler splash endpoints: dashboard, onion crawler ...
"""
import os
import sys
import json
from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response, abort
from flask_login import login_required, current_user
# Import Role_Manager
from Role_Manager import login_admin, login_analyst, login_read_only
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib import ail_core
from lib import Language
from lib import Tag
from lib.objects import ail_objects
# ============ BLUEPRINT ============
languages_ui = Blueprint('languages_ui', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/chats_explorer'))
# ============ VARIABLES ============
# bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info']
def create_json_response(data, status_code):
return Response(json.dumps(data, indent=2, sort_keys=True), mimetype='application/json'), status_code
# ============ FUNCTIONS ============
# ============= ROUTES ==============
@languages_ui.route("/languages/object/translate", methods=['POST'])
@login_required
@login_read_only
def translate_object():
obj_type = request.form.get('type')
subtype = request.form.get('subtype')
obj_id = request.form.get('id')
source = request.form.get('language_target')
target = request.form.get('target')
translation = request.form.get('translation')
if target == "Don't Translate":
target = None
resp = ail_objects.api_manually_translate(obj_type, subtype, obj_id, source, target, translation)
if resp[1] != 200:
return create_json_response(resp[0], resp[1])
else:
if request.referrer:
return redirect(request.referrer)
else:
if obj_type == 'ocr':
return redirect(url_for('objects_ocr.object_ocr', id=obj_id, target=target)) # TODO change to support all objects
@languages_ui.route("/languages/object/detect/language", methods=['GET'])
@login_required
@login_read_only
def detect_object_language():
obj_type = request.args.get('type')
subtype = request.args.get('subtype')
obj_id = request.args.get('id')
target = request.args.get('target')
resp = ail_objects.api_detect_language(obj_type, subtype, obj_id)
if resp[1] != 200:
return create_json_response(resp[0], resp[1])
else:
if request.referrer:
return redirect(request.referrer)
else:
if obj_type == 'ocr':
return redirect(url_for('objects_ocr.object_ocr', id=obj_id, target=target)) # TODO change to support all objects

View File

@ -0,0 +1,116 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
'''
Blueprint Flask: crawler splash endpoints: dashboard, onion crawler ...
'''
import json
import os
import sys
from io import BytesIO
from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response, abort, send_file, send_from_directory
from flask_login import login_required, current_user
# Import Role_Manager
from Role_Manager import login_admin, login_analyst, login_read_only, no_cache
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib import Language
from lib import Tag
from lib.objects import Ocrs
from packages import Date
# ============ BLUEPRINT ============
objects_ocr = Blueprint('objects_ocr', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/objects/ocr'))
# ============ VARIABLES ============
bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info']
def create_json_response(data, status_code):
return Response(json.dumps(data, indent=2, sort_keys=True), mimetype='application/json'), status_code
# ============ FUNCTIONS ============
@objects_ocr.route('/ocr/<path:filename>')
@login_required
@login_read_only
@no_cache
def ocr_image(filename):
if not filename:
abort(404)
if not 64 <= len(filename) <= 70:
abort(404)
filename = filename.replace('/', '')
ocr = Ocrs.Ocr(filename)
return send_file(BytesIO(ocr.draw_bounding_boxs()), mimetype='image/png')
@objects_ocr.route("/objects/ocrs", methods=['GET'])
@login_required
@login_read_only
def objects_ocrs():
date_from = request.args.get('date_from')
date_to = request.args.get('date_to')
show_objects = request.args.get('show_objects')
date = Date.sanitise_date_range(date_from, date_to)
date_from = date['date_from']
date_to = date['date_to']
if show_objects:
dict_objects = Ocrs.Ocrs().api_get_meta_by_daterange(date_from, date_to)
else:
dict_objects = {}
return render_template("OcrDaterange.html", date_from=date_from, date_to=date_to,
dict_objects=dict_objects, show_objects=show_objects)
@objects_ocr.route("/objects/ocrs/post", methods=['POST'])
@login_required
@login_read_only
def objects_ocrs_post():
date_from = request.form.get('date_from')
date_to = request.form.get('date_to')
show_objects = request.form.get('show_objects')
return redirect(url_for('objects_ocr.objects_ocrs', date_from=date_from, date_to=date_to, show_objects=show_objects))
@objects_ocr.route("/objects/ocrs/range/json", methods=['GET'])
@login_required
@login_read_only
def objects_ocrs_range_json():
date_from = request.args.get('date_from')
date_to = request.args.get('date_to')
date = Date.sanitise_date_range(date_from, date_to)
date_from = date['date_from']
date_to = date['date_to']
return jsonify(Ocrs.Ocrs().api_get_chart_nb_by_daterange(date_from, date_to))
@objects_ocr.route("/objects/ocr", methods=['GET'])
@login_required
@login_read_only
def object_ocr():
obj_id = request.args.get('id')
target = request.args.get('target')
if target == "Don't Translate":
target = None
meta = Ocrs.api_get_ocr(obj_id, target)
if meta[1] != 200:
return create_json_response(meta[0], meta[1])
else:
meta = meta[0]
languages = Language.get_translation_languages()
return render_template("ShowOcr.html", meta=meta,
ail_tags=Tag.get_modal_add_tags(meta['id'], meta['type'], meta['subtype']),
translation_languages=languages, translation_target=target)
# ============= ROUTES ==============

View File

@ -67,7 +67,12 @@
{% endif %}
{% if message['images'] %}
{% for message_image in message['images'] %}
<img class="object_image mb-1" src="{{ url_for('objects_image.image', filename=message_image)}}">
<img class="object_image mb-1" src="{{ url_for('objects_image.image', filename=message_image['id'])}}">
{% if message_image['ocr'] %}
<span>
<a class="btn btn-info" target="_blank" href="{{ url_for('objects_ocr.object_ocr', id=message_image['id'])}}"><i class="fas fa-expand"></i> OCR</a>
</span>
{% endif %}
{% endfor %}
{% endif %}
{% if message['files-names'] %}

View File

@ -130,6 +130,8 @@
{% include 'correlation/metadata_card_hhhash.html' %}
{% elif dict_object["object_type"] == "image" %}
{% include 'chats_explorer/card_image.html' %}
{% elif dict_object["object_type"] == "ocr" %}
{% include 'objects/ocr/card_ocr.html' %}
{% elif dict_object["object_type"] == "item" %}
{% include 'correlation/metadata_card_item.html' %}
{% elif dict_object["object_type"] == "favicon" %}
@ -313,10 +315,10 @@
<input class="form-check-input" type="checkbox" value="True" id="image_Check" name="image_Check" {%if "image" in dict_object["filter"]%}checked{%endif%}>
<label class="form-check-label" for="image_Check">Image</label>
</div>
{# <div class="form-check">#}
{# <input class="form-check-input" type="checkbox" value="True" id="ocr_Check" name="ocr_Check" {%if "ocr" in dict_object["filter"]%}checked{%endif%}>#}
{# <label class="form-check-label" for="ocr_Check">OCR</label>#}
{# </div>#}
<div class="form-check">
<input class="form-check-input" type="checkbox" value="True" id="ocr_Check" name="ocr_Check" {%if "ocr" in dict_object["filter"]%}checked{%endif%}>
<label class="form-check-label" for="ocr_Check">OCR</label>
</div>
<hr>
<div class="form-check">

View File

@ -0,0 +1,602 @@
<!DOCTYPE html>
<html>
<head>
<title>Ocrs - AIL</title>
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png') }}">
<!-- Core CSS -->
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/dataTables.bootstrap.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/daterangepicker.min.css') }}" rel="stylesheet">
<!-- JS -->
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
<script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/jquery.dataTables.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/dataTables.bootstrap.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/moment.min.js') }}"></script>
<script src="{{ url_for('static', filename='js/jquery.daterangepicker.min.js') }}"></script>
<script src="{{ url_for('static', filename='js/d3.min.js') }}"></script>
<script src="{{ url_for('static', filename='js/d3/sparklines.js')}}"></script>
<style>
.input-group .form-control {
position: unset;
}
.line {
fill: none;
stroke: #000;
stroke-width: 2.0px;
}
.bar {
fill: steelblue;
}
.bar:hover{
fill: brown;
cursor: pointer;
}
.bar_stack:hover{
cursor: pointer;
}
.pie_path:hover{
cursor: pointer;
}
.svgText {
pointer-events: none;
}
div.tooltip {
position: absolute;
text-align: center;
padding: 2px;
font: 12px sans-serif;
background: #ebf4fb;
border: 2px solid #b7ddf2;
border-radius: 8px;
pointer-events: none;
color: #000000;
}
</style>
</head>
<body>
{% include 'nav_bar.html' %}
<div class="container-fluid">
<div class="row">
{% include 'sidebars/sidebar_objects.html' %}
<div class="col-12 col-lg-10" id="core_content">
<div class="row">
<div class="col-xl-10">
<div class="mt-1" id="barchart_type"></div>
{# {% include 'image/block_images_search.html' %}#}
</div>
<div class="col-xl-2">
<div class="card mb-3 mt-2" style="background-color:#d9edf7;">
<div class="card-body text-center py-2">
<h6 class="card-title" style="color:#286090;">Select a date range :</h6>
<form action="{{ url_for('objects_ocr.objects_ocrs_post') }}" id="hash_selector_form" method='post'>
<div class="input-group" id="date-range-from">
<div class="input-group-prepend"><span class="input-group-text"><i class="far fa-calendar-alt" aria-hidden="true"></i></span></div>
<input class="form-control" id="date-range-from-input" placeholder="yyyy-mm-dd" value="{{ date_from }}" name="date_from" autocomplete="off">
</div>
<div class="input-group" id="date-range-to">
<div class="input-group-prepend"><span class="input-group-text"><i class="far fa-calendar-alt" aria-hidden="true"></i></span></div>
<input class="form-control" id="date-range-to-input" placeholder="yyyy-mm-dd" value="{{ date_to }}" name="date_to" autocomplete="off">
</div>
<div class="form-check my-1">
<input class="form-check-input" type="checkbox" id="checkbox-input-show" name="show_objects" value="True" {% if show_objects %}checked{% endif %}>
<label class="form-check-label" for="checkbox-input-show">
<span style="color:#286090; font-size: 14px;">
Show Ocrs <i class="fas fa-key"></i>
</span>
</label>
</div>
<button class="btn btn-primary" style="text-align:center;">
<i class="fas fa-copy"></i> Search
</button>
</form>
</div>
</div>
<div id="pie_chart_encoded">
</div>
<div id="pie_chart_top5_types">
</div>
</div>
</div>
{% if dict_objects %}
{% if date_from|string == date_to|string %}
<h3> {{ date_from }} Ocrs Name: </h3>
{% else %}
<h3> {{ date_from }} to {{ date_to }} Ocrs Name: </h3>
{% endif %}
<table id="tableb64" class="table table-striped table-bordered">
<thead class="bg-dark text-white">
<tr>
<th></th>
<th>First Seen</th>
<th>Last Seen</th>
<th>Total</th>
<th>Last days</th>
</tr>
</thead>
<tbody style="font-size: 15px;">
{% for obj_id in dict_objects %}
<tr>
<td><a target="_blank" href="{{ url_for('correlation.show_correlation') }}?type=ocr&id={{ obj_id }}">{{ dict_objects[obj_id]['id'] }}</a></td>
<td>{{ dict_objects[obj_id]['first_seen'] }}</td>
<td>{{ dict_objects[obj_id]['last_seen'] }}</td>
<td>{{ dict_objects[obj_id]['nb_seen'] }}</td>
<td id="sparklines_{{ obj_id }}" style="text-align:center;"></td>
</tr>
{% endfor %}
</tbody>
</table>
{% else %}
{% if show_objects %}
{% if date_from|string == date_to|string %}
<h3> {{ date_from }}, No OCR</h3>
{% else %}
<h3> {{ date_from }} to {{ date_to }}, No OCR</h3>
{% endif %}
{% endif %}
{% endif %}
</div>
</div>
</div>
<script>
var chart = {};
$(document).ready(function(){
$("#page-Decoded").addClass("active");
$("#nav_ocr").addClass("active");
$('#date-range-from').dateRangePicker({
separator : ' to ',
getValue: function()
{
if ($('#date-range-from-input').val() && $('#date-range-to').val() )
return $('#date-range-from-input').val() + ' to ' + $('#date-range-to').val();
else
return '';
},
setValue: function(s,s1,s2)
{
$('#date-range-from-input').val(s1);
$('#date-range-to-input').val(s2);
},
});
$('#date-range-to').dateRangePicker({
separator : ' to ',
getValue: function()
{
if ($('#date-range-from-input').val() && $('#date-range-to').val() )
return $('#date-range-from-input').val() + ' to ' + $('#date-range-to').val();
else
return '';
},
setValue: function(s,s1,s2)
{
$('#date-range-from-input').val(s1);
$('#date-range-to-input').val(s2);
},
});
$('#date-range-from').data('dateRangePicker').setDateRange('{{date_from}}','{{date_to}}');
$('#date-range-to').data('dateRangePicker').setDateRange('{{date_from}}','{{date_to}}');
$('#tableb64').DataTable({
"aLengthMenu": [[5, 10, 15, -1], [5, 10, 15, "All"]],
"iDisplayLength": 10,
"order": [[ 3, "desc" ]]
});
chart.stackBarChart = barchart_type_stack("{{ url_for('objects_ocr.objects_ocrs_range_json') }}?date_from={{date_from}}&date_to={{date_to}}", 'id');
chart.onResize();
$(window).on("resize", function() {
chart.onResize();
});
});
function toggle_sidebar(){
if($('#nav_menu').is(':visible')){
$('#nav_menu').hide();
$('#side_menu').removeClass('border-right')
$('#side_menu').removeClass('col-lg-2')
$('#core_content').removeClass('col-lg-10')
}else{
$('#nav_menu').show();
$('#side_menu').addClass('border-right')
$('#side_menu').addClass('col-lg-2')
$('#core_content').addClass('col-lg-10')
}
}
</script>
<script>
{% for obj_id in dict_objects %}
sparkline("sparklines_{{ obj_id }}", {{ dict_objects[obj_id]['sparkline'] }}, {});
{% endfor %}
</script>
<script>
var margin = {top: 20, right: 100, bottom: 55, left: 45},
width = 1000 - margin.left - margin.right,
height = 500 - margin.top - margin.bottom;
var x = d3.scaleBand().rangeRound([0, width]).padding(0.1);
var y = d3.scaleLinear().rangeRound([height, 0]);
var xAxis = d3.axisBottom(x);
var yAxis = d3.axisLeft(y);
var color = d3.scaleOrdinal(d3.schemeSet3);
var svg = d3.select("#barchart_type").append("svg")
.attr("id", "thesvg")
.attr("viewBox", "0 0 1000 500")
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom)
.append("g")
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
function barchart_type_stack(url, id) {
d3.json(url)
.then(function(data){
var labelVar = 'date'; //A
var varNames = d3.keys(data[0])
.filter(function (key) { return key !== labelVar;}); //B
data.forEach(function (d) { //D
var y0 = 0;
d.mapping = varNames.map(function (name) {
return {
name: name,
label: d[labelVar],
y0: y0,
y1: y0 += +d[name]
};
});
d.total = d.mapping[d.mapping.length - 1].y1;
});
x.domain(data.map(function (d) { return (d.date); })); //E
y.domain([0, d3.max(data, function (d) { return d.total; })]);
svg.append("g")
.attr("class", "x axis")
.attr("transform", "translate(0," + height + ")")
.call(xAxis)
.selectAll("text")
.attr("class", "bar")
{% if date_from|string == date_to|string and type is none %}
.on("click", function (d) { window.location.href = "{{ url_for('objects_ocr.objects_ocrs') }}?date_from={{date_from}}&date_to={{date_to}}&type_id="+d })
.attr("transform", "rotate(-18)" )
{% elif date_from|string == date_to|string and type is not none %}
.on("click", function (d) { window.location.href = "{{ url_for('objects_ocr.objects_ocrs') }}?date_from="+d+'&date_to='+d })
.attr("transform", "rotate(-18)" )
{% else %}
.on("click", function (d) { window.location.href = "{{ url_for('objects_ocr.objects_ocrs') }}?date_from="+d+'&date_to='+d })
.attr("transform", "rotate(-40)" )
{% endif %}
.style("text-anchor", "end");
svg.append("g")
.attr("class", "y axis")
.call(yAxis)
.append("text")
.attr("transform", "rotate(-90)")
.attr("y", 6)
.attr("dy", ".71em")
.style("text-anchor", "end");
var selection = svg.selectAll(".series")
.data(data)
.enter().append("g")
.attr("class", "series")
.attr("transform", function (d) { return "translate(" + x((d.date)) + ",0)"; });
selection.selectAll("rect")
.data(function (d) { return d.mapping; })
.enter().append("rect")
.attr("class", "bar_stack")
.attr("width", x.bandwidth())
.attr("y", function (d) { return y(d.y1); })
.attr("height", function (d) { return y(d.y0) - y(d.y1); })
.style("fill", function (d) { return color(d.name); })
.style("stroke", "grey")
.on("mouseover", function (d) { showPopover.call(this, d); })
.on("mouseout", function (d) { removePopovers(); })
{% if date_from|string == date_to|string and type is none %}
.on("click", function(d){ window.location.href = "{{ url_for('objects_ocr.objects_ocrs') }}" +'?date_from={{date_from}}&date_to={{date_to}}&type_id='+d.label+'&encoding='+d.name; });
{% elif date_from|string == date_to|string and type is not none %}
.on("click", function(d){ window.location.href = "{{ url_for('objects_ocr.objects_ocrs') }}" +'?type_id={{type_id}}&date_from='+d.label+'&date_to='+d.label+'&encoding='+d.name; });
{% else %}
.on("click", function(d){ window.location.href = "{{ url_for('objects_ocr.objects_ocrs') }}" +'?type_id='+ d.name +'&date_from='+d.label+'&date_to='+d.label; });
{% endif %}
data.forEach(function(d) {
if(d.total !== 0){
svg.append("text")
.attr("class", "bar")
.attr("dy", "-.35em")
.attr('x', x(d.date) + x.bandwidth()/2)
.attr('y', y(d.total))
{% if date_from|string == date_to|string and type is none %}
.on("click", function () {window.location.href = "{{ url_for('objects_ocr.objects_ocrs') }}"+'?date_from={{date_from}}&date_to={{date_to}}&type_id='+d.date })
{% elif date_from|string == date_to|string and type is not none %}
.on("click", function () {window.location.href = "{{ url_for('objects_ocr.objects_ocrs') }}?type_id={{type_id}}&date_from="+d.date+'&date_to='+d.date })
{% else %}
.on("click", function () {window.location.href = "{{ url_for('objects_ocr.objects_ocrs') }}"+'?date_from='+d.date+'&date_to='+d.date })
{% endif %}
.style("text-anchor", "middle")
.text(d.total);
}
});
drawLegend(varNames);
});
}
function drawLegend (varNames) {
var legend = svg.selectAll(".legend")
.data(varNames.slice().reverse())
.enter().append("g")
.attr("class", "legend")
.attr("transform", function (d, i) { return "translate(0," + i * 20 + ")"; });
legend.append("rect")
.attr("x", 943)
.attr("width", 10)
.attr("height", 10)
.style("fill", color)
.style("stroke", "grey");
legend.append("text")
.attr("class", "svgText")
.attr("x", 941)
.attr("y", 6)
.attr("dy", ".35em")
.style("text-anchor", "end")
.text(function (d) { return d; });
}
function removePopovers () {
$('.popover').each(function() {
$(this).remove();
});
}
function showPopover (d) {
$(this).popover({
title: "<b><span id='tooltip-id-name-bar'></span></b>",
placement: 'top',
container: 'body',
trigger: 'manual',
html : true,
content: function() {
return "<span id='tooltip-id-label'></span>" +
"<br/>num: <span id='tooltip-id-value-bar'></span>"; }
});
$(this).popover('show');
$("#tooltip-id-name-bar").text(d.name);
$("#tooltip-id-label").text(d.label);
$("#tooltip-id-value-bar").text(d3.format(",")(d.value ? d.value: d.y1 - d.y0));
}
chart.onResize = function () {
var aspect = 1000 / 500, chart = $("#thesvg");
var targetWidth = chart.parent().width();
chart.attr("width", targetWidth);
chart.attr("height", targetWidth / aspect);
}
window.chart = chart;
</script>
<script>
function draw_pie_chart(id, url_json, pie_on_click_url) {
var width_pie = 200;
var height_pie = 200;
var padding_pie = 10;
var opacity_pie = .8;
var radius_pie = Math.min(width_pie - padding_pie, height_pie - padding_pie) / 2;
//var color_pie = d3.scaleOrdinal(d3.schemeCategory10);
var color_pie = d3.scaleOrdinal(d3.schemeSet3);
var div_pie = d3.select("body").append("div")
.attr("class", "tooltip")
.style("opacity", 0);
var svg_pie = d3.select("#"+id)
.append('svg')
.attr("width", '100%')
.attr("height", '100%')
.attr('viewBox','0 0 '+Math.min(width_pie,height_pie) +' '+Math.min(width_pie,height_pie) )
.attr('preserveAspectRatio','xMinYMin')
var g_pie = svg_pie.append('g')
.attr('transform', 'translate(' + (width_pie/2) + ',' + (height_pie/2) + ')');
var arc_pie = d3.arc()
.innerRadius(0)
.outerRadius(radius_pie);
d3.json(url_json)
.then(function(data){
var pie_pie = d3.pie()
.value(function(d) { return d.value; })
.sort(null);
var path_pie = g_pie.selectAll('path')
.data(pie_pie(data))
.enter()
.append("g")
.append('path')
.attr('d', arc_pie)
.attr('fill', (d,i) => color_pie(i))
.attr('class', 'pie_path')
.on("mouseover", mouseovered_pie)
.on("mouseout", mouseouted_pie)
.on("click", function (d) {window.location.href = pie_on_click_url+d.data.name })
.style('opacity', opacity_pie)
.style('stroke', 'white');
});
function mouseovered_pie(d) {
//remove old content
$("#tooltip-id-name").remove();
$("#tooltip-id-value").remove();
// tooltip
var content;
content = "<b><span id='tooltip-id-name'></span></b><br/>"+
"<br/>"+
"<i>Decoded</i>: <span id='tooltip-id-value'></span><br/>"
div_pie.transition()
.duration(200)
.style("opacity", .9);
div_pie.html(content)
.style("left", (d3.event.pageX) + "px")
.style("top", (d3.event.pageY - 28) + "px");
$("#tooltip-id-name").text(d.data.name);
$("#tooltip-id-value").text(d.data.value);
}
function mouseouted_pie() {
div_pie.transition()
.duration(500)
.style("opacity", 0);
}
}
</script>
<script>
function barchart_type(url, id) {
var margin = {top: 20, right: 20, bottom: 70, left: 40};
var width = 960 - margin.left - margin.right;
var height = 500 - margin.top - margin.bottom;
var x = d3.scaleBand().rangeRound([0, width]).padding(0.1);
var y = d3.scaleLinear().rangeRound([height, 0]);
var xAxis = d3.axisBottom(x)
//.tickFormat(d3.time.format("%Y-%m"));
var yAxis = d3.axisLeft(y)
.ticks(10);
/*var svg = d3.select(id).append("svg")
.attr("width", width + margin.left + margin.right)
.attr("height", height + margin.top + margin.bottom)
.attr("id", "thesvg")
.append("g")
.attr("transform",
"translate(" + margin.left + "," + margin.top + ")");*/
d3.json(url)
.then(function(data){
data.forEach(function(d) {
d.value = +d.value;
});
x.domain(data.map(function(d) { return d.date; }));
y.domain([0, d3.max(data, function(d) { return d.value; })]);
var label = svg.append("g")
.attr("class", "x axis")
.attr("transform", "translate(0," + height + ")")
.call(xAxis)
.selectAll("text")
.style("text-anchor", "end")
.attr("dx", "-.8em")
.attr("dy", "-.55em")
{% if daily_type_chart %}
.attr("transform", "rotate(-20)" );
{% else %}
.attr("transform", "rotate(-70)" )
.attr("class", "bar")
{% endif %}
svg.append("g")
.attr("class", "y axis")
.call(yAxis)
.append("text")
.attr("transform", "rotate(-90)")
.attr("y", 6)
.attr("dy", ".71em")
.style("text-anchor", "end")
.text("Value ($)");
var bar = svg.selectAll("bar")
.data(data)
.enter().append("rect")
.attr("class", "bar")
//.style("fill", "steelblue")
.attr("x", function(d) { return x(d.date); })
.attr("width", x.bandwidth())
.attr("y", function(d) { return y(d.value); })
.attr("height", function(d) { return height - y(d.value); })
data.forEach(function(d) {
if(d.value != 0){
svg.append("text")
.attr("class", "bar")
.attr("dy", "-.35em")
//.text(function(d) { return d.value; });
.text(d.value)
.style("text-anchor", "middle")
.attr('x', x(d.date) + x.bandwidth()/2)
.attr('y', y(d.value));
}
});
});
}
</script>
</body>
</html>

View File

@ -0,0 +1,133 @@
<!DOCTYPE html>
<html>
<head>
<title>OCR - AIL</title>
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png') }}">
<!-- Core CSS -->
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/dataTables.bootstrap.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/tags.css') }}" rel="stylesheet" type="text/css" />
<link href="{{ url_for('static', filename='css/ail-project.css') }}" rel="stylesheet">
<!-- JS -->
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
<script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/bootstrap4.min.js') }}"></script>
<script src="{{ url_for('static', filename='js/jquery.dataTables.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/dataTables.bootstrap.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/tags.js') }}"></script>
</head>
<body>
{% include 'nav_bar.html' %}
<div class="container-fluid">
<div class="row">
{% include 'sidebars/sidebar_objects.html' %}
<div class="col-12 col-lg-10" id="core_content">
{% with meta=meta, is_correlation=False %}
{% include 'objects/ocr/card_ocr.html' %}
{% endwith %}
{# {% if meta['extracted_matches'] %}#}
{# <div id="accordion_extracted" class="mb-3 mx-3">#}
{# <div class="card">#}
{# <div class="card-header py-1" id="heading_extracted">#}
{# <div class="row">#}
{# <div class="col-11">#}
{# <div class="mt-2">#}
{# <img id="misp-logo" src="{{ url_for('static', filename='image/ail-icon.png')}}" height="32"> Extracted&nbsp;&nbsp;#}
{# <div class="badge badge-warning">{{meta['extracted_matches']|length}}</div>#}
{# </div>#}
{# </div>#}
{# <div class="col-1">#}
{# <button class="btn btn-link btn-lg py-2 float-right rotate down" data-toggle="collapse" data-target="#collapse_extracted" aria-expanded="true" aria-controls="collapseDecoded">#}
{# <i class="fas fa-chevron-circle-down"></i>#}
{# </button>#}
{# </div>#}
{# </div>#}
{# </div>#}
{##}
{# <div id="collapse_extracted" class="collapse" aria-labelledby="heading_extracted" data-parent="#accordion_extracted">#}
{# <div class="card-body">#}
{# <table id="table_extracted" class="table table-striped">#}
{# <thead class="thead-dark">#}
{# <tr>#}
{# <th>Type</th>#}
{# <th>ID</th>#}
{# <th>Extracted</th>#}
{# </tr>#}
{# </thead>#}
{# <tbody>#}
{# {% for match in meta['extracted_matches'] %}#}
{# <tr>#}
{# <td>#}
{# <svg height="26" width="26">#}
{# <g class="nodes">#}
{# <circle cx="13" cy="13" r="13" fill="{{ meta['extracted_matches'][match]['icon']['color'] }}"></circle>#}
{# <text x="13" y="13" text-anchor="middle" dominant-baseline="central" class="graph_node_icon {{ meta['extracted_matches'][match]['icon']['style'] }}" font-size="16px">{{ meta['extracted_matches'][match]['icon']['icon'] }}</text>#}
{# </g>#}
{# </svg>#}
{# {{ meta['extracted_matches'][match]['subtype'] }}#}
{# </td>#}
{# <td>{{ meta['extracted_matches'][match]['id'] }}</td>#}
{# <td>#}
{# {% for row in meta['extracted_matches'][match]['matches'] %}#}
{# <a href="#{{ row[0] }}:{{row[1] }}">{{ row[2] }}</a><br>#}
{# {% endfor %}#}
{# </td>#}
{# </tr>#}
{# {% endfor %}#}
{# </tbody>#}
{# </table>#}
{# </div>#}
{# </div>#}
{##}
{# </div>#}
{# </div>#}
{# {% endif %}#}
{% with translate_url=url_for('objects_ocr.object_ocr', id=meta['id']), obj_id=meta['id'] %}
{% include 'chats_explorer/block_translation.html' %}
{% endwith %}
<div class="mb-4"></div>
</div>
</div>
</div>
<script>
var chart = {};
$(document).ready(function(){
$("#page-Decoded").addClass("active");
$("#nav_chat").addClass("active");
});
function toggle_sidebar(){
if($('#nav_menu').is(':visible')){
$('#nav_menu').hide();
$('#side_menu').removeClass('border-right')
$('#side_menu').removeClass('col-lg-2')
$('#core_content').removeClass('col-lg-10')
}else{
$('#nav_menu').show();
$('#side_menu').addClass('border-right')
$('#side_menu').addClass('col-lg-2')
$('#core_content').addClass('col-lg-10')
}
}
</script>
</body>
</html>

View File

@ -0,0 +1,165 @@
<link href="{{ url_for('static', filename='css/tags.css') }}" rel="stylesheet" type="text/css" />
<script src="{{ url_for('static', filename='js/tags.js') }}"></script>
{% with modal_add_tags=ail_tags %}
{% include 'modals/add_tags.html' %}
{% endwith %}
{% include 'modals/edit_tag.html' %}
<style>
.object_image {
filter: blur(5px);
}
</style>
<div class="card my-1">
<div class="card-header">
<h4 class="text-secondary">{{ meta["id"] }} :</h4>
<ul class="list-group mb-2">
<li class="list-group-item py-0">
<table class="table">
<tbody style="font-size: 15px;">
<tr>
<td>
<svg height="26" width="26">
<g class="nodes">
<circle cx="13" cy="13" r="13" fill="orange"></circle>
<text x="13" y="13" text-anchor="middle" dominant-baseline="central" class="{{ meta["svg_icon"]["style"] }}" font-size="16px">{{ meta["svg_icon"]["icon"] }}</text>
</g>
</svg>
{{ meta['type'] }}
</td>
</tr>
</tbody>
</table>
</li>
<li class="list-group-item py-0">
<div id="accordion_image" class="my-3">
<div class="card">
<div class="card-header py-1" id="headingImage">
<button class="btn w-100 collapsed rotate" data-toggle="collapse" data-target="#collapseImage" aria-expanded="false" aria-controls="collapseImage">
<span class="row text-left">
<div class="col-11">
<span class="mt-2">
<i class="far fa-image"></i> Show Image&nbsp;&nbsp;
</span>
</div>
<div class="col-1 text-primary">
<i class="fas fa-chevron-circle-down"></i>
</div>
</span>
</button>
</div>
<div id="collapseImage" class="collapse show" aria-labelledby="headingImage" data-parent="#accordion_image">
<div class="card-body text-center">
{% include 'objects/image/block_blur_img_slider.html' %}
<img class="object_image mb-1" usemap="#image-map" src="{{ url_for('objects_ocr.ocr_image', filename=meta['id'])}}">
<map name="image-map">
{% for c in meta['map'] %}
<area shape="poly" coords="{{ c[0] }}" title="{{ c[1] }}">
{% endfor %}
</map>
</div>
</div>
</div>
</div>
</li>
<li class="list-group-item py-0">
<pre class="my-0" style="white-space: pre-wrap;">{{ meta['content'] }}</pre>
{% if meta['translation'] %}
<hr class="m-1">
<pre class="my-0 text-secondary" style="white-space: pre-wrap;">{{ meta['translation'] }}</pre>
{% endif %}
{% if not is_correlation %}
<div class="my-1">
{% set mess_id_escape= meta['id'] | replace("/", "_") %}
<span class="btn btn-outline-dark p-0 px-1" type="button" data-toggle="collapse" data-target="#collapseTrans{{ mess_id_escape }}" aria-expanded="false" aria-controls="collapseTrans{{ mess_id_escape }}">
<i class="fas fa-language"></i> {% if meta['language'] %}{{ meta['language'] }}{% endif %}
</span>
<div class="collapse" id="collapseTrans{{ mess_id_escape }}">
<div class="card card-body">
<form method="post" action="{{ url_for('languages_ui.translate_object') }}">
<input type="text" id="type" name="type" value="{{meta['type']}}" hidden>
<input type="text" id="id" name="id" value="{{meta['id']}}" hidden>
<span class="badge badge-primary">Source:</span>
<span class="">
<select id="language_target" name="language_target" class="form-select" aria-label="Message Language" onchange="$('#translation').val('');">
<option selected value="{{ meta['language'] }}">{{ meta['language'] }}</option>
{% for language in translation_languages %}
<option value="{{ language }}">{{ translation_languages[language] }}</option>
{% endfor %}
</select>
</span>
{% if translation_target %}
<input type="text" id="target" name="target" value="{{translation_target}}" hidden>
&nbsp;&nbsp;&nbsp;&nbsp;<span class="badge badge-primary">Target:</span><span>{{translation_target}}</span>
<textarea class="form-control" id="translation" name="translation">{{ meta['translation'] }}</textarea>
<button class="btn btn-dark" type="submit">
<i class="fas fa-pen-alt"> Update Language or Translation</i>
</button>
{% else %}
<button class="btn btn-dark" type="submit">
<i class="fas fa-pen-alt"> Update Language</i>
</button>
{% endif %}
</form>
<div>
<a class="btn btn-primary" href="{{ url_for('languages_ui.detect_object_language')}}?type={{ meta['type'] }}&id={{ meta['id'] }}">
<i class="fas fa-redo"></i> Detect Language
</a>
</div>
</div>
</div>
</div>
{% endif %}
</li>
<li class="list-group-item py-0">
<div class="my-2">
Tags:
{% for tag in meta['tags'] %}
<button class="btn btn-{{ bootstrap_label[loop.index0 % 5] }}"
data-toggle="modal" data-target="#edit_tags_modal"
data-tagid="{{ tag }}" data-objtype="{{ meta['type'] }}" data-objsubtype="" data-objid="{{ meta["id"] }}">
{{ tag }}
</button>
{% endfor %}
<button type="button" class="btn btn-light" data-toggle="modal" data-target="#add_tags_modal">
<i class="far fa-plus-square"></i>
</button>
</div>
</li>
</ul>
{% with obj_type='ocr', obj_id=meta['id'], obj_subtype='' %}
{% include 'modals/investigations_register_obj.html' %}
{% endwith %}
<button type="button" class="btn btn-primary" data-toggle="modal" data-target="#investigations_register_obj_modal">
<i class="fas fa-microscope"></i> Investigations
</button>
<span class="mb-2 float-right">
{% if is_correlation %}
<a href="{{ url_for('objects_ocr.object_ocr')}}?subtype={{ meta['subtype'] }}&id={{ meta['id'] }}">
<button class="btn btn-info"><i class="fas fa-expand"></i> Show Object</button>
</a>
{% else %}
<a href="{{ url_for('correlation.show_correlation')}}?type={{ meta['type'] }}&subtype={{ meta['subtype'] }}&id={{ meta['id'] }}">
<button class="btn btn-info"><i class="far fa-eye"></i> Correlations &nbsp;
</button>
</a>
{% endif %}
</span>
</div>
</div>

View File

@ -82,6 +82,12 @@
<span>Image</span>
</a>
</li>
<li class="nav-item">
<a class="nav-link" href="{{url_for('objects_ocr.objects_ocrs')}}" id="nav_ocr">
<i class="fas fa-expand"></i>
<span>OCR</span>
</a>
</li>
<li class="nav-item">
<a class="nav-link" href="{{url_for('objects_title.objects_titles')}}" id="nav_title">
<i class="fas fa-heading"></i>