mirror of https://github.com/CIRCL/AIL-framework
chg: [message] show trackers + modules matches
parent
a3a664b7f1
commit
5ec0d7f0cf
|
@ -40,6 +40,11 @@ r_key = regex_helper.generate_redis_cache_key('extractor')
|
|||
|
||||
# TODO UI Link
|
||||
|
||||
CORRELATION_TO_EXTRACT = {
|
||||
'item': ['cve', 'cryptocurrency', 'title', 'username'],
|
||||
'message': ['cve', 'cryptocurrency', 'username']
|
||||
}
|
||||
|
||||
MODULES = {
|
||||
'infoleak:automatic-detection="credit-card"': CreditCards(queue=False),
|
||||
'infoleak:automatic-detection="iban"': Iban(queue=False),
|
||||
|
@ -57,9 +62,9 @@ tools = Tools(queue=False)
|
|||
for tool_name in tools.get_tools():
|
||||
MODULES[f'infoleak:automatic-detection="{tool_name}-tool"'] = tools
|
||||
|
||||
def get_correl_match(extract_type, obj_id, content):
|
||||
def get_correl_match(extract_type, obj, content):
|
||||
extracted = []
|
||||
correl = correlations_engine.get_correlation_by_correl_type('item', '', obj_id, extract_type)
|
||||
correl = correlations_engine.get_correlation_by_correl_type(obj.type, obj.get_subtype(r_str=True), obj.id, extract_type)
|
||||
to_extract = []
|
||||
map_subtype = {}
|
||||
map_value_id = {}
|
||||
|
@ -75,18 +80,18 @@ def get_correl_match(extract_type, obj_id, content):
|
|||
sha256_val = sha256(value.encode()).hexdigest()
|
||||
map_value_id[sha256_val] = value
|
||||
if to_extract:
|
||||
objs = regex_helper.regex_finditer(r_key, '|'.join(to_extract), obj_id, content)
|
||||
for obj in objs:
|
||||
if map_subtype.get(obj[2]):
|
||||
subtype = map_subtype[obj[2]]
|
||||
objs = regex_helper.regex_finditer(r_key, '|'.join(to_extract), obj.get_global_id(), content)
|
||||
for ob in objs:
|
||||
if map_subtype.get(ob[2]):
|
||||
subtype = map_subtype[ob[2]]
|
||||
else:
|
||||
subtype = ''
|
||||
sha256_val = sha256(obj[2].encode()).hexdigest()
|
||||
sha256_val = sha256(ob[2].encode()).hexdigest()
|
||||
value_id = map_value_id.get(sha256_val)
|
||||
if not value_id:
|
||||
logger.critical(f'Error module extractor: {sha256_val}\n{extract_type}\n{subtype}\n{value_id}\n{map_value_id}\n{objs}')
|
||||
value_id = 'ERROR'
|
||||
extracted.append([obj[0], obj[1], obj[2], f'{extract_type}:{subtype}:{value_id}'])
|
||||
extracted.append([ob[0], ob[1], ob[2], f'{extract_type}:{subtype}:{value_id}'])
|
||||
return extracted
|
||||
|
||||
def _get_yara_match(data):
|
||||
|
@ -100,7 +105,7 @@ def _get_yara_match(data):
|
|||
return yara.CALLBACK_CONTINUE
|
||||
|
||||
def _get_word_regex(word):
|
||||
return '(?:^|(?<=[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]))' + word + '(?:$|(?=[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]))'
|
||||
return '(?i)(?:^|(?<=[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]))' + word + '(?:$|(?=[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]))'
|
||||
|
||||
def convert_byte_offset_to_string(b_content, offset):
|
||||
byte_chunk = b_content[:offset + 1]
|
||||
|
@ -115,17 +120,18 @@ def convert_byte_offset_to_string(b_content, offset):
|
|||
|
||||
# TODO RETRO HUNTS
|
||||
# TODO TRACKER TYPE IN UI
|
||||
def get_tracker_match(obj_id, content):
|
||||
def get_tracker_match(obj, content):
|
||||
extracted = []
|
||||
extracted_yara = []
|
||||
trackers = Tracker.get_obj_trackers('item', '', obj_id)
|
||||
obj_gid = obj.get_global_id()
|
||||
trackers = Tracker.get_obj_trackers(obj.type, obj.get_subtype(r_str=True), obj.id)
|
||||
for tracker_uuid in trackers:
|
||||
tracker = Tracker.Tracker(tracker_uuid)
|
||||
tracker_type = tracker.get_type()
|
||||
# print(tracker_type)
|
||||
tracked = tracker.get_tracked()
|
||||
if tracker_type == 'regex': # TODO Improve word detection -> word delimiter
|
||||
regex_match = regex_helper.regex_finditer(r_key, tracked, obj_id, content)
|
||||
regex_match = regex_helper.regex_finditer(r_key, tracked, obj_gid, content)
|
||||
for match in regex_match:
|
||||
extracted.append([int(match[0]), int(match[1]), match[2], f'tracker:{tracker.uuid}'])
|
||||
elif tracker_type == 'yara':
|
||||
|
@ -147,13 +153,13 @@ def get_tracker_match(obj_id, content):
|
|||
words = [tracked]
|
||||
for word in words:
|
||||
regex = _get_word_regex(word)
|
||||
regex_match = regex_helper.regex_finditer(r_key, regex, obj_id, content)
|
||||
regex_match = regex_helper.regex_finditer(r_key, regex, obj_gid, content)
|
||||
# print(regex_match)
|
||||
for match in regex_match:
|
||||
extracted.append([int(match[0]), int(match[1]), match[2], f'tracker:{tracker.uuid}'])
|
||||
|
||||
# Retro Hunt
|
||||
retro_hunts = Tracker.get_obj_retro_hunts('item', '', obj_id)
|
||||
retro_hunts = Tracker.get_obj_retro_hunts(obj.type, obj.get_subtype(r_str=True), obj.id)
|
||||
for retro_uuid in retro_hunts:
|
||||
retro_hunt = Tracker.RetroHunt(retro_uuid)
|
||||
rule = retro_hunt.get_rule(r_compile=True)
|
||||
|
@ -182,35 +188,36 @@ def get_tracker_match(obj_id, content):
|
|||
# Type:subtype:id
|
||||
# tag:iban
|
||||
# tracker:uuid
|
||||
|
||||
def extract(obj_id, content=None):
|
||||
item = Item(obj_id)
|
||||
if not item.exists():
|
||||
# def extract(obj_id, content=None):
|
||||
def extract(obj_type, subtype, obj_id, content=None):
|
||||
obj = ail_objects.get_object(obj_type, subtype, obj_id)
|
||||
if not obj.exists():
|
||||
return []
|
||||
obj_gid = obj.get_global_id()
|
||||
|
||||
# CHECK CACHE
|
||||
cached = r_cache.get(f'extractor:cache:{obj_id}')
|
||||
cached = r_cache.get(f'extractor:cache:{obj_gid}')
|
||||
# cached = None
|
||||
if cached:
|
||||
r_cache.expire(f'extractor:cache:{obj_id}', 300)
|
||||
r_cache.expire(f'extractor:cache:{obj_gid}', 300)
|
||||
return json.loads(cached)
|
||||
|
||||
if not content:
|
||||
content = item.get_content()
|
||||
content = obj.get_content()
|
||||
|
||||
extracted = get_tracker_match(obj_id, content)
|
||||
extracted = get_tracker_match(obj, content)
|
||||
|
||||
# print(item.get_tags())
|
||||
for tag in item.get_tags():
|
||||
for tag in obj.get_tags():
|
||||
if MODULES.get(tag):
|
||||
# print(tag)
|
||||
module = MODULES.get(tag)
|
||||
matches = module.extract(obj_id, content, tag)
|
||||
matches = module.extract(obj, content, tag)
|
||||
if matches:
|
||||
extracted = extracted + matches
|
||||
|
||||
for obj_t in ['cve', 'cryptocurrency', 'title', 'username']: # Decoded, PGP->extract bloc
|
||||
matches = get_correl_match(obj_t, obj_id, content)
|
||||
for obj_t in CORRELATION_TO_EXTRACT[obj.type]:
|
||||
matches = get_correl_match(obj_t, obj, content)
|
||||
if matches:
|
||||
extracted = extracted + matches
|
||||
|
||||
|
@ -221,8 +228,8 @@ def extract(obj_id, content=None):
|
|||
# Save In Cache
|
||||
if extracted:
|
||||
extracted_dump = json.dumps(extracted)
|
||||
r_cache.set(f'extractor:cache:{obj_id}', extracted_dump)
|
||||
r_cache.expire(f'extractor:cache:{obj_id}', 300) # TODO Reduce CACHE ???????????????
|
||||
r_cache.set(f'extractor:cache:{obj_gid}', extracted_dump)
|
||||
r_cache.expire(f'extractor:cache:{obj_gid}', 300) # TODO Reduce CACHE ???????????????
|
||||
|
||||
return extracted
|
||||
|
||||
|
@ -271,15 +278,7 @@ def get_extracted_by_match(extracted):
|
|||
|
||||
# if __name__ == '__main__':
|
||||
# t0 = time.time()
|
||||
# obj_id = 'crawled/2022/09/15/circl.lu179c7903-5b21-452e-9f25-4b61d9934e2b'
|
||||
# obj_id = 'crawled/2022/09/15/circl.lu1e4f9721-06dc-404f-aabf-3c3bd0b533bd'
|
||||
# obj_id = 'submitted/2022/09/13/submitted_ba3ee771-c91c-4f50-9d6a-8558cdac7aeb.gz'
|
||||
# # obj_id = 'tests/2021/01/01/credit_cards.gz'
|
||||
# # obj_id = 'crawled/2020/07/20/circl.luc9301321-f1b1-4d91-9082-5eb452b946c5'
|
||||
# obj_id = 'submitted/2019/09/22/97172282-e4c2-4a1e-b82c-c4fb9490a56e.gz'
|
||||
# obj_id = 'submitted/2019/09/20/4fb7f02d-1241-4ef4-b17e-80ae76038835.gz'
|
||||
# obj_id = 'crawled/2023/02/21/circl.lu1c300acb-0cbe-480f-917e-9afe3ec958e8'
|
||||
#
|
||||
# extract(obj_id)
|
||||
#
|
||||
# # get_obj_correl('cve', obj_id, content)
|
||||
|
|
|
@ -58,9 +58,9 @@ class CreditCards(AbstractModule):
|
|||
if lib_refine.is_luhn_valid(clean_card):
|
||||
return clean_card
|
||||
|
||||
def extract(self, obj_id, content, tag):
|
||||
def extract(self, obj, content, tag):
|
||||
extracted = []
|
||||
cards = self.regex_finditer(self.regex, obj_id, content)
|
||||
cards = self.regex_finditer(self.regex, obj.get_global_id(), content)
|
||||
for card in cards:
|
||||
start, end, value = card
|
||||
if self.get_valid_card(value):
|
||||
|
|
|
@ -62,9 +62,9 @@ class Iban(AbstractModule):
|
|||
return True
|
||||
return False
|
||||
|
||||
def extract(self, obj_id, content, tag):
|
||||
def extract(self, obj, content, tag):
|
||||
extracted = []
|
||||
ibans = self.regex_finditer(self.iban_regex, obj_id, content)
|
||||
ibans = self.regex_finditer(self.iban_regex, obj.get_global_id(), content)
|
||||
for iban in ibans:
|
||||
start, end, value = iban
|
||||
value = ''.join(e for e in value if e.isalnum())
|
||||
|
|
|
@ -118,10 +118,10 @@ class Mail(AbstractModule):
|
|||
print(e)
|
||||
return valid_mxdomain
|
||||
|
||||
def extract(self, obj_id, content, tag):
|
||||
def extract(self, obj, content, tag):
|
||||
extracted = []
|
||||
mxdomains = {}
|
||||
mails = self.regex_finditer(self.email_regex, obj_id, content)
|
||||
mails = self.regex_finditer(self.email_regex, obj.get_global_id(), content)
|
||||
for mail in mails:
|
||||
start, end, value = mail
|
||||
mxdomain = value.rsplit('@', 1)[1].lower()
|
||||
|
|
|
@ -55,9 +55,9 @@ class Onion(AbstractModule):
|
|||
# TEMP var: SAVE I2P Domain (future I2P crawler)
|
||||
# self.save_i2p = config_loader.get_config_boolean("Onion", "save_i2p")
|
||||
|
||||
def extract(self, obj_id, content, tag):
|
||||
def extract(self, obj, content, tag):
|
||||
extracted = []
|
||||
onions = self.regex_finditer(self.onion_regex, obj_id, content)
|
||||
onions = self.regex_finditer(self.onion_regex, obj.get_global_id(), content)
|
||||
for onion in onions:
|
||||
start, end, value = onion
|
||||
url_unpack = crawlers.unpack_url(value)
|
||||
|
|
|
@ -41,9 +41,9 @@ class Phone(AbstractModule):
|
|||
# Waiting time in seconds between to message processed
|
||||
self.pending_seconds = 1
|
||||
|
||||
def extract(self, obj_id, content, tag):
|
||||
def extract(self, obj, content, tag):
|
||||
extracted = []
|
||||
phones = self.regex_phone_iter('ZZ', obj_id, content)
|
||||
phones = self.regex_phone_iter('ZZ', obj.get_global_id(), content)
|
||||
for phone in phones:
|
||||
extracted.append([phone[0], phone[1], phone[2], f'tag:{tag}'])
|
||||
return extracted
|
||||
|
|
|
@ -23,6 +23,7 @@ from lib import ail_core
|
|||
from lib import chats_viewer
|
||||
from lib import Language
|
||||
from lib import Tag
|
||||
from lib import module_extractor
|
||||
|
||||
# ============ BLUEPRINT ============
|
||||
chats_explorer = Blueprint('chats_explorer', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/chats_explorer'))
|
||||
|
@ -235,6 +236,10 @@ def objects_message():
|
|||
else:
|
||||
message = message[0]
|
||||
languages = Language.get_translation_languages()
|
||||
extracted = module_extractor.extract('message', '', message['id'], content=message['content'])
|
||||
extracted_matches = module_extractor.get_extracted_by_match(extracted)
|
||||
message['extracted'] = extracted
|
||||
message['extracted_matches'] = extracted_matches
|
||||
return render_template('ChatMessage.html', meta=message, bootstrap_label=bootstrap_label,
|
||||
translation_languages=languages, translation_target=target,
|
||||
modal_add_tags=Tag.get_modal_add_tags(message['id'], object_type='message'))
|
||||
|
|
|
@ -85,7 +85,7 @@ def showItem(): # # TODO: support post
|
|||
else:
|
||||
meta['investigations'] = []
|
||||
|
||||
extracted = module_extractor.extract(item.id, content=meta['content'])
|
||||
extracted = module_extractor.extract('item', '', item.id, content=meta['content'])
|
||||
extracted_matches = module_extractor.get_extracted_by_match(extracted)
|
||||
|
||||
return render_template("show_item.html", bootstrap_label=bootstrap_label,
|
||||
|
|
|
@ -14,8 +14,8 @@
|
|||
|
||||
<!-- JS -->
|
||||
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
|
||||
<script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
|
||||
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
|
||||
<script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
|
||||
<script src="{{ url_for('static', filename='js/bootstrap4.min.js') }}"></script>
|
||||
<script src="{{ url_for('static', filename='js/jquery.dataTables.min.js')}}"></script>
|
||||
<script src="{{ url_for('static', filename='js/dataTables.bootstrap.min.js')}}"></script>
|
||||
<script src="{{ url_for('static', filename='js/tags.js') }}"></script>
|
||||
|
@ -134,6 +134,65 @@
|
|||
</div>
|
||||
|
||||
|
||||
{% if meta['extracted_matches'] %}
|
||||
<div id="accordion_extracted" class="mb-3 mx-3">
|
||||
<div class="card">
|
||||
<div class="card-header py-1" id="heading_extracted">
|
||||
<div class="row">
|
||||
<div class="col-11">
|
||||
<div class="mt-2">
|
||||
<img id="misp-logo" src="{{ url_for('static', filename='image/ail-icon.png')}}" height="32"> Extracted
|
||||
<div class="badge badge-warning">{{meta['extracted_matches']|length}}</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-1">
|
||||
<button class="btn btn-link btn-lg py-2 float-right rotate down" data-toggle="collapse" data-target="#collapse_extracted" aria-expanded="true" aria-controls="collapseDecoded">
|
||||
<i class="fas fa-chevron-circle-down"></i>
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div id="collapse_extracted" class="collapse" aria-labelledby="heading_extracted" data-parent="#accordion_extracted">
|
||||
<div class="card-body">
|
||||
<table id="table_extracted" class="table table-striped">
|
||||
<thead class="thead-dark">
|
||||
<tr>
|
||||
<th>Type</th>
|
||||
<th>ID</th>
|
||||
<th>Extracted</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for match in meta['extracted_matches'] %}
|
||||
<tr>
|
||||
<td>
|
||||
<svg height="26" width="26">
|
||||
<g class="nodes">
|
||||
<circle cx="13" cy="13" r="13" fill="{{ meta['extracted_matches'][match]['icon']['color'] }}"></circle>
|
||||
<text x="13" y="13" text-anchor="middle" dominant-baseline="central" class="graph_node_icon {{ meta['extracted_matches'][match]['icon']['style'] }}" font-size="16px">{{ meta['extracted_matches'][match]['icon']['icon'] }}</text>
|
||||
</g>
|
||||
</svg>
|
||||
{{ meta['extracted_matches'][match]['subtype'] }}
|
||||
</td>
|
||||
<td>{{ meta['extracted_matches'][match]['id'] }}</td>
|
||||
<td>
|
||||
{% for row in meta['extracted_matches'][match]['matches'] %}
|
||||
<a href="#{{ row[0] }}:{{row[1] }}">{{ row[2] }}</a><br>
|
||||
{% endfor %}
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
|
||||
{% include 'objects/image/block_blur_img_slider.html' %}
|
||||
{% with translate_url=url_for('chats_explorer.objects_message', id=meta['id']), obj_id=meta['id'] %}
|
||||
{% include 'chats_explorer/block_translation.html' %}
|
||||
|
@ -169,6 +228,10 @@
|
|||
|
||||
});
|
||||
|
||||
$(function () {
|
||||
$('[data-toggle="popover"]').popover()
|
||||
})
|
||||
|
||||
function toggle_sidebar(){
|
||||
if($('#nav_menu').is(':visible')){
|
||||
$('#nav_menu').hide();
|
||||
|
|
|
@ -76,7 +76,11 @@
|
|||
</div>
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
<pre class="my-0">{{ message['content'] }}</pre>
|
||||
{% if not message['extracted'] %}
|
||||
<pre class="my-0">{{ message['content'] }}</pre>
|
||||
{% else %}
|
||||
<pre class="my-0">{{ message['content'][:message['extracted'][0][0]] }}{% for row in message['extracted'] %}<span class="hg-text" data-toggle="popover" data-trigger="hover" data-html="true" title="<svg height="26" width="26"><g class="nodes"><circle cx="13" cy="13" r="13" fill="{{ message['extracted_matches'][row[3]]['icon']['color'] }}"></circle><text x="13" y="13" text-anchor="middle" dominant-baseline="central" class="graph_node_icon {{ message['extracted_matches'][row[3]]['icon']['style'] }}" font-size="16px">{{ message['extracted_matches'][row[3]]['icon']['icon'] }}</text></g></svg> {{ message['extracted_matches'][row[3]]['subtype'] }}" data-content="{{ message['extracted_matches'][row[3]]['id'] }}" id="{{ row[0] }}:{{ row[1] }}">{{ message['content'][row[0]:row[1]] }}</span>{% if loop.index + 1 > message['extracted']|length %}{{ message['content'][message['extracted'][-1][1]:] }}{% else %}{{ message['content'][row[1]:message['extracted'][loop.index][0]] }}{% endif %}{% endfor %}</pre>
|
||||
{% endif %}
|
||||
{% if message['translation'] %}
|
||||
<hr class="m-1">
|
||||
<pre class="my-0 text-secondary">{{ message['translation'] }}</pre>
|
||||
|
|
Loading…
Reference in New Issue