diff --git a/bin/crawlers/Crawler.py b/bin/crawlers/Crawler.py
index db1fc0fe..ab972c68 100755
--- a/bin/crawlers/Crawler.py
+++ b/bin/crawlers/Crawler.py
@@ -301,6 +301,7 @@ class Crawler(AbstractModule):
print(etag_content)
etag = Etags.create(etag_content)
etag.add(self.date.replace('/', ''), self.domain.id)
+ crawlers.extract_hhhash(entries['har'], self.domain.id, self.date.replace('/', ''))
# Next Children
entries_children = entries.get('children')
diff --git a/bin/lib/ail_core.py b/bin/lib/ail_core.py
index e0fd3a17..75520a2b 100755
--- a/bin/lib/ail_core.py
+++ b/bin/lib/ail_core.py
@@ -15,8 +15,8 @@ config_loader = ConfigLoader()
r_serv_db = config_loader.get_db_conn("Kvrocks_DB")
config_loader = None
-AIL_OBJECTS = sorted({'cookie-name', 'cve', 'cryptocurrency', 'decoded', 'domain', 'etag', 'favicon', 'item', 'pgp',
- 'screenshot', 'title', 'username'})
+AIL_OBJECTS = sorted({'cookie-name', 'cve', 'cryptocurrency', 'decoded', 'domain', 'etag', 'favicon', 'hhhash', 'item',
+ 'pgp', 'screenshot', 'title', 'username'})
def get_ail_uuid():
ail_uuid = r_serv_db.get('ail:uuid')
diff --git a/bin/lib/correlations_engine.py b/bin/lib/correlations_engine.py
index 39ccaa4e..609aa8c6 100755
--- a/bin/lib/correlations_engine.py
+++ b/bin/lib/correlations_engine.py
@@ -45,9 +45,10 @@ CORRELATION_TYPES_BY_OBJ = {
"cryptocurrency": ["domain", "item"],
"cve": ["domain", "item"],
"decoded": ["domain", "item"],
- "domain": ["cve", "cookie-name", "cryptocurrency", "decoded", "etag", "favicon", "item", "pgp", "title", "screenshot", "username"],
+ "domain": ["cve", "cookie-name", "cryptocurrency", "decoded", "etag", "favicon", "hhhash", "item", "pgp", "title", "screenshot", "username"],
"etag": ["domain"],
"favicon": ["domain", "item"], # TODO Decoded
+ "hhhash": ["domain"],
"item": ["cve", "cryptocurrency", "decoded", "domain", "favicon", "pgp", "screenshot", "title", "username"],
"pgp": ["domain", "item"],
"screenshot": ["domain", "item"],
diff --git a/bin/lib/crawlers.py b/bin/lib/crawlers.py
index 8b5113f6..571af921 100755
--- a/bin/lib/crawlers.py
+++ b/bin/lib/crawlers.py
@@ -39,6 +39,7 @@ from packages import git_status
from packages import Date
from lib.ConfigLoader import ConfigLoader
from lib.objects.Domains import Domain
+from lib.objects import HHHashs
from lib.objects.Items import Item
config_loader = ConfigLoader()
@@ -335,7 +336,7 @@ def _reprocess_all_hars_cookie_name():
from lib.objects import CookiesNames
for har_id in get_all_har_ids():
domain = har_id.split('/')[-1]
- domain = domain[:-41]
+ domain = domain[:-44]
date = har_id.split('/')
date = f'{date[-4]}{date[-3]}{date[-2]}'
for cookie_name in extract_cookies_names_from_har(get_har_content(har_id)):
@@ -358,7 +359,7 @@ def _reprocess_all_hars_etag():
from lib.objects import Etags
for har_id in get_all_har_ids():
domain = har_id.split('/')[-1]
- domain = domain[:-41]
+ domain = domain[:-44]
date = har_id.split('/')
date = f'{date[-4]}{date[-3]}{date[-2]}'
for etag_content in extract_etag_from_har(get_har_content(har_id)):
@@ -366,6 +367,56 @@ def _reprocess_all_hars_etag():
etag = Etags.create(etag_content)
etag.add(date, domain)
+def extract_hhhash_by_id(har_id, domain, date):
+ return extract_hhhash(get_har_content(har_id), domain, date)
+
+def extract_hhhash(har, domain, date):
+ hhhashs = set()
+ urls = set()
+ for entrie in har.get('log', {}).get('entries', []):
+ url = entrie.get('request').get('url')
+ if url not in urls:
+ # filter redirect
+ if entrie.get('response').get('status') == 200: # != 301:
+ # print(url, entrie.get('response').get('status'))
+
+ f = get_faup()
+ f.decode(url)
+ domain_url = f.get().get('domain')
+ if domain_url == domain:
+
+ headers = entrie.get('response').get('headers')
+
+ hhhash_header = HHHashs.build_hhhash_headers(headers)
+ hhhash = HHHashs.hhhash_headers(hhhash_header)
+
+ if hhhash not in hhhashs:
+ print('', url, hhhash)
+
+ # -----
+ obj = HHHashs.create(hhhash_header, hhhash)
+ obj.add(date, domain)
+
+ hhhashs.add(hhhash)
+ urls.add(url)
+ print()
+ print()
+ print('HHHASH:')
+ for hhhash in hhhashs:
+ print(hhhash)
+ return hhhashs
+
+def _reprocess_all_hars_hhhashs():
+ for har_id in get_all_har_ids():
+ print()
+ print(har_id)
+ domain = har_id.split('/')[-1]
+ domain = domain[:-44]
+ date = har_id.split('/')
+ date = f'{date[-4]}{date[-3]}{date[-2]}'
+ extract_hhhash_by_id(har_id, domain, date)
+
+
def _gzip_har(har_id):
har_path = os.path.join(HAR_DIR, har_id)
@@ -1957,15 +2008,16 @@ def test_ail_crawlers():
# TODO MOVE ME IN CRAWLER OR FLASK
load_blacklist()
-# if __name__ == '__main__':
-# delete_captures()
+if __name__ == '__main__':
+ # delete_captures()
-# item_id = 'crawled/2023/02/20/data.gz'
-# item = Item(item_id)
-# content = item.get_content()
-# temp_url = ''
-# r = extract_favicon_from_html(content, temp_url)
-# print(r)
-# _reprocess_all_hars_cookie_name()
-# _reprocess_all_hars_etag()
-# _gzip_all_hars()
+ # item_id = 'crawled/2023/02/20/data.gz'
+ # item = Item(item_id)
+ # content = item.get_content()
+ # temp_url = ''
+ # r = extract_favicon_from_html(content, temp_url)
+ # print(r)
+ # _reprocess_all_hars_cookie_name()
+ # _reprocess_all_hars_etag()
+ # _gzip_all_hars()
+ _reprocess_all_hars_hhhashs()
diff --git a/bin/lib/objects/HHHashs.py b/bin/lib/objects/HHHashs.py
new file mode 100755
index 00000000..021ac451
--- /dev/null
+++ b/bin/lib/objects/HHHashs.py
@@ -0,0 +1,138 @@
+#!/usr/bin/env python3
+# -*-coding:UTF-8 -*
+
+import hashlib
+import os
+import sys
+
+from flask import url_for
+
+from pymisp import MISPObject
+
+sys.path.append(os.environ['AIL_BIN'])
+##################################
+# Import Project packages
+##################################
+from lib.ConfigLoader import ConfigLoader
+from lib.objects.abstract_daterange_object import AbstractDaterangeObject, AbstractDaterangeObjects
+
+config_loader = ConfigLoader()
+r_objects = config_loader.get_db_conn("Kvrocks_Objects")
+baseurl = config_loader.get_config_str("Notifications", "ail_domain")
+config_loader = None
+
+
+class HHHash(AbstractDaterangeObject):
+ """
+ AIL HHHash Object.
+ """
+
+ def __init__(self, obj_id):
+ super(HHHash, self).__init__('hhhash', obj_id)
+
+ # def get_ail_2_ail_payload(self):
+ # payload = {'raw': self.get_gzip_content(b64=True),
+ # 'compress': 'gzip'}
+ # return payload
+
+ # # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
+ def delete(self):
+ # # TODO:
+ pass
+
+ def get_content(self, r_type='str'):
+ if r_type == 'str':
+ return self._get_field('content')
+
+ def get_link(self, flask_context=False):
+ if flask_context:
+ url = url_for('correlation.show_correlation', type=self.type, id=self.id)
+ else:
+ url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}'
+ return url
+
+ # TODO # CHANGE COLOR
+ def get_svg_icon(self):
+ return {'style': 'fas', 'icon': '\uf036', 'color': '#71D090', 'radius': 5}
+
+ def get_misp_object(self):
+ obj_attrs = []
+ obj = MISPObject('hhhash')
+ first_seen = self.get_first_seen()
+ last_seen = self.get_last_seen()
+ if first_seen:
+ obj.first_seen = first_seen
+ if last_seen:
+ obj.last_seen = last_seen
+ if not first_seen or not last_seen:
+ self.logger.warning(
+ f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={first_seen}, last={last_seen}')
+
+ obj_attrs.append(obj.add_attribute('hhhash', value=self.get_id()))
+ obj_attrs.append(obj.add_attribute('hhhash-headers', value=self.get_content()))
+ obj_attrs.append(obj.add_attribute('hhhash-tool', value='lacus'))
+ for obj_attr in obj_attrs:
+ for tag in self.get_tags():
+ obj_attr.add_tag(tag)
+ return obj
+
+ def get_nb_seen(self):
+ return self.get_nb_correlation('domain')
+
+ def get_meta(self, options=set()):
+ meta = self._get_meta(options=options)
+ meta['id'] = self.id
+ meta['tags'] = self.get_tags(r_list=True)
+ meta['content'] = self.get_content()
+ return meta
+
+ def add(self, date, obj_id): # date = HAR Date
+ self._add(date, 'domain', '', obj_id)
+
+ def create(self, hhhash_header, _first_seen=None, _last_seen=None): # TODO CREATE ADD FUNCTION -> urls set
+ self._set_field('content', hhhash_header)
+ self._create()
+
+
+def create(hhhash_header, hhhash=None):
+ if not hhhash:
+ hhhash = hhhash_headers(hhhash_header)
+ hhhash = HHHash(hhhash)
+ if not hhhash.exists():
+ hhhash.create(hhhash_header)
+ return hhhash
+
+def build_hhhash_headers(dict_headers): # filter_dup=True
+ hhhash = ''
+ previous_header = ''
+ for header in dict_headers:
+ header_name = header.get('name')
+ if header_name:
+ if header_name != previous_header: # remove dup headers, filter playwright invalid splitting
+ hhhash = f'{hhhash}:{header_name}'
+ previous_header = header_name
+ hhhash = hhhash[1:]
+ # print(hhhash)
+ return hhhash
+
+def hhhash_headers(header_hhhash):
+ m = hashlib.sha256()
+ m.update(header_hhhash.encode())
+ digest = m.hexdigest()
+ return f"hhh:1:{digest}"
+
+
+class HHHashs(AbstractDaterangeObjects):
+ """
+ HHHashs Objects
+ """
+ def __init__(self):
+ super().__init__('hhhash', HHHash)
+
+ def sanitize_id_to_search(self, name_to_search):
+ return name_to_search # TODO
+
+
+# if __name__ == '__main__':
+# name_to_search = '98'
+# print(search_cves_by_name(name_to_search))
diff --git a/bin/lib/objects/ail_objects.py b/bin/lib/objects/ail_objects.py
index e0eeda63..f12708fb 100755
--- a/bin/lib/objects/ail_objects.py
+++ b/bin/lib/objects/ail_objects.py
@@ -20,6 +20,7 @@ from lib.objects.Decodeds import Decoded, get_all_decodeds_objects, get_nb_decod
from lib.objects.Domains import Domain
from lib.objects import Etags
from lib.objects.Favicons import Favicon
+from lib.objects import HHHashs
from lib.objects.Items import Item, get_all_items_objects, get_nb_items_objects
from lib.objects import Pgps
from lib.objects.Screenshots import Screenshot
@@ -62,6 +63,8 @@ def get_object(obj_type, subtype, id):
return Etags.Etag(id)
elif obj_type == 'favicon':
return Favicon(id)
+ elif obj_type == 'hhhash':
+ return HHHashs.HHHash(id)
elif obj_type == 'screenshot':
return Screenshot(id)
elif obj_type == 'cryptocurrency':
@@ -104,9 +107,12 @@ def get_obj_global_id(obj_type, subtype, obj_id):
obj = get_object(obj_type, subtype, obj_id)
return obj.get_global_id()
+def get_obj_type_subtype_id_from_global_id(global_id):
+ obj_type, subtype, obj_id = global_id.split(':', 2)
+ return obj_type, subtype, obj_id
def get_obj_from_global_id(global_id):
- obj = global_id.split(':', 3)
+ obj = get_obj_type_subtype_id_from_global_id(global_id)
return get_object(obj[0], obj[1], obj[2])
@@ -162,7 +168,7 @@ def get_objects_meta(objs, options=set(), flask_context=False):
subtype = obj[1]
obj_id = obj[2]
else:
- obj_type, subtype, obj_id = obj.split(':', 2)
+ obj_type, subtype, obj_id = get_obj_type_subtype_id_from_global_id(obj)
metas.append(get_object_meta(obj_type, subtype, obj_id, options=options, flask_context=flask_context))
return metas
@@ -171,7 +177,7 @@ def get_object_card_meta(obj_type, subtype, id, related_btc=False):
obj = get_object(obj_type, subtype, id)
meta = obj.get_meta()
meta['icon'] = obj.get_svg_icon()
- if subtype or obj_type == 'cookie-name' or obj_type == 'cve' or obj_type == 'etag' or obj_type == 'title' or obj_type == 'favicon':
+ if subtype or obj_type == 'cookie-name' or obj_type == 'cve' or obj_type == 'etag' or obj_type == 'title' or obj_type == 'favicon' or obj_type == 'hhhash':
meta['sparkline'] = obj.get_sparkline()
if obj_type == 'cve':
meta['cve_search'] = obj.get_cve_search()
@@ -402,7 +408,7 @@ def create_correlation_graph_links(links_set):
def create_correlation_graph_nodes(nodes_set, obj_str_id, flask_context=True):
graph_nodes_list = []
for node_id in nodes_set:
- obj_type, subtype, obj_id = node_id.split(':', 2)
+ obj_type, subtype, obj_id = get_obj_type_subtype_id_from_global_id(node_id)
dict_node = {'id': node_id}
dict_node['style'] = get_object_svg(obj_type, subtype, obj_id)
diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py
index b40b5cc8..93d3a47d 100755
--- a/var/www/Flask_server.py
+++ b/var/www/Flask_server.py
@@ -52,6 +52,7 @@ from blueprints.objects_subtypes import objects_subtypes
from blueprints.objects_title import objects_title
from blueprints.objects_cookie_name import objects_cookie_name
from blueprints.objects_etag import objects_etag
+from blueprints.objects_hhhash import objects_hhhash
Flask_dir = os.environ['AIL_FLASK']
@@ -108,6 +109,7 @@ app.register_blueprint(objects_subtypes, url_prefix=baseUrl)
app.register_blueprint(objects_title, url_prefix=baseUrl)
app.register_blueprint(objects_cookie_name, url_prefix=baseUrl)
app.register_blueprint(objects_etag, url_prefix=baseUrl)
+app.register_blueprint(objects_hhhash, url_prefix=baseUrl)
# ========= =========#
diff --git a/var/www/blueprints/correlation.py b/var/www/blueprints/correlation.py
index 594dc660..d5d672b1 100644
--- a/var/www/blueprints/correlation.py
+++ b/var/www/blueprints/correlation.py
@@ -99,6 +99,9 @@ def show_correlation():
correl_option = request.form.get('CryptocurrencyCheck')
if correl_option:
filter_types.append('cryptocurrency')
+ correl_option = request.form.get('HHHashCheck')
+ if correl_option:
+ filter_types.append('hhhash')
correl_option = request.form.get('PgpCheck')
if correl_option:
filter_types.append('pgp')
@@ -177,26 +180,15 @@ def show_correlation():
@login_read_only
def get_description():
object_id = request.args.get('object_id')
- object_id = object_id.split(':')
- # unpack object_id # # TODO: put me in lib
- if len(object_id) == 3:
- object_type = object_id[0]
- type_id = object_id[1]
- correlation_id = object_id[2]
- elif len(object_id) == 2:
- object_type = object_id[0]
- type_id = None
- correlation_id = object_id[1]
- else:
- return jsonify({})
+ obj_type, subtype, obj_id = ail_objects.get_obj_type_subtype_id_from_global_id(object_id)
- # check if correlation_id exist
+ # check if obj exist
# # TODO: return error json
- if not ail_objects.exists_obj(object_type, type_id, correlation_id):
+ if not ail_objects.exists_obj(obj_type, subtype, obj_id):
return Response(json.dumps({"status": "error", "reason": "404 Not Found"}, indent=2, sort_keys=True), mimetype='application/json'), 404
# object exist
else:
- res = ail_objects.get_object_meta(object_type, type_id, correlation_id, options={'tags', 'tags_safe'},
+ res = ail_objects.get_object_meta(obj_type, subtype, obj_id, options={'tags', 'tags_safe'},
flask_context=True)
if 'tags' in res:
res['tags'] = list(res['tags'])
diff --git a/var/www/blueprints/objects_hhhash.py b/var/www/blueprints/objects_hhhash.py
new file mode 100644
index 00000000..9d5bd320
--- /dev/null
+++ b/var/www/blueprints/objects_hhhash.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+# -*-coding:UTF-8 -*
+
+'''
+ Blueprint Flask: crawler splash endpoints: dashboard, onion crawler ...
+'''
+
+import os
+import sys
+
+from flask import render_template, jsonify, request, Blueprint, redirect, url_for, Response, abort
+from flask_login import login_required, current_user
+
+# Import Role_Manager
+from Role_Manager import login_admin, login_analyst, login_read_only
+
+sys.path.append(os.environ['AIL_BIN'])
+##################################
+# Import Project packages
+##################################
+from lib.objects import HHHashs
+from packages import Date
+
+# ============ BLUEPRINT ============
+objects_hhhash = Blueprint('objects_hhhash', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/objects/hhhash'))
+
+# ============ VARIABLES ============
+bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info']
+
+
+# ============ FUNCTIONS ============
+@objects_hhhash.route("/objects/hhhashs", methods=['GET'])
+@login_required
+@login_read_only
+def objects_hhhashs():
+ date_from = request.args.get('date_from')
+ date_to = request.args.get('date_to')
+ show_objects = request.args.get('show_objects')
+ date = Date.sanitise_date_range(date_from, date_to)
+ date_from = date['date_from']
+ date_to = date['date_to']
+
+ if show_objects:
+ dict_objects = HHHashs.HHHashs().api_get_meta_by_daterange(date_from, date_to)
+ else:
+ dict_objects = {}
+
+ return render_template("HHHashDaterange.html", date_from=date_from, date_to=date_to,
+ dict_objects=dict_objects, show_objects=show_objects)
+
+@objects_hhhash.route("/objects/hhhash/post", methods=['POST'])
+@login_required
+@login_read_only
+def objects_hhhashs_post():
+ date_from = request.form.get('date_from')
+ date_to = request.form.get('date_to')
+ show_objects = request.form.get('show_objects')
+ return redirect(url_for('objects_hhhash.objects_hhhashs', date_from=date_from, date_to=date_to, show_objects=show_objects))
+
+@objects_hhhash.route("/objects/hhhash/range/json", methods=['GET'])
+@login_required
+@login_read_only
+def objects_hhhash_range_json():
+ date_from = request.args.get('date_from')
+ date_to = request.args.get('date_to')
+ date = Date.sanitise_date_range(date_from, date_to)
+ date_from = date['date_from']
+ date_to = date['date_to']
+ return jsonify(HHHashs.HHHashs().api_get_chart_nb_by_daterange(date_from, date_to))
+
+# @objects_hhhash.route("/objects/hhhash/search", methods=['POST'])
+# @login_required
+# @login_read_only
+# def objects_hhhashs_names_search():
+# to_search = request.form.get('object_id')
+#
+# # TODO SANITIZE ID
+# # TODO Search all
+# cve = Cves.Cve(to_search)
+# if not cve.exists():
+# abort(404)
+# else:
+# return redirect(cve.get_link(flask_context=True))
+
+# ============= ROUTES ==============
+
diff --git a/var/www/templates/correlation/metadata_card_hhhash.html b/var/www/templates/correlation/metadata_card_hhhash.html
new file mode 100644
index 00000000..c1474605
--- /dev/null
+++ b/var/www/templates/correlation/metadata_card_hhhash.html
@@ -0,0 +1,173 @@
+
+
+
+{% with modal_add_tags=dict_object['metadata_card']['add_tags_modal']%}
+ {% include 'modals/add_tags.html' %}
+{% endwith %}
+
+{% include 'modals/edit_tag.html' %}
+
+
+
+
+
+
+
+
+
+
diff --git a/var/www/templates/correlation/show_correlation.html b/var/www/templates/correlation/show_correlation.html
index b243bd35..11a85cd7 100644
--- a/var/www/templates/correlation/show_correlation.html
+++ b/var/www/templates/correlation/show_correlation.html
@@ -119,6 +119,8 @@
{% include 'correlation/metadata_card_cookie_name.html' %}
{% elif dict_object["object_type"] == "etag" %}
{% include 'correlation/metadata_card_etag.html' %}
+ {% elif dict_object["object_type"] == "hhhash" %}
+ {% include 'correlation/metadata_card_hhhash.html' %}
{% elif dict_object["object_type"] == "item" %}
{% include 'correlation/metadata_card_item.html' %}
{% endif %}
@@ -230,6 +232,10 @@
+
+
+
+