diff --git a/bin/crawlers/Crawler.py b/bin/crawlers/Crawler.py index db1fc0fe..ab972c68 100755 --- a/bin/crawlers/Crawler.py +++ b/bin/crawlers/Crawler.py @@ -301,6 +301,7 @@ class Crawler(AbstractModule): print(etag_content) etag = Etags.create(etag_content) etag.add(self.date.replace('/', ''), self.domain.id) + crawlers.extract_hhhash(entries['har'], self.domain.id, self.date.replace('/', '')) # Next Children entries_children = entries.get('children') diff --git a/bin/lib/ail_core.py b/bin/lib/ail_core.py index e0fd3a17..75520a2b 100755 --- a/bin/lib/ail_core.py +++ b/bin/lib/ail_core.py @@ -15,8 +15,8 @@ config_loader = ConfigLoader() r_serv_db = config_loader.get_db_conn("Kvrocks_DB") config_loader = None -AIL_OBJECTS = sorted({'cookie-name', 'cve', 'cryptocurrency', 'decoded', 'domain', 'etag', 'favicon', 'item', 'pgp', - 'screenshot', 'title', 'username'}) +AIL_OBJECTS = sorted({'cookie-name', 'cve', 'cryptocurrency', 'decoded', 'domain', 'etag', 'favicon', 'hhhash', 'item', + 'pgp', 'screenshot', 'title', 'username'}) def get_ail_uuid(): ail_uuid = r_serv_db.get('ail:uuid') diff --git a/bin/lib/correlations_engine.py b/bin/lib/correlations_engine.py index 39ccaa4e..609aa8c6 100755 --- a/bin/lib/correlations_engine.py +++ b/bin/lib/correlations_engine.py @@ -45,9 +45,10 @@ CORRELATION_TYPES_BY_OBJ = { "cryptocurrency": ["domain", "item"], "cve": ["domain", "item"], "decoded": ["domain", "item"], - "domain": ["cve", "cookie-name", "cryptocurrency", "decoded", "etag", "favicon", "item", "pgp", "title", "screenshot", "username"], + "domain": ["cve", "cookie-name", "cryptocurrency", "decoded", "etag", "favicon", "hhhash", "item", "pgp", "title", "screenshot", "username"], "etag": ["domain"], "favicon": ["domain", "item"], # TODO Decoded + "hhhash": ["domain"], "item": ["cve", "cryptocurrency", "decoded", "domain", "favicon", "pgp", "screenshot", "title", "username"], "pgp": ["domain", "item"], "screenshot": ["domain", "item"], diff --git a/bin/lib/crawlers.py b/bin/lib/crawlers.py index 8b5113f6..571af921 100755 --- a/bin/lib/crawlers.py +++ b/bin/lib/crawlers.py @@ -39,6 +39,7 @@ from packages import git_status from packages import Date from lib.ConfigLoader import ConfigLoader from lib.objects.Domains import Domain +from lib.objects import HHHashs from lib.objects.Items import Item config_loader = ConfigLoader() @@ -335,7 +336,7 @@ def _reprocess_all_hars_cookie_name(): from lib.objects import CookiesNames for har_id in get_all_har_ids(): domain = har_id.split('/')[-1] - domain = domain[:-41] + domain = domain[:-44] date = har_id.split('/') date = f'{date[-4]}{date[-3]}{date[-2]}' for cookie_name in extract_cookies_names_from_har(get_har_content(har_id)): @@ -358,7 +359,7 @@ def _reprocess_all_hars_etag(): from lib.objects import Etags for har_id in get_all_har_ids(): domain = har_id.split('/')[-1] - domain = domain[:-41] + domain = domain[:-44] date = har_id.split('/') date = f'{date[-4]}{date[-3]}{date[-2]}' for etag_content in extract_etag_from_har(get_har_content(har_id)): @@ -366,6 +367,56 @@ def _reprocess_all_hars_etag(): etag = Etags.create(etag_content) etag.add(date, domain) +def extract_hhhash_by_id(har_id, domain, date): + return extract_hhhash(get_har_content(har_id), domain, date) + +def extract_hhhash(har, domain, date): + hhhashs = set() + urls = set() + for entrie in har.get('log', {}).get('entries', []): + url = entrie.get('request').get('url') + if url not in urls: + # filter redirect + if entrie.get('response').get('status') == 200: # != 301: + # print(url, entrie.get('response').get('status')) + + f = get_faup() + f.decode(url) + domain_url = f.get().get('domain') + if domain_url == domain: + + headers = entrie.get('response').get('headers') + + hhhash_header = HHHashs.build_hhhash_headers(headers) + hhhash = HHHashs.hhhash_headers(hhhash_header) + + if hhhash not in hhhashs: + print('', url, hhhash) + + # ----- + obj = HHHashs.create(hhhash_header, hhhash) + obj.add(date, domain) + + hhhashs.add(hhhash) + urls.add(url) + print() + print() + print('HHHASH:') + for hhhash in hhhashs: + print(hhhash) + return hhhashs + +def _reprocess_all_hars_hhhashs(): + for har_id in get_all_har_ids(): + print() + print(har_id) + domain = har_id.split('/')[-1] + domain = domain[:-44] + date = har_id.split('/') + date = f'{date[-4]}{date[-3]}{date[-2]}' + extract_hhhash_by_id(har_id, domain, date) + + def _gzip_har(har_id): har_path = os.path.join(HAR_DIR, har_id) @@ -1957,15 +2008,16 @@ def test_ail_crawlers(): # TODO MOVE ME IN CRAWLER OR FLASK load_blacklist() -# if __name__ == '__main__': -# delete_captures() +if __name__ == '__main__': + # delete_captures() -# item_id = 'crawled/2023/02/20/data.gz' -# item = Item(item_id) -# content = item.get_content() -# temp_url = '' -# r = extract_favicon_from_html(content, temp_url) -# print(r) -# _reprocess_all_hars_cookie_name() -# _reprocess_all_hars_etag() -# _gzip_all_hars() + # item_id = 'crawled/2023/02/20/data.gz' + # item = Item(item_id) + # content = item.get_content() + # temp_url = '' + # r = extract_favicon_from_html(content, temp_url) + # print(r) + # _reprocess_all_hars_cookie_name() + # _reprocess_all_hars_etag() + # _gzip_all_hars() + _reprocess_all_hars_hhhashs() diff --git a/bin/lib/objects/HHHashs.py b/bin/lib/objects/HHHashs.py new file mode 100755 index 00000000..021ac451 --- /dev/null +++ b/bin/lib/objects/HHHashs.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import hashlib +import os +import sys + +from flask import url_for + +from pymisp import MISPObject + +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib.ConfigLoader import ConfigLoader +from lib.objects.abstract_daterange_object import AbstractDaterangeObject, AbstractDaterangeObjects + +config_loader = ConfigLoader() +r_objects = config_loader.get_db_conn("Kvrocks_Objects") +baseurl = config_loader.get_config_str("Notifications", "ail_domain") +config_loader = None + + +class HHHash(AbstractDaterangeObject): + """ + AIL HHHash Object. + """ + + def __init__(self, obj_id): + super(HHHash, self).__init__('hhhash', obj_id) + + # def get_ail_2_ail_payload(self): + # payload = {'raw': self.get_gzip_content(b64=True), + # 'compress': 'gzip'} + # return payload + + # # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\ + def delete(self): + # # TODO: + pass + + def get_content(self, r_type='str'): + if r_type == 'str': + return self._get_field('content') + + def get_link(self, flask_context=False): + if flask_context: + url = url_for('correlation.show_correlation', type=self.type, id=self.id) + else: + url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}' + return url + + # TODO # CHANGE COLOR + def get_svg_icon(self): + return {'style': 'fas', 'icon': '\uf036', 'color': '#71D090', 'radius': 5} + + def get_misp_object(self): + obj_attrs = [] + obj = MISPObject('hhhash') + first_seen = self.get_first_seen() + last_seen = self.get_last_seen() + if first_seen: + obj.first_seen = first_seen + if last_seen: + obj.last_seen = last_seen + if not first_seen or not last_seen: + self.logger.warning( + f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={first_seen}, last={last_seen}') + + obj_attrs.append(obj.add_attribute('hhhash', value=self.get_id())) + obj_attrs.append(obj.add_attribute('hhhash-headers', value=self.get_content())) + obj_attrs.append(obj.add_attribute('hhhash-tool', value='lacus')) + for obj_attr in obj_attrs: + for tag in self.get_tags(): + obj_attr.add_tag(tag) + return obj + + def get_nb_seen(self): + return self.get_nb_correlation('domain') + + def get_meta(self, options=set()): + meta = self._get_meta(options=options) + meta['id'] = self.id + meta['tags'] = self.get_tags(r_list=True) + meta['content'] = self.get_content() + return meta + + def add(self, date, obj_id): # date = HAR Date + self._add(date, 'domain', '', obj_id) + + def create(self, hhhash_header, _first_seen=None, _last_seen=None): # TODO CREATE ADD FUNCTION -> urls set + self._set_field('content', hhhash_header) + self._create() + + +def create(hhhash_header, hhhash=None): + if not hhhash: + hhhash = hhhash_headers(hhhash_header) + hhhash = HHHash(hhhash) + if not hhhash.exists(): + hhhash.create(hhhash_header) + return hhhash + +def build_hhhash_headers(dict_headers): # filter_dup=True + hhhash = '' + previous_header = '' + for header in dict_headers: + header_name = header.get('name') + if header_name: + if header_name != previous_header: # remove dup headers, filter playwright invalid splitting + hhhash = f'{hhhash}:{header_name}' + previous_header = header_name + hhhash = hhhash[1:] + # print(hhhash) + return hhhash + +def hhhash_headers(header_hhhash): + m = hashlib.sha256() + m.update(header_hhhash.encode()) + digest = m.hexdigest() + return f"hhh:1:{digest}" + + +class HHHashs(AbstractDaterangeObjects): + """ + HHHashs Objects + """ + def __init__(self): + super().__init__('hhhash', HHHash) + + def sanitize_id_to_search(self, name_to_search): + return name_to_search # TODO + + +# if __name__ == '__main__': +# name_to_search = '98' +# print(search_cves_by_name(name_to_search)) diff --git a/bin/lib/objects/ail_objects.py b/bin/lib/objects/ail_objects.py index e0eeda63..f12708fb 100755 --- a/bin/lib/objects/ail_objects.py +++ b/bin/lib/objects/ail_objects.py @@ -20,6 +20,7 @@ from lib.objects.Decodeds import Decoded, get_all_decodeds_objects, get_nb_decod from lib.objects.Domains import Domain from lib.objects import Etags from lib.objects.Favicons import Favicon +from lib.objects import HHHashs from lib.objects.Items import Item, get_all_items_objects, get_nb_items_objects from lib.objects import Pgps from lib.objects.Screenshots import Screenshot @@ -62,6 +63,8 @@ def get_object(obj_type, subtype, id): return Etags.Etag(id) elif obj_type == 'favicon': return Favicon(id) + elif obj_type == 'hhhash': + return HHHashs.HHHash(id) elif obj_type == 'screenshot': return Screenshot(id) elif obj_type == 'cryptocurrency': @@ -104,9 +107,12 @@ def get_obj_global_id(obj_type, subtype, obj_id): obj = get_object(obj_type, subtype, obj_id) return obj.get_global_id() +def get_obj_type_subtype_id_from_global_id(global_id): + obj_type, subtype, obj_id = global_id.split(':', 2) + return obj_type, subtype, obj_id def get_obj_from_global_id(global_id): - obj = global_id.split(':', 3) + obj = get_obj_type_subtype_id_from_global_id(global_id) return get_object(obj[0], obj[1], obj[2]) @@ -162,7 +168,7 @@ def get_objects_meta(objs, options=set(), flask_context=False): subtype = obj[1] obj_id = obj[2] else: - obj_type, subtype, obj_id = obj.split(':', 2) + obj_type, subtype, obj_id = get_obj_type_subtype_id_from_global_id(obj) metas.append(get_object_meta(obj_type, subtype, obj_id, options=options, flask_context=flask_context)) return metas @@ -171,7 +177,7 @@ def get_object_card_meta(obj_type, subtype, id, related_btc=False): obj = get_object(obj_type, subtype, id) meta = obj.get_meta() meta['icon'] = obj.get_svg_icon() - if subtype or obj_type == 'cookie-name' or obj_type == 'cve' or obj_type == 'etag' or obj_type == 'title' or obj_type == 'favicon': + if subtype or obj_type == 'cookie-name' or obj_type == 'cve' or obj_type == 'etag' or obj_type == 'title' or obj_type == 'favicon' or obj_type == 'hhhash': meta['sparkline'] = obj.get_sparkline() if obj_type == 'cve': meta['cve_search'] = obj.get_cve_search() @@ -402,7 +408,7 @@ def create_correlation_graph_links(links_set): def create_correlation_graph_nodes(nodes_set, obj_str_id, flask_context=True): graph_nodes_list = [] for node_id in nodes_set: - obj_type, subtype, obj_id = node_id.split(':', 2) + obj_type, subtype, obj_id = get_obj_type_subtype_id_from_global_id(node_id) dict_node = {'id': node_id} dict_node['style'] = get_object_svg(obj_type, subtype, obj_id) diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index b40b5cc8..93d3a47d 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -52,6 +52,7 @@ from blueprints.objects_subtypes import objects_subtypes from blueprints.objects_title import objects_title from blueprints.objects_cookie_name import objects_cookie_name from blueprints.objects_etag import objects_etag +from blueprints.objects_hhhash import objects_hhhash Flask_dir = os.environ['AIL_FLASK'] @@ -108,6 +109,7 @@ app.register_blueprint(objects_subtypes, url_prefix=baseUrl) app.register_blueprint(objects_title, url_prefix=baseUrl) app.register_blueprint(objects_cookie_name, url_prefix=baseUrl) app.register_blueprint(objects_etag, url_prefix=baseUrl) +app.register_blueprint(objects_hhhash, url_prefix=baseUrl) # ========= =========# diff --git a/var/www/blueprints/correlation.py b/var/www/blueprints/correlation.py index 594dc660..d5d672b1 100644 --- a/var/www/blueprints/correlation.py +++ b/var/www/blueprints/correlation.py @@ -99,6 +99,9 @@ def show_correlation(): correl_option = request.form.get('CryptocurrencyCheck') if correl_option: filter_types.append('cryptocurrency') + correl_option = request.form.get('HHHashCheck') + if correl_option: + filter_types.append('hhhash') correl_option = request.form.get('PgpCheck') if correl_option: filter_types.append('pgp') @@ -177,26 +180,15 @@ def show_correlation(): @login_read_only def get_description(): object_id = request.args.get('object_id') - object_id = object_id.split(':') - # unpack object_id # # TODO: put me in lib - if len(object_id) == 3: - object_type = object_id[0] - type_id = object_id[1] - correlation_id = object_id[2] - elif len(object_id) == 2: - object_type = object_id[0] - type_id = None - correlation_id = object_id[1] - else: - return jsonify({}) + obj_type, subtype, obj_id = ail_objects.get_obj_type_subtype_id_from_global_id(object_id) - # check if correlation_id exist + # check if obj exist # # TODO: return error json - if not ail_objects.exists_obj(object_type, type_id, correlation_id): + if not ail_objects.exists_obj(obj_type, subtype, obj_id): return Response(json.dumps({"status": "error", "reason": "404 Not Found"}, indent=2, sort_keys=True), mimetype='application/json'), 404 # object exist else: - res = ail_objects.get_object_meta(object_type, type_id, correlation_id, options={'tags', 'tags_safe'}, + res = ail_objects.get_object_meta(obj_type, subtype, obj_id, options={'tags', 'tags_safe'}, flask_context=True) if 'tags' in res: res['tags'] = list(res['tags']) diff --git a/var/www/blueprints/objects_hhhash.py b/var/www/blueprints/objects_hhhash.py new file mode 100644 index 00000000..9d5bd320 --- /dev/null +++ b/var/www/blueprints/objects_hhhash.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +''' + Blueprint Flask: crawler splash endpoints: dashboard, onion crawler ... +''' + +import os +import sys + +from flask import render_template, jsonify, request, Blueprint, redirect, url_for, Response, abort +from flask_login import login_required, current_user + +# Import Role_Manager +from Role_Manager import login_admin, login_analyst, login_read_only + +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib.objects import HHHashs +from packages import Date + +# ============ BLUEPRINT ============ +objects_hhhash = Blueprint('objects_hhhash', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/objects/hhhash')) + +# ============ VARIABLES ============ +bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info'] + + +# ============ FUNCTIONS ============ +@objects_hhhash.route("/objects/hhhashs", methods=['GET']) +@login_required +@login_read_only +def objects_hhhashs(): + date_from = request.args.get('date_from') + date_to = request.args.get('date_to') + show_objects = request.args.get('show_objects') + date = Date.sanitise_date_range(date_from, date_to) + date_from = date['date_from'] + date_to = date['date_to'] + + if show_objects: + dict_objects = HHHashs.HHHashs().api_get_meta_by_daterange(date_from, date_to) + else: + dict_objects = {} + + return render_template("HHHashDaterange.html", date_from=date_from, date_to=date_to, + dict_objects=dict_objects, show_objects=show_objects) + +@objects_hhhash.route("/objects/hhhash/post", methods=['POST']) +@login_required +@login_read_only +def objects_hhhashs_post(): + date_from = request.form.get('date_from') + date_to = request.form.get('date_to') + show_objects = request.form.get('show_objects') + return redirect(url_for('objects_hhhash.objects_hhhashs', date_from=date_from, date_to=date_to, show_objects=show_objects)) + +@objects_hhhash.route("/objects/hhhash/range/json", methods=['GET']) +@login_required +@login_read_only +def objects_hhhash_range_json(): + date_from = request.args.get('date_from') + date_to = request.args.get('date_to') + date = Date.sanitise_date_range(date_from, date_to) + date_from = date['date_from'] + date_to = date['date_to'] + return jsonify(HHHashs.HHHashs().api_get_chart_nb_by_daterange(date_from, date_to)) + +# @objects_hhhash.route("/objects/hhhash/search", methods=['POST']) +# @login_required +# @login_read_only +# def objects_hhhashs_names_search(): +# to_search = request.form.get('object_id') +# +# # TODO SANITIZE ID +# # TODO Search all +# cve = Cves.Cve(to_search) +# if not cve.exists(): +# abort(404) +# else: +# return redirect(cve.get_link(flask_context=True)) + +# ============= ROUTES ============== + diff --git a/var/www/templates/correlation/metadata_card_hhhash.html b/var/www/templates/correlation/metadata_card_hhhash.html new file mode 100644 index 00000000..c1474605 --- /dev/null +++ b/var/www/templates/correlation/metadata_card_hhhash.html @@ -0,0 +1,173 @@ + + + +{% with modal_add_tags=dict_object['metadata_card']['add_tags_modal']%} + {% include 'modals/add_tags.html' %} +{% endwith %} + +{% include 'modals/edit_tag.html' %} + +
+
+

{{ dict_object["metadata"]["content"] }}

+
{{ dict_object["correlation_id"] }}
+ + + {% with obj_type='hhhash', obj_id=dict_object['correlation_id'], obj_subtype='' %} + {% include 'modals/investigations_register_obj.html' %} + {% endwith %} + + +
+
+ + + + + + diff --git a/var/www/templates/correlation/show_correlation.html b/var/www/templates/correlation/show_correlation.html index b243bd35..11a85cd7 100644 --- a/var/www/templates/correlation/show_correlation.html +++ b/var/www/templates/correlation/show_correlation.html @@ -119,6 +119,8 @@ {% include 'correlation/metadata_card_cookie_name.html' %} {% elif dict_object["object_type"] == "etag" %} {% include 'correlation/metadata_card_etag.html' %} + {% elif dict_object["object_type"] == "hhhash" %} + {% include 'correlation/metadata_card_hhhash.html' %} {% elif dict_object["object_type"] == "item" %} {% include 'correlation/metadata_card_item.html' %} {% endif %} @@ -230,6 +232,10 @@ +
+ + +
diff --git a/var/www/templates/objects/hhhash/HHHashDaterange.html b/var/www/templates/objects/hhhash/HHHashDaterange.html new file mode 100644 index 00000000..79e12238 --- /dev/null +++ b/var/www/templates/objects/hhhash/HHHashDaterange.html @@ -0,0 +1,602 @@ + + + + + HHHashs - AIL + + + + + + + + + + + + + + + + + + + + + + + + {% include 'nav_bar.html' %} + +
+
+ + {% include 'sidebars/sidebar_objects.html' %} + +
+ +
+
+
+ +{# {% include 'hhhash/block_hhhash_search.html' %}#} + +
+ + +
+ +
+
+
Select a date range :
+
+
+
+ +
+
+
+ +
+
+ + +
+ +
+
+
+ +
+
+
+
+
+
+ + {% if dict_objects %} + {% if date_from|string == date_to|string %} +

{{ date_from }} HHHashs Name:

+ {% else %} +

{{ date_from }} to {{ date_to }} HHHashs Name:

+ {% endif %} + + + + + + + + + + + + {% for obj_id in dict_objects %} + + + + + + + + {% endfor %} + +
First SeenLast SeenTotalLast days
{{ dict_objects[obj_id]['content'] }}{{ dict_objects[obj_id]['first_seen'] }}{{ dict_objects[obj_id]['last_seen'] }}{{ dict_objects[obj_id]['nb_seen'] }}
+ + + {% else %} + {% if show_objects %} + {% if date_from|string == date_to|string %} +

{{ date_from }}, No HHHash Name

+ {% else %} +

{{ date_from }} to {{ date_to }}, No HHHash Name

+ {% endif %} + {% endif %} + {% endif %} +
+ +
+
+ + + + + + + + + + + + + + + + + diff --git a/var/www/templates/sidebars/sidebar_objects.html b/var/www/templates/sidebars/sidebar_objects.html index 5f7b00b5..12b5abc0 100644 --- a/var/www/templates/sidebars/sidebar_objects.html +++ b/var/www/templates/sidebars/sidebar_objects.html @@ -46,6 +46,12 @@ Etag +