diff --git a/bin/crawlers/Crawler.py b/bin/crawlers/Crawler.py index be615993..2b2f35c6 100755 --- a/bin/crawlers/Crawler.py +++ b/bin/crawlers/Crawler.py @@ -17,6 +17,7 @@ from lib import ail_logger from lib import crawlers from lib.ConfigLoader import ConfigLoader from lib.objects import CookiesNames +from lib.objects import Etags from lib.objects.Domains import Domain from lib.objects.Items import Item from lib.objects import Screenshots @@ -288,6 +289,10 @@ class Crawler(AbstractModule): print(cookie_name) cookie = CookiesNames.create(cookie_name) cookie.add(self.date.replace('/', ''), self.domain.id) + for etag_content in crawlers.extract_etag_from_har(entries['har']): + print(etag_content) + etag = Etags.create(etag_content) + etag.add(self.date.replace('/', ''), self.domain.id) # Next Children entries_children = entries.get('children') diff --git a/bin/lib/ail_core.py b/bin/lib/ail_core.py index c52db274..e0fd3a17 100755 --- a/bin/lib/ail_core.py +++ b/bin/lib/ail_core.py @@ -15,7 +15,7 @@ config_loader = ConfigLoader() r_serv_db = config_loader.get_db_conn("Kvrocks_DB") config_loader = None -AIL_OBJECTS = sorted({'cookie-name', 'cve', 'cryptocurrency', 'decoded', 'domain', 'favicon', 'item', 'pgp', +AIL_OBJECTS = sorted({'cookie-name', 'cve', 'cryptocurrency', 'decoded', 'domain', 'etag', 'favicon', 'item', 'pgp', 'screenshot', 'title', 'username'}) def get_ail_uuid(): diff --git a/bin/lib/correlations_engine.py b/bin/lib/correlations_engine.py index 8e29837d..1a2081ac 100755 --- a/bin/lib/correlations_engine.py +++ b/bin/lib/correlations_engine.py @@ -45,7 +45,8 @@ CORRELATION_TYPES_BY_OBJ = { "cryptocurrency": ["domain", "item"], "cve": ["domain", "item"], "decoded": ["domain", "item"], - "domain": ["cve", "cookie-name", "cryptocurrency", "decoded", "favicon", "item", "pgp", "title", "screenshot", "username"], + "domain": ["cve", "cookie-name", "cryptocurrency", "decoded", "etag", "favicon", "item", "pgp", "title", "screenshot", "username"], + "etag": ["domain"], "favicon": ["domain", "item"], # TODO Decoded "item": ["cve", "cryptocurrency", "decoded", "domain", "favicon", "pgp", "screenshot", "title", "username"], "pgp": ["domain", "item"], diff --git a/bin/lib/crawlers.py b/bin/lib/crawlers.py index 6f6bf5b7..7c98537e 100755 --- a/bin/lib/crawlers.py +++ b/bin/lib/crawlers.py @@ -264,6 +264,7 @@ def extract_author_from_html(html): if keywords: return keywords['content'] return '' + # # # - - # # # @@ -299,9 +300,10 @@ def get_all_har_ids(): except (TypeError, ValueError): pass - for file in [f for f in os.listdir(today_root_dir) if os.path.isfile(os.path.join(today_root_dir, f))]: - har_id = os.path.relpath(os.path.join(today_root_dir, file), HAR_DIR) - har_ids.append(har_id) + if os.path.exists(today_root_dir): + for file in [f for f in os.listdir(today_root_dir) if os.path.isfile(os.path.join(today_root_dir, f))]: + har_id = os.path.relpath(os.path.join(today_root_dir, file), HAR_DIR) + har_ids.append(har_id) for ydir in sorted(dirs_year, reverse=False): search_dear = os.path.join(HAR_DIR, ydir) @@ -312,14 +314,13 @@ def get_all_har_ids(): har_ids.append(har_id) return har_ids -def extract_cookies_names_from_har_by_har_id(har_id): +def get_har_content(har_id): har_path = os.path.join(HAR_DIR, har_id) with open(har_path) as f: try: - har_content = json.loads(f.read()) + return json.loads(f.read()) except json.decoder.JSONDecodeError: - har_content = {} - return extract_cookies_names_from_har(har_content) + return {} def extract_cookies_names_from_har(har): cookies = set() @@ -334,18 +335,41 @@ def extract_cookies_names_from_har(har): cookies.add(name) return cookies -def _reprocess_all_hars(): +def _reprocess_all_hars_cookie_name(): from lib.objects import CookiesNames for har_id in get_all_har_ids(): domain = har_id.split('/')[-1] domain = domain[:-41] date = har_id.split('/') date = f'{date[-4]}{date[-3]}{date[-2]}' - for cookie_name in extract_cookies_names_from_har_by_har_id(har_id): + for cookie_name in extract_cookies_names_from_har(get_har_content(har_id)): print(domain, date, cookie_name) cookie = CookiesNames.create(cookie_name) cookie.add(date, domain) +def extract_etag_from_har(har): # TODO check response url + etags = set() + for entrie in har.get('log', {}).get('entries', []): + for header in entrie.get('response', {}).get('headers', []): + if header.get('name') == 'etag': + # print(header) + etag = header.get('value') + if etag: + etags.add(etag) + return etags + +def _reprocess_all_hars_etag(): + from lib.objects import Etags + for har_id in get_all_har_ids(): + domain = har_id.split('/')[-1] + domain = domain[:-41] + date = har_id.split('/') + date = f'{date[-4]}{date[-3]}{date[-2]}' + for etag_content in extract_etag_from_har(get_har_content(har_id)): + print(domain, date, etag_content) + etag = Etags.create(etag_content) + etag.add(date, domain) + # # # - - # # # ################################################################################ @@ -1913,5 +1937,5 @@ load_blacklist() # temp_url = '' # r = extract_favicon_from_html(content, temp_url) # print(r) -# _reprocess_all_hars() - +# _reprocess_all_hars_cookie_name() +# _reprocess_all_hars_etag() diff --git a/bin/lib/objects/Etags.py b/bin/lib/objects/Etags.py new file mode 100755 index 00000000..eb41f68c --- /dev/null +++ b/bin/lib/objects/Etags.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import sys + +from hashlib import sha256 +from flask import url_for + +from pymisp import MISPObject + +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib.ConfigLoader import ConfigLoader +from lib.objects.abstract_daterange_object import AbstractDaterangeObject, AbstractDaterangeObjects + +config_loader = ConfigLoader() +r_objects = config_loader.get_db_conn("Kvrocks_Objects") +baseurl = config_loader.get_config_str("Notifications", "ail_domain") +config_loader = None + +# TODO NEW ABSTRACT OBJECT -> daterange for all objects ???? + +class Etag(AbstractDaterangeObject): + """ + AIL Etag Object. + """ + + def __init__(self, obj_id): + super(Etag, self).__init__('etag', obj_id) + + # def get_ail_2_ail_payload(self): + # payload = {'raw': self.get_gzip_content(b64=True), + # 'compress': 'gzip'} + # return payload + + # # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\ + def delete(self): + # # TODO: + pass + + def get_content(self, r_type='str'): + if r_type == 'str': + return self._get_field('content') + + def get_link(self, flask_context=False): + if flask_context: + url = url_for('correlation.show_correlation', type=self.type, id=self.id) + else: + url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}' + return url + + # TODO # CHANGE COLOR + def get_svg_icon(self): + return {'style': 'fas', 'icon': '\uf02b', 'color': '#556F65', 'radius': 5} + + def get_misp_object(self): + obj_attrs = [] + obj = MISPObject('etag') + first_seen = self.get_first_seen() + last_seen = self.get_last_seen() + if first_seen: + obj.first_seen = first_seen + if last_seen: + obj.last_seen = last_seen + if not first_seen or not last_seen: + self.logger.warning( + f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={first_seen}, last={last_seen}') + + obj_attrs.append(obj.add_attribute('etag', value=self.get_content())) + for obj_attr in obj_attrs: + for tag in self.get_tags(): + obj_attr.add_tag(tag) + return obj + + def get_nb_seen(self): + return self.get_nb_correlation('domain') + + def get_meta(self, options=set()): + meta = self._get_meta(options=options) + meta['id'] = self.id + meta['tags'] = self.get_tags(r_list=True) + meta['content'] = self.get_content() + return meta + + def add(self, date, obj_id): # date = HAR Date + self._add(date, 'domain', '', obj_id) + + def create(self, content, _first_seen=None, _last_seen=None): + if not isinstance(content, str): + content = content.decode() + self._set_field('content', content) + self._create() + + +def create(content): + if isinstance(content, str): + content = content.encode() + obj_id = sha256(content).hexdigest() + etag = Etag(obj_id) + if not etag.exists(): + etag.create(content) + return etag + + +class Etags(AbstractDaterangeObjects): + """ + Etags Objects + """ + def __init__(self): + super().__init__('etag', Etag) + + def sanitize_id_to_search(self, name_to_search): + return name_to_search # TODO + + +# if __name__ == '__main__': +# name_to_search = '98' +# print(search_cves_by_name(name_to_search)) diff --git a/bin/lib/objects/ail_objects.py b/bin/lib/objects/ail_objects.py index c54b7dd4..4279c776 100755 --- a/bin/lib/objects/ail_objects.py +++ b/bin/lib/objects/ail_objects.py @@ -18,6 +18,7 @@ from lib.objects import CookiesNames from lib.objects.Cves import Cve from lib.objects.Decodeds import Decoded, get_all_decodeds_objects, get_nb_decodeds_objects from lib.objects.Domains import Domain +from lib.objects import Etags from lib.objects.Favicons import Favicon from lib.objects.Items import Item, get_all_items_objects, get_nb_items_objects from lib.objects import Pgps @@ -57,6 +58,8 @@ def get_object(obj_type, subtype, id): return CookiesNames.CookieName(id) elif obj_type == 'cve': return Cve(id) + elif obj_type == 'etag': + return Etags.Etag(id) elif obj_type == 'favicon': return Favicon(id) elif obj_type == 'screenshot': @@ -168,7 +171,7 @@ def get_object_card_meta(obj_type, subtype, id, related_btc=False): obj = get_object(obj_type, subtype, id) meta = obj.get_meta() meta['icon'] = obj.get_svg_icon() - if subtype or obj_type == 'cookie-name' or obj_type == 'cve' or obj_type == 'title' or obj_type == 'favicon': + if subtype or obj_type == 'cookie-name' or obj_type == 'cve' or obj_type == 'etag' or obj_type == 'title' or obj_type == 'favicon': meta['sparkline'] = obj.get_sparkline() if obj_type == 'cve': meta['cve_search'] = obj.get_cve_search() diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index cc110c35..b40b5cc8 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -51,6 +51,7 @@ from blueprints.objects_decoded import objects_decoded from blueprints.objects_subtypes import objects_subtypes from blueprints.objects_title import objects_title from blueprints.objects_cookie_name import objects_cookie_name +from blueprints.objects_etag import objects_etag Flask_dir = os.environ['AIL_FLASK'] @@ -106,6 +107,7 @@ app.register_blueprint(objects_decoded, url_prefix=baseUrl) app.register_blueprint(objects_subtypes, url_prefix=baseUrl) app.register_blueprint(objects_title, url_prefix=baseUrl) app.register_blueprint(objects_cookie_name, url_prefix=baseUrl) +app.register_blueprint(objects_etag, url_prefix=baseUrl) # ========= =========# diff --git a/var/www/blueprints/correlation.py b/var/www/blueprints/correlation.py index f6e7feda..d5bb1c82 100644 --- a/var/www/blueprints/correlation.py +++ b/var/www/blueprints/correlation.py @@ -83,6 +83,9 @@ def show_correlation(): correl_option = request.form.get('CookieNameCheck') if correl_option: filter_types.append('cookie-name') + correl_option = request.form.get('EtagCheck') + if correl_option: + filter_types.append('etag') correl_option = request.form.get('CveCheck') if correl_option: filter_types.append('cve') diff --git a/var/www/blueprints/objects_cookie_name.py b/var/www/blueprints/objects_cookie_name.py index ab111ff2..06d6743a 100644 --- a/var/www/blueprints/objects_cookie_name.py +++ b/var/www/blueprints/objects_cookie_name.py @@ -45,8 +45,6 @@ def objects_cookies_names(): else: dict_objects = {} - print(dict_objects) - return render_template("CookieNameDaterange.html", date_from=date_from, date_to=date_to, dict_objects=dict_objects, show_objects=show_objects) diff --git a/var/www/blueprints/objects_etag.py b/var/www/blueprints/objects_etag.py new file mode 100644 index 00000000..ad2b24fd --- /dev/null +++ b/var/www/blueprints/objects_etag.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +''' + Blueprint Flask: crawler splash endpoints: dashboard, onion crawler ... +''' + +import os +import sys + +from flask import render_template, jsonify, request, Blueprint, redirect, url_for, Response, abort +from flask_login import login_required, current_user + +# Import Role_Manager +from Role_Manager import login_admin, login_analyst, login_read_only + +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from lib.objects import Etags +from packages import Date + +# ============ BLUEPRINT ============ +objects_etag = Blueprint('objects_etag', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/objects/etag')) + +# ============ VARIABLES ============ +bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info'] + + +# ============ FUNCTIONS ============ +@objects_etag.route("/objects/etags", methods=['GET']) +@login_required +@login_read_only +def objects_etags(): + date_from = request.args.get('date_from') + date_to = request.args.get('date_to') + show_objects = request.args.get('show_objects') + date = Date.sanitise_date_range(date_from, date_to) + date_from = date['date_from'] + date_to = date['date_to'] + + if show_objects: + dict_objects = Etags.Etags().api_get_meta_by_daterange(date_from, date_to) + else: + dict_objects = {} + + return render_template("EtagDaterange.html", date_from=date_from, date_to=date_to, + dict_objects=dict_objects, show_objects=show_objects) + +@objects_etag.route("/objects/etag/post", methods=['POST']) +@login_required +@login_read_only +def objects_etags_post(): + date_from = request.form.get('date_from') + date_to = request.form.get('date_to') + show_objects = request.form.get('show_objects') + return redirect(url_for('objects_etag.objects_etags', date_from=date_from, date_to=date_to, show_objects=show_objects)) + +@objects_etag.route("/objects/etag/range/json", methods=['GET']) +@login_required +@login_read_only +def objects_etag_range_json(): + date_from = request.args.get('date_from') + date_to = request.args.get('date_to') + date = Date.sanitise_date_range(date_from, date_to) + date_from = date['date_from'] + date_to = date['date_to'] + return jsonify(Etags.Etags().api_get_chart_nb_by_daterange(date_from, date_to)) + +# @objects_etag.route("/objects/etag/search", methods=['POST']) +# @login_required +# @login_read_only +# def objects_etags_names_search(): +# to_search = request.form.get('object_id') +# +# # TODO SANITIZE ID +# # TODO Search all +# cve = Cves.Cve(to_search) +# if not cve.exists(): +# abort(404) +# else: +# return redirect(cve.get_link(flask_context=True)) + +# ============= ROUTES ============== + diff --git a/var/www/templates/correlation/metadata_card_etag.html b/var/www/templates/correlation/metadata_card_etag.html new file mode 100644 index 00000000..cc599227 --- /dev/null +++ b/var/www/templates/correlation/metadata_card_etag.html @@ -0,0 +1,173 @@ + + + +{% with modal_add_tags=dict_object['metadata_card']['add_tags_modal']%} + {% include 'modals/add_tags.html' %} +{% endwith %} + +{% include 'modals/edit_tag.html' %} + +
+
+

{{ dict_object["metadata"]["content"] }}

+
{{ dict_object["correlation_id"] }}
+ + + {% with obj_type='etag', obj_id=dict_object['correlation_id'], obj_subtype='' %} + {% include 'modals/investigations_register_obj.html' %} + {% endwith %} + + +
+
+ + + + + + diff --git a/var/www/templates/correlation/show_correlation.html b/var/www/templates/correlation/show_correlation.html index 95aa922c..326b637f 100644 --- a/var/www/templates/correlation/show_correlation.html +++ b/var/www/templates/correlation/show_correlation.html @@ -117,6 +117,8 @@ {% include 'correlation/metadata_card_title.html' %} {% elif dict_object["object_type"] == "cookie-name" %} {% include 'correlation/metadata_card_cookie_name.html' %} + {% elif dict_object["object_type"] == "etag" %} + {% include 'correlation/metadata_card_etag.html' %} {% elif dict_object["object_type"] == "item" %} {% include 'correlation/metadata_card_item.html' %} {% endif %} @@ -211,6 +213,10 @@ +
+ + +
diff --git a/var/www/templates/objects/etag/EtagDaterange.html b/var/www/templates/objects/etag/EtagDaterange.html new file mode 100644 index 00000000..9fc7f3e7 --- /dev/null +++ b/var/www/templates/objects/etag/EtagDaterange.html @@ -0,0 +1,602 @@ + + + + + Etags - AIL + + + + + + + + + + + + + + + + + + + + + + + + {% include 'nav_bar.html' %} + +
+
+ + {% include 'sidebars/sidebar_objects.html' %} + +
+ +
+
+
+ +{# {% include 'etag/block_etag_search.html' %}#} + +
+ + +
+ +
+
+
Select a date range :
+
+
+
+ +
+
+
+ +
+
+ + +
+ +
+
+
+ +
+
+
+
+
+
+ + {% if dict_objects %} + {% if date_from|string == date_to|string %} +

{{ date_from }} Etags Name:

+ {% else %} +

{{ date_from }} to {{ date_to }} Etags Name:

+ {% endif %} + + + + + + + + + + + + {% for obj_id in dict_objects %} + + + + + + + + {% endfor %} + +
First SeenLast SeenTotalLast days
{{ dict_objects[obj_id]['content'] }}{{ dict_objects[obj_id]['first_seen'] }}{{ dict_objects[obj_id]['last_seen'] }}{{ dict_objects[obj_id]['nb_seen'] }}
+ + + {% else %} + {% if show_objects %} + {% if date_from|string == date_to|string %} +

{{ date_from }}, No Etag Name

+ {% else %} +

{{ date_from }} to {{ date_to }}, No Etag Name

+ {% endif %} + {% endif %} + {% endif %} +
+ +
+
+ + + + + + + + + + + + + + + + + diff --git a/var/www/templates/sidebars/sidebar_objects.html b/var/www/templates/sidebars/sidebar_objects.html index 89239701..5f7b00b5 100644 --- a/var/www/templates/sidebars/sidebar_objects.html +++ b/var/www/templates/sidebars/sidebar_objects.html @@ -40,6 +40,12 @@ Cookie Name +