diff --git a/bin/crawlers/Crawler.py b/bin/crawlers/Crawler.py
index be615993..2b2f35c6 100755
--- a/bin/crawlers/Crawler.py
+++ b/bin/crawlers/Crawler.py
@@ -17,6 +17,7 @@ from lib import ail_logger
from lib import crawlers
from lib.ConfigLoader import ConfigLoader
from lib.objects import CookiesNames
+from lib.objects import Etags
from lib.objects.Domains import Domain
from lib.objects.Items import Item
from lib.objects import Screenshots
@@ -288,6 +289,10 @@ class Crawler(AbstractModule):
print(cookie_name)
cookie = CookiesNames.create(cookie_name)
cookie.add(self.date.replace('/', ''), self.domain.id)
+ for etag_content in crawlers.extract_etag_from_har(entries['har']):
+ print(etag_content)
+ etag = Etags.create(etag_content)
+ etag.add(self.date.replace('/', ''), self.domain.id)
# Next Children
entries_children = entries.get('children')
diff --git a/bin/lib/ail_core.py b/bin/lib/ail_core.py
index c52db274..e0fd3a17 100755
--- a/bin/lib/ail_core.py
+++ b/bin/lib/ail_core.py
@@ -15,7 +15,7 @@ config_loader = ConfigLoader()
r_serv_db = config_loader.get_db_conn("Kvrocks_DB")
config_loader = None
-AIL_OBJECTS = sorted({'cookie-name', 'cve', 'cryptocurrency', 'decoded', 'domain', 'favicon', 'item', 'pgp',
+AIL_OBJECTS = sorted({'cookie-name', 'cve', 'cryptocurrency', 'decoded', 'domain', 'etag', 'favicon', 'item', 'pgp',
'screenshot', 'title', 'username'})
def get_ail_uuid():
diff --git a/bin/lib/correlations_engine.py b/bin/lib/correlations_engine.py
index 8e29837d..1a2081ac 100755
--- a/bin/lib/correlations_engine.py
+++ b/bin/lib/correlations_engine.py
@@ -45,7 +45,8 @@ CORRELATION_TYPES_BY_OBJ = {
"cryptocurrency": ["domain", "item"],
"cve": ["domain", "item"],
"decoded": ["domain", "item"],
- "domain": ["cve", "cookie-name", "cryptocurrency", "decoded", "favicon", "item", "pgp", "title", "screenshot", "username"],
+ "domain": ["cve", "cookie-name", "cryptocurrency", "decoded", "etag", "favicon", "item", "pgp", "title", "screenshot", "username"],
+ "etag": ["domain"],
"favicon": ["domain", "item"], # TODO Decoded
"item": ["cve", "cryptocurrency", "decoded", "domain", "favicon", "pgp", "screenshot", "title", "username"],
"pgp": ["domain", "item"],
diff --git a/bin/lib/crawlers.py b/bin/lib/crawlers.py
index 6f6bf5b7..7c98537e 100755
--- a/bin/lib/crawlers.py
+++ b/bin/lib/crawlers.py
@@ -264,6 +264,7 @@ def extract_author_from_html(html):
if keywords:
return keywords['content']
return ''
+
# # # - - # # #
@@ -299,9 +300,10 @@ def get_all_har_ids():
except (TypeError, ValueError):
pass
- for file in [f for f in os.listdir(today_root_dir) if os.path.isfile(os.path.join(today_root_dir, f))]:
- har_id = os.path.relpath(os.path.join(today_root_dir, file), HAR_DIR)
- har_ids.append(har_id)
+ if os.path.exists(today_root_dir):
+ for file in [f for f in os.listdir(today_root_dir) if os.path.isfile(os.path.join(today_root_dir, f))]:
+ har_id = os.path.relpath(os.path.join(today_root_dir, file), HAR_DIR)
+ har_ids.append(har_id)
for ydir in sorted(dirs_year, reverse=False):
search_dear = os.path.join(HAR_DIR, ydir)
@@ -312,14 +314,13 @@ def get_all_har_ids():
har_ids.append(har_id)
return har_ids
-def extract_cookies_names_from_har_by_har_id(har_id):
+def get_har_content(har_id):
har_path = os.path.join(HAR_DIR, har_id)
with open(har_path) as f:
try:
- har_content = json.loads(f.read())
+ return json.loads(f.read())
except json.decoder.JSONDecodeError:
- har_content = {}
- return extract_cookies_names_from_har(har_content)
+ return {}
def extract_cookies_names_from_har(har):
cookies = set()
@@ -334,18 +335,41 @@ def extract_cookies_names_from_har(har):
cookies.add(name)
return cookies
-def _reprocess_all_hars():
+def _reprocess_all_hars_cookie_name():
from lib.objects import CookiesNames
for har_id in get_all_har_ids():
domain = har_id.split('/')[-1]
domain = domain[:-41]
date = har_id.split('/')
date = f'{date[-4]}{date[-3]}{date[-2]}'
- for cookie_name in extract_cookies_names_from_har_by_har_id(har_id):
+ for cookie_name in extract_cookies_names_from_har(get_har_content(har_id)):
print(domain, date, cookie_name)
cookie = CookiesNames.create(cookie_name)
cookie.add(date, domain)
+def extract_etag_from_har(har): # TODO check response url
+ etags = set()
+ for entrie in har.get('log', {}).get('entries', []):
+ for header in entrie.get('response', {}).get('headers', []):
+ if header.get('name') == 'etag':
+ # print(header)
+ etag = header.get('value')
+ if etag:
+ etags.add(etag)
+ return etags
+
+def _reprocess_all_hars_etag():
+ from lib.objects import Etags
+ for har_id in get_all_har_ids():
+ domain = har_id.split('/')[-1]
+ domain = domain[:-41]
+ date = har_id.split('/')
+ date = f'{date[-4]}{date[-3]}{date[-2]}'
+ for etag_content in extract_etag_from_har(get_har_content(har_id)):
+ print(domain, date, etag_content)
+ etag = Etags.create(etag_content)
+ etag.add(date, domain)
+
# # # - - # # #
################################################################################
@@ -1913,5 +1937,5 @@ load_blacklist()
# temp_url = ''
# r = extract_favicon_from_html(content, temp_url)
# print(r)
-# _reprocess_all_hars()
-
+# _reprocess_all_hars_cookie_name()
+# _reprocess_all_hars_etag()
diff --git a/bin/lib/objects/Etags.py b/bin/lib/objects/Etags.py
new file mode 100755
index 00000000..eb41f68c
--- /dev/null
+++ b/bin/lib/objects/Etags.py
@@ -0,0 +1,121 @@
+#!/usr/bin/env python3
+# -*-coding:UTF-8 -*
+
+import os
+import sys
+
+from hashlib import sha256
+from flask import url_for
+
+from pymisp import MISPObject
+
+sys.path.append(os.environ['AIL_BIN'])
+##################################
+# Import Project packages
+##################################
+from lib.ConfigLoader import ConfigLoader
+from lib.objects.abstract_daterange_object import AbstractDaterangeObject, AbstractDaterangeObjects
+
+config_loader = ConfigLoader()
+r_objects = config_loader.get_db_conn("Kvrocks_Objects")
+baseurl = config_loader.get_config_str("Notifications", "ail_domain")
+config_loader = None
+
+# TODO NEW ABSTRACT OBJECT -> daterange for all objects ????
+
+class Etag(AbstractDaterangeObject):
+ """
+ AIL Etag Object.
+ """
+
+ def __init__(self, obj_id):
+ super(Etag, self).__init__('etag', obj_id)
+
+ # def get_ail_2_ail_payload(self):
+ # payload = {'raw': self.get_gzip_content(b64=True),
+ # 'compress': 'gzip'}
+ # return payload
+
+ # # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
+ def delete(self):
+ # # TODO:
+ pass
+
+ def get_content(self, r_type='str'):
+ if r_type == 'str':
+ return self._get_field('content')
+
+ def get_link(self, flask_context=False):
+ if flask_context:
+ url = url_for('correlation.show_correlation', type=self.type, id=self.id)
+ else:
+ url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}'
+ return url
+
+ # TODO # CHANGE COLOR
+ def get_svg_icon(self):
+ return {'style': 'fas', 'icon': '\uf02b', 'color': '#556F65', 'radius': 5}
+
+ def get_misp_object(self):
+ obj_attrs = []
+ obj = MISPObject('etag')
+ first_seen = self.get_first_seen()
+ last_seen = self.get_last_seen()
+ if first_seen:
+ obj.first_seen = first_seen
+ if last_seen:
+ obj.last_seen = last_seen
+ if not first_seen or not last_seen:
+ self.logger.warning(
+ f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={first_seen}, last={last_seen}')
+
+ obj_attrs.append(obj.add_attribute('etag', value=self.get_content()))
+ for obj_attr in obj_attrs:
+ for tag in self.get_tags():
+ obj_attr.add_tag(tag)
+ return obj
+
+ def get_nb_seen(self):
+ return self.get_nb_correlation('domain')
+
+ def get_meta(self, options=set()):
+ meta = self._get_meta(options=options)
+ meta['id'] = self.id
+ meta['tags'] = self.get_tags(r_list=True)
+ meta['content'] = self.get_content()
+ return meta
+
+ def add(self, date, obj_id): # date = HAR Date
+ self._add(date, 'domain', '', obj_id)
+
+ def create(self, content, _first_seen=None, _last_seen=None):
+ if not isinstance(content, str):
+ content = content.decode()
+ self._set_field('content', content)
+ self._create()
+
+
+def create(content):
+ if isinstance(content, str):
+ content = content.encode()
+ obj_id = sha256(content).hexdigest()
+ etag = Etag(obj_id)
+ if not etag.exists():
+ etag.create(content)
+ return etag
+
+
+class Etags(AbstractDaterangeObjects):
+ """
+ Etags Objects
+ """
+ def __init__(self):
+ super().__init__('etag', Etag)
+
+ def sanitize_id_to_search(self, name_to_search):
+ return name_to_search # TODO
+
+
+# if __name__ == '__main__':
+# name_to_search = '98'
+# print(search_cves_by_name(name_to_search))
diff --git a/bin/lib/objects/ail_objects.py b/bin/lib/objects/ail_objects.py
index c54b7dd4..4279c776 100755
--- a/bin/lib/objects/ail_objects.py
+++ b/bin/lib/objects/ail_objects.py
@@ -18,6 +18,7 @@ from lib.objects import CookiesNames
from lib.objects.Cves import Cve
from lib.objects.Decodeds import Decoded, get_all_decodeds_objects, get_nb_decodeds_objects
from lib.objects.Domains import Domain
+from lib.objects import Etags
from lib.objects.Favicons import Favicon
from lib.objects.Items import Item, get_all_items_objects, get_nb_items_objects
from lib.objects import Pgps
@@ -57,6 +58,8 @@ def get_object(obj_type, subtype, id):
return CookiesNames.CookieName(id)
elif obj_type == 'cve':
return Cve(id)
+ elif obj_type == 'etag':
+ return Etags.Etag(id)
elif obj_type == 'favicon':
return Favicon(id)
elif obj_type == 'screenshot':
@@ -168,7 +171,7 @@ def get_object_card_meta(obj_type, subtype, id, related_btc=False):
obj = get_object(obj_type, subtype, id)
meta = obj.get_meta()
meta['icon'] = obj.get_svg_icon()
- if subtype or obj_type == 'cookie-name' or obj_type == 'cve' or obj_type == 'title' or obj_type == 'favicon':
+ if subtype or obj_type == 'cookie-name' or obj_type == 'cve' or obj_type == 'etag' or obj_type == 'title' or obj_type == 'favicon':
meta['sparkline'] = obj.get_sparkline()
if obj_type == 'cve':
meta['cve_search'] = obj.get_cve_search()
diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py
index cc110c35..b40b5cc8 100755
--- a/var/www/Flask_server.py
+++ b/var/www/Flask_server.py
@@ -51,6 +51,7 @@ from blueprints.objects_decoded import objects_decoded
from blueprints.objects_subtypes import objects_subtypes
from blueprints.objects_title import objects_title
from blueprints.objects_cookie_name import objects_cookie_name
+from blueprints.objects_etag import objects_etag
Flask_dir = os.environ['AIL_FLASK']
@@ -106,6 +107,7 @@ app.register_blueprint(objects_decoded, url_prefix=baseUrl)
app.register_blueprint(objects_subtypes, url_prefix=baseUrl)
app.register_blueprint(objects_title, url_prefix=baseUrl)
app.register_blueprint(objects_cookie_name, url_prefix=baseUrl)
+app.register_blueprint(objects_etag, url_prefix=baseUrl)
# ========= =========#
diff --git a/var/www/blueprints/correlation.py b/var/www/blueprints/correlation.py
index f6e7feda..d5bb1c82 100644
--- a/var/www/blueprints/correlation.py
+++ b/var/www/blueprints/correlation.py
@@ -83,6 +83,9 @@ def show_correlation():
correl_option = request.form.get('CookieNameCheck')
if correl_option:
filter_types.append('cookie-name')
+ correl_option = request.form.get('EtagCheck')
+ if correl_option:
+ filter_types.append('etag')
correl_option = request.form.get('CveCheck')
if correl_option:
filter_types.append('cve')
diff --git a/var/www/blueprints/objects_cookie_name.py b/var/www/blueprints/objects_cookie_name.py
index ab111ff2..06d6743a 100644
--- a/var/www/blueprints/objects_cookie_name.py
+++ b/var/www/blueprints/objects_cookie_name.py
@@ -45,8 +45,6 @@ def objects_cookies_names():
else:
dict_objects = {}
- print(dict_objects)
-
return render_template("CookieNameDaterange.html", date_from=date_from, date_to=date_to,
dict_objects=dict_objects, show_objects=show_objects)
diff --git a/var/www/blueprints/objects_etag.py b/var/www/blueprints/objects_etag.py
new file mode 100644
index 00000000..ad2b24fd
--- /dev/null
+++ b/var/www/blueprints/objects_etag.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+# -*-coding:UTF-8 -*
+
+'''
+ Blueprint Flask: crawler splash endpoints: dashboard, onion crawler ...
+'''
+
+import os
+import sys
+
+from flask import render_template, jsonify, request, Blueprint, redirect, url_for, Response, abort
+from flask_login import login_required, current_user
+
+# Import Role_Manager
+from Role_Manager import login_admin, login_analyst, login_read_only
+
+sys.path.append(os.environ['AIL_BIN'])
+##################################
+# Import Project packages
+##################################
+from lib.objects import Etags
+from packages import Date
+
+# ============ BLUEPRINT ============
+objects_etag = Blueprint('objects_etag', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/objects/etag'))
+
+# ============ VARIABLES ============
+bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info']
+
+
+# ============ FUNCTIONS ============
+@objects_etag.route("/objects/etags", methods=['GET'])
+@login_required
+@login_read_only
+def objects_etags():
+ date_from = request.args.get('date_from')
+ date_to = request.args.get('date_to')
+ show_objects = request.args.get('show_objects')
+ date = Date.sanitise_date_range(date_from, date_to)
+ date_from = date['date_from']
+ date_to = date['date_to']
+
+ if show_objects:
+ dict_objects = Etags.Etags().api_get_meta_by_daterange(date_from, date_to)
+ else:
+ dict_objects = {}
+
+ return render_template("EtagDaterange.html", date_from=date_from, date_to=date_to,
+ dict_objects=dict_objects, show_objects=show_objects)
+
+@objects_etag.route("/objects/etag/post", methods=['POST'])
+@login_required
+@login_read_only
+def objects_etags_post():
+ date_from = request.form.get('date_from')
+ date_to = request.form.get('date_to')
+ show_objects = request.form.get('show_objects')
+ return redirect(url_for('objects_etag.objects_etags', date_from=date_from, date_to=date_to, show_objects=show_objects))
+
+@objects_etag.route("/objects/etag/range/json", methods=['GET'])
+@login_required
+@login_read_only
+def objects_etag_range_json():
+ date_from = request.args.get('date_from')
+ date_to = request.args.get('date_to')
+ date = Date.sanitise_date_range(date_from, date_to)
+ date_from = date['date_from']
+ date_to = date['date_to']
+ return jsonify(Etags.Etags().api_get_chart_nb_by_daterange(date_from, date_to))
+
+# @objects_etag.route("/objects/etag/search", methods=['POST'])
+# @login_required
+# @login_read_only
+# def objects_etags_names_search():
+# to_search = request.form.get('object_id')
+#
+# # TODO SANITIZE ID
+# # TODO Search all
+# cve = Cves.Cve(to_search)
+# if not cve.exists():
+# abort(404)
+# else:
+# return redirect(cve.get_link(flask_context=True))
+
+# ============= ROUTES ==============
+
diff --git a/var/www/templates/correlation/metadata_card_etag.html b/var/www/templates/correlation/metadata_card_etag.html
new file mode 100644
index 00000000..cc599227
--- /dev/null
+++ b/var/www/templates/correlation/metadata_card_etag.html
@@ -0,0 +1,173 @@
+
+
+
+{% with modal_add_tags=dict_object['metadata_card']['add_tags_modal']%}
+ {% include 'modals/add_tags.html' %}
+{% endwith %}
+
+{% include 'modals/edit_tag.html' %}
+
+
+
+
+
+
+
+
+
+
diff --git a/var/www/templates/correlation/show_correlation.html b/var/www/templates/correlation/show_correlation.html
index 95aa922c..326b637f 100644
--- a/var/www/templates/correlation/show_correlation.html
+++ b/var/www/templates/correlation/show_correlation.html
@@ -117,6 +117,8 @@
{% include 'correlation/metadata_card_title.html' %}
{% elif dict_object["object_type"] == "cookie-name" %}
{% include 'correlation/metadata_card_cookie_name.html' %}
+ {% elif dict_object["object_type"] == "etag" %}
+ {% include 'correlation/metadata_card_etag.html' %}
{% elif dict_object["object_type"] == "item" %}
{% include 'correlation/metadata_card_item.html' %}
{% endif %}
@@ -211,6 +213,10 @@
+
+
+
+