From bb03ef532bb1af337f4a345da78db2fc1f005f34 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Thu, 14 Nov 2019 17:05:58 +0100 Subject: [PATCH] chg: [Correlation UI] add correlation blueprint + UI graph correlation --- bin/Crawler.py | 7 + bin/lib/Correlate_object.py | 44 +++-- bin/lib/Decoded.py | 6 + bin/packages/Correlation.py | 12 -- bin/torcrawler/TorSplashCrawler.py | 2 + var/www/Flask_server.py | 2 + var/www/blueprints/correlation.py | 187 ++++++++++++++++++ .../correlation/show_correlation.html | 105 +++++++++- 8 files changed, 333 insertions(+), 32 deletions(-) create mode 100644 var/www/blueprints/correlation.py diff --git a/bin/Crawler.py b/bin/Crawler.py index 848d2b67..c7051b75 100755 --- a/bin/Crawler.py +++ b/bin/Crawler.py @@ -128,6 +128,13 @@ def get_elem_to_crawl(rotation_mode): if message is not None: domain_service_type = service_type break + #load_discovery_queue + if message is None: + for service_type in rotation_mode: + message = redis_crawler.spop('{}_crawler_discovery_queue'.format(service_type)) + if message is not None: + domain_service_type = service_type + break #load_normal_queue if message is None: for service_type in rotation_mode: diff --git a/bin/lib/Correlate_object.py b/bin/lib/Correlate_object.py index eb8dbdc6..95c25f3f 100755 --- a/bin/lib/Correlate_object.py +++ b/bin/lib/Correlate_object.py @@ -22,6 +22,29 @@ config_loader = ConfigLoader.ConfigLoader() r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") config_loader = None +def get_all_correlation_names(): + ''' + Return a list of all available correlations + ''' + return ['pgp', 'cryptocurrency', 'decoded'] + +def get_all_correlation_objects(): + ''' + Return a list of all correllated objects + ''' + return ['domain', 'paste'] + +def get_object_metadata(object_type, correlation_id, type_id=None): + if object_type == 'domain': + return Domain.Domain(correlation_id).get_domain_metadata() + elif object_type == 'paste': + return None + elif object_type == 'decoded': + return Decoded.get_decoded_metadata(correlation_id) + elif object_type == 'pgp': + return Pgp.pgp._get_metadata(type_id, correlation_id) + elif object_type == 'cryptocurrency': + return Cryptocurrency.cryptocurrency._get_metadata(type_id, correlation_id) def get_object_correlation(object_type, value, correlation_names, correlation_objects, requested_correl_type=None): if object_type == 'domain': @@ -37,9 +60,6 @@ def get_object_correlation(object_type, value, correlation_names, correlation_ob return {} - - - def get_correlation_node_icon(correlation_name, correlation_type=None, value=None): ''' Used in UI Graph. @@ -165,14 +185,14 @@ def create_node_id(correlation_name, value, correlation_type=''): # # TODO: filter by correlation type => bitcoin, mail, ... -def get_graph_node_object_correlation(object_type, domain, mode, correlation_names, correlation_objects, max_nodes=300, requested_correl_type=None): +def get_graph_node_object_correlation(object_type, root_value, mode, correlation_names, correlation_objects, max_nodes=300, requested_correl_type=None): links = set() nodes = set() - root_node_id = create_node_id(object_type, domain, requested_correl_type) + root_node_id = create_node_id(object_type, root_value, requested_correl_type) nodes.add(root_node_id) - root_correlation = get_object_correlation(object_type, domain, correlation_names, correlation_objects, requested_correl_type=requested_correl_type) + root_correlation = get_object_correlation(object_type, root_value, correlation_names, correlation_objects, requested_correl_type=requested_correl_type) for correl in root_correlation: if correl in ('pgp', 'cryptocurrency'): for correl_type in root_correlation[correl]: @@ -192,8 +212,8 @@ def get_graph_node_object_correlation(object_type, domain, mode, correlation_nam if res: for corr_obj in res: for correl_key_val in res[corr_obj]: - #filter root domain - if correl_key_val == domain: + #filter root value + if correl_key_val == root_value: continue if len(nodes) > max_nodes: @@ -222,8 +242,8 @@ def get_graph_node_object_correlation(object_type, domain, mode, correlation_nam for corr_obj in res: if corr_obj in ('decoded', 'domain', 'paste'): for correl_key_val in res[corr_obj]: - #filter root domain - if correl_key_val == domain: + #filter root value + if correl_key_val == root_value: continue if len(nodes) > max_nodes: @@ -241,8 +261,8 @@ def get_graph_node_object_correlation(object_type, domain, mode, correlation_nam if corr_obj in ('pgp', 'cryptocurrency'): for correl_key_type in res[corr_obj]: for correl_key_val in res[corr_obj][correl_key_type]: - #filter root domain - if correl_key_val == domain: + #filter root value + if correl_key_val == root_value: continue if len(nodes) > max_nodes: diff --git a/bin/lib/Decoded.py b/bin/lib/Decoded.py index 1dc73825..40ab9fd9 100755 --- a/bin/lib/Decoded.py +++ b/bin/lib/Decoded.py @@ -24,6 +24,12 @@ def get_decoded_item_type(sha1_string): ''' return r_serv_metadata.hget('metadata_hash:{}'.format(sha1_string), 'estimated_type') +def get_decoded_metadata(sha1_string): + metadata_dict = {} + metadata_dict['first_seen'] = r_serv_metadata.hget('metadata_hash:{}'.format(sha1_string), 'first_seen') + metadata_dict['last_seen'] = r_serv_metadata.hget('metadata_hash:{}'.format(sha1_string), 'last_seen') + return metadata_dict + def get_decoded_items_list(sha1_string): return r_serv_metadata.zrange('nb_seen_hash:{}'.format(sha1_string), 0, -1) diff --git a/bin/packages/Correlation.py b/bin/packages/Correlation.py index 97f02d2f..3921163e 100755 --- a/bin/packages/Correlation.py +++ b/bin/packages/Correlation.py @@ -12,18 +12,6 @@ config_loader = ConfigLoader.ConfigLoader() r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") config_loader = None -def get_all_correlation_names(): - ''' - Return a list of all available correlations - ''' - return ['pgp', 'cryptocurrency', 'decoded'] - -def get_all_correlation_objects(): - ''' - Return a list of all correllated objects - ''' - return ['domain', 'paste'] - class Correlation(object): def __init__(self, correlation_name, all_correlation_types): diff --git a/bin/torcrawler/TorSplashCrawler.py b/bin/torcrawler/TorSplashCrawler.py index 4e36c1c9..9d20f1f3 100644 --- a/bin/torcrawler/TorSplashCrawler.py +++ b/bin/torcrawler/TorSplashCrawler.py @@ -68,9 +68,11 @@ class TorSplashCrawler(): self.date_month = date['date_month'] self.date_epoch = int(date['epoch']) + # # TODO: timeout in config self.arg_crawler = { 'html': crawler_options['html'], 'wait': 10, 'render_all': 1, + 'timeout': 30, 'har': crawler_options['har'], 'png': crawler_options['png']} diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index 63c593b4..aa6d3268 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -35,6 +35,7 @@ import Flask_config # Import Blueprint from blueprints.root import root from blueprints.crawler_splash import crawler_splash +from blueprints.correlation import correlation Flask_dir = os.environ['AIL_FLASK'] @@ -80,6 +81,7 @@ app.config['MAX_CONTENT_LENGTH'] = 900 * 1024 * 1024 # ========= BLUEPRINT =========# app.register_blueprint(root, url_prefix=baseUrl) app.register_blueprint(crawler_splash, url_prefix=baseUrl) +app.register_blueprint(correlation, url_prefix=baseUrl) # ========= =========# # ========= session ======== diff --git a/var/www/blueprints/correlation.py b/var/www/blueprints/correlation.py new file mode 100644 index 00000000..826f0809 --- /dev/null +++ b/var/www/blueprints/correlation.py @@ -0,0 +1,187 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +''' + Blueprint Flask: crawler splash endpoints: dashboard, onion crawler ... +''' + +import os +import sys +import json +import random + +from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response +from flask_login import login_required, current_user, login_user, logout_user + +sys.path.append('modules') +import Flask_config + +# Import Role_Manager +from Role_Manager import create_user_db, check_password_strength, check_user_role_integrity +from Role_Manager import login_admin, login_analyst + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) +import Correlate_object + +bootstrap_label = Flask_config.bootstrap_label + +# ============ BLUEPRINT ============ +correlation = Blueprint('correlation', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/correlation')) + +# ============ VARIABLES ============ + +def show_correlation(correlation_type, type_id, key_id): + if is_valid_type_id(correlation_type, type_id): + key_id_metadata = get_key_id_metadata(correlation_type, type_id, key_id) + if key_id_metadata: + + num_day_sparkline = 6 + date_range_sparkline = get_date_range(num_day_sparkline) + + sparkline_values = list_sparkline_type_id_values(date_range_sparkline, correlation_type, type_id, key_id) + return render_template('show_correlation.html', key_id=key_id, type_id=type_id, + correlation_type=correlation_type, + graph_line_endpoint=get_graph_line_json_endpoint(correlation_type), + key_id_metadata=key_id_metadata, + type_icon=get_icon(correlation_type, type_id), + sparkline_values=sparkline_values) + else: + return '404' + else: + return 'error' + +# ============ FUNCTIONS ============ + +def sanitise_graph_mode(graph_mode): + if graph_mode not in ('inter', 'union'): + return 'union' + else: + return graph_mode + +def sanitise_nb_max_nodes(nb_max_nodes): + try: + nb_max_nodes = int(nb_max_nodes) + if nb_max_nodes < 2: + nb_max_nodes = 300 + except: + nb_max_nodes = 300 + return nb_max_nodes + +def sanitise_correlation_names(correlation_names): + ''' + correlation_names ex = 'pgp,crypto' + ''' + all_correlation_names = Correlate_object.get_all_correlation_names() + if correlation_names is None: + return all_correlation_names + else: + l_correlation_names = [] + for correl in correlation_names.split(','): + if correl in all_correlation_names: + l_correlation_names.append(correl) + if l_correlation_names: + return l_correlation_names + else: + return all_correlation_names + +def sanitise_correlation_objects(correlation_objects): + ''' + correlation_objects ex = 'domain,decoded' + ''' + all_correlation_objects = Correlate_object.get_all_correlation_objects() + if correlation_objects is None: + return all_correlation_objects + else: + l_correlation_objects = [] + for correl in correlation_objects.split(','): + if correl in all_correlation_objects: + l_correlation_objects.append(correl) + if l_correlation_objects: + return l_correlation_objects + else: + return all_correlation_objects + +# ============= ROUTES ============== +@correlation.route('/correlation/show_correlation', methods=['GET', 'POST']) # GET + POST +@login_required +@login_analyst +def showCorrelationDomain(): + if request.method == 'POST': + object_type = request.form.get('object_type') + type_id = request.form.get('type_id') + correlation_id = request.form.get('correlation_id') + max_nodes = request.form.get('max_nb_nodes_in') + mode = request.form.get('mode') + if mode: + mode = 'inter' + else: + mode = 'union' + + ## get all selected correlations + correlation_names = [] + correlation_objects = [] + #correlation_names + correl_option = request.form.get('CryptocurrencyCheck') + if correl_option: + correlation_names.append('cryptocurrency') + correl_option = request.form.get('PgpCheck') + if correl_option: + correlation_names.append('pgp') + correl_option = request.form.get('DecodedCheck') + if correl_option: + correlation_names.append('decoded') + # correlation_objects + correl_option = request.form.get('DomainCheck') + if correl_option: + correlation_objects.append('domain') + correl_option = request.form.get('PasteCheck') + if correl_option: + correlation_objects.append('paste') + + # list as params + correlation_names = ",".join(correlation_names) + correlation_objects = ",".join(correlation_objects) + + # redirect to keep history and bookmark + return redirect(url_for('correlation.showCorrelationDomain', object_type=object_type, type_id=type_id, correlation_id=correlation_id, mode=mode, + max_nodes=max_nodes, correlation_names=correlation_names, correlation_objects=correlation_objects)) + + # request.method == 'GET' + else: + object_type = request.args.get('object_type') + type_id = request.args.get('type_id') + correlation_id = request.args.get('correlation_id') + max_nodes = sanitise_nb_max_nodes(request.args.get('max_nodes')) + mode = sanitise_graph_mode(request.args.get('mode')) + + correlation_names = sanitise_correlation_names(request.args.get('correlation_names')) + correlation_objects = sanitise_correlation_objects(request.args.get('correlation_objects')) + + dict_object = {"object_type": object_type, "correlation_id": correlation_id} + dict_object["max_nodes"] = max_nodes + dict_object["mode"] = mode + dict_object["correlation_names"] = correlation_names + dict_object["correlation_names_str"] = ",".join(correlation_names) + dict_object["correlation_objects"] = correlation_objects + dict_object["correlation_objects_str"] = ",".join(correlation_objects) + dict_object["metadata"] = Correlate_object.get_object_metadata(object_type, correlation_id, type_id=type_id) + if type_id: + dict_object["metadata"]['type_id'] = type_id + return render_template("show_correlation.html", dict_object=dict_object) + +@correlation.route('/correlation/graph_node_json') +@login_required +@login_analyst +def graph_node_json(): # # TODO: use post + correlation_id = request.args.get('correlation_id') + type_id = request.args.get('type_id') + object_type = request.args.get('object_type') + max_nodes = sanitise_nb_max_nodes(request.args.get('max_nodes')) + + correlation_names = sanitise_correlation_names(request.args.get('correlation_names')) + correlation_objects = sanitise_correlation_objects(request.args.get('correlation_objects')) + + mode = sanitise_graph_mode(request.args.get('mode')) + + res = Correlate_object.get_graph_node_object_correlation(object_type, correlation_id, mode, correlation_names, correlation_objects, requested_correl_type=type_id, max_nodes=max_nodes) + return jsonify(res) diff --git a/var/www/templates/correlation/show_correlation.html b/var/www/templates/correlation/show_correlation.html index 352c9b11..1cf70f67 100644 --- a/var/www/templates/correlation/show_correlation.html +++ b/var/www/templates/correlation/show_correlation.html @@ -85,7 +85,44 @@
- +
+
+

{{ dict_object["correlation_id"] }} :

+
    +
  • +
    +
    + + + + + + {% if dict_object["metadata"]['first_seen'] %} + + + {% endif %} + + + + + + + {% if dict_object["metadata"]['first_seen'] %} + + + {% endif %} + + +
    Object typetypeFirst_seenLast_seen
    {{ dict_object["object_type"] }}  {{ dict_object["metadata"]["type_id"] }}{{ dict_object["metadata"]['first_seen'] }}{{ dict_object["metadata"]['last_seen'] }}
    +
    +
    +
    +
    +
    +
  • +
+
+
@@ -103,7 +140,7 @@
-
+
Graph
@@ -112,11 +149,63 @@  Resize Graph - {% if correlation_type=='pgpdump' %} - {% include 'decoded/show_helper_pgpdump.html' %} - {% elif correlation_type=='cryptocurrency' %} - {% include 'decoded/show_helper_cryptocurrency.html' %} - {% endif %} +
    +
  • Select Correlation
  • +
  • + +
    + + + + +
    + + +
    +
    + + +
    +
    + + +
    +
    + + +
    +
    + + +
    + +
  • +
  • +
    + Union   +
    + + +
    +
    +
  • +
  • + +
    + + +
    + +
    + +
    + + + +
  • +
+ + {% include 'decoded/show_helper_pgpdump.html' %}
@@ -133,7 +222,7 @@ var all_graph = {}; $(document).ready(function(){ $("#page-Decoded").addClass("active"); - all_graph.node_graph = create_graph("{{ url_for('correlation.test') }}"); + all_graph.node_graph = create_graph("{{ url_for('correlation.graph_node_json') }}?correlation_id={{ dict_object["correlation_id"] }}&object_type={{ dict_object["object_type"] }}&mode={{ dict_object["mode"] }}&correlation_names={{ dict_object["correlation_names_str"] }}&correlation_objects={{ dict_object["correlation_objects_str"] }}&max_nodes={{dict_object["max_nodes"]}}{% if 'type_id' in dict_object["metadata"] %}&type_id={{ dict_object["metadata"]["type_id"] }}{% endif %}"); all_graph.onResize(); });