chg: [Correlation domain] add correlation graph by domain (union + intersection)

pull/421/head
Terrtia 2019-11-12 17:08:52 +01:00
parent 35b63f7bf4
commit 504e45a43b
No known key found for this signature in database
GPG Key ID: 1E1B1F50D84613D0
6 changed files with 696 additions and 25 deletions

250
bin/lib/Correlate_object.py Executable file
View File

@ -0,0 +1,250 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import uuid
import redis
from flask import url_for
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
import Decoded
import Domain
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
import Pgp
import Cryptocurrency
config_loader = ConfigLoader.ConfigLoader()
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
config_loader = None
def get_correlation_node_icon(correlation_name, correlation_type=None, value=None):
'''
Used in UI Graph.
Return a font awesome icon for a given correlation_name.
:param correlation_name: correlation name
:param correlation_name: str
:param correlation_type: correlation type
:type correlation_type: str, optional
:return: a dictionnary {font awesome class, icon_code}
:rtype: dict
'''
icon_class = 'fas'
icon_text = ''
node_color = "#332288"
node_radius = 6
if correlation_name == "pgp":
node_color = '#44AA99'
if correlation_type == 'key':
icon_text = '\uf084'
elif correlation_type == 'name':
icon_text = '\uf507'
elif correlation_type == 'mail':
icon_text = '\uf1fa'
else:
icon_text = 'times'
elif correlation_name == 'cryptocurrency':
node_color = '#DDCC77'
if correlation_type == 'bitcoin':
icon_class = 'fab'
icon_text = '\uf15a'
elif correlation_type == 'monero':
icon_class = 'fab'
icon_text = '\uf3d0'
elif correlation_type == 'ethereum':
icon_class = 'fab'
icon_text = '\uf42e'
else:
icon_text = '\uf51e'
elif correlation_name == 'decoded':
node_color = '#88CCEE'
correlation_type = Decoded.get_decoded_item_type(value).split('/')[0]
if correlation_type == 'application':
icon_text = '\uf15b'
elif correlation_type == 'audio':
icon_text = '\uf1c7'
elif correlation_type == 'image':
icon_text = '\uf1c5'
elif correlation_type == 'text':
icon_text = '\uf15c'
else:
icon_text = '\uf249'
elif correlation_name == 'domain':
node_radius = 5
node_color = '#CC6677'
if Domain.get_domain_type(value) == 'onion':
icon_text = '\uf06e'
else:
icon_class = 'fab'
icon_text = '\uf13b'
elif correlation_name == 'paste':
node_radius = 5
if Item.is_crawled(value):
node_color = 'red'
else:
node_color = '#332288'
return {"icon_class": icon_class, "icon_text": icon_text, "node_color": node_color, "node_radius": node_radius}
def get_item_url(correlation_name, value, correlation_type=None):
'''
Warning: use only in flask
'''
url = '#'
if correlation_name == "pgp":
endpoint = 'hashDecoded.show_pgpdump'
url = url_for(endpoint, type_id=correlation_type, key_id=value)
elif correlation_name == 'cryptocurrency':
endpoint = 'hashDecoded.show_cryptocurrency'
url = url_for(endpoint, type_id=correlation_type, key_id=value)
elif correlation_name == 'decoded':
endpoint = 'hashDecoded.showHash'
url = url_for(endpoint, hash=value)
elif correlation_name == 'domain':
endpoint = 'crawler_splash.showDomain'
url = url_for(endpoint, domain=value)
elif correlation_name == 'paste':
endpoint = 'showsavedpastes.showsavedpaste'
url = url_for(endpoint, paste=value)
return url
# # TODO: refractor
# # get object description, return dict, used by correlation
# def get_object_desc(object_type, item_value, correlation_name, correlation_type=None):
# if object_type=="domain":
# return Domain.get_object_desc(item_value)
# if object_type=="correlation":
# return Domain.get_object_desc(item_value)
# {"name": self.correlation_name, "type": correlation_type, "id": correlation_value, "object": correl_object}
#
#
# # # TODO: sanithyse dict_correlation_to_check
# def get_object_correlation(object, object_value, mode, nb_max_elem=400, dict_correlation_to_check=[], depth_limit=1):
# '''
# Return all correlation of a given item id.
#
# :param l_items_to_correlate: list of dict
# :type l_items_to_correlate: list
# :param mode: correlation mode
# mode == intersection, union
# union: show all related objects
# intersection: show only direct correlation
# :type mode: str
# :param nb_max_elem: max nb of nodes
# :type nb_max_elem: int, optional
#
#
# '''
# dict_item_desc = {}
# dict_correlation = object.get_correlation(value, dict_correlation_to_check)
def create_graph_links(links_set):
graph_links_list = []
for link in links_set:
graph_links_list.append({"source": link[0], "target": link[1]})
return graph_links_list
def create_graph_nodes(nodes_set, root_node_id):
graph_nodes_list = []
for node_id in nodes_set:
correlation_name, correlation_type, value = node_id.split(';', 3)
dict_node = {"id": node_id}
dict_node['style'] = get_correlation_node_icon(correlation_name, correlation_type, value)
dict_node['text'] = value
if node_id == root_node_id:
dict_node["style"]["node_color"] = 'orange'
dict_node["style"]["node_radius"] = 7
dict_node['url'] = get_item_url(correlation_name, value, correlation_type)
graph_nodes_list.append(dict_node)
return graph_nodes_list
def create_node_id(correlation_name, value, correlation_type=''):
return '{};{};{}'.format(correlation_name, correlation_type, value)
def get_graph_node_domain_correlation(domain, mode, max_nodes=400):
links = set()
nodes = set()
root_node_id = create_node_id('domain', domain)
nodes.add(root_node_id)
domain_correlation = Domain.get_domain_all_correlation(domain)
for correl in domain_correlation:
if correl in ('pgp', 'cryptocurrency'):
for correl_type in domain_correlation[correl]:
for correl_val in domain_correlation[correl][correl_type]:
# add correlation # # TODO: put this in union
correl_node_id = create_node_id(correl, correl_val, correl_type)
if mode=="union":
nodes.add(correl_node_id)
links.add((root_node_id, correl_node_id))
# get PGP correlation
if correl=='pgp':
res = Pgp.pgp.get_correlation_obj_domain(correl_val, correlation_type=correl_type) # change function for item ?
# get Cryptocurrency correlation
else:
res = Cryptocurrency.cryptocurrency.get_correlation_obj_domain(correl_val, correlation_type=correl_type)
# inter mode
if res:
for correl_key_val in res:
#filter root domain
if correl_key_val == domain:
continue
new_corel_1 = create_node_id('domain', correl_key_val)
new_corel_2 = create_node_id(correl, correl_val, correl_type)
nodes.add(new_corel_1)
nodes.add(new_corel_2)
links.add((new_corel_1, new_corel_2))
if mode=="inter":
nodes.add(correl_node_id)
links.add((root_node_id, correl_node_id))
if correl=='decoded':
for correl_val in domain_correlation[correl]:
correl_node_id = create_node_id(correl, correl_val)
if mode=="union":
nodes.add(correl_node_id)
links.add((root_node_id, correl_node_id))
res = Decoded.get_decoded_domain_item(correl_val)
if res:
for correl_key_val in res:
#filter root domain
if correl_key_val == domain:
continue
new_corel_1 = create_node_id('domain', correl_key_val)
new_corel_2 = create_node_id(correl, correl_val)
nodes.add(new_corel_1)
nodes.add(new_corel_2)
links.add((new_corel_1, new_corel_2))
if mode=="inter":
nodes.add(correl_node_id)
links.add((root_node_id, correl_node_id))
return {"nodes": create_graph_nodes(nodes, root_node_id), "links": create_graph_links(links)}
######## API EXPOSED ########
######## ########

View File

@ -16,6 +16,14 @@ config_loader = ConfigLoader.ConfigLoader()
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
config_loader = None
def get_decoded_item_type(sha1_string):
'''
Retun the estimed type of a given decoded item.
:param sha1_string: sha1_string
'''
return r_serv_metadata.hget('metadata_hash:{}'.format(sha1_string), 'estimated_type')
def _get_decoded_items_list(sha1_string):
return r_serv_metadata.zrange('nb_seen_hash:{}'.format(sha1_string), 0, -1)
@ -43,6 +51,18 @@ def get_domain_decoded_item(domain):
else:
return []
def get_decoded_domain_item(sha1_string):
'''
Retun all domain of a given decoded item.
:param sha1_string: sha1_string
'''
res = r_serv_metadata.smembers('domain_hash:{}'.format(sha1_string))
if res:
return list(res)
else:
return []
def save_domain_decoded(domain, sha1_string):
r_serv_metadata.sadd('hash_domain:{}'.format(domain), sha1_string) # domain - hash map
r_serv_metadata.sadd('domain_hash:{}'.format(sha1_string), domain) # hash - domain ma

View File

@ -208,6 +208,7 @@ def get_domain_decoded(domain):
'''
return Decoded.get_domain_decoded_item(domain)
def get_domain_all_correlation(domain, correlation_type=None, get_nb=False):
'''
Retun all correlation of a given domain.
@ -230,6 +231,7 @@ def get_domain_all_correlation(domain, correlation_type=None, get_nb=False):
domain_correl['decoded'] = res
return domain_correl
# TODO: handle port
def get_domain_history(domain, domain_type, port): # TODO: add date_range: from to + nb_elem
'''
@ -377,7 +379,7 @@ class Domain(object):
def get_domain_correlation(self):
'''
Retun all cryptocurrencies of a given domain.
Retun all correlation of a given domain.
'''
return get_domain_all_correlation(self.domain, get_nb=True)

View File

@ -108,24 +108,6 @@ class Correlation(object):
else:
return []
def _get_correlation_obj_domain(self, field_name, correlation_type):
'''
Return all domains that contain this correlation.
:param domain: field name
:type domain: str
:param correlation_type: correlation type
:type correlation_type: str
:return: a list of correlation
:rtype: list
'''
res = r_serv_metadata.smembers('set_domain_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name))
if res:
return list(res)
else:
return []
def get_domain_correlation_dict(self, domain, correlation_type=None, get_nb=False):
'''
Return all correlation of a given domain.
@ -147,6 +129,44 @@ class Correlation(object):
dict_correlation['nb'] = dict_correlation.get('nb', 0) + len(dict_correlation[correl])
return dict_correlation
def _get_correlation_obj_domain(self, field_name, correlation_type):
'''
Return all domains that contain this correlation.
:param domain: field name
:type domain: str
:param correlation_type: correlation type
:type correlation_type: str
:return: a list of correlation
:rtype: list
'''
res = r_serv_metadata.smembers('set_domain_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name))
if res:
return list(res)
else:
return []
def get_correlation_obj_domain(self, field_name, correlation_type=None):
'''
Return all domain correlation of a given correlation_value.
:param field_name: field_name
:param correlation_type: list of correlation types
:type correlation_type: list, optional
:return: a dictionnary of all the requested correlations
:rtype: list
'''
correlation_type = self.sanythise_correlation_types(correlation_type)
for correl in correlation_type:
res = self._get_correlation_obj_domain(field_name, correl)
if res:
return res
return []
def _get_item_correlation_obj(self, item_id, correlation_type):
'''
Return correlation of a given item id.
@ -192,6 +212,8 @@ class Correlation(object):
r_serv_metadata.sadd('domain_{}_{}:{}'.format(self.correlation_name, correlation_type, domain), correlation_value)
r_serv_metadata.sadd('set_domain_{}_{}:{}'.format(self.correlation_name, correlation_type, correlation_value), domain)
######## API EXPOSED ########

View File

@ -6,16 +6,16 @@ import sys
import gzip
import redis
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
import Decoded
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
import Date
import Tag
from Cryptocurrency import cryptocurrency
import Cryptocurrency
from Pgp import pgp
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
import Decoded
config_loader = ConfigLoader.ConfigLoader()
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
r_cache = config_loader.get_redis_conn("Redis_Cache")
@ -137,7 +137,7 @@ def get_item_cryptocurrency(item_id, currencies_type=None, get_nb=False):
:param currencies_type: list of cryptocurrencies type
:type currencies_type: list, optional
'''
return cryptocurrency.get_item_correlation_dict(item_id, correlation_type=currencies_type, get_nb=get_nb)
return Cryptocurrency.cryptocurrency.get_item_correlation_dict(item_id, correlation_type=currencies_type, get_nb=get_nb)
def get_item_pgp(item_id, currencies_type=None, get_nb=False):
'''

View File

@ -0,0 +1,377 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>AIL - framework</title>
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png') }}">
<!-- Core CSS -->
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
<!-- JS -->
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
<script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
<script language="javascript" src="{{ url_for('static', filename='js/d3.min.js') }}"></script>
<style>
line.link {
stroke: #666;
}
line.link:hover{
stroke: red;
stroke-width: 2px
}
.node {
pointer-events: all;
}
circle {
stroke: none;
}
.graph_text_node {
font: 8px sans-serif;
pointer-events: none;
}
.graph_node_icon {
pointer-events: none;
}
.node text {
font: 8px sans-serif;
pointer-events: auto;
}
div.tooltip {
position: absolute;
text-align: center;
padding: 2px;
font: 12px sans-serif;
background: #ebf4fb;
border: 2px solid #b7ddf2;
border-radius: 8px;
pointer-events: none;
color: #000000;
}
.graph_panel {
padding: unset;
}
.line_graph {
fill: none;
stroke: steelblue;
stroke-width: 2px;
stroke-linejoin: round;
stroke-linecap: round;
stroke-width: 1.5;
/*attr('stroke', '#bcbd22').*/
}
</style>
</head>
<body>
{% include 'nav_bar.html' %}
<div class="container-fluid">
<div class="row">
{% include 'decoded/menu_sidebar.html' %}
<div class="col-12 col-lg-10" id="core_content">
<div class="row">
<div class="col-xl-10">
<div class="card mb-3">
<div class="card-header">
<i class="fas fa-project-diagram"></i> Graph
</div>
<div class="card-body graph_panel">
<div id="graph">
</div>
</div>
</div>
</div>
<div class="col-xl-2">
<div class="card my-3">
<div class="card-header">
<i class="fas fa-project-diagram"></i> Graph
</div>
<div class="card-body text-center px-0 py-0">
<button class="btn btn-primary my-4" onclick="resize_graph();">
<i class="fas fa-sync"></i>&nbsp;Resize Graph
</button>
{% if correlation_type=='pgpdump' %}
{% include 'decoded/show_helper_pgpdump.html' %}
{% elif correlation_type=='cryptocurrency' %}
{% include 'decoded/show_helper_cryptocurrency.html' %}
{% endif %}
</div>
</div>
</div>
</div>
</div>
</div>
</div>
<script>
var all_graph = {};
$(document).ready(function(){
$("#page-Decoded").addClass("active");
all_graph.node_graph = create_graph("{{ url_for('correlation.test') }}");
all_graph.onResize();
});
$(window).on("resize", function() {
all_graph.onResize();
});
function toggle_sidebar(){
if($('#nav_menu').is(':visible')){
$('#nav_menu').hide();
$('#side_menu').removeClass('border-right')
$('#side_menu').removeClass('col-lg-2')
$('#core_content').removeClass('col-lg-10')
}else{
$('#nav_menu').show();
$('#side_menu').addClass('border-right')
$('#side_menu').addClass('col-lg-2')
$('#core_content').addClass('col-lg-10')
}
}
</script>
<script>
function resize_graph() {
zoom.translateTo(svg_node, 200, 200);
zoom.scaleTo(svg_node, 2);
}
</script>
<script>
var width = 400,
height = 400;
var link;
var zoom = d3.zoom()
.scaleExtent([.2, 10])
.on("zoom", zoomed);
//var transform = d3.zoomIdentity;
var color = d3.scaleOrdinal(d3.schemeCategory10);
var div = d3.select("body").append("div")
.attr("class", "tooltip")
.style("opacity", 0);
var simulation = d3.forceSimulation()
.force("link", d3.forceLink().id(function(d) { return d.id; }))
.force("charge", d3.forceManyBody())
.force("center", d3.forceCenter(width / 2, height / 2));
//.on("tick", ticked);
var svg_node = d3.select("#graph").append("svg")
.attr("id", "graph_div")
.attr("width", width)
.attr("height", height)
.call(d3.zoom().scaleExtent([1, 8]).on("zoom", zoomed))
.on("dblclick.zoom", null)
var container_graph = svg_node.append("g");
//.attr("transform", "translate(40,0)")
//.attr("transform", "scale(2)");
function create_graph(url){
d3.json(url)
.then(function(data){
link = container_graph.append("g")
.selectAll("line")
.data(data.links)
.enter().append("line")
.attr("class", "link");
//.attr("stroke-width", function(d) { return Math.sqrt(d.value); })
var node = container_graph.selectAll(".node")
.data(data.nodes)
.enter().append("g")
.attr("class", "nodes")
.on("dblclick", doubleclick)
.on("click", click)
.on("mouseover", mouseovered)
.on("mouseout", mouseouted)
.call(d3.drag()
.on("start", drag_start)
.on("drag", dragged)
.on("end", drag_end));
node.append("circle")
.attr("r", function(d) {
return (d.style.node_radius);})
.attr("fill", function(d) {
return d.style.node_color;});
node.append('text')
.attr('text-anchor', 'middle')
.attr('dominant-baseline', 'central')
.attr("class", function(d) {
return "graph_node_icon " + d.style.icon_class
})
.attr('font-size', '8px' )
.attr('pointer-events', 'none')
.text(function(d) {
//if(d.hash){
return d.style.icon_text
//}
});
zoom.translateTo(svg_node, 200, 200);
zoom.scaleTo(svg_node, 2);
/* node.append("text")
.attr("dy", 3)
.attr("dx", 7)
.attr("class", "graph_text_node")
//.style("text-anchor", function(d) { return d.children ? "end" : "start"; })
.text(function(d) { return d.id; });*/
simulation
.nodes(data.nodes)
.on("tick", ticked);
simulation.force("link")
.links(data.links);
function ticked() {
link
.attr("x1", function(d) { return d.source.x; })
.attr("y1", function(d) { return d.source.y; })
.attr("x2", function(d) { return d.target.x; })
.attr("y2", function(d) { return d.target.y; });
/*node
.attr("cx", function(d) { return d.x; })
.attr("cy", function(d) { return d.y; });*/
node.attr("transform", function(d) { return "translate(" + d.x + "," + d.y + ")"; });
}
});
}
function zoomed() {
container_graph.attr("transform", d3.event.transform);
}
function doubleclick (d) {
window.open(d.url, '_blank');
}
function click (d) {
console.log('clicked')
}
function drag_start(d) {
if (!d3.event.active) simulation.alphaTarget(0.3).restart();
d.fx = d.x;
d.fy = d.y;
}
function dragged(d) {
d.fx = d3.event.x;
d.fy = d3.event.y;
}
function drag_end(d) {
if (!d3.event.active) simulation.alphaTarget(0);
d.fx = d.x;
d.fy = d.y;
}
function mouseovered(d) {
// tooltip
var content;
if(d.hash == true){
content = "<b><span id='tooltip-id-name'></span></b><br/>"+
"<br/>"+
"<i>First seen</i>: <span id='tooltip-id-first_seen'></span><br/>"+
"<i>Last seen</i>: <span id='tooltip-id-last_seen'></span><br/>"+
"<i>nb_seen</i>: <span id='tooltip-id-nb_seen'></span>";
div.transition()
.duration(200)
.style("opacity", .9);
div.html(content)
.style("left", (d3.event.pageX) + "px")
.style("top", (d3.event.pageY - 28) + "px");
$("#tooltip-id-name").text(d.id);
$("#tooltip-id-first_seen").text(d.first_seen);
$("#tooltip-id-last_seen").text(d.last_seen);
$("#tooltip-id-nb_seen").text(d.nb_seen_in_paste);
} else {
content = "<b><span id='tooltip-id-name'></span></b><br/>";
div.transition()
.duration(200)
.style("opacity", .9);
div.html(content)
.style("left", (d3.event.pageX) + "px")
.style("top", (d3.event.pageY - 28) + "px");
$("#tooltip-id-name").text(d.text);
}
//links
/*link.style("stroke-opacity", function(o) {
return o.source === d || o.target === d ? 1 : opacity;
});*/
link.style("stroke", function(o){
return o.source === d || o.target === d ? "#666" : "#ddd";
});
}
function mouseouted() {
div.transition()
.duration(500)
.style("opacity", 0);
link.style("stroke", "#666");
}
all_graph.onResize = function () {
var aspect = 1000 / 500, all_graph = $("#graph_div");
var targetWidth = all_graph.parent().width();
all_graph.attr("width", targetWidth);
all_graph.attr("height", targetWidth / aspect);
}
window.all_graph = all_graph;
</script>
</body>
</html>