chg: [screenshot correlation + v2.6] add screenshot-domain correlation + v2.6 update

pull/453/head
Terrtia 2019-12-17 15:13:36 +01:00
parent a3939b77f9
commit 056bad7a49
No known key found for this signature in database
GPG Key ID: 1E1B1F50D84613D0
13 changed files with 363 additions and 5 deletions

View File

@ -12,6 +12,7 @@ sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
import Decoded
import Domain
import Screenshot
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
import Pgp
@ -26,7 +27,7 @@ def get_all_correlation_names():
'''
Return a list of all available correlations
'''
return ['pgp', 'cryptocurrency', 'decoded']
return ['pgp', 'cryptocurrency', 'decoded', 'screenshot']
def get_all_correlation_objects():
'''
@ -45,6 +46,8 @@ def exist_object(object_type, correlation_id, type_id=None):
return Pgp.pgp._exist_corelation_field(type_id, correlation_id)
elif object_type == 'cryptocurrency':
return Cryptocurrency.cryptocurrency._exist_corelation_field(type_id, correlation_id)
elif object_type == 'screenshot':
return Screenshot.exist_screenshot(correlation_id)
else:
return False
@ -59,6 +62,8 @@ def get_object_metadata(object_type, correlation_id, type_id=None):
return Pgp.pgp.get_metadata(type_id, correlation_id)
elif object_type == 'cryptocurrency':
return Cryptocurrency.cryptocurrency.get_metadata(type_id, correlation_id)
elif object_type == 'screenshot':
return Screenshot.get_metadata(correlation_id)
def get_object_correlation(object_type, value, correlation_names, correlation_objects, requested_correl_type=None):
if object_type == 'domain':
@ -71,7 +76,8 @@ def get_object_correlation(object_type, value, correlation_names, correlation_ob
return Pgp.pgp.get_correlation_all_object(requested_correl_type, value, correlation_objects=correlation_objects)
elif object_type == 'cryptocurrency':
return Cryptocurrency.cryptocurrency.get_correlation_all_object(requested_correl_type, value, correlation_objects=correlation_objects)
elif object_type == 'screenshot':
return Screenshot.get_screenshot_correlated_object(value, correlation_objects)
return {}
def get_correlation_node_icon(correlation_name, correlation_type=None, value=None):
@ -130,6 +136,10 @@ def get_correlation_node_icon(correlation_name, correlation_type=None, value=Non
else:
icon_text = '\uf249'
elif correlation_name == 'screenshot':
node_color = '#E1F5DF'
icon_text = '\uf03e'
elif correlation_name == 'domain':
node_radius = 5
node_color = '#3DA760'
@ -162,6 +172,9 @@ def get_item_url(correlation_name, value, correlation_type=None):
elif correlation_name == 'decoded':
endpoint = 'correlation.show_correlation'
url = url_for(endpoint, object_type="decoded", correlation_id=value)
elif correlation_name == 'screenshot':
endpoint = 'correlation.show_correlation'
url = url_for(endpoint, object_type="screenshot", correlation_id=value)
elif correlation_name == 'domain':
endpoint = 'crawler_splash.showDomain'
url = url_for(endpoint, domain=value)
@ -241,7 +254,7 @@ def get_graph_node_object_correlation(object_type, root_value, mode, correlation
if mode=="inter":
nodes.add(correl_node_id)
links.add((root_node_id, correl_node_id))
if correl in ('decoded', 'domain', 'paste'):
if correl in ('decoded', 'screenshot', 'domain', 'paste'):
for correl_val in root_correlation[correl]:
correl_node_id = create_node_id(correl, correl_val)
@ -254,7 +267,7 @@ def get_graph_node_object_correlation(object_type, root_value, mode, correlation
res = get_object_correlation(correl, correl_val, correlation_names, correlation_objects)
if res:
for corr_obj in res:
if corr_obj in ('decoded', 'domain', 'paste'):
if corr_obj in ('decoded', 'domain', 'paste', 'screenshot'):
for correl_key_val in res[corr_obj]:
#filter root value
if correl_key_val == root_value:

View File

@ -26,6 +26,7 @@ cryptocurrency = Cryptocurrency.cryptocurrency
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
import Correlate_object
import Screenshot
config_loader = ConfigLoader.ConfigLoader()
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
@ -432,6 +433,14 @@ def get_domain_decoded(domain):
'''
return Decoded.get_domain_decoded_item(domain)
def get_domain_screenshot(domain):
'''
Retun all decoded item of a given domain.
:param domain: crawled domain
'''
return Screenshot.get_domain_screenshot(domain)
def get_domain_all_correlation(domain, correlation_names=[], get_nb=False):
'''
@ -453,6 +462,8 @@ def get_domain_all_correlation(domain, correlation_names=[], get_nb=False):
res = get_domain_pgp(domain, get_nb=get_nb)
elif correlation_name=='decoded':
res = get_domain_decoded(domain)
elif correlation_name=='screenshot':
res = get_domain_screenshot(domain)
else:
res = None
# add correllation to dict

104
bin/lib/Screenshot.py Executable file
View File

@ -0,0 +1,104 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import redis
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages'))
import Item
import Date
import ConfigLoader
config_loader = ConfigLoader.ConfigLoader()
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot"), 'screenshot')
config_loader = None
# get screenshot relative path
def get_screenshot_rel_path(sha256_string, add_extension=False):
screenshot_path = os.path.join(sha256_string[0:2], sha256_string[2:4], sha256_string[4:6], sha256_string[6:8], sha256_string[8:10], sha256_string[10:12], sha256_string[12:])
if add_extension:
screenshot_path = screenshot_path + '.png'
return screenshot_path
def exist_screenshot(sha256_string):
screenshot_path = os.path.join(SCREENSHOT_FOLDER, get_screenshot_rel_path(sha256_string, add_extension=True))
return os.path.isfile(screenshot_path)
def get_metadata(sha256_string):
metadata_dict = {}
metadata_dict['sha256'] = sha256_string
return metadata_dict
def get_screenshot_items_list(sha256_string):
res = r_serv_onion.smembers('screenshot:{}'.format(sha256_string))
if res:
return list(res)
else:
return []
def get_item_screenshot_list(item_id):
'''
Retun all decoded item of a given item id.
:param item_id: item id
'''
screenshot = r_serv_metadata.hget('paste_metadata:{}'.format(item_id), 'screenshot')
if screenshot:
return [screenshot]
else:
return []
def get_domain_screenshot(domain):
'''
Retun all screenshot of a given domain.
:param domain: crawled domain
'''
res = r_serv_onion.smembers('domain_screenshot:{}'.format(domain))
if res:
return list(res)
else:
return []
def get_screenshot_domain(sha256_string):
'''
Retun all domain of a given screenshot.
:param sha256_string: sha256_string
'''
res = r_serv_onion.smembers('screenshot_domain:{}'.format(sha256_string))
if res:
return list(res)
else:
return []
def get_screenshot_correlated_object(sha256_string, correlation_objects=[]):
'''
Retun all correlation of a given sha256.
:param sha1_string: sha256
:type sha1_string: str
:return: a dict of all correlation for a given sha256
:rtype: dict
'''
if correlation_objects is None:
correlation_objects = Correlation.get_all_correlation_objects()
decoded_correlation = {}
for correlation_object in correlation_objects:
if correlation_object == 'paste':
res = get_screenshot_items_list(sha256_string)
elif correlation_object == 'domain':
res = get_screenshot_domain(sha256_string)
else:
res = None
if res:
decoded_correlation[correlation_object] = res
return decoded_correlation

View File

@ -17,6 +17,7 @@ sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
import Correlate_object
import Decoded
import Screenshot
config_loader = ConfigLoader.ConfigLoader()
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
@ -168,6 +169,14 @@ def get_item_decoded(item_id):
'''
return Decoded.get_item_decoded(item_id)
def get_item_all_screenshot(item_id):
'''
Return all screenshot of a given item.
:param item_id: item id
'''
return Screenshot.get_item_screenshot_list(item_id)
def get_item_all_correlation(item_id, correlation_names=[], get_nb=False):
'''
Retun all correlation of a given item id.
@ -188,6 +197,8 @@ def get_item_all_correlation(item_id, correlation_names=[], get_nb=False):
res = get_item_pgp(item_id, get_nb=get_nb)
elif correlation_name=='decoded':
res = get_item_decoded(item_id)
elif correlation_name=='screenshot':
res = get_item_all_screenshot(item_id)
else:
res = None
# add correllation to dict
@ -256,6 +267,11 @@ def is_item_in_domain(domain, item_id):
def get_item_domain(item_id):
return item_id[19:-36]
def get_domain(item_id):
item_id = item_id.split('/')
item_id = item_id[-1]
return item_id[:-36]
def get_item_children(item_id):
return list(r_serv_metadata.smembers('paste_children:{}'.format(item_id)))

View File

@ -201,6 +201,9 @@ class TorSplashCrawler():
self.r_serv_metadata.hset('paste_metadata:{}'.format(relative_filename_paste), 'screenshot', hash)
# add sha256 metadata
self.r_serv_onion.sadd('screenshot:{}'.format(hash), relative_filename_paste)
# domain map
r_serv_onion.sadd('domain_screenshot:{}'.format(domain[0]), hash)
r_serv_onion.sadd('screenshot_domain:{}'.format(hash), domain[0])
if 'har' in response.data:
dirname = os.path.dirname(filename_har)

View File

@ -74,3 +74,23 @@ if __name__ == "__main__":
r_serv.delete('ail:current_background_update')
r_serv.delete('update:nb_elem_to_convert')
r_serv.delete('update:nb_elem_converted')
if r_serv.sismember('ail:to_update', 'v2.6'):
new_version = 'v2.6'
r_serv.delete('ail:update_error')
r_serv.delete('ail:current_background_script_stat')
r_serv.set('ail:update_in_progress', new_version)
r_serv.set('ail:current_background_update', new_version)
r_serv.set('ail:current_background_script', 'screenshot update')
update_file = os.path.join(os.environ['AIL_HOME'], 'update', new_version, 'Update_screenshots.py')
process = subprocess.run(['python' ,update_file])
update_progress = r_serv_db.get('ail:current_background_script_stat')
if update_progress:
if int(update_progress) == 100:
r_serv.delete('ail:update_in_progress')
r_serv.delete('ail:current_background_script')
r_serv.delete('ail:current_background_script_stat')
r_serv.delete('ail:current_background_update')
r_serv_db.srem('ail:to_update', new_version)

34
update/v2.6/Update.py Executable file
View File

@ -0,0 +1,34 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import re
import sys
import time
import redis
import datetime
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
new_version = 'v2.6'
if __name__ == '__main__':
start_deb = time.time()
config_loader = ConfigLoader.ConfigLoader()
r_serv = config_loader.get_redis_conn("ARDB_DB")
config_loader = None
#Set current update_in_progress
r_serv.set('ail:update_in_progress', new_version)
r_serv.set('ail:current_background_update', new_version)
r_serv.sadd('ail:to_update', new_version)
#Set current ail version
r_serv.set('ail:version', new_version)
#Set current ail version
r_serv.hset('ail:update_date', new_version, datetime.datetime.now().strftime("%Y%m%d"))

39
update/v2.6/Update.sh Executable file
View File

@ -0,0 +1,39 @@
#!/bin/bash
[ -z "$AIL_HOME" ] && echo "Needs the env var AIL_HOME. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_REDIS" ] && echo "Needs the env var AIL_REDIS. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_ARDB" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_BIN" ] && echo "Needs the env var AIL_ARDB. Run the script from the virtual environment." && exit 1;
[ -z "$AIL_FLASK" ] && echo "Needs the env var AIL_FLASK. Run the script from the virtual environment." && exit 1;
export PATH=$AIL_HOME:$PATH
export PATH=$AIL_REDIS:$PATH
export PATH=$AIL_ARDB:$PATH
export PATH=$AIL_BIN:$PATH
export PATH=$AIL_FLASK:$PATH
GREEN="\\033[1;32m"
DEFAULT="\\033[0;39m"
echo -e $GREEN"Shutting down AIL ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
bash ${AIL_BIN}/LAUNCH.sh -lav &
wait
echo ""
echo ""
echo -e $GREEN"Updating AIL VERSION ..."$DEFAULT
echo ""
python ${AIL_HOME}/update/v2.6/Update.py
wait
echo ""
echo ""
echo ""
echo -e $GREEN"Shutting down ARDB ..."$DEFAULT
bash ${AIL_BIN}/LAUNCH.sh -ks
wait
exit 0

View File

@ -0,0 +1,95 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import time
import redis
import datetime
from hashlib import sha256
from pyfaup.faup import Faup
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
import Item
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
def get_all_item(screenshot_sha256):
return r_serv_onion.smembers('screenshot:{}'.format(screenshot_sha256))
def sanitize_domain(domain):
faup.decode(domain)
domain_sanitized = faup.get()
return domain_sanitized['domain'].lower()
def update_db(screenshot_sha256):
screenshot_items = get_all_item(screenshot_sha256)
if screenshot_items:
for item_id in screenshot_items:
item_id = item_id.replace(PASTES_FOLDER+'/', '', 1) # remove root path
domain = Item.get_domain(item_id)
domain_sanitized = sanitize_domain(domain)
if domain != domain_sanitized:
r_serv_onion.sadd('incorrect_domain', domain)
domain = domain_sanitized
#print(item_id)
#print(domain)
r_serv_onion.sadd('domain_screenshot:{}'.format(domain), screenshot_sha256)
r_serv_onion.sadd('screenshot_domain:{}'.format(screenshot_sha256), domain)
else:
pass
# broken screenshot
r_serv_onion.sadd('broken_screenshot', screenshot_sha256)
if __name__ == '__main__':
start_deb = time.time()
faup = Faup()
config_loader = ConfigLoader.ConfigLoader()
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes"))
SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "crawled_screenshot"), 'screenshot')
r_serv_db = config_loader.get_redis_conn("ARDB_DB")
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
config_loader = None
r_serv_db.set('ail:update_in_progress', 'v2.6')
r_serv_db.set('ail:current_background_update', 'v2.6')
r_serv_db.set('ail:current_background_script_stat', 20)
r_serv_db.set('ail:current_background_script', 'screenshot update')
nb = 0
if os.path.isdir(SCREENSHOT_FOLDER):
for root, dirs, files in os.walk(SCREENSHOT_FOLDER, topdown=False):
#print(dirs)
for name in files:
nb = nb + 1
screenshot_sha256 = os.path.join(root, name)
screenshot_sha256 = screenshot_sha256[:-4] # remove .png
screenshot_sha256 = screenshot_sha256.replace(SCREENSHOT_FOLDER, '', 1)
screenshot_sha256 = screenshot_sha256.replace('/', '')
update_db(screenshot_sha256)
#print('Screenshot updated: {}'.format(nb))
r_serv_db.set('ail:current_background_script', 'screenshot updated: {}'.format(nb))
r_serv_db.set('ail:current_background_script_stat', 100)
end = time.time()
print('ALL screenshot updated: {} in {} s'.format(nb, end - start_deb))
r_serv_db.delete('ail:update_in_progress')
r_serv_db.delete('ail:current_background_script')
r_serv_db.delete('ail:current_background_script_stat')
r_serv_db.delete('ail:current_background_update')
r_serv_db.srem('ail:to_update', 'v2.6')

View File

@ -147,6 +147,9 @@ def show_correlation():
correl_option = request.form.get('DecodedCheck')
if correl_option:
correlation_names.append('decoded')
correl_option = request.form.get('ScreenshotCheck')
if correl_option:
correlation_names.append('screenshot')
# correlation_objects
correl_option = request.form.get('DomainCheck')
if correl_option:

View File

@ -88,7 +88,9 @@ bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info']
dict_update_description = {'v1.5':{'nb_background_update': 5, 'update_warning_message': 'An Update is running on the background. Some informations like Tags, screenshot can be',
'update_warning_message_notice_me': 'missing from the UI.'},
'v2.4':{'nb_background_update': 1, 'update_warning_message': 'An Update is running on the background. Some informations like Domain Tags/Correlation can be',
'update_warning_message_notice_me': 'missing from the UI.'}
'update_warning_message_notice_me': 'missing from the UI.'},
'v2.6':{'nb_background_update': 1, 'update_warning_message': 'An Update is running on the background. Some informations like Domain Tags/Correlation can be',
'update_warning_message_notice_me': 'missing from the UI.'}
}
UPLOAD_FOLDER = os.path.join(os.environ['AIL_FLASK'], 'submitted')

View File

@ -7,6 +7,9 @@
<th class="">
Decoded:
</th>
<th class="">
Screenshot:
</th>
<th class="">
Pgp:
</th>
@ -96,6 +99,17 @@
other types of file
</div>
</td>
<td>
<div class="my-1">
<svg height="26" width="26">
<g class="nodes">
<circle cx="13" cy="13" r="13" fill="#E1F5DF"></circle>
<text x="13" y="13" text-anchor="middle" dominant-baseline="central" class="graph_node_icon fas" font-size="16px">&#xf03e;</text>
</g>
</svg>
screenshot
</div>
</td>
<td>
<div class="my-1">
<svg height="26" width="26">

View File

@ -144,6 +144,10 @@
<input class="form-check-input" type="checkbox" value="True" id="DecodedCheck" name="DecodedCheck" {%if "decoded" in dict_object["correlation_names"]%}checked{%endif%}>
<label class="form-check-label" for="DecodedCheck">Decoded</label>
</div>
<div class="form-check">
<input class="form-check-input" type="checkbox" value="True" id="ScreenshotCheck" name="ScreenshotCheck" {%if "screenshot" in dict_object["correlation_names"]%}checked{%endif%}>
<label class="form-check-label" for="ScreenshotCheck">Screenshot</label>
</div>
<div class="form-check">
<input class="form-check-input" type="checkbox" value="True" id="PgpCheck" name="PgpCheck" {%if "pgp" in dict_object["correlation_names"]%}checked{%endif%}>
<label class="form-check-label" for="PgpCheck">PGP</label>