chg: [Showpaste] add screenshot + improve onion db

pull/260/head
Terrtia 2018-08-16 17:24:39 +02:00
parent 7652089433
commit ed559d9f4a
No known key found for this signature in database
GPG Key ID: 1E1B1F50D84613D0
7 changed files with 186 additions and 38 deletions

1
.gitignore vendored
View File

@ -32,6 +32,7 @@ var/www/submitted
# Local config # Local config
bin/packages/config.cfg bin/packages/config.cfg
configs/keys configs/keys
files
# installed files # installed files
nltk_data/ nltk_data/

View File

@ -8,6 +8,7 @@ import redis
import datetime import datetime
import time import time
import subprocess import subprocess
import requests
sys.path.append(os.environ['AIL_BIN']) sys.path.append(os.environ['AIL_BIN'])
from Helper import Process from Helper import Process
@ -17,31 +18,40 @@ from pubsublogger import publisher
def signal_handler(sig, frame): def signal_handler(sig, frame):
sys.exit(0) sys.exit(0)
def crawl_onion(url, domain): def crawl_onion(url, domain, date):
date = datetime.datetime.now().strftime("%Y%m%d")
if not r_onion.sismember('onion_up:'+date , domain): if not r_onion.sismember('onion_up:'+date , domain) and not r_onion.sismember('onion_down:'+date , domain):
#if not r_onion.sismember('full_onion_up', domain) and not r_onion.sismember('onion_down:'+date , domain):
super_father = r_serv_metadata.hget('paste_metadata:'+paste, 'super_father') super_father = r_serv_metadata.hget('paste_metadata:'+paste, 'super_father')
if super_father is None: if super_father is None:
super_father=paste super_father=paste
process = subprocess.Popen(["python", './torcrawler/tor_crawler.py', url, domain, paste, super_father], try:
stdout=subprocess.PIPE) r = requests.get(splash_url , timeout=0.010)
while process.poll() is None: except Exception:
time.sleep(1) ## FIXME: # TODO: relaunch docker
exit(0)
if process.returncode == 0: if r.status_code == 200:
if r_serv_metadata.exists('paste_children:'+paste): process = subprocess.Popen(["python", './torcrawler/tor_crawler.py', url, domain, paste, super_father],
msg = 'infoleak:automatic-detection="onion";{}'.format(paste) stdout=subprocess.PIPE)
p.populate_set_out(msg, 'Tags') while process.poll() is None:
print(process.stdout.read()) time.sleep(1)
r_onion.sadd('onion_up:'+date , domain) if process.returncode == 0:
r_onion.sadd('onion_up_link:'+date , url) if r_serv_metadata.exists('paste_children:'+paste):
msg = 'infoleak:automatic-detection="onion";{}'.format(paste)
p.populate_set_out(msg, 'Tags')
print(process.stdout.read())
else:
r_onion.sadd('onion_down:'+date , domain)
r_onion.sadd('onion_down_link:'+date , url)
print(process.stdout.read())
else: else:
r_onion.sadd('onion_down:'+date , domain) ## FIXME: # TODO: relaunch docker
r_onion.sadd('onion_down_link:'+date , url) exit(0)
print(process.stdout.read())
if __name__ == '__main__': if __name__ == '__main__':
@ -102,15 +112,51 @@ if __name__ == '__main__':
domain_url = 'http://{}'.format(domain) domain_url = 'http://{}'.format(domain)
print('------------------START ONIOM CRAWLER------------------') print('------------------START ONION CRAWLER------------------')
print('url: {}'.format(url)) print('url: {}'.format(url))
print('domain: {}'.format(domain)) print('domain: {}'.format(domain))
print('domain_url: {}'.format(domain_url)) print('domain_url: {}'.format(domain_url))
crawl_onion(url, domain) if not r_onion.sismember('banned_onion', domain):
if url != domain_url:
crawl_onion(domain_url, domain)
date = datetime.datetime.now().strftime("%Y%m%d")
crawl_onion(url, domain, date)
if url != domain_url:
crawl_onion(domain_url, domain, date)
# save dowm onion
if not r_onion.sismember('onion_up:'+date , domain):
r_onion.sadd('onion_down:'+date , domain)
r_onion.sadd('onion_down_link:'+date , url)
r_onion.hincrby('onion_link_down', url, 1)
if not r_onion.exists('onion_metadata:{}'.format(domain)):
r_onion.hset('onion_metadata:{}'.format(domain), 'first_seen', date)
r_onion.hset('onion_metadata:{}'.format(domain), 'last_seen', date)
else:
r_onion.hincrby('onion_link_up', url, 1)
# last check
r_onion.hset('onion_metadata:{}'.format(domain), 'last_check', date)
# check external onions links (full_scrawl)
external_domains = set()
for link in r_onion.smembers('domain_onion_external_links:{}'.format(domain)):
print(link)
external_domain = re.findall(url_regex, link)
print(external_domain)
if len(external_domain) > 0:
external_domain = external_domain[0][4]
else:
continue
print(external_domain)
# # TODO: add i2p
if '.onion' in external_domain and external_domain != domain:
external_domains.add(external_domain)
if len(external_domains) >= 10:
r_onion.sadd('onion_potential_source', domain)
r_onion.delete('domain_onion_external_links:{}'.format(domain))
print(r_onion.smembers('domain_onion_external_links:{}'.format(domain)))
else: else:
continue continue
else: else:

View File

@ -94,6 +94,7 @@ class Paste(object):
var = self.p_path.split('/') var = self.p_path.split('/')
self.p_date = Date(var[-4], var[-3], var[-2]) self.p_date = Date(var[-4], var[-3], var[-2])
self.p_rel_path = os.path.join(var[-4], var[-3], var[-2], self.p_name)
self.p_source = var[-5] self.p_source = var[-5]
self.supposed_url = 'https://{}/{}'.format(self.p_source.replace('_pro', ''), var[-1].split('.gz')[0]) self.supposed_url = 'https://{}/{}'.format(self.p_source.replace('_pro', ''), var[-1].split('.gz')[0])
@ -291,6 +292,9 @@ class Paste(object):
else: else:
return '[]' return '[]'
def get_p_rel_path(self):
return self.p_rel_path
def save_all_attributes_redis(self, key=None): def save_all_attributes_redis(self, key=None):
""" """
Saving all the attributes in a "Redis-like" Database (Redis, LevelDB) Saving all the attributes in a "Redis-like" Database (Redis, LevelDB)

View File

@ -10,6 +10,10 @@ import datetime
import base64 import base64
import redis import redis
from scrapy.spidermiddlewares.httperror import HttpError
from twisted.internet.error import DNSLookupError
from twisted.internet.error import TimeoutError
from scrapy import Spider from scrapy import Spider
from scrapy.linkextractors import LinkExtractor from scrapy.linkextractors import LinkExtractor
from scrapy.crawler import CrawlerProcess, Crawler from scrapy.crawler import CrawlerProcess, Crawler
@ -79,6 +83,8 @@ class TorSplashCrawler():
db=self.p.config.getint("ARDB_Onion", "db"), db=self.p.config.getint("ARDB_Onion", "db"),
decode_responses=True) decode_responses=True)
self.crawler_path = os.path.join(self.p.config.get("Directories", "crawled"), date )
self.crawled_paste_filemame = os.path.join(os.environ['AIL_HOME'], self.p.config.get("Directories", "pastes"), self.crawled_paste_filemame = os.path.join(os.environ['AIL_HOME'], self.p.config.get("Directories", "pastes"),
self.p.config.get("Directories", "crawled"), date ) self.p.config.get("Directories", "crawled"), date )
@ -89,7 +95,7 @@ class TorSplashCrawler():
self.start_urls, self.start_urls,
self.parse, self.parse,
endpoint='render.json', endpoint='render.json',
meta={'parent': self.original_paste}, meta={'father': self.original_paste},
args={ 'html': 1, args={ 'html': 1,
'wait': 10, 'wait': 10,
'render_all': 1, 'render_all': 1,
@ -106,44 +112,47 @@ class TorSplashCrawler():
UUID = self.domains[0]+str(uuid.uuid4()) UUID = self.domains[0]+str(uuid.uuid4())
filename_paste = os.path.join(self.crawled_paste_filemame, UUID) filename_paste = os.path.join(self.crawled_paste_filemame, UUID)
relative_filename_paste = os.path.join(self.crawler_path, UUID)
filename_screenshot = os.path.join(self.crawled_screenshot, UUID +'.png') filename_screenshot = os.path.join(self.crawled_screenshot, UUID +'.png')
# save new paste on disk # save new paste on disk
if self.save_crawled_paste(filename_paste, response.data['html']): if self.save_crawled_paste(filename_paste, response.data['html']):
self.r_serv_onion.sadd('onion_up:'+self.full_date , self.domains[0])
self.r_serv_onion.sadd('full_onion_up', self.domains[0])
# create onion metadata # create onion metadata
if not self.r_serv_onion.exists('onion_metadata:{}'.format(self.domain[0])): if not self.r_serv_onion.exists('onion_metadata:{}'.format(self.domains[0])):
self.r_serv_onion.hset('onion_metadata:{}'.format(self.domain[0]), 'first_seen', self.full_date) self.r_serv_onion.hset('onion_metadata:{}'.format(self.domains[0]), 'first_seen', self.full_date)
self.r_serv_onion.hset('onion_metadata:{}'.format(self.domain[0]), 'last_seen', self.full_date) self.r_serv_onion.hset('onion_metadata:{}'.format(self.domains[0]), 'last_seen', self.full_date)
# add onion screenshot history # add onion screenshot history
self.r_serv_onion.sadd('onion_history:{}'.format(self.domain[0]), self.full_date) self.r_serv_onion.sadd('onion_history:{}'.format(self.domains[0]), self.full_date)
#create paste metadata #create paste metadata
self.r_serv_metadata.hset('paste_metadata:'+filename_paste, 'super_father', self.super_father) self.r_serv_metadata.hset('paste_metadata:'+filename_paste, 'super_father', self.super_father)
self.r_serv_metadata.hset('paste_metadata:'+filename_paste, 'father', response.meta['parent']) self.r_serv_metadata.hset('paste_metadata:'+filename_paste, 'father', response.meta['father'])
self.r_serv_metadata.hset('paste_metadata:'+filename_paste, 'domain', self.domains[0]) self.r_serv_metadata.hset('paste_metadata:'+filename_paste, 'domain', self.domains[0])
self.r_serv_metadata.hset('paste_metadata:'+filename_paste, 'real_link', response.url) self.r_serv_metadata.hset('paste_metadata:'+filename_paste, 'real_link', response.url)
self.r_serv_metadata.sadd('paste_children:'+response.meta['parent'], filename_paste) self.r_serv_metadata.sadd('paste_children:'+response.meta['father'], filename_paste)
dirname = os.path.dirname(filename_screenshot) dirname = os.path.dirname(filename_screenshot)
if not os.path.exists(dirname): if not os.path.exists(dirname):
os.makedirs(dirname) os.makedirs(dirname)
print(sys.getsizeof(response.data['png'])) size_screenshot = (len(response.data['png'])*3) /4
print(sys.getsizeof(response.data['html'])) print(size_screenshot)
print(self.domains[0])
if size_screenshot < 5000000: #bytes
with open(filename_screenshot, 'wb') as f:
with open(filename_screenshot, 'wb') as f: f.write(base64.standard_b64decode(response.data['png'].encode()))
f.write(base64.standard_b64decode(response.data['png'].encode()))
# save external links in set # save external links in set
lext = LinkExtractor(deny_domains=self.domains, unique=True) lext = LinkExtractor(deny_domains=self.domains, unique=True)
for link in lext.extract_links(response): for link in lext.extract_links(response):
self.r_serv_metadata.sadd('paste_crawler:filename_paste', link) self.r_serv_onion.sadd('domain_onion_external_links:{}'.format(self.domains[0]), link.url)
self.r_serv_metadata.sadd('paste_onion_external_links:{}'.format(filename_paste), link.url)
#le = LinkExtractor(unique=True) #le = LinkExtractor(unique=True)
le = LinkExtractor(allow_domains=self.domains, unique=True) le = LinkExtractor(allow_domains=self.domains, unique=True)
@ -154,12 +163,38 @@ class TorSplashCrawler():
link.url, link.url,
self.parse, self.parse,
endpoint='render.json', endpoint='render.json',
meta={'parent': UUID}, meta={'father': relative_filename_paste},
args={ 'html': 1, args={ 'html': 1,
'png': 1, 'png': 1,
'render_all': 1, 'render_all': 1,
'wait': 10} 'wait': 10}
#errback=self.errback_catcher
) )
'''
def errback_catcher(self, failure):
# catch all errback failures,
self.logger.error(repr(failure))
#if isinstance(failure.value, HttpError):
if failure.check(HttpError):
# you can get the response
response = failure.value.response
print('HttpError')
self.logger.error('HttpError on %s', response.url)
#elif isinstance(failure.value, DNSLookupError):
elif failure.check(DNSLookupError):
# this is the original request
request = failure.request
print(DNSLookupError)
self.logger.error('DNSLookupError on %s', request.url)
#elif isinstance(failure.value, TimeoutError):
elif failure.check(TimeoutError):
request = failure.request
print(TimeoutError)
self.logger.error('TimeoutError on %s', request.url)
'''
def save_crawled_paste(self, filename, content): def save_crawled_paste(self, filename, content):

View File

@ -96,6 +96,12 @@ r_serv_statistics = redis.StrictRedis(
db=cfg.getint("ARDB_Statistics", "db"), db=cfg.getint("ARDB_Statistics", "db"),
decode_responses=True) decode_responses=True)
r_serv_onion = redis.StrictRedis(
host=cfg.get("ARDB_Onion", "host"),
port=cfg.getint("ARDB_Onion", "port"),
db=cfg.getint("ARDB_Onion", "db"),
decode_responses=True)
sys.path.append('../../configs/keys') sys.path.append('../../configs/keys')
# MISP # # MISP #
@ -144,4 +150,6 @@ bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info']
UPLOAD_FOLDER = os.path.join(os.environ['AIL_FLASK'], 'submitted') UPLOAD_FOLDER = os.path.join(os.environ['AIL_FLASK'], 'submitted')
SCREENSHOT_FOLDER = os.path.join(os.environ['AIL_HOME'], cfg.get("Directories", "crawled_screenshot"))
max_dashboard_logs = int(cfg.get("Flask", "max_dashboard_logs")) max_dashboard_logs = int(cfg.get("Flask", "max_dashboard_logs"))

View File

@ -5,9 +5,10 @@
Flask functions and routes for the trending modules page Flask functions and routes for the trending modules page
''' '''
import redis import redis
import os
import json import json
import flask import flask
from flask import Flask, render_template, jsonify, request, Blueprint, make_response, Response from flask import Flask, render_template, jsonify, request, Blueprint, make_response, Response, send_from_directory
import difflib import difflib
import ssdeep import ssdeep
@ -22,12 +23,14 @@ r_serv_pasteName = Flask_config.r_serv_pasteName
r_serv_metadata = Flask_config.r_serv_metadata r_serv_metadata = Flask_config.r_serv_metadata
r_serv_tags = Flask_config.r_serv_tags r_serv_tags = Flask_config.r_serv_tags
r_serv_statistics = Flask_config.r_serv_statistics r_serv_statistics = Flask_config.r_serv_statistics
r_serv_onion = Flask_config.r_serv_onion
max_preview_char = Flask_config.max_preview_char max_preview_char = Flask_config.max_preview_char
max_preview_modal = Flask_config.max_preview_modal max_preview_modal = Flask_config.max_preview_modal
DiffMaxLineLength = Flask_config.DiffMaxLineLength DiffMaxLineLength = Flask_config.DiffMaxLineLength
bootstrap_label = Flask_config.bootstrap_label bootstrap_label = Flask_config.bootstrap_label
misp_event_url = Flask_config.misp_event_url misp_event_url = Flask_config.misp_event_url
hive_case_url = Flask_config.hive_case_url hive_case_url = Flask_config.hive_case_url
SCREENSHOT_FOLDER = Flask_config.SCREENSHOT_FOLDER
showsavedpastes = Blueprint('showsavedpastes', __name__, template_folder='templates') showsavedpastes = Blueprint('showsavedpastes', __name__, template_folder='templates')
@ -130,6 +133,16 @@ def showpaste(content_range):
list_tags.append( (tag, automatic, tag_status_tp, tag_status_fp) ) list_tags.append( (tag, automatic, tag_status_tp, tag_status_fp) )
crawler_metadata = {}
if 'infoleak:submission="crawler"' in l_tags:
crawler_metadata['get_metadata'] = True
crawler_metadata['paste_father'] = r_serv_metadata.hget('paste_metadata:'+requested_path, 'father')
crawler_metadata['real_link'] = r_serv_metadata.hget('paste_metadata:'+requested_path,'real_link')
crawler_metadata['external_links'] =r_serv_metadata.scard('paste_onion_external_links:'+requested_path)
crawler_metadata['screenshot'] = paste.get_p_rel_path()
else:
crawler_metadata['get_metadata'] = False
if Flask_config.pymisp is False: if Flask_config.pymisp is False:
misp = False misp = False
else: else:
@ -157,6 +170,7 @@ def showpaste(content_range):
hive_url = hive_case_url.replace('id_here', hive_case) hive_url = hive_case_url.replace('id_here', hive_case)
return render_template("show_saved_paste.html", date=p_date, bootstrap_label=bootstrap_label, active_taxonomies=active_taxonomies, active_galaxies=active_galaxies, list_tags=list_tags, source=p_source, encoding=p_encoding, language=p_language, size=p_size, mime=p_mime, lineinfo=p_lineinfo, content=p_content, initsize=len(p_content), duplicate_list = p_duplicate_list, simil_list = p_simil_list, hashtype_list = p_hashtype_list, date_list=p_date_list, return render_template("show_saved_paste.html", date=p_date, bootstrap_label=bootstrap_label, active_taxonomies=active_taxonomies, active_galaxies=active_galaxies, list_tags=list_tags, source=p_source, encoding=p_encoding, language=p_language, size=p_size, mime=p_mime, lineinfo=p_lineinfo, content=p_content, initsize=len(p_content), duplicate_list = p_duplicate_list, simil_list = p_simil_list, hashtype_list = p_hashtype_list, date_list=p_date_list,
crawler_metadata=crawler_metadata,
misp=misp, hive=hive, misp_eventid=misp_eventid, misp_url=misp_url, hive_caseid=hive_caseid, hive_url=hive_url) misp=misp, hive=hive, misp_eventid=misp_eventid, misp_url=misp_url, hive_caseid=hive_caseid, hive_url=hive_url)
# ============ ROUTES ============ # ============ ROUTES ============
@ -202,5 +216,9 @@ def showDiff():
the_html = htmlD.make_file(lines1, lines2) the_html = htmlD.make_file(lines1, lines2)
return the_html return the_html
@showsavedpastes.route('/screenshot/<path:filename>')
def screenshot(filename):
return send_from_directory(SCREENSHOT_FOLDER, filename+'.png', as_attachment=True)
# ========= REGISTRATION ========= # ========= REGISTRATION =========
app.register_blueprint(showsavedpastes) app.register_blueprint(showsavedpastes)

View File

@ -373,6 +373,42 @@
</tbody> </tbody>
</table> </table>
{% endif %} {% endif %}
{% if crawler_metadata['get_metadata'] %}
<div class="row">
<div class="col-md-7">
<img src="{{ url_for('showsavedpastes.screenshot', filename=crawler_metadata['screenshot']) }}" onError="this.onerror=null;this.src='{{ url_for('static', filename='image/AIL.png') }}';" style="width:100%;" />
</div>
<div class="col-md-5">
<div class="row">
<div class="panel panel-default">
<div class="panel-heading">
<i id="flash-tld" class="glyphicon glyphicon-flash " flash-tld=""></i> Graph
</div>
<table class="table table-hover table-striped">
<tbody>
<tr>
<td>Father</td>
<td>{{ crawler_metadata['paste_father'] }}</td>
</tr>
<tr>
<td>Source link</td>
<td>{{ crawler_metadata['real_link'] }}</td>
</tr>
<tr>
<td>External links</td>
<td>{{ crawler_metadata['external_links'] }}</td>
</tr>
</tbody>
</table>
</div>
</div>
</div>
</div>
{% endif %}
<h3> Content: </h3> <h3> Content: </h3>
<a href="{{ url_for('showsavedpastes.showsavedrawpaste') }}?paste={{ request.args.get('paste') }}" id='raw_paste' > [Raw content] </a> <a href="{{ url_for('showsavedpastes.showsavedrawpaste') }}?paste={{ request.args.get('paste') }}" id='raw_paste' > [Raw content] </a>
<p data-initsize="{{ initsize }}"> <pre id="paste-holder">{{ content }}</pre></p> <p data-initsize="{{ initsize }}"> <pre id="paste-holder">{{ content }}</pre></p>