new: Add integration with SaneJS

pull/27/head
Raphaël Vinot 2018-07-19 18:18:22 +02:00
parent 5202630278
commit 3c9b28b704
3 changed files with 53 additions and 0 deletions

View File

@ -19,6 +19,10 @@ import time
from zipfile import ZipFile, ZIP_DEFLATED
from io import BytesIO
import base64
import socket
from urllib.parse import urlparse
import requests
app = Flask(__name__)
@ -37,6 +41,27 @@ SPLASH = 'http://127.0.0.1:8050'
HAR_DIR.mkdir(parents=True, exist_ok=True)
SANE_JS = 'http://127.0.0.1:5007'
def is_open(ip, port):
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
s.settimeout(2)
try:
s.connect((ip, int(port)))
s.shutdown(2)
return True
except Exception:
return False
if SANE_JS:
parsed = urlparse(SANE_JS)
if is_open(parsed.hostname, parsed.port):
has_sane_js = True
else:
has_sane_js = False
def cleanup_old_tmpfiles():
for tmpfile in pathlib.Path(tempfile.gettempdir()).glob('lookyloo*'):
@ -62,6 +87,13 @@ def load_tree(report_dir):
return ct.to_json(), ct.start_time.isoformat(), ct.user_agent, ct.root_url
def sane_js_query(sha512, details=False):
if has_sane_js:
r = requests.post(SANE_JS, json={"sha512": sha512, 'details': details})
return r.json()
return {}
@app.route('/scrape', methods=['GET', 'POST'])
def scrape():
if request.form.get('url'):
@ -128,6 +160,11 @@ def hostnode_details(node_uuid):
hostnode = ct.root_hartree.get_host_node_by_uuid(node_uuid)
urls = []
for url in hostnode.urls:
if hasattr(url, 'body_hash'):
sane_js_r = sane_js_query(url.body_hash, details=True)
if sane_js_r['exists']:
url.add_feature('sane_js_details', sane_js_r['details'])
print(url.sane_js_details)
urls.append(url.to_json())
return json.dumps(urls)

View File

@ -267,6 +267,7 @@ function icon(icons, key, icon_path){
.attr('x', function(d) { return d.data.total_width ? d.data.total_width + 1 : 0 })
.attr("xlink:href", icon_path).call(getBB);
content.filter(function(d){
if (typeof d.data[key] === 'boolean') {
return false;
@ -285,6 +286,7 @@ function icon(icons, key, icon_path){
.attr('x', function(d) { return d.data.total_width ? d.data.total_width + 1 : 0 })
.attr('width', function(d) { return d.to_print.toString().length + 'em'; })
.text(function(d) { return d.to_print; }).call(getBB);
};
function icon_list(parent_svg, relative_x_pos, relative_y_pos) {
@ -308,6 +310,18 @@ function icon_list(parent_svg, relative_x_pos, relative_y_pos) {
icon(icons, 'response_cookie', "/static/cookie_received.png");
icon(icons, 'redirect', "/static/redirect.png");
icon(icons, 'redirect_to_nothing', "/static/cookie_in_url.png");
icons.filter(function(d){
if (d.data.sane_js_details) {
d.libname = d.data.sane_js_details[0]['libname'];
return d.data.sane_js_details;
}
return false;
}).append('text')
.attr('x', function(d) { return d.data.total_width ? d.data.total_width + 5 : 0 })
.attr('y', 15)
.style("font-size", "15px")
.text(function(d) { return 'Library name: ' + d.libname }).call(getBB);
}
function text_entry(parent_svg, relative_x_pos, relative_y_pos, onclick_callback) {
@ -331,6 +345,7 @@ function text_entry(parent_svg, relative_x_pos, relative_y_pos, onclick_callback
d.data.total_width = 0; // reset total_width
to_display = d.data.name
if (d.data.urls_count) {
// Only on Hostname node.
to_display += ' (' + d.data.urls_count + ')';
};
return to_display;

View File

@ -3,6 +3,7 @@ flask
flask-bootstrap
ete3
beautifulsoup4
requests
# Backend libs
git+https://github.com/viper-framework/har2tree.git