mirror of https://github.com/CIRCL/lookyloo
Merge remote-tracking branch 'upstream/master'
commit
51ed616b48
|
@ -8,7 +8,7 @@ WORKDIR lookyloo
|
|||
|
||||
RUN pip3 install -r requirements.txt
|
||||
RUN pip3 install -e .
|
||||
RUN wget https://d3js.org/d3.v4.min.js -O lookyloo/static/d3.v4.min.js
|
||||
RUN wget https://d3js.org/d3.v5.min.js -O lookyloo/static/d3.v5.min.js
|
||||
RUN wget https://cdn.rawgit.com/eligrey/FileSaver.js/5733e40e5af936eb3f48554cf6a8a7075d71d18a/FileSaver.js -O lookyloo/static/FileSaver.js
|
||||
|
||||
RUN sed -i "s/SPLASH = 'http:\/\/127.0.0.1:8050'/SPLASH = 'http:\/\/splash:8050'/g" lookyloo/__init__.py
|
||||
|
|
|
@ -48,7 +48,7 @@ sudo docker run -p 8050:8050 -p 5023:5023 scrapinghub/splash --disable-ui --disa
|
|||
```bash
|
||||
pip install -r requirements.txt
|
||||
pip install -e .
|
||||
wget https://d3js.org/d3.v4.min.js -O lookyloo/static/d3.v4.min.js
|
||||
wget https://d3js.org/d3.v5.min.js -O lookyloo/static/d3.v5.min.js
|
||||
wget https://cdn.rawgit.com/eligrey/FileSaver.js/5733e40e5af936eb3f48554cf6a8a7075d71d18a/FileSaver.js -O lookyloo/static/FileSaver.js
|
||||
```
|
||||
# Run the app locally
|
||||
|
|
|
@ -9,3 +9,5 @@ chmod-socket = 660
|
|||
vacuum = true
|
||||
|
||||
die-on-term = true
|
||||
|
||||
wsgi-disable-file-wrapper = true
|
||||
|
|
|
@ -9,13 +9,12 @@ from scrapysplashwrapper import crawl
|
|||
from flask import Flask, render_template, request, session, send_file
|
||||
from flask_bootstrap import Bootstrap
|
||||
|
||||
from glob import glob
|
||||
import os
|
||||
from datetime import datetime
|
||||
|
||||
import pickle
|
||||
import tempfile
|
||||
import pathlib
|
||||
import time
|
||||
|
||||
from zipfile import ZipFile, ZIP_DEFLATED
|
||||
from io import BytesIO
|
||||
|
@ -30,26 +29,24 @@ if app.secret_key == 'changeme':
|
|||
|
||||
Bootstrap(app)
|
||||
app.config['BOOTSTRAP_SERVE_LOCAL'] = True
|
||||
app.config['SESSION_COOKIE_NAME'] = 'lookyloo'
|
||||
app.debug = True
|
||||
|
||||
HAR_DIR = 'scraped'
|
||||
HAR_DIR = pathlib.Path('scraped')
|
||||
SPLASH = 'http://127.0.0.1:8050'
|
||||
|
||||
pathlib.Path(HAR_DIR).mkdir(parents=True, exist_ok=True)
|
||||
HAR_DIR.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
@app.before_request
|
||||
def session_management():
|
||||
# make the session last indefinitely until it is cleared
|
||||
session.permanent = True
|
||||
def cleanup_old_tmpfiles():
|
||||
for tmpfile in pathlib.Path(tempfile.gettempdir()).glob('lookyloo*'):
|
||||
if time.time() - tmpfile.stat().st_atime > 36000:
|
||||
tmpfile.unlink()
|
||||
|
||||
|
||||
def load_tree(report_dir):
|
||||
if session.get('tree'):
|
||||
# TODO delete file
|
||||
pass
|
||||
session.clear()
|
||||
har_files = sorted(glob(os.path.join(HAR_DIR, report_dir, '*.har')))
|
||||
har_files = sorted(report_dir.glob('*.har'))
|
||||
ct = CrawledTree(har_files)
|
||||
ct.find_parents()
|
||||
ct.join_trees()
|
||||
|
@ -65,7 +62,7 @@ def scrape():
|
|||
if request.form.get('url'):
|
||||
url = request.form.get('url')
|
||||
if not url.startswith('http'):
|
||||
url = 'http://{}'.format(url)
|
||||
url = f'http://{url}'
|
||||
depth = request.form.get('depth')
|
||||
if depth is None:
|
||||
depth = 1
|
||||
|
@ -74,17 +71,20 @@ def scrape():
|
|||
# broken
|
||||
pass
|
||||
width = len(str(len(items)))
|
||||
dirpath = os.path.join(HAR_DIR, datetime.now().isoformat())
|
||||
os.makedirs(dirpath)
|
||||
dirpath = HAR_DIR / datetime.now().isoformat()
|
||||
dirpath.mkdir()
|
||||
for i, item in enumerate(items):
|
||||
harfile = item['har']
|
||||
png = base64.b64decode(item['png'])
|
||||
child_frames = item['childFrames']
|
||||
with open(os.path.join(dirpath, '{0:0{width}}.har'.format(i, width=width)), 'w') as f:
|
||||
html = item['html']
|
||||
with (dirpath / '{0:0{width}}.har'.format(i, width=width)).open('w') as f:
|
||||
json.dump(harfile, f)
|
||||
with open(os.path.join(dirpath, '{0:0{width}}.png'.format(i, width=width)), 'wb') as f:
|
||||
with (dirpath / '{0:0{width}}.png'.format(i, width=width)).open('wb') as f:
|
||||
f.write(png)
|
||||
with open(os.path.join(dirpath, '{0:0{width}}.frames.json'.format(i, width=width)), 'w') as f:
|
||||
with (dirpath / '{0:0{width}}.html'.format(i, width=width)).open('w') as f:
|
||||
f.write(html)
|
||||
with (dirpath / '{0:0{width}}.frames.json'.format(i, width=width)).open('w') as f:
|
||||
json.dump(child_frames, f)
|
||||
return tree(0)
|
||||
return render_template('scrape.html')
|
||||
|
@ -92,10 +92,28 @@ def scrape():
|
|||
|
||||
def get_report_dirs():
|
||||
# Cleanup HAR_DIR of failed runs.
|
||||
for report_dir in os.listdir(HAR_DIR):
|
||||
if not os.listdir(os.path.join(HAR_DIR, report_dir)):
|
||||
os.rmdir(os.path.join(HAR_DIR, report_dir))
|
||||
return sorted(os.listdir(HAR_DIR), reverse=True)
|
||||
for report_dir in HAR_DIR.iterdir():
|
||||
if report_dir.is_dir() and not report_dir.iterdir():
|
||||
report_dir.rmdir()
|
||||
return sorted(HAR_DIR.iterdir(), reverse=True)
|
||||
|
||||
|
||||
@app.route('/tree/hostname/<node_uuid>/text', methods=['GET'])
|
||||
def hostnode_details_text(node_uuid):
|
||||
with open(session["tree"], 'rb') as f:
|
||||
ct = pickle.load(f)
|
||||
hostnode = ct.root_hartree.get_host_node_by_uuid(node_uuid)
|
||||
urls = []
|
||||
for url in hostnode.urls:
|
||||
urls.append(url.name)
|
||||
content = '''# URLs
|
||||
|
||||
{}
|
||||
'''.format('\n'.join(urls))
|
||||
to_return = BytesIO(content.encode())
|
||||
to_return.seek(0)
|
||||
return send_file(to_return, mimetype='text/markdown',
|
||||
as_attachment=True, attachment_filename='file.md')
|
||||
|
||||
|
||||
@app.route('/tree/hostname/<node_uuid>', methods=['GET'])
|
||||
|
@ -114,16 +132,18 @@ def urlnode_details(node_uuid):
|
|||
with open(session["tree"], 'rb') as f:
|
||||
ct = pickle.load(f)
|
||||
urlnode = ct.root_hartree.get_url_node_by_uuid(node_uuid)
|
||||
|
||||
to_return = BytesIO()
|
||||
got_content = False
|
||||
if hasattr(urlnode, 'body'):
|
||||
with ZipFile(to_return, 'a', ZIP_DEFLATED, False) as zfile:
|
||||
zfile.writestr(urlnode.filename, urlnode.body.getvalue())
|
||||
to_return.seek(0)
|
||||
# return send_file(urlnode.body, mimetype='application/zip',
|
||||
# as_attachment=True, attachment_filename='file.zip')
|
||||
with open('foo.bin', 'wb') as f:
|
||||
f.write(to_return.getvalue())
|
||||
body_content = urlnode.body.getvalue()
|
||||
if body_content:
|
||||
got_content = True
|
||||
with ZipFile(to_return, 'w', ZIP_DEFLATED) as zfile:
|
||||
zfile.writestr(urlnode.filename, urlnode.body.getvalue())
|
||||
if not got_content:
|
||||
with ZipFile(to_return, 'w', ZIP_DEFLATED) as zfile:
|
||||
zfile.writestr('file.txt', b'Response body empty')
|
||||
to_return.seek(0)
|
||||
return send_file(to_return, mimetype='application/zip',
|
||||
as_attachment=True, attachment_filename='file.zip')
|
||||
|
||||
|
@ -138,12 +158,14 @@ def tree(tree_id):
|
|||
|
||||
@app.route('/', methods=['GET'])
|
||||
def index():
|
||||
cleanup_old_tmpfiles()
|
||||
session.clear()
|
||||
i = 0
|
||||
titles = []
|
||||
if not os.path.exists(HAR_DIR):
|
||||
os.makedirs(HAR_DIR)
|
||||
if not HAR_DIR.exists():
|
||||
HAR_DIR.mkdir(parents=True)
|
||||
for report_dir in get_report_dirs():
|
||||
har_files = sorted(glob(os.path.join(HAR_DIR, report_dir, '*.har')))
|
||||
har_files = sorted(report_dir.glob('*.har'))
|
||||
if not har_files:
|
||||
continue
|
||||
with open(har_files[0], 'r') as f:
|
||||
|
|
|
@ -6,12 +6,19 @@ var margin = {top: 20, right: 200, bottom: 30, left: 90},
|
|||
height = 10000 - margin.top - margin.bottom;
|
||||
|
||||
var node_width = 0;
|
||||
var max_overlay_width = 1500;
|
||||
var node_height = 45;
|
||||
|
||||
var main_svg = d3.select("body").append("svg")
|
||||
.attr("width", width + margin.right + margin.left)
|
||||
.attr("height", height + margin.top + margin.bottom)
|
||||
|
||||
main_svg.append("clipPath")
|
||||
.attr("id", "textOverlay")
|
||||
.append("rect")
|
||||
.attr('width', max_overlay_width - 25)
|
||||
.attr('height', node_height);
|
||||
|
||||
// Add background pattern
|
||||
var pattern = main_svg.append("defs").append('pattern')
|
||||
.attr('id', 'backstripes')
|
||||
|
@ -29,11 +36,7 @@ var background = main_svg.append('rect')
|
|||
.attr('y', 0)
|
||||
.attr('width', width)
|
||||
.attr('height', height)
|
||||
.style('fill', "url(#backstripes)")
|
||||
.on('click', function(d) {
|
||||
// Remove the
|
||||
main_svg.selectAll('.overlay').remove()
|
||||
});
|
||||
.style('fill', "url(#backstripes)");
|
||||
|
||||
// append the svg object to the body of the page
|
||||
// appends a 'group' element to 'svg'
|
||||
|
@ -78,27 +81,11 @@ function getBB(selection) {
|
|||
})
|
||||
};
|
||||
|
||||
function str2bytes (str) {
|
||||
var bytes = new Uint8Array(str.length);
|
||||
for (var i=0; i<str.length; i++) {
|
||||
bytes[i] = str.charCodeAt(i);
|
||||
}
|
||||
return bytes;
|
||||
}
|
||||
|
||||
function urlnode_click(d) {
|
||||
var url = "url/" + d.data.uuid;
|
||||
var xhr = new XMLHttpRequest();
|
||||
xhr.open('GET', url, true);
|
||||
xhr.responseType = "blob";
|
||||
xhr.withCredentials = true;
|
||||
xhr.onreadystatechange = function (){
|
||||
if (xhr.readyState === 4) {
|
||||
var blob = xhr.response;
|
||||
saveAs(blob, 'file.zip');
|
||||
}
|
||||
};
|
||||
xhr.send();
|
||||
var url = "tree/url/" + d.data.uuid;
|
||||
d3.blob(url, {credentials: 'same-origin'}).then(function(data) {
|
||||
saveAs(data, 'file.zip');
|
||||
});
|
||||
};
|
||||
|
||||
d3.selection.prototype.moveToFront = function() {
|
||||
|
@ -107,6 +94,9 @@ d3.selection.prototype.moveToFront = function() {
|
|||
});
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
// What happen when clicking on a domain (load a modal display)
|
||||
function hostnode_click(d) {
|
||||
// Move the node to the front (end of the list)
|
||||
|
@ -118,14 +108,17 @@ function hostnode_click(d) {
|
|||
.attr('class', 'overlay');
|
||||
|
||||
cur_node.append('line')
|
||||
.attr('class', 'overlay')
|
||||
.style("stroke", "black");
|
||||
.attr('id', 'overlay_link')
|
||||
.style("opacity", "0.95")
|
||||
.attr("stroke-width", "2")
|
||||
.style("stroke", "gray");
|
||||
|
||||
var top_margin = 15;
|
||||
var overlay_header_height = 50;
|
||||
var left_margin = 30;
|
||||
|
||||
overlay_hostname
|
||||
.datum({x: 0, y: 0})
|
||||
.datum({x: 0, y: 0, overlay_uuid: d.data.uuid})
|
||||
.attr('id', 'overlay_' + d.data.uuid)
|
||||
.attr("transform", "translate(" + 0 + "," + 0 + ")")
|
||||
.call(d3.drag().on("drag", function(d, i) {
|
||||
|
@ -135,9 +128,9 @@ function hostnode_click(d) {
|
|||
d.y += d3.event.dy
|
||||
d3.select(this)
|
||||
.attr("transform", "translate(" + d.x + "," + d.y + ")");
|
||||
cur_node.select('line')
|
||||
.attr("x2", d.x + top_margin)
|
||||
.attr("y2", d.y + left_margin);
|
||||
cur_node.select('#overlay_link')
|
||||
.attr("x2", d.x + left_margin + 3)
|
||||
.attr("y2", d.y + top_margin + 7);
|
||||
}));
|
||||
|
||||
overlay_hostname.append('rect')
|
||||
|
@ -154,26 +147,94 @@ function hostnode_click(d) {
|
|||
|
||||
// Modal display
|
||||
var url = "/tree/hostname/" + d.data.uuid;
|
||||
d3.json(url, function(error, urls) {
|
||||
d3.json(url, {credentials: 'same-origin'}).then(function(urls) {
|
||||
overlay_hostname
|
||||
.append('circle')
|
||||
.attr('id', 'overlay_circle_' + d.data.uuid)
|
||||
.attr('height', overlay_header_height)
|
||||
.attr('cx', left_margin + 10)
|
||||
.attr('cy', top_margin + 15)
|
||||
.attr('r', 12);
|
||||
|
||||
overlay_hostname
|
||||
.append('text')
|
||||
.attr('id', 'overlay_close_' + d.data.uuid)
|
||||
.attr('height', overlay_header_height)
|
||||
.attr('x', left_margin + 500)
|
||||
.attr('y', top_margin + 25)
|
||||
.style("font-size", overlay_header_height - 20)
|
||||
.text('\u2716')
|
||||
.attr('cursor', 'pointer')
|
||||
.on("click", function() {
|
||||
main_svg.selectAll('#overlay_' + d.data.uuid).remove();
|
||||
cur_node.select('#overlay_link').remove();
|
||||
}
|
||||
);
|
||||
|
||||
overlay_hostname.append('line')
|
||||
.attr('id', 'overlay_separator_header' + d.data.uuid)
|
||||
.style("stroke", "gray")
|
||||
.style('stroke-width', 2)
|
||||
.attr('x1', 15)
|
||||
.attr('y1', overlay_header_height)
|
||||
.attr('x2', 500)
|
||||
.attr('y2', overlay_header_height);
|
||||
|
||||
var interval_entries = 40;
|
||||
if (error) throw error;
|
||||
urls.forEach(function(url, index, array) {
|
||||
var jdata = JSON.parse(url)
|
||||
overlay_hostname.datum({'data': jdata});
|
||||
var text_node = text_entry(overlay_hostname, left_margin, top_margin + (interval_entries * index), urlnode_click);
|
||||
var text_node = text_entry(overlay_hostname, left_margin, top_margin + overlay_header_height + (interval_entries * index), urlnode_click);
|
||||
height_text = text_node.node().getBBox().height;
|
||||
icon_list(overlay_hostname, left_margin + 5, top_margin + height_text + (interval_entries * index));
|
||||
icon_list(overlay_hostname, left_margin + 5, top_margin + height_text + overlay_header_height + (interval_entries * index));
|
||||
});
|
||||
overlay_hostname.append('line')
|
||||
.attr('id', 'overlay_separator_footer' + d.data.uuid)
|
||||
.style("stroke", "gray")
|
||||
.style('stroke-width', 2)
|
||||
.attr('x1', 15)
|
||||
.attr('y1', overlay_hostname.node().getBBox().height + 15)
|
||||
.attr('x2', 500)
|
||||
.attr('y2', overlay_hostname.node().getBBox().height);
|
||||
|
||||
overlay_hostname
|
||||
.append('text')
|
||||
.attr('id', 'overlay_download_' + d.data.uuid)
|
||||
.attr('height', overlay_header_height - 10)
|
||||
.attr('x', left_margin)
|
||||
.attr('y', overlay_hostname.node().getBBox().height + 40)
|
||||
.style("font-size", overlay_header_height - 30)
|
||||
.text('Download URLs as text')
|
||||
.attr('cursor', 'pointer')
|
||||
.on("click", function() {
|
||||
var url = "/tree/hostname/" + d.data.uuid + '/text';
|
||||
d3.blob(url, {credentials: 'same-origin'}).then(function(data) {
|
||||
saveAs(data, 'file.md');
|
||||
});
|
||||
});
|
||||
|
||||
overlay_bbox = overlay_hostname.node().getBBox();
|
||||
overlay_hostname.select('rect')
|
||||
.attr('width', overlay_bbox.width + left_margin)
|
||||
.attr('height', overlay_bbox.height + top_margin);
|
||||
.attr('width', function() {
|
||||
optimal_size = overlay_bbox.width + left_margin
|
||||
return optimal_size < max_overlay_width ? optimal_size : max_overlay_width;
|
||||
})
|
||||
.attr('height', overlay_bbox.height + 10);
|
||||
|
||||
cur_node.select('line')
|
||||
.attr("x1", cur_node.x)
|
||||
.attr("y1", cur_node.y)
|
||||
.attr("x2", top_margin)
|
||||
.attr("y2", left_margin);
|
||||
overlay_hostname.select('#overlay_close_' + d.data.uuid)
|
||||
.attr('x', overlay_hostname.select('rect').node().getBBox().width - left_margin);
|
||||
|
||||
overlay_hostname.select('#overlay_separator_header' + d.data.uuid)
|
||||
.attr('x2', overlay_hostname.select('rect').node().getBBox().width + 14);
|
||||
overlay_hostname.select('#overlay_separator_footer' + d.data.uuid)
|
||||
.attr('x2', overlay_hostname.select('rect').node().getBBox().width + 14);
|
||||
|
||||
|
||||
cur_node.select('#overlay_link')
|
||||
.attr("x1", 10)
|
||||
.attr("y1", 0)
|
||||
.attr("x2", left_margin + 3)
|
||||
.attr("y2", top_margin + 7);
|
||||
});
|
||||
};
|
||||
|
||||
|
@ -243,7 +304,7 @@ function text_entry(parent_svg, relative_x_pos, relative_y_pos, onclick_callback
|
|||
// Avoid hiding the content after the circle
|
||||
var nodeContent = parent_svg
|
||||
.append('svg')
|
||||
.attr('height',node_height)
|
||||
.attr('height', node_height)
|
||||
.attr('x', relative_x_pos)
|
||||
.attr('y', relative_y_pos);
|
||||
|
||||
|
@ -254,6 +315,8 @@ function text_entry(parent_svg, relative_x_pos, relative_y_pos, onclick_callback
|
|||
.style("font-size", "16px")
|
||||
.attr("stroke-width", ".2px")
|
||||
.style("opacity", .9)
|
||||
.attr('cursor', 'pointer')
|
||||
.attr("clip-path", "url(#textOverlay)")
|
||||
.text(function(d) {
|
||||
d.data.total_width = 0; // reset total_width
|
||||
to_display = d.data.name
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
{% block scripts %}
|
||||
{{ super() }}
|
||||
<script src='{{ url_for('static', filename='FileSaver.js') }}'></script>
|
||||
<script src='{{ url_for('static', filename='d3.v4.min.js') }}'></script>
|
||||
<script src='{{ url_for('static', filename='d3.v5.min.js') }}'></script>
|
||||
{% endblock %}
|
||||
|
||||
{% block head %}
|
||||
|
|
|
@ -27,8 +27,33 @@
|
|||
<img src="{{ url_for('static', filename='redirect.png') }}"
|
||||
alt="Redirect" height="20" width="20"> Redirect</br>
|
||||
|
||||
<img src="{{ url_for('static', filename='cookie_in_url.png') }}"
|
||||
alt="Cookie in URL" height="20" width="20"> Cookie in URL</br>
|
||||
<img src="{{ url_for('static', filename='font.png') }}"
|
||||
alt="Font" height="20" width="20"> Font</br>
|
||||
|
||||
<img src="{{ url_for('static', filename='html.png') }}"
|
||||
alt="HTML" height="20" width="20"> HTML</br>
|
||||
|
||||
<img src="{{ url_for('static', filename='json.png') }}"
|
||||
alt="JSON" height="20" width="20"> JSON</br>
|
||||
|
||||
<img src="{{ url_for('static', filename='css.png') }}"
|
||||
alt="CSS" height="20" width="20"> CSS</br>
|
||||
|
||||
<img src="{{ url_for('static', filename='exe.png') }}"
|
||||
alt="EXE" height="20" width="20"> EXE</br>
|
||||
|
||||
<img src="{{ url_for('static', filename='img.png') }}"
|
||||
alt="Image" height="20" width="20"> Image</br>
|
||||
|
||||
<img src="{{ url_for('static', filename='video.png') }}"
|
||||
alt="Video" height="20" width="20"> Video</br>
|
||||
|
||||
<img src="{{ url_for('static', filename='ifr.png') }}"
|
||||
alt="iFrame" height="20" width="20"> iFrame</br>
|
||||
|
||||
<img src="{{ url_for('static', filename='wtf.png') }}"
|
||||
alt="Content type not set/unknown" height="20" width="20"> Content type not set/unknown</br>
|
||||
|
||||
</div>
|
||||
<div id=tree-details><center><b>Tree details</b></center></br>
|
||||
<b>Root URL</b>: {{ root_url }}</br>
|
||||
|
|
Loading…
Reference in New Issue