new: Capture an URL on the rendered page, keep the session (WiP)

pull/184/head
Raphaël Vinot 2021-03-19 17:51:25 +01:00
parent cd7b050cb0
commit 2a55461286
5 changed files with 137 additions and 1 deletions

View File

@ -547,6 +547,8 @@ class Lookyloo():
if isinstance(value, bool):
# Yes, empty string because that's False.
query[key] = 1 if value else ''
if isinstance(value, list):
query[key] = json.dumps(value)
p.hmset(perma_uuid, query) # type: ignore
p.sadd('to_capture', perma_uuid)
p.execute()
@ -560,6 +562,8 @@ class Lookyloo():
to_capture: Dict[str, Union[str, int, float]] = self.redis.hgetall(uuid)
self.redis.delete(uuid)
to_capture['perma_uuid'] = uuid
if 'cookies' in to_capture:
to_capture['cookies_pseudofile'] = to_capture.pop('cookies')
if self.capture(**to_capture): # type: ignore
self.logger.info(f'Processed {to_capture["url"]}')
return True
@ -692,6 +696,11 @@ class Lookyloo():
'''Get all the files related to this capture.'''
return self._get_raw(capture_uuid)
def get_urls_rendered_page(self, capture_uuid: str):
ct = self.get_crawled_tree(capture_uuid)
return sorted(set(ct.root_hartree.rendered_node.urls_in_rendered_page)
- set(ct.root_hartree.all_url_requests.keys()))
def capture(self, url: str, cookies_pseudofile: Optional[Union[BufferedIOBase, str]]=None,
depth: int=1, listing: bool=True, user_agent: Optional[str]=None,
referer: str='', perma_uuid: Optional[str]=None, os: Optional[str]=None,
@ -819,6 +828,13 @@ class Lookyloo():
break
return details, body_content
def get_latest_url_capture(self, url: str) -> Optional[CaptureCache]:
'''Get the most recent capture with this URL'''
captures = self.sorted_capture_cache(self.indexing.get_captures_url(url))
if captures:
return captures[0]
return None
def get_url_occurrences(self, url: str, limit: int=20) -> List[Dict]:
'''Get the most recent captures and URL nodes where the URL has been seen.'''
captures = self.sorted_capture_cache(self.indexing.get_captures_url(url))

View File

@ -22,7 +22,7 @@ from werkzeug.security import generate_password_hash, check_password_hash
from pymisp import MISPEvent
from lookyloo.helpers import get_homedir, update_user_agents, get_user_agents, get_config, get_taxonomies
from lookyloo.helpers import get_homedir, update_user_agents, get_user_agents, get_config, get_taxonomies, load_cookies
from lookyloo.lookyloo import Lookyloo, Indexing
from lookyloo.exceptions import NoValidHarFile, MissingUUID
from .proxied import ReverseProxied
@ -404,6 +404,30 @@ def export(tree_uuid: str):
as_attachment=True, attachment_filename='capture.zip')
@app.route('/tree/<string:tree_uuid>/urls_rendered_page', methods=['GET'])
def urls_rendered_page(tree_uuid: str):
urls = lookyloo.get_urls_rendered_page(tree_uuid)
return render_template('urls_rendered.html', base_tree_uuid=tree_uuid, urls=urls)
@app.route('/bulk_captures/<string:base_tree_uuid>', methods=['POST'])
def bulk_captures(base_tree_uuid: str):
selected_urls = request.form.getlist('url')
urls = lookyloo.get_urls_rendered_page(base_tree_uuid)
ct = lookyloo.get_crawled_tree(base_tree_uuid)
bulk_captures = []
for url in [urls[int(selected_id) - 1] for selected_id in selected_urls]:
cookies = load_cookies(lookyloo.get_cookies(base_tree_uuid))
capture = {'url': url,
'cookies': cookies,
'referer': ct.root_url
}
new_capture_uuid = lookyloo.enqueue_capture(capture)
bulk_captures.append((new_capture_uuid, url))
return render_template('bulk_captures.html', uuid=base_tree_uuid, bulk_captures=bulk_captures)
@app.route('/tree/<string:tree_uuid>/hide', methods=['GET'])
@flask_login.login_required
def hide_capture(tree_uuid: str):
@ -717,6 +741,8 @@ def urlnode_response_cookies(tree_uuid: str, node_uuid: str):
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/urls_in_rendered_content', methods=['GET'])
def urlnode_urls_in_rendered_content(tree_uuid: str, node_uuid: str):
# Note: we could simplify it with lookyloo.get_urls_rendered_page, but if at somepoint,
# we have multiple page rendered on one tree, it will be a problem.
ct = lookyloo.get_crawled_tree(tree_uuid)
urlnode = ct.root_hartree.get_url_node_by_uuid(node_uuid)
if not urlnode.rendered_html:

View File

@ -0,0 +1,52 @@
{% extends "main.html" %}
{% from 'bootstrap/utils.html' import render_messages %}
{% block title %}Captures{% endblock %}
{% block scripts %}
{{ super() }}
<script src='{{ url_for('static', filename='datatables.min.js') }}'></script>
<script type="text/javascript">
$('#table').DataTable( {
"order": [[ 0, "desc" ]],
"pageLength": 50,
"searching": false,
"paging": false
});
</script>
{% endblock %}
{% block styles %}
{{ super() }}
<link rel="stylesheet" href="{{ url_for('static', filename='datatables.min.css') }}">
{% endblock %}
{% block content %}
<center>
<h4>Ongoing captures</h4>
<button onclick="window.history.back();" class="btn btn-info" type="button">Go Back</button>
</center>
<div>The captures below are queued, it will take a few minutes before the links are working</div>
<div class="table-responsive">
<table id="table" class="table" style="width:96%">
<thead>
<tr>
<th>URL</th>
<th>Link</th>
</tr>
</thead>
<tbody>
{% for uuid, captured_url in bulk_captures %}
<tr>
<td>
{{ captured_url }}
</td>
<td><a href="{{ url_for('tree', tree_uuid=uuid) }}">Show capture</a></td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
{% endblock %}

View File

@ -72,6 +72,13 @@
modal.find('.modal-body').load(button.data("remote"));
});
</script>
<script>
$('#urlsInPageModal').on('show.bs.modal', function(e) {
var button = $(e.relatedTarget);
var modal = $(this);
modal.find('.modal-body').load(button.data("remote"));
});
</script>
<script>
{% if urlnode_uuid %}
@ -186,6 +193,10 @@
<a href="#emailModal" data-toggle="modal" data-target="#emailModal" role="button">Notify by mail</a>
</li>
{% endif %}
<li>
<a href="#urlsInPageModal" data-remote="{{ url_for('urls_rendered_page', tree_uuid=tree_uuid) }}"
data-toggle="modal" data-target="#urlsInPageModal" role="button">View URLs in rendered page</a>
</li>
<li>
<a href="https://www.lookyloo.eu/docs/main/usage.html#_investigate_a_capture" role="button">Documentation and usage</a>
</li>
@ -455,4 +466,23 @@
</div>
</div>
{% endif %}
<div class="modal fade" id="urlsInPageModal" tabindex="-1" role="dialog">
<div class="modal-dialog modal-xl" role="document">
<div class="modal-content">
<div class="modal-header">
<h5 class="modal-title" id="urlsInPageModalLabel">URLs in the rendered page</h5>
<button type="button" class="close" data-dismiss="modal" aria-label="Close">
<span aria-hidden="true">×</span>
</button>
</div>
<div class="modal-body">
... loading URLs in rendered page ...
</div>
<div class="modal-footer">
<button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
</div>
</div>
</div>
</div>
{% endblock content %}

View File

@ -0,0 +1,12 @@
<div>
<h4>Select below the URLs you want to capture.</h4>
<form role="form" action="{{ url_for('bulk_captures', base_tree_uuid=base_tree_uuid) }}" method=post enctype=multipart/form-data>
{% for url in urls %}
<div class="form-check form-check">
<input class="form-check-input" type="checkbox" name="url" id="url_{{loop.index}}" value="{{loop.index}}">
<label class="form-check-label" for="url_{{loop.index}}">{{url}}</label>
</div>
{% endfor %}
<button type="submit" class="btn btn-info" id="btn-capture-urls">Capture selected URLs</button>
</form>
</div>