mirror of https://github.com/CIRCL/lookyloo
new: Capture an URL on the rendered page, keep the session (WiP)
parent
cd7b050cb0
commit
2a55461286
lookyloo
website/web
|
@ -547,6 +547,8 @@ class Lookyloo():
|
|||
if isinstance(value, bool):
|
||||
# Yes, empty string because that's False.
|
||||
query[key] = 1 if value else ''
|
||||
if isinstance(value, list):
|
||||
query[key] = json.dumps(value)
|
||||
p.hmset(perma_uuid, query) # type: ignore
|
||||
p.sadd('to_capture', perma_uuid)
|
||||
p.execute()
|
||||
|
@ -560,6 +562,8 @@ class Lookyloo():
|
|||
to_capture: Dict[str, Union[str, int, float]] = self.redis.hgetall(uuid)
|
||||
self.redis.delete(uuid)
|
||||
to_capture['perma_uuid'] = uuid
|
||||
if 'cookies' in to_capture:
|
||||
to_capture['cookies_pseudofile'] = to_capture.pop('cookies')
|
||||
if self.capture(**to_capture): # type: ignore
|
||||
self.logger.info(f'Processed {to_capture["url"]}')
|
||||
return True
|
||||
|
@ -692,6 +696,11 @@ class Lookyloo():
|
|||
'''Get all the files related to this capture.'''
|
||||
return self._get_raw(capture_uuid)
|
||||
|
||||
def get_urls_rendered_page(self, capture_uuid: str):
|
||||
ct = self.get_crawled_tree(capture_uuid)
|
||||
return sorted(set(ct.root_hartree.rendered_node.urls_in_rendered_page)
|
||||
- set(ct.root_hartree.all_url_requests.keys()))
|
||||
|
||||
def capture(self, url: str, cookies_pseudofile: Optional[Union[BufferedIOBase, str]]=None,
|
||||
depth: int=1, listing: bool=True, user_agent: Optional[str]=None,
|
||||
referer: str='', perma_uuid: Optional[str]=None, os: Optional[str]=None,
|
||||
|
@ -819,6 +828,13 @@ class Lookyloo():
|
|||
break
|
||||
return details, body_content
|
||||
|
||||
def get_latest_url_capture(self, url: str) -> Optional[CaptureCache]:
|
||||
'''Get the most recent capture with this URL'''
|
||||
captures = self.sorted_capture_cache(self.indexing.get_captures_url(url))
|
||||
if captures:
|
||||
return captures[0]
|
||||
return None
|
||||
|
||||
def get_url_occurrences(self, url: str, limit: int=20) -> List[Dict]:
|
||||
'''Get the most recent captures and URL nodes where the URL has been seen.'''
|
||||
captures = self.sorted_capture_cache(self.indexing.get_captures_url(url))
|
||||
|
|
|
@ -22,7 +22,7 @@ from werkzeug.security import generate_password_hash, check_password_hash
|
|||
|
||||
from pymisp import MISPEvent
|
||||
|
||||
from lookyloo.helpers import get_homedir, update_user_agents, get_user_agents, get_config, get_taxonomies
|
||||
from lookyloo.helpers import get_homedir, update_user_agents, get_user_agents, get_config, get_taxonomies, load_cookies
|
||||
from lookyloo.lookyloo import Lookyloo, Indexing
|
||||
from lookyloo.exceptions import NoValidHarFile, MissingUUID
|
||||
from .proxied import ReverseProxied
|
||||
|
@ -404,6 +404,30 @@ def export(tree_uuid: str):
|
|||
as_attachment=True, attachment_filename='capture.zip')
|
||||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/urls_rendered_page', methods=['GET'])
|
||||
def urls_rendered_page(tree_uuid: str):
|
||||
urls = lookyloo.get_urls_rendered_page(tree_uuid)
|
||||
return render_template('urls_rendered.html', base_tree_uuid=tree_uuid, urls=urls)
|
||||
|
||||
|
||||
@app.route('/bulk_captures/<string:base_tree_uuid>', methods=['POST'])
|
||||
def bulk_captures(base_tree_uuid: str):
|
||||
selected_urls = request.form.getlist('url')
|
||||
urls = lookyloo.get_urls_rendered_page(base_tree_uuid)
|
||||
ct = lookyloo.get_crawled_tree(base_tree_uuid)
|
||||
bulk_captures = []
|
||||
for url in [urls[int(selected_id) - 1] for selected_id in selected_urls]:
|
||||
cookies = load_cookies(lookyloo.get_cookies(base_tree_uuid))
|
||||
capture = {'url': url,
|
||||
'cookies': cookies,
|
||||
'referer': ct.root_url
|
||||
}
|
||||
new_capture_uuid = lookyloo.enqueue_capture(capture)
|
||||
bulk_captures.append((new_capture_uuid, url))
|
||||
|
||||
return render_template('bulk_captures.html', uuid=base_tree_uuid, bulk_captures=bulk_captures)
|
||||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/hide', methods=['GET'])
|
||||
@flask_login.login_required
|
||||
def hide_capture(tree_uuid: str):
|
||||
|
@ -717,6 +741,8 @@ def urlnode_response_cookies(tree_uuid: str, node_uuid: str):
|
|||
|
||||
@app.route('/tree/<string:tree_uuid>/url/<string:node_uuid>/urls_in_rendered_content', methods=['GET'])
|
||||
def urlnode_urls_in_rendered_content(tree_uuid: str, node_uuid: str):
|
||||
# Note: we could simplify it with lookyloo.get_urls_rendered_page, but if at somepoint,
|
||||
# we have multiple page rendered on one tree, it will be a problem.
|
||||
ct = lookyloo.get_crawled_tree(tree_uuid)
|
||||
urlnode = ct.root_hartree.get_url_node_by_uuid(node_uuid)
|
||||
if not urlnode.rendered_html:
|
||||
|
|
|
@ -0,0 +1,52 @@
|
|||
{% extends "main.html" %}
|
||||
|
||||
{% from 'bootstrap/utils.html' import render_messages %}
|
||||
|
||||
{% block title %}Captures{% endblock %}
|
||||
|
||||
{% block scripts %}
|
||||
{{ super() }}
|
||||
<script src='{{ url_for('static', filename='datatables.min.js') }}'></script>
|
||||
<script type="text/javascript">
|
||||
$('#table').DataTable( {
|
||||
"order": [[ 0, "desc" ]],
|
||||
"pageLength": 50,
|
||||
"searching": false,
|
||||
"paging": false
|
||||
});
|
||||
</script>
|
||||
{% endblock %}
|
||||
|
||||
{% block styles %}
|
||||
{{ super() }}
|
||||
<link rel="stylesheet" href="{{ url_for('static', filename='datatables.min.css') }}">
|
||||
{% endblock %}
|
||||
|
||||
|
||||
{% block content %}
|
||||
<center>
|
||||
<h4>Ongoing captures</h4>
|
||||
<button onclick="window.history.back();" class="btn btn-info" type="button">Go Back</button>
|
||||
</center>
|
||||
<div>The captures below are queued, it will take a few minutes before the links are working</div>
|
||||
<div class="table-responsive">
|
||||
<table id="table" class="table" style="width:96%">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>URL</th>
|
||||
<th>Link</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for uuid, captured_url in bulk_captures %}
|
||||
<tr>
|
||||
<td>
|
||||
{{ captured_url }}
|
||||
</td>
|
||||
<td><a href="{{ url_for('tree', tree_uuid=uuid) }}">Show capture</a></td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
{% endblock %}
|
|
@ -72,6 +72,13 @@
|
|||
modal.find('.modal-body').load(button.data("remote"));
|
||||
});
|
||||
</script>
|
||||
<script>
|
||||
$('#urlsInPageModal').on('show.bs.modal', function(e) {
|
||||
var button = $(e.relatedTarget);
|
||||
var modal = $(this);
|
||||
modal.find('.modal-body').load(button.data("remote"));
|
||||
});
|
||||
</script>
|
||||
|
||||
<script>
|
||||
{% if urlnode_uuid %}
|
||||
|
@ -186,6 +193,10 @@
|
|||
<a href="#emailModal" data-toggle="modal" data-target="#emailModal" role="button">Notify by mail</a>
|
||||
</li>
|
||||
{% endif %}
|
||||
<li>
|
||||
<a href="#urlsInPageModal" data-remote="{{ url_for('urls_rendered_page', tree_uuid=tree_uuid) }}"
|
||||
data-toggle="modal" data-target="#urlsInPageModal" role="button">View URLs in rendered page</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="https://www.lookyloo.eu/docs/main/usage.html#_investigate_a_capture" role="button">Documentation and usage</a>
|
||||
</li>
|
||||
|
@ -455,4 +466,23 @@
|
|||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
<div class="modal fade" id="urlsInPageModal" tabindex="-1" role="dialog">
|
||||
<div class="modal-dialog modal-xl" role="document">
|
||||
<div class="modal-content">
|
||||
<div class="modal-header">
|
||||
<h5 class="modal-title" id="urlsInPageModalLabel">URLs in the rendered page</h5>
|
||||
<button type="button" class="close" data-dismiss="modal" aria-label="Close">
|
||||
<span aria-hidden="true">×</span>
|
||||
</button>
|
||||
</div>
|
||||
<div class="modal-body">
|
||||
... loading URLs in rendered page ...
|
||||
</div>
|
||||
<div class="modal-footer">
|
||||
<button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{% endblock content %}
|
||||
|
|
|
@ -0,0 +1,12 @@
|
|||
<div>
|
||||
<h4>Select below the URLs you want to capture.</h4>
|
||||
<form role="form" action="{{ url_for('bulk_captures', base_tree_uuid=base_tree_uuid) }}" method=post enctype=multipart/form-data>
|
||||
{% for url in urls %}
|
||||
<div class="form-check form-check">
|
||||
<input class="form-check-input" type="checkbox" name="url" id="url_{{loop.index}}" value="{{loop.index}}">
|
||||
<label class="form-check-label" for="url_{{loop.index}}">{{url}}</label>
|
||||
</div>
|
||||
{% endfor %}
|
||||
<button type="submit" class="btn btn-info" id="btn-capture-urls">Capture selected URLs</button>
|
||||
</form>
|
||||
</div>
|
Loading…
Reference in New Issue