mirror of https://github.com/CIRCL/lookyloo
new: Add captures categorization
parent
bd2287ade3
commit
2802cfd46c
|
@ -17,6 +17,7 @@
|
|||
"use_user_agents_users": false,
|
||||
"enable_default_blur_screenshot": false,
|
||||
"enable_context_by_users": false,
|
||||
"enable_categorization": false,
|
||||
"enable_mail_notification": false,
|
||||
"email": {
|
||||
"from": "Lookyloo <lookyloo@myorg.local>",
|
||||
|
|
|
@ -19,6 +19,8 @@ from redis import Redis
|
|||
from redis.exceptions import ConnectionError
|
||||
from publicsuffix2 import PublicSuffixList, fetch # type: ignore
|
||||
from bs4 import BeautifulSoup # type: ignore
|
||||
from pytaxonomies import Taxonomies
|
||||
|
||||
try:
|
||||
import cloudscraper # type: ignore
|
||||
HAS_CF = True
|
||||
|
@ -55,6 +57,11 @@ def get_resources_hashes(har2tree_container: Union[CrawledTree, HostNode, URLNod
|
|||
return all_ressources_hashes
|
||||
|
||||
|
||||
@lru_cache(64)
|
||||
def get_taxonomies():
|
||||
return Taxonomies()
|
||||
|
||||
|
||||
@lru_cache(64)
|
||||
def get_public_suffix_list():
|
||||
"""Initialize Public Suffix List"""
|
||||
|
|
|
@ -30,7 +30,7 @@ from werkzeug.useragents import UserAgent
|
|||
from .exceptions import NoValidHarFile, MissingUUID
|
||||
from .helpers import (get_homedir, get_socket_path, load_cookies, get_config,
|
||||
safe_create_dir, get_email_template, load_pickle_tree,
|
||||
remove_pickle_tree, get_resources_hashes)
|
||||
remove_pickle_tree, get_resources_hashes, get_taxonomies)
|
||||
from .modules import VirusTotal, SaneJavaScript, PhishingInitiative
|
||||
from .context import Context
|
||||
from .indexing import Indexing
|
||||
|
@ -43,6 +43,7 @@ class Lookyloo():
|
|||
self.logger.setLevel(get_config('generic', 'loglevel'))
|
||||
self.indexing = Indexing()
|
||||
self.is_public_instance = get_config('generic', 'public_instance')
|
||||
self.taxonomies = get_taxonomies()
|
||||
|
||||
self.redis: Redis = Redis(unix_socket_path=get_socket_path('cache'), decode_responses=True)
|
||||
self.scrape_dir: Path = get_homedir() / 'scraped'
|
||||
|
@ -269,6 +270,53 @@ class Lookyloo():
|
|||
return {}
|
||||
return ct.root_hartree.stats
|
||||
|
||||
def categories_capture(self, capture_uuid: str):
|
||||
capture_dir = self.lookup_capture_dir(capture_uuid)
|
||||
if not capture_dir:
|
||||
raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache')
|
||||
# get existing categories if possible
|
||||
if (capture_dir / 'categories').exists():
|
||||
with (capture_dir / 'categories').open() as f:
|
||||
current_categories = [line.strip() for line in f.readlines()]
|
||||
else:
|
||||
current_categories = []
|
||||
return {e: self.taxonomies.revert_machinetag(e) for e in current_categories}
|
||||
|
||||
def categorize_capture(self, capture_uuid: str, category: str):
|
||||
if not get_config('generic', 'enable_categorization'):
|
||||
return
|
||||
# Make sure the category is mappable to a taxonomy.
|
||||
self.taxonomies.revert_machinetag(category)
|
||||
|
||||
capture_dir = self.lookup_capture_dir(capture_uuid)
|
||||
if not capture_dir:
|
||||
raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache')
|
||||
# get existing categories if possible
|
||||
if (capture_dir / 'categories').exists():
|
||||
with (capture_dir / 'categories').open() as f:
|
||||
current_categories = set(line.strip() for line in f.readlines())
|
||||
else:
|
||||
current_categories = set()
|
||||
current_categories.add(category)
|
||||
with (capture_dir / 'categories').open('w') as f:
|
||||
f.writelines(f'{t}\n' for t in current_categories)
|
||||
|
||||
def uncategorize_capture(self, capture_uuid: str, category: str):
|
||||
if not get_config('generic', 'enable_categorization'):
|
||||
return
|
||||
capture_dir = self.lookup_capture_dir(capture_uuid)
|
||||
if not capture_dir:
|
||||
raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache')
|
||||
# get existing categories if possible
|
||||
if (capture_dir / 'categories').exists():
|
||||
with (capture_dir / 'categories').open() as f:
|
||||
current_categories = set(line.strip() for line in f.readlines())
|
||||
else:
|
||||
current_categories = set()
|
||||
current_categories.remove(category)
|
||||
with (capture_dir / 'categories').open('w') as f:
|
||||
f.writelines(f'{t}\n' for t in current_categories)
|
||||
|
||||
def trigger_modules(self, capture_uuid: str, force: bool=False) -> None:
|
||||
capture_dir = self.lookup_capture_dir(capture_uuid)
|
||||
if not capture_dir:
|
||||
|
|
|
@ -14,7 +14,7 @@ from flask import Flask, render_template, request, send_file, redirect, url_for,
|
|||
from flask_bootstrap import Bootstrap # type: ignore
|
||||
from flask_httpauth import HTTPDigestAuth # type: ignore
|
||||
|
||||
from lookyloo.helpers import get_homedir, update_user_agents, get_user_agents, get_config
|
||||
from lookyloo.helpers import get_homedir, update_user_agents, get_user_agents, get_config, get_taxonomies
|
||||
from lookyloo.lookyloo import Lookyloo, Indexing
|
||||
from lookyloo.exceptions import NoValidHarFile, MissingUUID
|
||||
from .proxied import ReverseProxied
|
||||
|
@ -172,6 +172,36 @@ def trigger_modules(tree_uuid: str, force: int):
|
|||
return redirect(url_for('modules', tree_uuid=tree_uuid))
|
||||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/categories_capture/', defaults={'query': ''})
|
||||
@app.route('/tree/<string:tree_uuid>/categories_capture/<string:query>', methods=['GET'])
|
||||
def categories_capture(tree_uuid: str, query: str):
|
||||
current_categories = lookyloo.categories_capture(tree_uuid)
|
||||
matching_categories = None
|
||||
if query:
|
||||
matching_categories = {}
|
||||
t = get_taxonomies()
|
||||
entries = t.search(query)
|
||||
if entries:
|
||||
matching_categories = {e: t.revert_machinetag(e) for e in entries}
|
||||
return render_template('categories_capture.html', tree_uuid=tree_uuid,
|
||||
current_categories=current_categories,
|
||||
matching_categories=matching_categories)
|
||||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/uncategorize/', defaults={'category': ''})
|
||||
@app.route('/tree/<string:tree_uuid>/uncategorize/<string:category>', methods=['GET'])
|
||||
def uncategorize_capture(tree_uuid: str, category: str):
|
||||
lookyloo.uncategorize_capture(tree_uuid, category)
|
||||
return jsonify({'response': f'{category} successfully added to {tree_uuid}'})
|
||||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/categorize/', defaults={'category': ''})
|
||||
@app.route('/tree/<string:tree_uuid>/categorize/<string:category>', methods=['GET'])
|
||||
def categorize_capture(tree_uuid: str, category: str):
|
||||
lookyloo.categorize_capture(tree_uuid, category)
|
||||
return jsonify({'response': f'{category} successfully removed from {tree_uuid}'})
|
||||
|
||||
|
||||
@app.route('/tree/<string:tree_uuid>/stats', methods=['GET'])
|
||||
def stats(tree_uuid: str):
|
||||
stats = lookyloo.get_statistics(tree_uuid)
|
||||
|
@ -311,11 +341,16 @@ def tree(tree_uuid: str, urlnode_uuid: Optional[str]=None):
|
|||
enable_context_by_users = True
|
||||
else:
|
||||
enable_context_by_users = False
|
||||
if get_config('generic', 'enable_categorization'):
|
||||
enable_categorization = True
|
||||
else:
|
||||
enable_categorization = False
|
||||
tree_json, start_time, user_agent, root_url, meta = lookyloo.load_tree(tree_uuid)
|
||||
return render_template('tree.html', tree_json=tree_json, start_time=start_time,
|
||||
user_agent=user_agent, root_url=root_url, tree_uuid=tree_uuid,
|
||||
meta=meta, enable_mail_notification=enable_mail_notification,
|
||||
enable_context_by_users=enable_context_by_users,
|
||||
enable_categorization=enable_categorization,
|
||||
blur_screenshot=blur_screenshot,
|
||||
urlnode_uuid=urlnode_uuid, has_redirects=True if cache['redirects'] else False)
|
||||
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
{% from "macros.html" import taxonomy_table %}
|
||||
|
||||
<div>
|
||||
|
||||
{% if current_categories %}
|
||||
<center><h4>Current categories for the capture</h4></center>
|
||||
{{ taxonomy_table(tree_uuid, current_categories, 0) }}
|
||||
{% else %}
|
||||
<center><h4>The capture isn't categorized yet</h4></center>
|
||||
{% endif%}
|
||||
|
||||
{% if matching_categories is none %}
|
||||
<p></p>
|
||||
{% elif matching_categories %}
|
||||
<center><h4>Categories matching your query</h4></center>
|
||||
{{ taxonomy_table(tree_uuid, matching_categories, 1) }}
|
||||
{% else %}
|
||||
<center><h4>No categories matching your query</h4></center>
|
||||
{% endif%}
|
||||
|
||||
</div>
|
|
@ -1,3 +1,65 @@
|
|||
{% macro taxonomy_table(tree_uuid, categories_info, add_category) %}
|
||||
<div class="table-responsive">
|
||||
<table id="table" class="table">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>Name</th>
|
||||
<th>Description</th>
|
||||
<th>Machinetag</th>
|
||||
{% if add_category %}
|
||||
<th>Click to add category</th>
|
||||
{% else %}
|
||||
<th>Click to remove category</th>
|
||||
{% endif %}
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for mt, val in categories_info.items() %}
|
||||
<tr>
|
||||
<td><a href="https://www.misp-project.org/taxonomies.html#_{{ val[0].name }}">{{ val[0].name }}</a></td>
|
||||
<td>
|
||||
{% if val|length == 3 %}
|
||||
{% if val[2].description %}
|
||||
{{ val[2].description }}
|
||||
{% elif val[2].expanded %}
|
||||
{{ val[2].expanded }}
|
||||
{%endif%}
|
||||
{% elif val[1].description %}
|
||||
{{ val[1].description }}
|
||||
{% else %}
|
||||
{{ val[1].predicate }}
|
||||
{%endif%}
|
||||
</td>
|
||||
<td>{{ mt }}</td>
|
||||
<td>
|
||||
<button type="button" class="btn btn-link {% if add_category %}categorize_capture{% else %}uncategorize_capture{% endif %}" value="{{ mt }}">
|
||||
{% if add_category %}
|
||||
Categorize capture.
|
||||
{% else %}
|
||||
Uncategorize capture.
|
||||
{% endif %}
|
||||
</button>
|
||||
</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<script>
|
||||
$('.categorize_capture').on('click',function(e){
|
||||
var button = $(this);
|
||||
$.get("{{ url_for('categorize_capture', tree_uuid=tree_uuid) }}" + button.val())
|
||||
$('.modal-body').load("{{ url_for('categories_capture', tree_uuid=tree_uuid) }}")
|
||||
});
|
||||
$('.uncategorize_capture').on('click',function(e){
|
||||
var button = $(this);
|
||||
$.get("{{ url_for('uncategorize_capture', tree_uuid=tree_uuid) }}" + button.val())
|
||||
$('.modal-body').load("{{ url_for('categories_capture', tree_uuid=tree_uuid) }}")
|
||||
});
|
||||
</script>
|
||||
{% endmacro %}
|
||||
|
||||
|
||||
{% macro known_content_details(details) %}
|
||||
<div>
|
||||
{% if details is string %}
|
||||
|
|
|
@ -9,13 +9,35 @@
|
|||
{{ super() }}
|
||||
<script src='{{ url_for('static', filename='d3.v6.min.js') }}'></script>
|
||||
<script src='{{ url_for('static', filename='tree.js') }}'></script>
|
||||
|
||||
<script>
|
||||
$('#modulesModal').on('show.bs.modal', function(e) {
|
||||
var button = $(e.relatedTarget);
|
||||
var modal = $(this);
|
||||
modal.find('.modal-body').load(button.data("remote"));
|
||||
});
|
||||
$('.modulesForceRefresh').on('click',function(){
|
||||
$('.modal-body').load("{{ url_for('trigger_modules', tree_uuid=tree_uuid, force=True) }}",function(){
|
||||
$('#modulesModal').modal({show:true});
|
||||
});
|
||||
});
|
||||
</script>
|
||||
|
||||
<script>
|
||||
$('#categoriesModal').on('show.bs.modal', function(e) {
|
||||
var button = $(e.relatedTarget);
|
||||
var modal = $(this);
|
||||
modal.find('.modal-body').load(button.data("remote"));
|
||||
});
|
||||
$('#searchCategories').submit(function(event){
|
||||
var query = $("#query").val();
|
||||
$('.modal-body').load("{{ url_for('categories_capture', tree_uuid=tree_uuid) }}" + query, function() {
|
||||
$('#categoriesModal').modal({show:true});
|
||||
});
|
||||
event.preventDefault();
|
||||
});
|
||||
</script>
|
||||
|
||||
<script>
|
||||
$('#statsModal').on('show.bs.modal', function(e) {
|
||||
var button = $(e.relatedTarget);
|
||||
|
@ -23,13 +45,7 @@
|
|||
modal.find('.modal-body').load(button.data("remote"));
|
||||
});
|
||||
</script>
|
||||
<script>
|
||||
$('.modulesForceRefresh').on('click',function(){
|
||||
$('.modal-body').load("{{ url_for('trigger_modules', tree_uuid=tree_uuid, force=True) }}",function(){
|
||||
$('#modulesModal').modal({show:true});
|
||||
});
|
||||
});
|
||||
</script>
|
||||
|
||||
{% if urlnode_uuid %}
|
||||
<script>
|
||||
history.scrollRestoration = "manual";
|
||||
|
@ -82,6 +98,12 @@
|
|||
<a href="#modulesModal" data-remote="{{ url_for('trigger_modules', tree_uuid=tree_uuid, force=False) }}"
|
||||
data-toggle="modal" data-target="#modulesModal" role="button">Show third party reports</a>
|
||||
</li>
|
||||
{% if enable_categorization %}
|
||||
<li>
|
||||
<a href="#categoriesModal" data-remote="{{ url_for('categories_capture', tree_uuid=tree_uuid) }}"
|
||||
data-toggle="modal" data-target="#categoriesModal" role="button">Manage categories of the capture</a>
|
||||
</li>
|
||||
{% endif %}
|
||||
<li>
|
||||
<a href="#statsModal" data-remote="{{ url_for('stats', tree_uuid=tree_uuid) }}"
|
||||
data-toggle="modal" data-target="#statsModal" role="button">Show Statistics</a>
|
||||
|
@ -228,7 +250,7 @@
|
|||
<div class="modal-dialog modal-xl" role="document">
|
||||
<div class="modal-content">
|
||||
<div class="modal-header">
|
||||
<h5 class="modal-title" id="modulesModalLabel">Statistics</h5>
|
||||
<h5 class="modal-title" id="statsModalLabel">Statistics</h5>
|
||||
<button type="button" class="close" data-dismiss="modal" aria-label="Close">
|
||||
<span aria-hidden="true">×</span>
|
||||
</button>
|
||||
|
@ -247,7 +269,7 @@
|
|||
<div class="modal-dialog modal-xl" role="document">
|
||||
<div class="modal-content">
|
||||
<div class="modal-header">
|
||||
<h5 class="modal-title" id="modulesModalLabel">Screenshot</h5>
|
||||
<h5 class="modal-title" id="screenshotModalLabel">Screenshot</h5>
|
||||
<button type="button" class="close" data-dismiss="modal" aria-label="Close">
|
||||
<span aria-hidden="true">×</span>
|
||||
</button>
|
||||
|
@ -290,6 +312,32 @@
|
|||
</div>
|
||||
</div>
|
||||
|
||||
<div class="modal fade" id="categoriesModal" tabindex="-1" role="dialog">
|
||||
<div class="modal-dialog modal-xl" role="document">
|
||||
<div class="modal-content">
|
||||
<div class="modal-header">
|
||||
<h5 class="modal-title" id="categoriesModalLabel">Categorize the capture</h5>
|
||||
<button type="button" class="close" data-dismiss="modal" aria-label="Close">
|
||||
<span aria-hidden="true">×</span>
|
||||
</button>
|
||||
</div>
|
||||
<div class="modal-body">
|
||||
... loading the categorization options ...
|
||||
</div>
|
||||
<p>
|
||||
<form id=searchCategories>
|
||||
<label for="query">Category to search</label>
|
||||
<input type="text" class="form-control" name="query" id="query" placeholder="Query">
|
||||
<button type="submit" class="btn btn-success">Search</button>
|
||||
</form>
|
||||
</p>
|
||||
<div class="modal-footer">
|
||||
<button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="modal fade" id="emailModal" tabindex="-1" role="dialog">
|
||||
<div class="modal-dialog modal-xl" role="document">
|
||||
<form role="form" action="{{ tree_uuid }}/send_mail" method=post enctype=multipart/form-data>
|
||||
|
|
Loading…
Reference in New Issue