new: Add captures categorization

pull/116/head
Raphaël Vinot 2020-10-28 18:49:15 +01:00
parent bd2287ade3
commit 2802cfd46c
7 changed files with 233 additions and 11 deletions

View File

@ -17,6 +17,7 @@
"use_user_agents_users": false,
"enable_default_blur_screenshot": false,
"enable_context_by_users": false,
"enable_categorization": false,
"enable_mail_notification": false,
"email": {
"from": "Lookyloo <lookyloo@myorg.local>",

View File

@ -19,6 +19,8 @@ from redis import Redis
from redis.exceptions import ConnectionError
from publicsuffix2 import PublicSuffixList, fetch # type: ignore
from bs4 import BeautifulSoup # type: ignore
from pytaxonomies import Taxonomies
try:
import cloudscraper # type: ignore
HAS_CF = True
@ -55,6 +57,11 @@ def get_resources_hashes(har2tree_container: Union[CrawledTree, HostNode, URLNod
return all_ressources_hashes
@lru_cache(64)
def get_taxonomies():
return Taxonomies()
@lru_cache(64)
def get_public_suffix_list():
"""Initialize Public Suffix List"""

View File

@ -30,7 +30,7 @@ from werkzeug.useragents import UserAgent
from .exceptions import NoValidHarFile, MissingUUID
from .helpers import (get_homedir, get_socket_path, load_cookies, get_config,
safe_create_dir, get_email_template, load_pickle_tree,
remove_pickle_tree, get_resources_hashes)
remove_pickle_tree, get_resources_hashes, get_taxonomies)
from .modules import VirusTotal, SaneJavaScript, PhishingInitiative
from .context import Context
from .indexing import Indexing
@ -43,6 +43,7 @@ class Lookyloo():
self.logger.setLevel(get_config('generic', 'loglevel'))
self.indexing = Indexing()
self.is_public_instance = get_config('generic', 'public_instance')
self.taxonomies = get_taxonomies()
self.redis: Redis = Redis(unix_socket_path=get_socket_path('cache'), decode_responses=True)
self.scrape_dir: Path = get_homedir() / 'scraped'
@ -269,6 +270,53 @@ class Lookyloo():
return {}
return ct.root_hartree.stats
def categories_capture(self, capture_uuid: str):
capture_dir = self.lookup_capture_dir(capture_uuid)
if not capture_dir:
raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache')
# get existing categories if possible
if (capture_dir / 'categories').exists():
with (capture_dir / 'categories').open() as f:
current_categories = [line.strip() for line in f.readlines()]
else:
current_categories = []
return {e: self.taxonomies.revert_machinetag(e) for e in current_categories}
def categorize_capture(self, capture_uuid: str, category: str):
if not get_config('generic', 'enable_categorization'):
return
# Make sure the category is mappable to a taxonomy.
self.taxonomies.revert_machinetag(category)
capture_dir = self.lookup_capture_dir(capture_uuid)
if not capture_dir:
raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache')
# get existing categories if possible
if (capture_dir / 'categories').exists():
with (capture_dir / 'categories').open() as f:
current_categories = set(line.strip() for line in f.readlines())
else:
current_categories = set()
current_categories.add(category)
with (capture_dir / 'categories').open('w') as f:
f.writelines(f'{t}\n' for t in current_categories)
def uncategorize_capture(self, capture_uuid: str, category: str):
if not get_config('generic', 'enable_categorization'):
return
capture_dir = self.lookup_capture_dir(capture_uuid)
if not capture_dir:
raise MissingUUID(f'Unable to find UUID {capture_uuid} in the cache')
# get existing categories if possible
if (capture_dir / 'categories').exists():
with (capture_dir / 'categories').open() as f:
current_categories = set(line.strip() for line in f.readlines())
else:
current_categories = set()
current_categories.remove(category)
with (capture_dir / 'categories').open('w') as f:
f.writelines(f'{t}\n' for t in current_categories)
def trigger_modules(self, capture_uuid: str, force: bool=False) -> None:
capture_dir = self.lookup_capture_dir(capture_uuid)
if not capture_dir:

View File

@ -14,7 +14,7 @@ from flask import Flask, render_template, request, send_file, redirect, url_for,
from flask_bootstrap import Bootstrap # type: ignore
from flask_httpauth import HTTPDigestAuth # type: ignore
from lookyloo.helpers import get_homedir, update_user_agents, get_user_agents, get_config
from lookyloo.helpers import get_homedir, update_user_agents, get_user_agents, get_config, get_taxonomies
from lookyloo.lookyloo import Lookyloo, Indexing
from lookyloo.exceptions import NoValidHarFile, MissingUUID
from .proxied import ReverseProxied
@ -172,6 +172,36 @@ def trigger_modules(tree_uuid: str, force: int):
return redirect(url_for('modules', tree_uuid=tree_uuid))
@app.route('/tree/<string:tree_uuid>/categories_capture/', defaults={'query': ''})
@app.route('/tree/<string:tree_uuid>/categories_capture/<string:query>', methods=['GET'])
def categories_capture(tree_uuid: str, query: str):
current_categories = lookyloo.categories_capture(tree_uuid)
matching_categories = None
if query:
matching_categories = {}
t = get_taxonomies()
entries = t.search(query)
if entries:
matching_categories = {e: t.revert_machinetag(e) for e in entries}
return render_template('categories_capture.html', tree_uuid=tree_uuid,
current_categories=current_categories,
matching_categories=matching_categories)
@app.route('/tree/<string:tree_uuid>/uncategorize/', defaults={'category': ''})
@app.route('/tree/<string:tree_uuid>/uncategorize/<string:category>', methods=['GET'])
def uncategorize_capture(tree_uuid: str, category: str):
lookyloo.uncategorize_capture(tree_uuid, category)
return jsonify({'response': f'{category} successfully added to {tree_uuid}'})
@app.route('/tree/<string:tree_uuid>/categorize/', defaults={'category': ''})
@app.route('/tree/<string:tree_uuid>/categorize/<string:category>', methods=['GET'])
def categorize_capture(tree_uuid: str, category: str):
lookyloo.categorize_capture(tree_uuid, category)
return jsonify({'response': f'{category} successfully removed from {tree_uuid}'})
@app.route('/tree/<string:tree_uuid>/stats', methods=['GET'])
def stats(tree_uuid: str):
stats = lookyloo.get_statistics(tree_uuid)
@ -311,11 +341,16 @@ def tree(tree_uuid: str, urlnode_uuid: Optional[str]=None):
enable_context_by_users = True
else:
enable_context_by_users = False
if get_config('generic', 'enable_categorization'):
enable_categorization = True
else:
enable_categorization = False
tree_json, start_time, user_agent, root_url, meta = lookyloo.load_tree(tree_uuid)
return render_template('tree.html', tree_json=tree_json, start_time=start_time,
user_agent=user_agent, root_url=root_url, tree_uuid=tree_uuid,
meta=meta, enable_mail_notification=enable_mail_notification,
enable_context_by_users=enable_context_by_users,
enable_categorization=enable_categorization,
blur_screenshot=blur_screenshot,
urlnode_uuid=urlnode_uuid, has_redirects=True if cache['redirects'] else False)

View File

@ -0,0 +1,21 @@
{% from "macros.html" import taxonomy_table %}
<div>
{% if current_categories %}
<center><h4>Current categories for the capture</h4></center>
{{ taxonomy_table(tree_uuid, current_categories, 0) }}
{% else %}
<center><h4>The capture isn't categorized yet</h4></center>
{% endif%}
{% if matching_categories is none %}
<p></p>
{% elif matching_categories %}
<center><h4>Categories matching your query</h4></center>
{{ taxonomy_table(tree_uuid, matching_categories, 1) }}
{% else %}
<center><h4>No categories matching your query</h4></center>
{% endif%}
</div>

View File

@ -1,3 +1,65 @@
{% macro taxonomy_table(tree_uuid, categories_info, add_category) %}
<div class="table-responsive">
<table id="table" class="table">
<thead>
<tr>
<th>Name</th>
<th>Description</th>
<th>Machinetag</th>
{% if add_category %}
<th>Click to add category</th>
{% else %}
<th>Click to remove category</th>
{% endif %}
</tr>
</thead>
<tbody>
{% for mt, val in categories_info.items() %}
<tr>
<td><a href="https://www.misp-project.org/taxonomies.html#_{{ val[0].name }}">{{ val[0].name }}</a></td>
<td>
{% if val|length == 3 %}
{% if val[2].description %}
{{ val[2].description }}
{% elif val[2].expanded %}
{{ val[2].expanded }}
{%endif%}
{% elif val[1].description %}
{{ val[1].description }}
{% else %}
{{ val[1].predicate }}
{%endif%}
</td>
<td>{{ mt }}</td>
<td>
<button type="button" class="btn btn-link {% if add_category %}categorize_capture{% else %}uncategorize_capture{% endif %}" value="{{ mt }}">
{% if add_category %}
Categorize capture.
{% else %}
Uncategorize capture.
{% endif %}
</button>
</td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
<script>
$('.categorize_capture').on('click',function(e){
var button = $(this);
$.get("{{ url_for('categorize_capture', tree_uuid=tree_uuid) }}" + button.val())
$('.modal-body').load("{{ url_for('categories_capture', tree_uuid=tree_uuid) }}")
});
$('.uncategorize_capture').on('click',function(e){
var button = $(this);
$.get("{{ url_for('uncategorize_capture', tree_uuid=tree_uuid) }}" + button.val())
$('.modal-body').load("{{ url_for('categories_capture', tree_uuid=tree_uuid) }}")
});
</script>
{% endmacro %}
{% macro known_content_details(details) %}
<div>
{% if details is string %}

View File

@ -9,13 +9,35 @@
{{ super() }}
<script src='{{ url_for('static', filename='d3.v6.min.js') }}'></script>
<script src='{{ url_for('static', filename='tree.js') }}'></script>
<script>
$('#modulesModal').on('show.bs.modal', function(e) {
var button = $(e.relatedTarget);
var modal = $(this);
modal.find('.modal-body').load(button.data("remote"));
});
$('.modulesForceRefresh').on('click',function(){
$('.modal-body').load("{{ url_for('trigger_modules', tree_uuid=tree_uuid, force=True) }}",function(){
$('#modulesModal').modal({show:true});
});
});
</script>
<script>
$('#categoriesModal').on('show.bs.modal', function(e) {
var button = $(e.relatedTarget);
var modal = $(this);
modal.find('.modal-body').load(button.data("remote"));
});
$('#searchCategories').submit(function(event){
var query = $("#query").val();
$('.modal-body').load("{{ url_for('categories_capture', tree_uuid=tree_uuid) }}" + query, function() {
$('#categoriesModal').modal({show:true});
});
event.preventDefault();
});
</script>
<script>
$('#statsModal').on('show.bs.modal', function(e) {
var button = $(e.relatedTarget);
@ -23,13 +45,7 @@
modal.find('.modal-body').load(button.data("remote"));
});
</script>
<script>
$('.modulesForceRefresh').on('click',function(){
$('.modal-body').load("{{ url_for('trigger_modules', tree_uuid=tree_uuid, force=True) }}",function(){
$('#modulesModal').modal({show:true});
});
});
</script>
{% if urlnode_uuid %}
<script>
history.scrollRestoration = "manual";
@ -82,6 +98,12 @@
<a href="#modulesModal" data-remote="{{ url_for('trigger_modules', tree_uuid=tree_uuid, force=False) }}"
data-toggle="modal" data-target="#modulesModal" role="button">Show third party reports</a>
</li>
{% if enable_categorization %}
<li>
<a href="#categoriesModal" data-remote="{{ url_for('categories_capture', tree_uuid=tree_uuid) }}"
data-toggle="modal" data-target="#categoriesModal" role="button">Manage categories of the capture</a>
</li>
{% endif %}
<li>
<a href="#statsModal" data-remote="{{ url_for('stats', tree_uuid=tree_uuid) }}"
data-toggle="modal" data-target="#statsModal" role="button">Show Statistics</a>
@ -228,7 +250,7 @@
<div class="modal-dialog modal-xl" role="document">
<div class="modal-content">
<div class="modal-header">
<h5 class="modal-title" id="modulesModalLabel">Statistics</h5>
<h5 class="modal-title" id="statsModalLabel">Statistics</h5>
<button type="button" class="close" data-dismiss="modal" aria-label="Close">
<span aria-hidden="true">×</span>
</button>
@ -247,7 +269,7 @@
<div class="modal-dialog modal-xl" role="document">
<div class="modal-content">
<div class="modal-header">
<h5 class="modal-title" id="modulesModalLabel">Screenshot</h5>
<h5 class="modal-title" id="screenshotModalLabel">Screenshot</h5>
<button type="button" class="close" data-dismiss="modal" aria-label="Close">
<span aria-hidden="true">×</span>
</button>
@ -290,6 +312,32 @@
</div>
</div>
<div class="modal fade" id="categoriesModal" tabindex="-1" role="dialog">
<div class="modal-dialog modal-xl" role="document">
<div class="modal-content">
<div class="modal-header">
<h5 class="modal-title" id="categoriesModalLabel">Categorize the capture</h5>
<button type="button" class="close" data-dismiss="modal" aria-label="Close">
<span aria-hidden="true">×</span>
</button>
</div>
<div class="modal-body">
... loading the categorization options ...
</div>
<p>
<form id=searchCategories>
<label for="query">Category to search</label>
<input type="text" class="form-control" name="query" id="query" placeholder="Query">
<button type="submit" class="btn btn-success">Search</button>
</form>
</p>
<div class="modal-footer">
<button type="button" class="btn btn-secondary" data-dismiss="modal">Close</button>
</div>
</div>
</div>
</div>
<div class="modal fade" id="emailModal" tabindex="-1" role="dialog">
<div class="modal-dialog modal-xl" role="document">
<form role="form" action="{{ tree_uuid }}/send_mail" method=post enctype=multipart/form-data>