mirror of https://github.com/CIRCL/AIL-framework
chg: [titles] add title IDs and contents search
parent
94961f2eba
commit
d4152462f5
|
@ -105,19 +105,14 @@ def create_favicon(content, url=None): # TODO URL ????
|
|||
favicon.create(content)
|
||||
|
||||
|
||||
# TODO ADD SEARCH FUNCTION
|
||||
|
||||
class Favicons(AbstractDaterangeObjects):
|
||||
"""
|
||||
Favicons Objects
|
||||
"""
|
||||
def __init__(self):
|
||||
super().__init__('favicon')
|
||||
super().__init__('favicon', Favicon)
|
||||
|
||||
def get_metas(self, obj_ids, options=set()):
|
||||
return self._get_metas(Favicon, obj_ids, options=options)
|
||||
|
||||
def sanitize_name_to_search(self, name_to_search):
|
||||
def sanitize_id_to_search(self, name_to_search):
|
||||
return name_to_search # TODO
|
||||
|
||||
|
||||
|
|
|
@ -7,6 +7,8 @@ import sys
|
|||
from hashlib import sha256
|
||||
from flask import url_for
|
||||
|
||||
# import warnings
|
||||
# warnings.filterwarnings("ignore", category=DeprecationWarning)
|
||||
from pymisp import MISPObject
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
|
@ -100,21 +102,24 @@ class Titles(AbstractDaterangeObjects):
|
|||
Titles Objects
|
||||
"""
|
||||
def __init__(self):
|
||||
super().__init__('title')
|
||||
super().__init__('title', Title)
|
||||
|
||||
def get_metas(self, obj_ids, options=set()):
|
||||
return self._get_metas(Title, obj_ids, options=options)
|
||||
|
||||
def sanitize_name_to_search(self, name_to_search):
|
||||
def sanitize_id_to_search(self, name_to_search):
|
||||
return name_to_search
|
||||
|
||||
|
||||
# if __name__ == '__main__':
|
||||
# from lib import crawlers
|
||||
# from lib.objects import Items
|
||||
# for item in Items.get_all_items_objects(filters={'sources': ['crawled']}):
|
||||
# title_content = crawlers.extract_title_from_html(item.get_content())
|
||||
# if title_content:
|
||||
# print(item.id, title_content)
|
||||
# title = create_title(title_content)
|
||||
# title.add(item.get_date(), item.id)
|
||||
# # from lib import crawlers
|
||||
# # from lib.objects import Items
|
||||
# # for item in Items.get_all_items_objects(filters={'sources': ['crawled']}):
|
||||
# # title_content = crawlers.extract_title_from_html(item.get_content())
|
||||
# # if title_content:
|
||||
# # print(item.id, title_content)
|
||||
# # title = create_title(title_content)
|
||||
# # title.add(item.get_date(), item.id)
|
||||
# titles = Titles()
|
||||
# # for r in titles.get_ids_iterator():
|
||||
# # print(r)
|
||||
# r = titles.search_by_id('f7d57B', r_pos=True, case_sensitive=False)
|
||||
# print(r)
|
||||
|
||||
|
|
|
@ -163,16 +163,21 @@ class AbstractDaterangeObjects(ABC):
|
|||
Abstract Daterange Objects
|
||||
"""
|
||||
|
||||
def __init__(self, obj_type):
|
||||
def __init__(self, obj_type, obj_class):
|
||||
""" Abstract for Daterange Objects
|
||||
|
||||
:param obj_type: object type (item, ...)
|
||||
:param obj_class: object python class (Item, ...)
|
||||
"""
|
||||
self.type = obj_type
|
||||
self.obj_class = obj_class
|
||||
|
||||
def get_all(self):
|
||||
def get_ids(self):
|
||||
return r_object.smembers(f'{self.type}:all')
|
||||
|
||||
# def get_ids_iterator(self):
|
||||
# return r_object.sscan_iter(r_object, f'{self.type}:all')
|
||||
|
||||
def get_by_date(self, date):
|
||||
return r_object.zrange(f'{self.type}:date:{date}', 0, -1)
|
||||
|
||||
|
@ -185,35 +190,61 @@ class AbstractDaterangeObjects(ABC):
|
|||
obj_ids = obj_ids | set(self.get_by_date(date))
|
||||
return obj_ids
|
||||
|
||||
@abstractmethod
|
||||
def get_metas(self, obj_ids, options=set()):
|
||||
pass
|
||||
|
||||
def _get_metas(self, obj_class_ref, obj_ids, options=set()):
|
||||
dict_obj = {}
|
||||
for obj_id in obj_ids:
|
||||
obj = obj_class_ref(obj_id)
|
||||
obj = self.obj_class(obj_id)
|
||||
dict_obj[obj_id] = obj.get_meta(options=options)
|
||||
return dict_obj
|
||||
|
||||
@abstractmethod
|
||||
def sanitize_name_to_search(self, name_to_search):
|
||||
return name_to_search
|
||||
def sanitize_id_to_search(self, id_to_search):
|
||||
return id_to_search
|
||||
|
||||
def search_by_name(self, name_to_search, r_pos=False):
|
||||
def search_by_id(self, name_to_search, r_pos=False, case_sensitive=True):
|
||||
objs = {}
|
||||
if case_sensitive:
|
||||
flags = 0
|
||||
else:
|
||||
flags = re.IGNORECASE
|
||||
# for subtype in subtypes:
|
||||
r_name = self.sanitize_name_to_search(name_to_search)
|
||||
r_name = self.sanitize_id_to_search(name_to_search)
|
||||
if not name_to_search or isinstance(r_name, dict):
|
||||
return objs
|
||||
r_name = re.compile(r_name)
|
||||
for title_name in self.get_all():
|
||||
res = re.search(r_name, title_name)
|
||||
r_name = re.compile(r_name, flags=flags)
|
||||
for obj_id in self.get_ids(): # TODO REPLACE ME WITH AN ITERATOR
|
||||
res = re.search(r_name, obj_id)
|
||||
if res:
|
||||
objs[title_name] = {}
|
||||
objs[obj_id] = {}
|
||||
if r_pos:
|
||||
objs[title_name]['hl-start'] = res.start()
|
||||
objs[title_name]['hl-end'] = res.end()
|
||||
objs[obj_id]['hl-start'] = res.start()
|
||||
objs[obj_id]['hl-end'] = res.end()
|
||||
return objs
|
||||
|
||||
def sanitize_content_to_search(self, content_to_search):
|
||||
return content_to_search
|
||||
|
||||
def search_by_content(self, content_to_search, r_pos=False, case_sensitive=True):
|
||||
objs = {}
|
||||
if case_sensitive:
|
||||
flags = 0
|
||||
else:
|
||||
flags = re.IGNORECASE
|
||||
# for subtype in subtypes:
|
||||
r_search = self.sanitize_content_to_search(content_to_search)
|
||||
if not r_search or isinstance(r_search, dict):
|
||||
return objs
|
||||
r_search = re.compile(r_search, flags=flags)
|
||||
for obj_id in self.get_ids(): # TODO REPLACE ME WITH AN ITERATOR
|
||||
obj = self.obj_class(obj_id)
|
||||
content = obj.get_content()
|
||||
res = re.search(r_search, content)
|
||||
if res:
|
||||
objs[obj_id] = {}
|
||||
if r_pos: # TODO ADD CONTENT ????
|
||||
objs[obj_id]['hl-start'] = res.start()
|
||||
objs[obj_id]['hl-end'] = res.end()
|
||||
objs[obj_id]['content'] = content
|
||||
return objs
|
||||
|
||||
def api_get_chart_nb_by_daterange(self, date_from, date_to):
|
||||
|
@ -226,5 +257,4 @@ class AbstractDaterangeObjects(ABC):
|
|||
|
||||
def api_get_meta_by_daterange(self, date_from, date_to):
|
||||
date = Date.sanitise_date_range(date_from, date_to)
|
||||
return self.get_metas(self.get_by_daterange(date['date_from'], date['date_to']), options={'sparkline'})
|
||||
|
||||
return self.get_metas(self.get_by_daterange(date['date_from'], date['date_to']), options={'sparkline'})
|
|
@ -5,6 +5,7 @@
|
|||
Blueprint Flask: crawler splash endpoints: dashboard, onion crawler ...
|
||||
'''
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
@ -27,8 +28,11 @@ objects_title = Blueprint('objects_title', __name__, template_folder=os.path.joi
|
|||
# ============ VARIABLES ============
|
||||
bootstrap_label = ['primary', 'success', 'danger', 'warning', 'info']
|
||||
|
||||
|
||||
# ============ FUNCTIONS ============
|
||||
def create_json_response(data, status_code):
|
||||
return Response(json.dumps(data, indent=2, sort_keys=True), mimetype='application/json'), status_code
|
||||
|
||||
# ============= ROUTES ==============
|
||||
@objects_title.route("/objects/title", methods=['GET'])
|
||||
@login_required
|
||||
@login_read_only
|
||||
|
@ -72,15 +76,30 @@ def objects_title_range_json():
|
|||
@login_required
|
||||
@login_read_only
|
||||
def objects_title_search():
|
||||
to_search = request.form.get('object_id')
|
||||
to_search = request.form.get('to_search')
|
||||
type_to_search = request.form.get('search_type', 'id')
|
||||
case_sensitive = request.form.get('case_sensitive')
|
||||
case_sensitive = bool(case_sensitive)
|
||||
titles = Titles.Titles()
|
||||
|
||||
# TODO SANITIZE ID
|
||||
# TODO Search all
|
||||
title = Titles.Title(to_search)
|
||||
if not title.exists():
|
||||
abort(404)
|
||||
if type_to_search == 'id':
|
||||
if len(type_to_search) == 64:
|
||||
title = Titles.Title(to_search)
|
||||
if not title.exists():
|
||||
abort(404)
|
||||
else:
|
||||
return redirect(title.get_link(flask_context=True))
|
||||
else:
|
||||
search_result = titles.search_by_id(to_search, r_pos=True, case_sensitive=case_sensitive)
|
||||
elif type_to_search == 'content':
|
||||
search_result = titles.search_by_content(to_search, r_pos=True, case_sensitive=case_sensitive)
|
||||
else:
|
||||
return redirect(title.get_link(flask_context=True))
|
||||
return create_json_response({'error': 'Unknown search type'}, 400)
|
||||
|
||||
# ============= ROUTES ==============
|
||||
if search_result:
|
||||
dict_objects = titles.get_metas(search_result.keys(), options={'sparkline'})
|
||||
else:
|
||||
dict_objects = {}
|
||||
|
||||
return render_template("search_title_result.html", dict_objects=dict_objects, search_result=search_result,
|
||||
to_search=to_search, case_sensitive=case_sensitive, type_to_search=type_to_search)
|
||||
|
|
|
@ -75,17 +75,8 @@
|
|||
<div class="col-xl-10">
|
||||
<div class="mt-1" id="barchart_type"></div>
|
||||
|
||||
<div class="card border-secondary my-2">
|
||||
<div class="card-body text-dark">
|
||||
<h5 class="card-title">Search Title by name:</h5>
|
||||
<form action="{{ url_for('objects_title.objects_title_search') }}" id="search_subtype_onj" method='post'>
|
||||
<div class="input-group mb-1">
|
||||
<input type="text" class="form-control col-8" name="object_id" value="" placeholder="Title ID" required>
|
||||
<button class="btn btn-primary input-group-addon search-obj col-2"><i class="fas fa-search"></i></button>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
{% include 'title/block_titles_search.html' %}
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
<div class="card border-secondary my-2">
|
||||
<div class="card-body text-dark">
|
||||
<h5 class="card-title">Titles Search:</h5>
|
||||
<form action="{{ url_for('objects_title.objects_title_search') }}" id="search_subtype_onj" method='post'>
|
||||
<div class="input-group mb-1">
|
||||
<select class="custom-select col-2" name="search_type" value="{% if type_to_search %}{{ type_to_search }}{% else %}content{% endif %}" required>
|
||||
<option value="content">Content Search</option>
|
||||
<option value="id" {% if type_to_search %}{% if type_to_search == "id" %}selected{% endif %}{% endif %}>ID Search</option>
|
||||
</select>
|
||||
<input type="text" class="form-control col-8" name="to_search" value="{% if to_search %}{{ to_search }}{% endif %}" placeholder="ID or content to Search" required>
|
||||
<button class="btn btn-primary input-group-addon search-obj col-2"><i class="fas fa-search"></i></button>
|
||||
</div>
|
||||
<div class="custom-control custom-switch mt-1">
|
||||
<input class="custom-control-input" type="checkbox" name="case_sensitive" id="case_sensitive" {% if type_to_search %}{% if case_sensitive %}value="True" checked{% else %}value="False"{% endif %}{% else %}value="True" checked{% endif %}>
|
||||
<label class="custom-control-label" for="case_sensitive">Case Sensitive</label>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
|
@ -0,0 +1,113 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
|
||||
<head>
|
||||
<title>Titles - AIL</title>
|
||||
<link rel="icon" href="{{ url_for('static', filename='image/ail-icon.png') }}">
|
||||
|
||||
<!-- Core CSS -->
|
||||
<link href="{{ url_for('static', filename='css/bootstrap4.min.css') }}" rel="stylesheet">
|
||||
<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
|
||||
<link href="{{ url_for('static', filename='css/dataTables.bootstrap.min.css') }}" rel="stylesheet">
|
||||
<link href="{{ url_for('static', filename='css/daterangepicker.min.css') }}" rel="stylesheet">
|
||||
<link href="{{ url_for('static', filename='css/ail-project.css') }}" rel="stylesheet">
|
||||
|
||||
<!-- JS -->
|
||||
<script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
|
||||
<script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
|
||||
<script src="{{ url_for('static', filename='js/bootstrap4.min.js')}}"></script>
|
||||
<script src="{{ url_for('static', filename='js/jquery.dataTables.min.js')}}"></script>
|
||||
<script src="{{ url_for('static', filename='js/dataTables.bootstrap.min.js')}}"></script>
|
||||
<script src="{{ url_for('static', filename='js/moment.min.js') }}"></script>
|
||||
<script src="{{ url_for('static', filename='js/jquery.daterangepicker.min.js') }}"></script>
|
||||
<script src="{{ url_for('static', filename='js/d3.min.js') }}"></script>
|
||||
<script src="{{ url_for('static', filename='js/d3/sparklines.js')}}"></script>
|
||||
|
||||
</head>
|
||||
<body>
|
||||
|
||||
{% include 'nav_bar.html' %}
|
||||
|
||||
<div class="container-fluid">
|
||||
<div class="row">
|
||||
|
||||
{% include 'sidebars/sidebar_objects.html' %}
|
||||
|
||||
<div class="col-12 col-lg-10" id="core_content">
|
||||
|
||||
{% include 'title/block_titles_search.html' %}
|
||||
|
||||
|
||||
|
||||
<table id="table_objects" class="table table-striped table-bordered">
|
||||
<thead class="bg-dark text-white">
|
||||
<tr>
|
||||
<th></th>
|
||||
<th>First Seen</th>
|
||||
<th>Last Seen</th>
|
||||
<th>Total</th>
|
||||
<th>Last days</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody style="font-size: 15px;">
|
||||
{% for obj_id in dict_objects %}
|
||||
<tr>
|
||||
<td>
|
||||
<a target="_blank" href="{{ url_for('correlation.show_correlation') }}?type=title&id={{ obj_id }}">
|
||||
{% if type_to_search == 'content' %}
|
||||
<span>{{ dict_objects[obj_id]['content'][:search_result[obj_id]['hl-start']] }}</span><span class="hg-text">{{dict_objects[obj_id]['content'][search_result[obj_id]['hl-start']:search_result[obj_id]['hl-end']]}}</span>{{ dict_objects[obj_id]['content'][search_result[obj_id]['hl-end']:] }}
|
||||
{% else %}
|
||||
<span>{{ dict_objects[obj_id]['content'] }}</span>
|
||||
{% endif %}
|
||||
</a>
|
||||
</td>
|
||||
<td>{{ dict_objects[obj_id]['first_seen'] }}</td>
|
||||
<td>{{ dict_objects[obj_id]['last_seen'] }}</td>
|
||||
<td>{{ dict_objects[obj_id]['nb_seen'] }}</td>
|
||||
<td id="sparklines_{{ obj_id }}" style="text-align:center;"></td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
var chart = {};
|
||||
$(document).ready(function(){
|
||||
$("#page-Decoded").addClass("active");
|
||||
$("#nav_title").addClass("active");
|
||||
|
||||
$('#table_objects').DataTable({
|
||||
"aLengthMenu": [[5, 10, 15, -1], [5, 10, 15, "All"]],
|
||||
"iDisplayLength": 10,
|
||||
"order": [[ 3, "desc" ]]
|
||||
});
|
||||
});
|
||||
|
||||
function toggle_sidebar(){
|
||||
if($('#nav_menu').is(':visible')){
|
||||
$('#nav_menu').hide();
|
||||
$('#side_menu').removeClass('border-right')
|
||||
$('#side_menu').removeClass('col-lg-2')
|
||||
$('#core_content').removeClass('col-lg-10')
|
||||
}else{
|
||||
$('#nav_menu').show();
|
||||
$('#side_menu').addClass('border-right')
|
||||
$('#side_menu').addClass('col-lg-2')
|
||||
$('#core_content').addClass('col-lg-10')
|
||||
}
|
||||
}
|
||||
</script>
|
||||
|
||||
<script>
|
||||
{% for obj_id in dict_objects %}
|
||||
sparkline("sparklines_{{ obj_id }}", {{ dict_objects[obj_id]['sparkline'] }}, {});
|
||||
{% endfor %}
|
||||
</script>
|
||||
|
||||
|
||||
</body>
|
||||
</html>
|
Loading…
Reference in New Issue