mirror of https://github.com/CIRCL/AIL-framework
chg: [titles] add yara tracker on title + tags domains if unsafe title tags
parent
045aab6f34
commit
f44c5509da
|
@ -22,6 +22,7 @@ from lib.objects.Domains import Domain
|
||||||
from lib.objects.Items import Item
|
from lib.objects.Items import Item
|
||||||
from lib.objects import Screenshots
|
from lib.objects import Screenshots
|
||||||
from lib.objects import Titles
|
from lib.objects import Titles
|
||||||
|
from trackers.Tracker_Yara import Tracker_Yara
|
||||||
|
|
||||||
logging.config.dictConfig(ail_logger.get_config(name='crawlers'))
|
logging.config.dictConfig(ail_logger.get_config(name='crawlers'))
|
||||||
|
|
||||||
|
@ -35,6 +36,8 @@ class Crawler(AbstractModule):
|
||||||
# Waiting time in seconds between to message processed
|
# Waiting time in seconds between to message processed
|
||||||
self.pending_seconds = 1
|
self.pending_seconds = 1
|
||||||
|
|
||||||
|
self.tracker_yara = Tracker_Yara(queue=False)
|
||||||
|
|
||||||
config_loader = ConfigLoader()
|
config_loader = ConfigLoader()
|
||||||
|
|
||||||
self.default_har = config_loader.get_config_boolean('Crawler', 'default_har')
|
self.default_har = config_loader.get_config_boolean('Crawler', 'default_har')
|
||||||
|
@ -283,6 +286,12 @@ class Crawler(AbstractModule):
|
||||||
if title_content:
|
if title_content:
|
||||||
title = Titles.create_title(title_content)
|
title = Titles.create_title(title_content)
|
||||||
title.add(item.get_date(), item_id)
|
title.add(item.get_date(), item_id)
|
||||||
|
# Tracker
|
||||||
|
self.tracker_yara.compute(title.get_id(), obj_type=title.get_type())
|
||||||
|
if not title.is_tags_safe():
|
||||||
|
unsafe_tag = 'dark-web:topic="pornography-child-exploitation"'
|
||||||
|
self.domain.add_tag(unsafe_tag)
|
||||||
|
item.add_tag(unsafe_tag)
|
||||||
|
|
||||||
# SCREENSHOT
|
# SCREENSHOT
|
||||||
if self.screenshot:
|
if self.screenshot:
|
||||||
|
|
|
@ -923,7 +923,7 @@ def api_add_tracker(dict_input, user_id):
|
||||||
# Filters # TODO MOVE ME
|
# Filters # TODO MOVE ME
|
||||||
filters = dict_input.get('filters', {})
|
filters = dict_input.get('filters', {})
|
||||||
if filters:
|
if filters:
|
||||||
if filters.keys() == {'decoded', 'item', 'pgp'} and set(filters['pgp'].get('subtypes', [])) == {'mail', 'name'}:
|
if filters.keys() == {'decoded', 'item', 'pgp', 'title'} and set(filters['pgp'].get('subtypes', [])) == {'mail', 'name'}:
|
||||||
filters = {}
|
filters = {}
|
||||||
for obj_type in filters:
|
for obj_type in filters:
|
||||||
if obj_type not in get_objects_tracked():
|
if obj_type not in get_objects_tracked():
|
||||||
|
@ -998,7 +998,7 @@ def api_edit_tracker(dict_input, user_id):
|
||||||
# Filters # TODO MOVE ME
|
# Filters # TODO MOVE ME
|
||||||
filters = dict_input.get('filters', {})
|
filters = dict_input.get('filters', {})
|
||||||
if filters:
|
if filters:
|
||||||
if filters.keys() == {'decoded', 'item', 'pgp'} and set(filters['pgp'].get('subtypes', [])) == {'mail', 'name'}:
|
if filters.keys() == {'decoded', 'item', 'pgp', 'title'} and set(filters['pgp'].get('subtypes', [])) == {'mail', 'name'}:
|
||||||
if not filters['decoded'] and not filters['item']:
|
if not filters['decoded'] and not filters['item']:
|
||||||
filters = {}
|
filters = {}
|
||||||
for obj_type in filters:
|
for obj_type in filters:
|
||||||
|
|
|
@ -50,7 +50,7 @@ def get_object_all_subtypes(obj_type):
|
||||||
return []
|
return []
|
||||||
|
|
||||||
def get_objects_tracked():
|
def get_objects_tracked():
|
||||||
return ['decoded', 'item', 'pgp']
|
return ['decoded', 'item', 'pgp', 'title']
|
||||||
|
|
||||||
def get_objects_retro_hunted():
|
def get_objects_retro_hunted():
|
||||||
return ['decoded', 'item']
|
return ['decoded', 'item']
|
||||||
|
|
|
@ -45,6 +45,8 @@ class Title(AbstractDaterangeObject):
|
||||||
def get_content(self, r_type='str'):
|
def get_content(self, r_type='str'):
|
||||||
if r_type == 'str':
|
if r_type == 'str':
|
||||||
return self._get_field('content')
|
return self._get_field('content')
|
||||||
|
elif r_type == 'bytes':
|
||||||
|
return self._get_field('content').encode()
|
||||||
|
|
||||||
def get_link(self, flask_context=False):
|
def get_link(self, flask_context=False):
|
||||||
if flask_context:
|
if flask_context:
|
||||||
|
@ -122,4 +124,3 @@ class Titles(AbstractDaterangeObjects):
|
||||||
# # print(r)
|
# # print(r)
|
||||||
# r = titles.search_by_id('f7d57B', r_pos=True, case_sensitive=False)
|
# r = titles.search_by_id('f7d57B', r_pos=True, case_sensitive=False)
|
||||||
# print(r)
|
# print(r)
|
||||||
|
|
||||||
|
|
|
@ -132,6 +132,10 @@
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
<div class="custom-control custom-switch mt-1">
|
||||||
|
<input class="custom-control-input" type="checkbox" name="title_obj" id="title_obj" checked="">
|
||||||
|
<label class="custom-control-label" for="title_obj"><i class="fas fa-lock-open"></i> Decoded <i class="fas fa-heading text-info" data-toggle="tooltip" data-placement="right" title="Title that has been extracted from a HTML page"></i></label>
|
||||||
|
</div>
|
||||||
|
|
||||||
{# <div class="custom-control custom-switch mt-1">#}
|
{# <div class="custom-control custom-switch mt-1">#}
|
||||||
{# <input class="custom-control-input" type="checkbox" name="level" id="screenshot_obj" checked="">#}
|
{# <input class="custom-control-input" type="checkbox" name="level" id="screenshot_obj" checked="">#}
|
||||||
|
|
Loading…
Reference in New Issue