mirror of https://github.com/CIRCL/AIL-framework
chg: [crawler] add unsafe tag if domain contain unsafe screenshot
parent
37c71b8438
commit
6b60041db2
|
@ -15,6 +15,7 @@ from modules.abstract_module import AbstractModule
|
||||||
from lib import crawlers
|
from lib import crawlers
|
||||||
from lib.ConfigLoader import ConfigLoader
|
from lib.ConfigLoader import ConfigLoader
|
||||||
from lib.objects.Domains import Domain
|
from lib.objects.Domains import Domain
|
||||||
|
from lib.objects.Items import Item
|
||||||
from lib.objects import Screenshots
|
from lib.objects import Screenshots
|
||||||
|
|
||||||
|
|
||||||
|
@ -53,6 +54,9 @@ class Crawler(AbstractModule):
|
||||||
self.items_dir = None
|
self.items_dir = None
|
||||||
self.domain = None
|
self.domain = None
|
||||||
|
|
||||||
|
# TODO Replace with warning list ???
|
||||||
|
self.placeholder_screenshots = {'27e14ace10b0f96acd2bd919aaa98a964597532c35b6409dff6cc8eec8214748'}
|
||||||
|
|
||||||
# Send module state to logs
|
# Send module state to logs
|
||||||
self.redis_logger.info('Crawler initialized')
|
self.redis_logger.info('Crawler initialized')
|
||||||
|
|
||||||
|
@ -248,8 +252,13 @@ class Crawler(AbstractModule):
|
||||||
if 'png' in entries and entries['png']:
|
if 'png' in entries and entries['png']:
|
||||||
screenshot = Screenshots.create_screenshot(entries['png'], b64=False)
|
screenshot = Screenshots.create_screenshot(entries['png'], b64=False)
|
||||||
if screenshot:
|
if screenshot:
|
||||||
# Remove Errors pages # TODO Replace with warning list ???
|
if not screenshot.is_tags_safe():
|
||||||
if screenshot.id not in ['27e14ace10b0f96acd2bd919aaa98a964597532c35b6409dff6cc8eec8214748']:
|
unsafe_tag = 'dark-web:topic="pornography-child-exploitation"'
|
||||||
|
self.domain.add_tag(unsafe_tag)
|
||||||
|
item = Item(item_id)
|
||||||
|
item.add_tag(unsafe_tag)
|
||||||
|
# Remove Placeholder pages # TODO Replace with warning list ???
|
||||||
|
if screenshot.id not in self.placeholder_screenshots:
|
||||||
# Create Correlations
|
# Create Correlations
|
||||||
screenshot.add_correlation('item', '', item_id)
|
screenshot.add_correlation('item', '', item_id)
|
||||||
screenshot.add_correlation('domain', '', self.domain.id)
|
screenshot.add_correlation('domain', '', self.domain.id)
|
||||||
|
|
Loading…
Reference in New Issue