mirror of https://github.com/CIRCL/AIL-framework
fix: [crawler] filter lookup parent + domain daterange
parent
c8b1c67a08
commit
83e11082b5
|
@ -78,6 +78,7 @@ class Crawler(AbstractModule):
|
|||
self.items_dir = None
|
||||
self.original_domain = None
|
||||
self.domain = None
|
||||
self.parent = None
|
||||
|
||||
# TODO Replace with warning list ???
|
||||
self.placeholder_screenshots = {'07244254f73e822bd4a95d916d8b27f2246b02c428adc29082d09550c6ed6e1a' # blank
|
||||
|
@ -243,6 +244,7 @@ class Crawler(AbstractModule):
|
|||
return None
|
||||
|
||||
self.domain = Domain(domain)
|
||||
self.parent = self.domain.get_parent()
|
||||
self.original_domain = Domain(domain)
|
||||
|
||||
epoch = int(time.time())
|
||||
|
@ -263,7 +265,9 @@ class Crawler(AbstractModule):
|
|||
# Save Capture
|
||||
self.save_capture_response(parent_id, entries)
|
||||
|
||||
self.domain.update_daterange(self.date.replace('/', ''))
|
||||
if self.parent != 'lookup':
|
||||
# Update domain first/last seen
|
||||
self.domain.update_daterange(self.date.replace('/', ''))
|
||||
# Origin + History + tags
|
||||
if self.root_item:
|
||||
self.domain.set_last_origin(parent_id)
|
||||
|
@ -271,6 +275,7 @@ class Crawler(AbstractModule):
|
|||
# Tags
|
||||
for tag in task.get_tags():
|
||||
self.domain.add_tag(tag)
|
||||
# Crawler stats
|
||||
self.domain.add_history(epoch, root_item=self.root_item)
|
||||
|
||||
if self.domain != self.original_domain:
|
||||
|
|
|
@ -86,7 +86,7 @@ def get_default_correlation_objects():
|
|||
return AIL_OBJECTS_CORRELATIONS_DEFAULT
|
||||
|
||||
def get_obj_queued():
|
||||
return ['item', 'image', 'message', 'ocr', 'qrcode']
|
||||
return ['item', 'image', 'message', 'ocr', 'qrcode'] # screenshot ???
|
||||
|
||||
def get_objects_tracked():
|
||||
return ['decoded', 'item', 'pgp', 'message', 'ocr', 'qrcode', 'title']
|
||||
|
|
|
@ -195,6 +195,9 @@ def get_processed_end_objs():
|
|||
def get_processed_end_obj():
|
||||
return r_obj_process.spop(f'objs:processed')
|
||||
|
||||
def is_obj_in_process(obj_gid):
|
||||
return r_obj_process.sismember(f'objs:process', obj_gid)
|
||||
|
||||
def get_processed_objs_by_type(obj_type):
|
||||
return r_obj_process.zrange(f'objs:process:{obj_type}', 0, -1)
|
||||
|
||||
|
|
|
@ -67,7 +67,7 @@ faup = Faup()
|
|||
# is safe ???
|
||||
# TODO FILTER URL ???
|
||||
|
||||
def api_get_onion_lookup(domain):
|
||||
def api_get_onion_lookup(domain): # TODO check if object process done ???
|
||||
domain = domain.lower()
|
||||
url_unpack = unpack_url(domain)
|
||||
domain = url_unpack['domain']
|
||||
|
@ -78,6 +78,11 @@ def api_get_onion_lookup(domain):
|
|||
if is_crawler_activated():
|
||||
create_task(domain, parent='lookup', priority=0, har=D_HAR, screenshot=D_SCREENSHOT)
|
||||
return {'error': 'domain not found', 'domain': domain}, 404
|
||||
if not dom.was_up():
|
||||
return {'error': 'domain not found', 'domain': domain}, 404
|
||||
# else
|
||||
## TODO check if object process done -> return result if more than one history
|
||||
# #-> check item history
|
||||
meta = dom.get_meta(options={'languages'})
|
||||
meta['first_seen'] = meta['first_seen'].replace('/', '-')
|
||||
meta['last_seen'] = meta['last_check'].replace('/', '-')
|
||||
|
|
|
@ -19,6 +19,7 @@ sys.path.append(os.environ['AIL_BIN'])
|
|||
# Import Project packages
|
||||
##################################
|
||||
from lib import ail_logger
|
||||
from lib.ail_queues import is_obj_in_process
|
||||
from lib import Tag
|
||||
from lib.ConfigLoader import ConfigLoader
|
||||
from lib import Duplicate
|
||||
|
@ -92,6 +93,15 @@ class AbstractObject(ABC):
|
|||
else:
|
||||
return r_object.hset(f'meta:{self.type}:{self.get_subtype(r_str=True)}:{self.id}', field, value)
|
||||
|
||||
## Queues ##
|
||||
|
||||
# is_in_queue , is_in_module
|
||||
|
||||
def is_being_processed(self):
|
||||
return is_obj_in_process(self.get_global_id())
|
||||
|
||||
# -Queues- #
|
||||
|
||||
## Tags ##
|
||||
def get_tags(self, r_list=False):
|
||||
tags = Tag.get_object_tags(self.type, self.id, self.get_subtype(r_str=True))
|
||||
|
|
Loading…
Reference in New Issue