mirror of https://github.com/CIRCL/AIL-framework
fix: [crawler] fix capture start time
parent
5fc9b1403f
commit
235539ea42
|
@ -121,7 +121,9 @@ class Crawler(AbstractModule):
|
||||||
if crawlers.get_nb_crawler_captures() < crawlers.get_crawler_max_captures():
|
if crawlers.get_nb_crawler_captures() < crawlers.get_crawler_max_captures():
|
||||||
task_row = crawlers.add_task_to_lacus_queue()
|
task_row = crawlers.add_task_to_lacus_queue()
|
||||||
if task_row:
|
if task_row:
|
||||||
task_uuid, priority = task_row
|
task, priority = task_row
|
||||||
|
task.start()
|
||||||
|
task_uuid = task.uuid
|
||||||
try:
|
try:
|
||||||
self.enqueue_capture(task_uuid, priority)
|
self.enqueue_capture(task_uuid, priority)
|
||||||
except ConnectionError:
|
except ConnectionError:
|
||||||
|
@ -195,10 +197,17 @@ class Crawler(AbstractModule):
|
||||||
print(task.uuid, capture_uuid, 'launched')
|
print(task.uuid, capture_uuid, 'launched')
|
||||||
|
|
||||||
if self.ail_to_push_discovery:
|
if self.ail_to_push_discovery:
|
||||||
|
|
||||||
if task.get_depth() == 1 and priority < 10 and task.get_domain().endswith('.onion'):
|
if task.get_depth() == 1 and priority < 10 and task.get_domain().endswith('.onion'):
|
||||||
har = task.get_har()
|
har = task.get_har()
|
||||||
screenshot = task.get_screenshot()
|
screenshot = task.get_screenshot()
|
||||||
self.ail_to_push_discovery.add_crawler_capture(task_uuid, capture_uuid, url, har=har,
|
# parent_id = task.get_parent()
|
||||||
|
# if parent_id != 'manual' and parent_id != 'auto':
|
||||||
|
# parent = parent_id[19:-36]
|
||||||
|
# else:
|
||||||
|
# parent = 'AIL_capture'
|
||||||
|
|
||||||
|
self.ail_to_push_discovery.add_crawler_capture(task_uuid, capture_uuid, url, har=har, # parent=parent,
|
||||||
screenshot=screenshot, depth_limit=1, proxy='force_tor')
|
screenshot=screenshot, depth_limit=1, proxy='force_tor')
|
||||||
print(task.uuid, capture_uuid, 'Added to ail_to_push_discovery')
|
print(task.uuid, capture_uuid, 'Added to ail_to_push_discovery')
|
||||||
return capture_uuid
|
return capture_uuid
|
||||||
|
|
|
@ -1642,8 +1642,7 @@ def add_task_to_lacus_queue():
|
||||||
return None
|
return None
|
||||||
task_uuid, priority = task_uuid[0]
|
task_uuid, priority = task_uuid[0]
|
||||||
task = CrawlerTask(task_uuid)
|
task = CrawlerTask(task_uuid)
|
||||||
task.start()
|
return task, priority
|
||||||
return task.uuid, priority
|
|
||||||
|
|
||||||
# PRIORITY: discovery = 0/10, feeder = 10, manual = 50, auto = 40, test = 100
|
# PRIORITY: discovery = 0/10, feeder = 10, manual = 50, auto = 40, test = 100
|
||||||
def create_task(url, depth=1, har=True, screenshot=True, header=None, cookiejar=None, proxy=None,
|
def create_task(url, depth=1, har=True, screenshot=True, header=None, cookiejar=None, proxy=None,
|
||||||
|
|
Loading…
Reference in New Issue