mirror of https://github.com/CIRCL/AIL-framework
fix: [crawler] fix incomplete response
parent
f8fd037bd2
commit
e9539e640b
|
@ -186,7 +186,7 @@ class Crawler(AbstractModule):
|
|||
parent_id = task.get_parent()
|
||||
|
||||
entries = self.lacus.get_capture(capture.uuid)
|
||||
print(entries['status'])
|
||||
print(entries.get('status'))
|
||||
self.har = task.get_har()
|
||||
self.screenshot = task.get_screenshot()
|
||||
# DEBUG
|
||||
|
@ -218,12 +218,12 @@ class Crawler(AbstractModule):
|
|||
if 'error' in entries:
|
||||
# TODO IMPROVE ERROR MESSAGE
|
||||
self.logger.warning(str(entries['error']))
|
||||
print(entries['error'])
|
||||
print(entries.get('error'))
|
||||
if entries.get('html'):
|
||||
print('retrieved content')
|
||||
# print(entries.get('html'))
|
||||
|
||||
if 'last_redirected_url' in entries and entries['last_redirected_url']:
|
||||
if 'last_redirected_url' in entries and entries.get('last_redirected_url'):
|
||||
last_url = entries['last_redirected_url']
|
||||
unpacked_last_url = crawlers.unpack_url(last_url)
|
||||
current_domain = unpacked_last_url['domain']
|
||||
|
@ -238,7 +238,7 @@ class Crawler(AbstractModule):
|
|||
else:
|
||||
last_url = f'http://{self.domain.id}'
|
||||
|
||||
if 'html' in entries and entries['html']:
|
||||
if 'html' in entries and entries.get('html'):
|
||||
item_id = crawlers.create_item_id(self.items_dir, self.domain.id)
|
||||
print(item_id)
|
||||
gzip64encoded = crawlers.get_gzipped_b64_item(item_id, entries['html'])
|
||||
|
@ -264,7 +264,7 @@ class Crawler(AbstractModule):
|
|||
|
||||
# SCREENSHOT
|
||||
if self.screenshot:
|
||||
if 'png' in entries and entries['png']:
|
||||
if 'png' in entries and entries.get('png'):
|
||||
screenshot = Screenshots.create_screenshot(entries['png'], b64=False)
|
||||
if screenshot:
|
||||
if not screenshot.is_tags_safe():
|
||||
|
@ -278,7 +278,7 @@ class Crawler(AbstractModule):
|
|||
screenshot.add_correlation('domain', '', self.domain.id)
|
||||
# HAR
|
||||
if self.har:
|
||||
if 'har' in entries and entries['har']:
|
||||
if 'har' in entries and entries.get('har'):
|
||||
har_id = crawlers.create_har_id(self.date, item_id)
|
||||
crawlers.save_har(har_id, entries['har'])
|
||||
for cookie_name in crawlers.extract_cookies_names_from_har(entries['har']):
|
||||
|
|
Loading…
Reference in New Issue