fix: [favicon] crawler favicon

dev
terrtia 2024-02-21 14:34:20 +01:00
parent c219febd71
commit 81c4dde7b0
No known key found for this signature in database
GPG Key ID: 1E1B1F50D84613D0
1 changed files with 9 additions and 0 deletions

View File

@ -20,6 +20,7 @@ from lib.ConfigLoader import ConfigLoader
from lib.objects import CookiesNames from lib.objects import CookiesNames
from lib.objects import Etags from lib.objects import Etags
from lib.objects.Domains import Domain from lib.objects.Domains import Domain
from lib.objects import Favicons
from lib.objects.Items import Item from lib.objects.Items import Item
from lib.objects import Screenshots from lib.objects import Screenshots
from lib.objects import Titles from lib.objects import Titles
@ -198,6 +199,7 @@ class Crawler(AbstractModule):
user_agent=task.get_user_agent(), user_agent=task.get_user_agent(),
proxy=task.get_proxy(), proxy=task.get_proxy(),
cookies=task.get_cookies(), cookies=task.get_cookies(),
with_favicon=True,
force=force, force=force,
general_timeout_in_sec=90) # TODO increase timeout if onion ???? general_timeout_in_sec=90) # TODO increase timeout if onion ????
@ -245,6 +247,7 @@ class Crawler(AbstractModule):
parent_id = task.get_parent() parent_id = task.get_parent()
entries = self.lacus.get_capture(capture.uuid) entries = self.lacus.get_capture(capture.uuid)
print(entries.get('status')) print(entries.get('status'))
self.har = task.get_har() self.har = task.get_har()
self.screenshot = task.get_screenshot() self.screenshot = task.get_screenshot()
@ -369,6 +372,12 @@ class Crawler(AbstractModule):
etag.add(self.date.replace('/', ''), self.domain) etag.add(self.date.replace('/', ''), self.domain)
crawlers.extract_hhhash(entries['har'], self.domain.id, self.date.replace('/', '')) crawlers.extract_hhhash(entries['har'], self.domain.id, self.date.replace('/', ''))
# FAVICON
if entries.get('potential_favicons'):
for favicon in entries['potential_favicons']:
fav = Favicons.create(favicon)
fav.add(item.get_date(), item)
# Next Children # Next Children
entries_children = entries.get('children') entries_children = entries.get('children')
if entries_children: if entries_children: