chg: Add missing bits

lacus
Raphaël Vinot 2022-09-16 12:20:34 +02:00
parent a3342b5569
commit be3bec1252
2 changed files with 61 additions and 55 deletions

View File

@ -13,7 +13,7 @@ from redis.asyncio import Redis
from redis import Redis as RedisSync
from lookyloo.default import AbstractManager, get_config, get_socket_path, safe_create_dir
from lookyloo.helpers import get_captures_dir, UserAgents, CaptureStatus
from lookyloo.helpers import get_captures_dir, CaptureStatus
from lookyloo.modules import FOX
@ -28,7 +28,6 @@ class AsyncCapture(AbstractManager):
self.script_name = 'async_capture'
self.only_global_lookups: bool = get_config('generic', 'only_global_lookups')
self.capture_dir: Path = get_captures_dir()
self.user_agents = UserAgents()
self.redis_sync: RedisSync = RedisSync(unix_socket_path=get_socket_path('cache'))
self.lacus = LacusCore(self.redis_sync)
@ -59,7 +58,7 @@ class AsyncCapture(AbstractManager):
# By default, the captures are not on the index, unless the user mark them as listed
listing = True if (b'listing' in to_capture and to_capture[b'listing'].lower() in [b'true', b'1']) else False
await self.lacus.capture(uuid)
status, result = await self.lacus.capture(uuid)
while True:
entries = self.lacus.get_capture(uuid, decode=True)
@ -78,67 +77,63 @@ class AsyncCapture(AbstractManager):
self.logger.warning(f'{entries["status"]} is not a valid status')
break
if not entries:
# broken
self.logger.critical(f'Something went terribly wrong when capturing {uuid}.')
else:
now = datetime.now()
dirpath = self.capture_dir / str(now.year) / f'{now.month:02}' / now.isoformat()
safe_create_dir(dirpath)
now = datetime.now()
dirpath = self.capture_dir / str(now.year) / f'{now.month:02}' / now.isoformat()
safe_create_dir(dirpath)
if b'os' in to_capture or b'browser' in to_capture:
meta: Dict[str, str] = {}
if b'os' in to_capture:
meta['os'] = to_capture[b'os'].decode()
if b'browser' in to_capture:
meta['browser'] = to_capture[b'browser'].decode()
with (dirpath / 'meta').open('w') as _meta:
json.dump(meta, _meta)
if b'os' in to_capture or b'browser' in to_capture:
meta: Dict[str, str] = {}
if b'os' in to_capture:
meta['os'] = to_capture[b'os'].decode()
if b'browser' in to_capture:
meta['browser'] = to_capture[b'browser'].decode()
with (dirpath / 'meta').open('w') as _meta:
json.dump(meta, _meta)
# Write UUID
with (dirpath / 'uuid').open('w') as _uuid:
_uuid.write(uuid)
# Write UUID
with (dirpath / 'uuid').open('w') as _uuid:
_uuid.write(uuid)
# Write no_index marker (optional)
if not listing:
(dirpath / 'no_index').touch()
# Write no_index marker (optional)
if not listing:
(dirpath / 'no_index').touch()
# Write parent UUID (optional)
if b'parent' in to_capture:
with (dirpath / 'parent').open('w') as _parent:
_parent.write(to_capture[b'parent'].decode())
# Write parent UUID (optional)
if b'parent' in to_capture:
with (dirpath / 'parent').open('w') as _parent:
_parent.write(to_capture[b'parent'].decode())
if 'downloaded_filename' in entries and entries['downloaded_filename']:
with (dirpath / '0.data.filename').open('w') as _downloaded_filename:
_downloaded_filename.write(entries['downloaded_filename'])
if 'downloaded_filename' in entries and entries['downloaded_filename']:
with (dirpath / '0.data.filename').open('w') as _downloaded_filename:
_downloaded_filename.write(entries['downloaded_filename'])
if 'downloaded_file' in entries and entries['downloaded_file']:
with (dirpath / '0.data').open('wb') as _downloaded_file:
_downloaded_file.write(entries['downloaded_file'])
if 'downloaded_file' in entries and entries['downloaded_file']:
with (dirpath / '0.data').open('wb') as _downloaded_file:
_downloaded_file.write(entries['downloaded_file'])
if 'error' in entries:
with (dirpath / 'error.txt').open('w') as _error:
json.dump(entries['error'], _error)
if 'error' in entries:
with (dirpath / 'error.txt').open('w') as _error:
json.dump(entries['error'], _error)
with (dirpath / '0.har').open('w') as _har:
json.dump(entries['har'], _har)
with (dirpath / '0.har').open('w') as _har:
json.dump(entries['har'], _har)
if 'png' in entries and entries['png']:
with (dirpath / '0.png').open('wb') as _img:
_img.write(entries['png'])
if 'png' in entries and entries['png']:
with (dirpath / '0.png').open('wb') as _img:
_img.write(entries['png'])
if 'html' in entries and entries['html']:
with (dirpath / '0.html').open('w') as _html:
_html.write(entries['html'])
if 'html' in entries and entries['html']:
with (dirpath / '0.html').open('w') as _html:
_html.write(entries['html'])
if 'last_redirected_url' in entries and entries['last_redirected_url']:
with (dirpath / '0.last_redirect.txt').open('w') as _redir:
_redir.write(entries['last_redirected_url'])
if 'last_redirected_url' in entries and entries['last_redirected_url']:
with (dirpath / '0.last_redirect.txt').open('w') as _redir:
_redir.write(entries['last_redirected_url'])
if 'cookies' in entries and entries['cookies']:
with (dirpath / '0.cookies.json').open('w') as _cookies:
json.dump(entries['cookies'], _cookies)
await self.redis.hset('lookup_dirs', uuid, str(dirpath))
if 'cookies' in entries and entries['cookies']:
with (dirpath / '0.cookies.json').open('w') as _cookies:
json.dump(entries['cookies'], _cookies)
await self.redis.hset('lookup_dirs', uuid, str(dirpath))
async with self.redis.pipeline() as lazy_cleanup:
if queue and await self.redis.zscore('queues', queue):

View File

@ -33,7 +33,7 @@ from .exceptions import (MissingCaptureDirectory,
MissingUUID, TreeNeedsRebuild, NoValidHarFile)
from .helpers import (CaptureStatus, get_captures_dir, get_email_template,
get_resources_hashes, get_taxonomies,
uniq_domains, ParsedUserAgent)
uniq_domains, ParsedUserAgent, load_cookies, UserAgents)
from .indexing import Indexing
from .modules import (MISP, PhishingInitiative, UniversalWhois,
UrlScan, VirusTotal, Phishtank, Hashlookup,
@ -46,6 +46,7 @@ class Lookyloo():
self.logger = logging.getLogger(f'{self.__class__.__name__}')
self.logger.setLevel(get_config('generic', 'loglevel'))
self.indexing = Indexing()
self.user_agents = UserAgents()
self.is_public_instance = get_config('generic', 'public_instance')
self.public_domain = get_config('generic', 'public_domain')
self.taxonomies = get_taxonomies()
@ -424,6 +425,16 @@ class Lookyloo():
headers += f'\nDNT: {query.pop("dnt")}'
headers = headers.strip()
# NOTE: Lookyloo can get the cookies in somewhat weird formats, mornalizing them
cookies = load_cookies(query.pop('cookies', None))
# NOTE: Make sure we have a useragent
user_agent = query.pop('user_agent', None)
if not user_agent:
# Catch case where the UA is broken on the UI, and the async submission.
self.user_agents.user_agents # triggers an update of the default UAs
capture_ua = user_agent if user_agent else self.user_agents.default['useragent']
perma_uuid = self.lacus.enqueue(
url=query.pop('url', None),
document_name=query.pop('document_name', None),
@ -431,10 +442,10 @@ class Lookyloo():
depth=query.pop('depth', 0),
browser=query.pop('browser', None),
device_name=query.pop('device_name', None),
user_agent=query.pop('user_agent', None),
user_agent=capture_ua,
proxy=query.pop('proxy', None),
general_timeout_in_sec=query.pop('general_timeout_in_sec', None),
cookies=query.pop('cookies', None),
cookies=cookies if cookies else None,
headers=headers if headers else None,
http_credentials=query.pop('http_credentials', None),
viewport=query.pop('viewport', None),