mirror of https://github.com/CIRCL/lookyloo
chg: Add missing bits
parent
dcbae777a1
commit
623813167e
|
@ -13,7 +13,7 @@ from redis.asyncio import Redis
|
||||||
from redis import Redis as RedisSync
|
from redis import Redis as RedisSync
|
||||||
|
|
||||||
from lookyloo.default import AbstractManager, get_config, get_socket_path, safe_create_dir
|
from lookyloo.default import AbstractManager, get_config, get_socket_path, safe_create_dir
|
||||||
from lookyloo.helpers import get_captures_dir, UserAgents, CaptureStatus
|
from lookyloo.helpers import get_captures_dir, CaptureStatus
|
||||||
|
|
||||||
from lookyloo.modules import FOX
|
from lookyloo.modules import FOX
|
||||||
|
|
||||||
|
@ -28,7 +28,6 @@ class AsyncCapture(AbstractManager):
|
||||||
self.script_name = 'async_capture'
|
self.script_name = 'async_capture'
|
||||||
self.only_global_lookups: bool = get_config('generic', 'only_global_lookups')
|
self.only_global_lookups: bool = get_config('generic', 'only_global_lookups')
|
||||||
self.capture_dir: Path = get_captures_dir()
|
self.capture_dir: Path = get_captures_dir()
|
||||||
self.user_agents = UserAgents()
|
|
||||||
self.redis_sync: RedisSync = RedisSync(unix_socket_path=get_socket_path('cache'))
|
self.redis_sync: RedisSync = RedisSync(unix_socket_path=get_socket_path('cache'))
|
||||||
self.lacus = LacusCore(self.redis_sync)
|
self.lacus = LacusCore(self.redis_sync)
|
||||||
|
|
||||||
|
@ -59,7 +58,7 @@ class AsyncCapture(AbstractManager):
|
||||||
# By default, the captures are not on the index, unless the user mark them as listed
|
# By default, the captures are not on the index, unless the user mark them as listed
|
||||||
listing = True if (b'listing' in to_capture and to_capture[b'listing'].lower() in [b'true', b'1']) else False
|
listing = True if (b'listing' in to_capture and to_capture[b'listing'].lower() in [b'true', b'1']) else False
|
||||||
|
|
||||||
await self.lacus.capture(uuid)
|
status, result = await self.lacus.capture(uuid)
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
entries = self.lacus.get_capture(uuid, decode=True)
|
entries = self.lacus.get_capture(uuid, decode=True)
|
||||||
|
@ -78,67 +77,63 @@ class AsyncCapture(AbstractManager):
|
||||||
self.logger.warning(f'{entries["status"]} is not a valid status')
|
self.logger.warning(f'{entries["status"]} is not a valid status')
|
||||||
break
|
break
|
||||||
|
|
||||||
if not entries:
|
now = datetime.now()
|
||||||
# broken
|
dirpath = self.capture_dir / str(now.year) / f'{now.month:02}' / now.isoformat()
|
||||||
self.logger.critical(f'Something went terribly wrong when capturing {uuid}.')
|
safe_create_dir(dirpath)
|
||||||
else:
|
|
||||||
now = datetime.now()
|
|
||||||
dirpath = self.capture_dir / str(now.year) / f'{now.month:02}' / now.isoformat()
|
|
||||||
safe_create_dir(dirpath)
|
|
||||||
|
|
||||||
if b'os' in to_capture or b'browser' in to_capture:
|
if b'os' in to_capture or b'browser' in to_capture:
|
||||||
meta: Dict[str, str] = {}
|
meta: Dict[str, str] = {}
|
||||||
if b'os' in to_capture:
|
if b'os' in to_capture:
|
||||||
meta['os'] = to_capture[b'os'].decode()
|
meta['os'] = to_capture[b'os'].decode()
|
||||||
if b'browser' in to_capture:
|
if b'browser' in to_capture:
|
||||||
meta['browser'] = to_capture[b'browser'].decode()
|
meta['browser'] = to_capture[b'browser'].decode()
|
||||||
with (dirpath / 'meta').open('w') as _meta:
|
with (dirpath / 'meta').open('w') as _meta:
|
||||||
json.dump(meta, _meta)
|
json.dump(meta, _meta)
|
||||||
|
|
||||||
# Write UUID
|
# Write UUID
|
||||||
with (dirpath / 'uuid').open('w') as _uuid:
|
with (dirpath / 'uuid').open('w') as _uuid:
|
||||||
_uuid.write(uuid)
|
_uuid.write(uuid)
|
||||||
|
|
||||||
# Write no_index marker (optional)
|
# Write no_index marker (optional)
|
||||||
if not listing:
|
if not listing:
|
||||||
(dirpath / 'no_index').touch()
|
(dirpath / 'no_index').touch()
|
||||||
|
|
||||||
# Write parent UUID (optional)
|
# Write parent UUID (optional)
|
||||||
if b'parent' in to_capture:
|
if b'parent' in to_capture:
|
||||||
with (dirpath / 'parent').open('w') as _parent:
|
with (dirpath / 'parent').open('w') as _parent:
|
||||||
_parent.write(to_capture[b'parent'].decode())
|
_parent.write(to_capture[b'parent'].decode())
|
||||||
|
|
||||||
if 'downloaded_filename' in entries and entries['downloaded_filename']:
|
if 'downloaded_filename' in entries and entries['downloaded_filename']:
|
||||||
with (dirpath / '0.data.filename').open('w') as _downloaded_filename:
|
with (dirpath / '0.data.filename').open('w') as _downloaded_filename:
|
||||||
_downloaded_filename.write(entries['downloaded_filename'])
|
_downloaded_filename.write(entries['downloaded_filename'])
|
||||||
|
|
||||||
if 'downloaded_file' in entries and entries['downloaded_file']:
|
if 'downloaded_file' in entries and entries['downloaded_file']:
|
||||||
with (dirpath / '0.data').open('wb') as _downloaded_file:
|
with (dirpath / '0.data').open('wb') as _downloaded_file:
|
||||||
_downloaded_file.write(entries['downloaded_file'])
|
_downloaded_file.write(entries['downloaded_file'])
|
||||||
|
|
||||||
if 'error' in entries:
|
if 'error' in entries:
|
||||||
with (dirpath / 'error.txt').open('w') as _error:
|
with (dirpath / 'error.txt').open('w') as _error:
|
||||||
json.dump(entries['error'], _error)
|
json.dump(entries['error'], _error)
|
||||||
|
|
||||||
with (dirpath / '0.har').open('w') as _har:
|
with (dirpath / '0.har').open('w') as _har:
|
||||||
json.dump(entries['har'], _har)
|
json.dump(entries['har'], _har)
|
||||||
|
|
||||||
if 'png' in entries and entries['png']:
|
if 'png' in entries and entries['png']:
|
||||||
with (dirpath / '0.png').open('wb') as _img:
|
with (dirpath / '0.png').open('wb') as _img:
|
||||||
_img.write(entries['png'])
|
_img.write(entries['png'])
|
||||||
|
|
||||||
if 'html' in entries and entries['html']:
|
if 'html' in entries and entries['html']:
|
||||||
with (dirpath / '0.html').open('w') as _html:
|
with (dirpath / '0.html').open('w') as _html:
|
||||||
_html.write(entries['html'])
|
_html.write(entries['html'])
|
||||||
|
|
||||||
if 'last_redirected_url' in entries and entries['last_redirected_url']:
|
if 'last_redirected_url' in entries and entries['last_redirected_url']:
|
||||||
with (dirpath / '0.last_redirect.txt').open('w') as _redir:
|
with (dirpath / '0.last_redirect.txt').open('w') as _redir:
|
||||||
_redir.write(entries['last_redirected_url'])
|
_redir.write(entries['last_redirected_url'])
|
||||||
|
|
||||||
if 'cookies' in entries and entries['cookies']:
|
if 'cookies' in entries and entries['cookies']:
|
||||||
with (dirpath / '0.cookies.json').open('w') as _cookies:
|
with (dirpath / '0.cookies.json').open('w') as _cookies:
|
||||||
json.dump(entries['cookies'], _cookies)
|
json.dump(entries['cookies'], _cookies)
|
||||||
await self.redis.hset('lookup_dirs', uuid, str(dirpath))
|
await self.redis.hset('lookup_dirs', uuid, str(dirpath))
|
||||||
|
|
||||||
async with self.redis.pipeline() as lazy_cleanup:
|
async with self.redis.pipeline() as lazy_cleanup:
|
||||||
if queue and await self.redis.zscore('queues', queue):
|
if queue and await self.redis.zscore('queues', queue):
|
||||||
|
|
|
@ -33,7 +33,7 @@ from .exceptions import (MissingCaptureDirectory,
|
||||||
MissingUUID, TreeNeedsRebuild, NoValidHarFile)
|
MissingUUID, TreeNeedsRebuild, NoValidHarFile)
|
||||||
from .helpers import (CaptureStatus, get_captures_dir, get_email_template,
|
from .helpers import (CaptureStatus, get_captures_dir, get_email_template,
|
||||||
get_resources_hashes, get_taxonomies,
|
get_resources_hashes, get_taxonomies,
|
||||||
uniq_domains, ParsedUserAgent)
|
uniq_domains, ParsedUserAgent, load_cookies, UserAgents)
|
||||||
from .indexing import Indexing
|
from .indexing import Indexing
|
||||||
from .modules import (MISP, PhishingInitiative, UniversalWhois,
|
from .modules import (MISP, PhishingInitiative, UniversalWhois,
|
||||||
UrlScan, VirusTotal, Phishtank, Hashlookup,
|
UrlScan, VirusTotal, Phishtank, Hashlookup,
|
||||||
|
@ -46,6 +46,7 @@ class Lookyloo():
|
||||||
self.logger = logging.getLogger(f'{self.__class__.__name__}')
|
self.logger = logging.getLogger(f'{self.__class__.__name__}')
|
||||||
self.logger.setLevel(get_config('generic', 'loglevel'))
|
self.logger.setLevel(get_config('generic', 'loglevel'))
|
||||||
self.indexing = Indexing()
|
self.indexing = Indexing()
|
||||||
|
self.user_agents = UserAgents()
|
||||||
self.is_public_instance = get_config('generic', 'public_instance')
|
self.is_public_instance = get_config('generic', 'public_instance')
|
||||||
self.public_domain = get_config('generic', 'public_domain')
|
self.public_domain = get_config('generic', 'public_domain')
|
||||||
self.taxonomies = get_taxonomies()
|
self.taxonomies = get_taxonomies()
|
||||||
|
@ -424,6 +425,16 @@ class Lookyloo():
|
||||||
headers += f'\nDNT: {query.pop("dnt")}'
|
headers += f'\nDNT: {query.pop("dnt")}'
|
||||||
headers = headers.strip()
|
headers = headers.strip()
|
||||||
|
|
||||||
|
# NOTE: Lookyloo can get the cookies in somewhat weird formats, mornalizing them
|
||||||
|
cookies = load_cookies(query.pop('cookies', None))
|
||||||
|
|
||||||
|
# NOTE: Make sure we have a useragent
|
||||||
|
user_agent = query.pop('user_agent', None)
|
||||||
|
if not user_agent:
|
||||||
|
# Catch case where the UA is broken on the UI, and the async submission.
|
||||||
|
self.user_agents.user_agents # triggers an update of the default UAs
|
||||||
|
capture_ua = user_agent if user_agent else self.user_agents.default['useragent']
|
||||||
|
|
||||||
perma_uuid = self.lacus.enqueue(
|
perma_uuid = self.lacus.enqueue(
|
||||||
url=query.pop('url', None),
|
url=query.pop('url', None),
|
||||||
document_name=query.pop('document_name', None),
|
document_name=query.pop('document_name', None),
|
||||||
|
@ -431,10 +442,10 @@ class Lookyloo():
|
||||||
depth=query.pop('depth', 0),
|
depth=query.pop('depth', 0),
|
||||||
browser=query.pop('browser', None),
|
browser=query.pop('browser', None),
|
||||||
device_name=query.pop('device_name', None),
|
device_name=query.pop('device_name', None),
|
||||||
user_agent=query.pop('user_agent', None),
|
user_agent=capture_ua,
|
||||||
proxy=query.pop('proxy', None),
|
proxy=query.pop('proxy', None),
|
||||||
general_timeout_in_sec=query.pop('general_timeout_in_sec', None),
|
general_timeout_in_sec=query.pop('general_timeout_in_sec', None),
|
||||||
cookies=query.pop('cookies', None),
|
cookies=cookies if cookies else None,
|
||||||
headers=headers if headers else None,
|
headers=headers if headers else None,
|
||||||
http_credentials=query.pop('http_credentials', None),
|
http_credentials=query.pop('http_credentials', None),
|
||||||
viewport=query.pop('viewport', None),
|
viewport=query.pop('viewport', None),
|
||||||
|
|
Loading…
Reference in New Issue