mirror of https://github.com/CIRCL/lookyloo
new: Support lacus unreachable by caching locally
+ initialize lacus globally for consistency.pull/545/head
parent
dd0426537a
commit
9677c4d120
|
@ -11,10 +11,10 @@ from pathlib import Path
|
||||||
from typing import Dict, Optional, Union
|
from typing import Dict, Optional, Union
|
||||||
|
|
||||||
from lacuscore import LacusCore, CaptureStatus as CaptureStatusCore, CaptureResponse as CaptureResponseCore
|
from lacuscore import LacusCore, CaptureStatus as CaptureStatusCore, CaptureResponse as CaptureResponseCore
|
||||||
from pylacus import PyLacus, CaptureStatus as CaptureStatusPy, CaptureResponse as CaptureResponsePy
|
from pylacus import CaptureStatus as CaptureStatusPy, CaptureResponse as CaptureResponsePy
|
||||||
from redis import Redis
|
|
||||||
|
|
||||||
from lookyloo.default import AbstractManager, get_config, get_socket_path, safe_create_dir
|
from lookyloo.lookyloo import Lookyloo
|
||||||
|
from lookyloo.default import AbstractManager, get_config, safe_create_dir
|
||||||
from lookyloo.helpers import get_captures_dir
|
from lookyloo.helpers import get_captures_dir
|
||||||
|
|
||||||
from lookyloo.modules import FOX
|
from lookyloo.modules import FOX
|
||||||
|
@ -30,25 +30,7 @@ class AsyncCapture(AbstractManager):
|
||||||
self.script_name = 'async_capture'
|
self.script_name = 'async_capture'
|
||||||
self.only_global_lookups: bool = get_config('generic', 'only_global_lookups')
|
self.only_global_lookups: bool = get_config('generic', 'only_global_lookups')
|
||||||
self.capture_dir: Path = get_captures_dir()
|
self.capture_dir: Path = get_captures_dir()
|
||||||
self.redis: Redis = Redis(unix_socket_path=get_socket_path('cache'))
|
self.lookyloo = Lookyloo()
|
||||||
|
|
||||||
self.lacus: Union[PyLacus, LacusCore]
|
|
||||||
has_remote_lacus = False
|
|
||||||
if get_config('generic', 'remote_lacus'):
|
|
||||||
remote_lacus_config = get_config('generic', 'remote_lacus')
|
|
||||||
if remote_lacus_config.get('enable'):
|
|
||||||
self.logger.info("Remote lacus enabled, trying to set it up...")
|
|
||||||
remote_lacus_url = remote_lacus_config.get('url')
|
|
||||||
self.lacus = PyLacus(remote_lacus_url)
|
|
||||||
if self.lacus.is_up:
|
|
||||||
has_remote_lacus = True
|
|
||||||
self.logger.info(f"Remote lacus enabled to {remote_lacus_url}.")
|
|
||||||
else:
|
|
||||||
self.logger.warning(f"Unable to setup remote lacus to {remote_lacus_url}.")
|
|
||||||
|
|
||||||
if not has_remote_lacus:
|
|
||||||
self.lacus = LacusCore(self.redis, get_config('generic', 'tor_proxy'),
|
|
||||||
get_config('generic', 'only_global_lookups'))
|
|
||||||
|
|
||||||
self.captures: Dict[asyncio.Task, float] = {}
|
self.captures: Dict[asyncio.Task, float] = {}
|
||||||
|
|
||||||
|
@ -65,21 +47,20 @@ class AsyncCapture(AbstractManager):
|
||||||
self.set_running()
|
self.set_running()
|
||||||
uuid: Optional[str] = None
|
uuid: Optional[str] = None
|
||||||
entries: Union[CaptureResponseCore, CaptureResponsePy]
|
entries: Union[CaptureResponseCore, CaptureResponsePy]
|
||||||
if isinstance(self.lacus, LacusCore):
|
if isinstance(self.lookyloo.lacus, LacusCore):
|
||||||
if uuid := await self.lacus.consume_queue():
|
if uuid := await self.lookyloo.lacus.consume_queue():
|
||||||
entries = self.lacus.get_capture(uuid, decode=True)
|
entries = self.lookyloo.lacus.get_capture(uuid, decode=True)
|
||||||
if entries['status'] != CaptureStatusCore.DONE:
|
if entries['status'] != CaptureStatusCore.DONE:
|
||||||
self.logger.warning(f'The capture {uuid} is reported as not done ({entries["status"]}) when it should.')
|
self.logger.warning(f'The capture {uuid} is reported as not done ({entries["status"]}) when it should.')
|
||||||
self.redis.zrem('to_capture', uuid)
|
self.lookyloo.redis.zrem('to_capture', uuid)
|
||||||
self.redis.delete(uuid)
|
self.lookyloo.redis.delete(uuid)
|
||||||
else:
|
else:
|
||||||
# Find a capture that is done
|
# Find a capture that is done
|
||||||
try:
|
try:
|
||||||
for uuid_b in self.redis.zrevrangebyscore('to_capture', 'Inf', '-Inf'):
|
for uuid in self.lookyloo.redis.zrevrangebyscore('to_capture', 'Inf', '-Inf'):
|
||||||
uuid = uuid_b.decode()
|
|
||||||
if not uuid:
|
if not uuid:
|
||||||
break
|
break
|
||||||
entries = self.lacus.get_capture(uuid)
|
entries = self.lookyloo.lacus.get_capture(uuid)
|
||||||
if entries['status'] == CaptureStatusPy.DONE:
|
if entries['status'] == CaptureStatusPy.DONE:
|
||||||
log = f'Got the capture for {uuid} from Lacus'
|
log = f'Got the capture for {uuid} from Lacus'
|
||||||
if runtime := entries.get('runtime'):
|
if runtime := entries.get('runtime'):
|
||||||
|
@ -92,33 +73,34 @@ class AsyncCapture(AbstractManager):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.critical(f'Error when getting captures from lacus, will retry later: {e}')
|
self.logger.critical(f'Error when getting captures from lacus, will retry later: {e}')
|
||||||
uuid = None
|
uuid = None
|
||||||
|
await asyncio.sleep(10)
|
||||||
|
|
||||||
if uuid is None:
|
if uuid is None:
|
||||||
self.unset_running()
|
self.unset_running()
|
||||||
return
|
return
|
||||||
|
|
||||||
self.redis.sadd('ongoing', uuid)
|
self.lookyloo.redis.sadd('ongoing', uuid)
|
||||||
queue: Optional[bytes] = self.redis.getdel(f'{uuid}_mgmt')
|
queue: Optional[str] = self.lookyloo.redis.getdel(f'{uuid}_mgmt')
|
||||||
|
|
||||||
to_capture: Dict[bytes, bytes] = self.redis.hgetall(uuid)
|
to_capture: Dict[str, str] = self.lookyloo.redis.hgetall(uuid)
|
||||||
|
|
||||||
if get_config('generic', 'default_public'):
|
if get_config('generic', 'default_public'):
|
||||||
# By default, the captures are on the index, unless the user mark them as un-listed
|
# By default, the captures are on the index, unless the user mark them as un-listed
|
||||||
listing = False if (b'listing' in to_capture and to_capture[b'listing'].lower() in [b'false', b'0', b'']) else True
|
listing = False if ('listing' in to_capture and to_capture['listing'].lower() in ['false', '0', '']) else True
|
||||||
else:
|
else:
|
||||||
# By default, the captures are not on the index, unless the user mark them as listed
|
# By default, the captures are not on the index, unless the user mark them as listed
|
||||||
listing = True if (b'listing' in to_capture and to_capture[b'listing'].lower() in [b'true', b'1']) else False
|
listing = True if ('listing' in to_capture and to_capture['listing'].lower() in ['true', '1']) else False
|
||||||
|
|
||||||
now = datetime.now()
|
now = datetime.now()
|
||||||
dirpath = self.capture_dir / str(now.year) / f'{now.month:02}' / now.isoformat()
|
dirpath = self.capture_dir / str(now.year) / f'{now.month:02}' / now.isoformat()
|
||||||
safe_create_dir(dirpath)
|
safe_create_dir(dirpath)
|
||||||
|
|
||||||
if b'os' in to_capture or b'browser' in to_capture:
|
if 'os' in to_capture or 'browser' in to_capture:
|
||||||
meta: Dict[str, str] = {}
|
meta: Dict[str, str] = {}
|
||||||
if b'os' in to_capture:
|
if 'os' in to_capture:
|
||||||
meta['os'] = to_capture[b'os'].decode()
|
meta['os'] = to_capture['os']
|
||||||
if b'browser' in to_capture:
|
if 'browser' in to_capture:
|
||||||
meta['browser'] = to_capture[b'browser'].decode()
|
meta['browser'] = to_capture['browser']
|
||||||
with (dirpath / 'meta').open('w') as _meta:
|
with (dirpath / 'meta').open('w') as _meta:
|
||||||
json.dump(meta, _meta)
|
json.dump(meta, _meta)
|
||||||
|
|
||||||
|
@ -131,9 +113,9 @@ class AsyncCapture(AbstractManager):
|
||||||
(dirpath / 'no_index').touch()
|
(dirpath / 'no_index').touch()
|
||||||
|
|
||||||
# Write parent UUID (optional)
|
# Write parent UUID (optional)
|
||||||
if b'parent' in to_capture:
|
if 'parent' in to_capture:
|
||||||
with (dirpath / 'parent').open('w') as _parent:
|
with (dirpath / 'parent').open('w') as _parent:
|
||||||
_parent.write(to_capture[b'parent'].decode())
|
_parent.write(to_capture['parent'])
|
||||||
|
|
||||||
if 'downloaded_filename' in entries and entries['downloaded_filename']:
|
if 'downloaded_filename' in entries and entries['downloaded_filename']:
|
||||||
with (dirpath / '0.data.filename').open('w') as _downloaded_filename:
|
with (dirpath / '0.data.filename').open('w') as _downloaded_filename:
|
||||||
|
@ -167,9 +149,9 @@ class AsyncCapture(AbstractManager):
|
||||||
with (dirpath / '0.cookies.json').open('w') as _cookies:
|
with (dirpath / '0.cookies.json').open('w') as _cookies:
|
||||||
json.dump(entries['cookies'], _cookies)
|
json.dump(entries['cookies'], _cookies)
|
||||||
|
|
||||||
lazy_cleanup = self.redis.pipeline()
|
lazy_cleanup = self.lookyloo.redis.pipeline()
|
||||||
lazy_cleanup.hset('lookup_dirs', uuid, str(dirpath))
|
lazy_cleanup.hset('lookup_dirs', uuid, str(dirpath))
|
||||||
if queue and self.redis.zscore('queues', queue):
|
if queue and self.lookyloo.redis.zscore('queues', queue):
|
||||||
lazy_cleanup.zincrby('queues', -1, queue)
|
lazy_cleanup.zincrby('queues', -1, queue)
|
||||||
lazy_cleanup.zrem('to_capture', uuid)
|
lazy_cleanup.zrem('to_capture', uuid)
|
||||||
lazy_cleanup.srem('ongoing', uuid)
|
lazy_cleanup.srem('ongoing', uuid)
|
||||||
|
|
|
@ -6,9 +6,8 @@ from collections import Counter
|
||||||
from datetime import date, timedelta
|
from datetime import date, timedelta
|
||||||
from typing import Any, Dict
|
from typing import Any, Dict
|
||||||
|
|
||||||
from redis import Redis
|
from lookyloo.lookyloo import Lookyloo
|
||||||
|
from lookyloo.default import AbstractManager, get_config, get_homedir, safe_create_dir
|
||||||
from lookyloo.default import AbstractManager, get_config, get_homedir, get_socket_path, safe_create_dir
|
|
||||||
from lookyloo.helpers import ParsedUserAgent, serialize_to_json
|
from lookyloo.helpers import ParsedUserAgent, serialize_to_json
|
||||||
|
|
||||||
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
|
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
|
||||||
|
@ -19,13 +18,15 @@ class Processing(AbstractManager):
|
||||||
|
|
||||||
def __init__(self, loglevel: int=logging.INFO):
|
def __init__(self, loglevel: int=logging.INFO):
|
||||||
super().__init__(loglevel)
|
super().__init__(loglevel)
|
||||||
self.script_name = 'archiver'
|
self.script_name = 'processing'
|
||||||
|
self.lookyloo = Lookyloo()
|
||||||
|
|
||||||
self.use_own_ua = get_config('generic', 'use_user_agents_users')
|
self.use_own_ua = get_config('generic', 'use_user_agents_users')
|
||||||
|
|
||||||
def _to_run_forever(self):
|
def _to_run_forever(self):
|
||||||
if self.use_own_ua:
|
if self.use_own_ua:
|
||||||
self._build_ua_file()
|
self._build_ua_file()
|
||||||
|
self._retry_failed_enqueue()
|
||||||
|
|
||||||
def _build_ua_file(self):
|
def _build_ua_file(self):
|
||||||
'''Build a file in a format compatible with the capture page'''
|
'''Build a file in a format compatible with the capture page'''
|
||||||
|
@ -34,11 +35,10 @@ class Processing(AbstractManager):
|
||||||
safe_create_dir(self_generated_ua_file_path)
|
safe_create_dir(self_generated_ua_file_path)
|
||||||
self_generated_ua_file = self_generated_ua_file_path / f'{yesterday.isoformat()}.json'
|
self_generated_ua_file = self_generated_ua_file_path / f'{yesterday.isoformat()}.json'
|
||||||
if self_generated_ua_file.exists():
|
if self_generated_ua_file.exists():
|
||||||
self.logger.info(f'User-agent file for {yesterday} already exists.')
|
self.logger.debug(f'User-agent file for {yesterday} already exists.')
|
||||||
return
|
return
|
||||||
self.logger.info(f'Generating user-agent file for {yesterday}')
|
self.logger.info(f'Generating user-agent file for {yesterday}')
|
||||||
redis = Redis(unix_socket_path=get_socket_path('cache'), decode_responses=True)
|
entries = self.lookyloo.redis.zrevrange(f'user_agents|{yesterday.isoformat()}', 0, -1)
|
||||||
entries = redis.zrevrange(f'user_agents|{yesterday.isoformat()}', 0, -1)
|
|
||||||
if not entries:
|
if not entries:
|
||||||
self.logger.info(f'No User-agent file for {yesterday} to generate.')
|
self.logger.info(f'No User-agent file for {yesterday} to generate.')
|
||||||
return
|
return
|
||||||
|
@ -67,13 +67,49 @@ class Processing(AbstractManager):
|
||||||
json.dump(to_store, f, indent=2, default=serialize_to_json)
|
json.dump(to_store, f, indent=2, default=serialize_to_json)
|
||||||
|
|
||||||
# Remove the UA / IP mapping.
|
# Remove the UA / IP mapping.
|
||||||
redis.delete(f'user_agents|{yesterday.isoformat()}')
|
self.lookyloo.redis.delete(f'user_agents|{yesterday.isoformat()}')
|
||||||
self.logger.info(f'User-agent file for {yesterday} generated.')
|
self.logger.info(f'User-agent file for {yesterday} generated.')
|
||||||
|
|
||||||
|
def _retry_failed_enqueue(self):
|
||||||
|
'''If enqueuing failed, the settings are added, with a UUID in the 'to_capture key', and they have a UUID'''
|
||||||
|
for uuid in self.lookyloo.redis.zrevrangebyscore('to_capture', 'Inf', '-Inf'):
|
||||||
|
if self.lookyloo.redis.hexists(uuid, 'not_queued'):
|
||||||
|
self.logger.info(f'Found a non-queued capture ({uuid}), retrying now.')
|
||||||
|
# This capture couldn't be queued and we created the uuid locally
|
||||||
|
query = self.lookyloo.redis.hgetall(uuid)
|
||||||
|
try:
|
||||||
|
self.lookyloo.lacus.enqueue(
|
||||||
|
url=query.get('url', None),
|
||||||
|
document_name=query.get('document_name', None),
|
||||||
|
document=query.get('document', None),
|
||||||
|
# depth=query.get('depth', 0),
|
||||||
|
browser=query.get('browser', None),
|
||||||
|
device_name=query.get('device_name', None),
|
||||||
|
user_agent=query.get('user_agent', None),
|
||||||
|
proxy=query.get('proxy', None),
|
||||||
|
general_timeout_in_sec=query.get('general_timeout_in_sec', None),
|
||||||
|
cookies=query.get('cookies', None),
|
||||||
|
headers=query.get('headers', None),
|
||||||
|
http_credentials=query.get('http_credentials', None),
|
||||||
|
viewport=query.get('viewport', None),
|
||||||
|
referer=query.get('referer', None),
|
||||||
|
rendered_hostname_only=query.get('rendered_hostname_only', True),
|
||||||
|
# force=query.get('force', False),
|
||||||
|
# recapture_interval=query.get('recapture_interval', 300),
|
||||||
|
priority=query.get('priority', None),
|
||||||
|
uuid=uuid
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.warning(f'Still unable to enqueue capture: {e}')
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
self.lookyloo.redis.hdel(uuid, 'not_queued')
|
||||||
|
self.logger.info(f'{uuid} enqueued.')
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
p = Processing()
|
p = Processing()
|
||||||
p.run(sleep_in_sec=3600 * 24)
|
p.run(sleep_in_sec=30)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -9,10 +9,12 @@ import smtplib
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from datetime import date, datetime
|
from datetime import date, datetime
|
||||||
from email.message import EmailMessage
|
from email.message import EmailMessage
|
||||||
|
from functools import cached_property
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import (Any, Dict, Iterable, List, MutableMapping, Optional, Set,
|
from typing import (Any, Dict, Iterable, List, MutableMapping, Optional, Set,
|
||||||
Tuple, Union)
|
Tuple, Union)
|
||||||
|
from uuid import uuid4
|
||||||
from zipfile import ZipFile
|
from zipfile import ZipFile
|
||||||
|
|
||||||
from defang import defang # type: ignore
|
from defang import defang # type: ignore
|
||||||
|
@ -108,27 +110,36 @@ class Lookyloo():
|
||||||
self._captures_index = CapturesIndex(self.redis, self.context)
|
self._captures_index = CapturesIndex(self.redis, self.context)
|
||||||
self.logger.info('Index initialized.')
|
self.logger.info('Index initialized.')
|
||||||
|
|
||||||
|
# init lacus
|
||||||
|
self.lacus
|
||||||
|
|
||||||
|
@property
|
||||||
|
def redis(self):
|
||||||
|
return Redis(connection_pool=self.redis_pool)
|
||||||
|
|
||||||
|
@cached_property
|
||||||
|
def lacus(self):
|
||||||
has_remote_lacus = False
|
has_remote_lacus = False
|
||||||
self.lacus: Union[PyLacus, LacusCore]
|
self._lacus: Union[PyLacus, LacusCore]
|
||||||
if get_config('generic', 'remote_lacus'):
|
if get_config('generic', 'remote_lacus'):
|
||||||
remote_lacus_config = get_config('generic', 'remote_lacus')
|
remote_lacus_config = get_config('generic', 'remote_lacus')
|
||||||
if remote_lacus_config.get('enable'):
|
if remote_lacus_config.get('enable'):
|
||||||
self.logger.info("Remote lacus enabled, trying to set it up...")
|
self.logger.info("Remote lacus enabled, trying to set it up...")
|
||||||
remote_lacus_url = remote_lacus_config.get('url')
|
remote_lacus_url = remote_lacus_config.get('url')
|
||||||
self.lacus = PyLacus(remote_lacus_url)
|
self._lacus = PyLacus(remote_lacus_url)
|
||||||
if self.lacus.is_up:
|
if self._lacus.is_up:
|
||||||
has_remote_lacus = True
|
has_remote_lacus = True
|
||||||
self.logger.info(f"Remote lacus enabled to {remote_lacus_url}.")
|
self.logger.info(f"Remote lacus enabled to {remote_lacus_url}.")
|
||||||
else:
|
else:
|
||||||
self.logger.warning(f"Unable to setup remote lacus to {remote_lacus_url}.")
|
self.logger.warning(f"Unable to setup remote lacus to {remote_lacus_url}.")
|
||||||
|
raise LookylooException('Remote lacus is enabled but unreachable.')
|
||||||
|
|
||||||
if not has_remote_lacus:
|
if not has_remote_lacus:
|
||||||
self.lacus = LacusCore(self.redis, get_config('generic', 'tor_proxy'),
|
# We need a redis connector that doesn't decode.
|
||||||
get_config('generic', 'only_global_lookups'))
|
redis: Redis = Redis(unix_socket_path=get_socket_path('cache'))
|
||||||
|
self._lacus = LacusCore(redis, get_config('generic', 'tor_proxy'),
|
||||||
@property
|
get_config('generic', 'only_global_lookups'))
|
||||||
def redis(self):
|
return self._lacus
|
||||||
return Redis(connection_pool=self.redis_pool)
|
|
||||||
|
|
||||||
def add_context(self, capture_uuid: str, /, urlnode_uuid: str, *, ressource_hash: str,
|
def add_context(self, capture_uuid: str, /, urlnode_uuid: str, *, ressource_hash: str,
|
||||||
legitimate: bool, malicious: bool, details: Dict[str, Dict[str, str]]):
|
legitimate: bool, malicious: bool, details: Dict[str, Dict[str, str]]):
|
||||||
|
@ -375,7 +386,15 @@ class Lookyloo():
|
||||||
elif self.redis.sismember('ongoing', capture_uuid):
|
elif self.redis.sismember('ongoing', capture_uuid):
|
||||||
# Post-processing on lookyloo's side
|
# Post-processing on lookyloo's side
|
||||||
return CaptureStatusCore.ONGOING
|
return CaptureStatusCore.ONGOING
|
||||||
lacus_status = self.lacus.get_capture_status(capture_uuid)
|
try:
|
||||||
|
lacus_status = self.lacus.get_capture_status(capture_uuid)
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.warning(f'Unable to get the status for {capture_uuid} from lacus: {e}')
|
||||||
|
if self.redis.zscore('to_capture', capture_uuid) is not None:
|
||||||
|
return CaptureStatusCore.QUEUED
|
||||||
|
else:
|
||||||
|
return CaptureStatusCore.UNKNOWN
|
||||||
|
|
||||||
if (lacus_status == CaptureStatusCore.UNKNOWN
|
if (lacus_status == CaptureStatusCore.UNKNOWN
|
||||||
and self.redis.zscore('to_capture', capture_uuid) is not None):
|
and self.redis.zscore('to_capture', capture_uuid) is not None):
|
||||||
# If we do the query before lacus picks it up, we will tell to the user that the UUID doesn't exists.
|
# If we do the query before lacus picks it up, we will tell to the user that the UUID doesn't exists.
|
||||||
|
@ -473,41 +492,57 @@ class Lookyloo():
|
||||||
|
|
||||||
query = self._prepare_lacus_query(query)
|
query = self._prepare_lacus_query(query)
|
||||||
|
|
||||||
priority = get_priority(source, user, authenticated)
|
query['priority'] = get_priority(source, user, authenticated)
|
||||||
perma_uuid = self.lacus.enqueue(
|
if query['priority'] < -10:
|
||||||
url=query.pop('url', None),
|
# Someone is probably abusing the system with useless URLs, remove them from the index
|
||||||
document_name=query.pop('document_name', None),
|
query['listing'] = 0
|
||||||
document=query.pop('document', None),
|
try:
|
||||||
# depth=query.pop('depth', 0),
|
perma_uuid = self.lacus.enqueue(
|
||||||
browser=query.pop('browser', None),
|
url=query.get('url', None),
|
||||||
device_name=query.pop('device_name', None),
|
document_name=query.get('document_name', None),
|
||||||
user_agent=query.pop('user_agent', None),
|
document=query.get('document', None),
|
||||||
proxy=query.pop('proxy', None),
|
# depth=query.get('depth', 0),
|
||||||
general_timeout_in_sec=query.pop('general_timeout_in_sec', None),
|
browser=query.get('browser', None),
|
||||||
cookies=query.pop('cookies', None),
|
device_name=query.get('device_name', None),
|
||||||
headers=query.pop('headers', None),
|
user_agent=query.get('user_agent', None),
|
||||||
http_credentials=query.pop('http_credentials', None),
|
proxy=query.get('proxy', None),
|
||||||
viewport=query.pop('viewport', None),
|
general_timeout_in_sec=query.get('general_timeout_in_sec', None),
|
||||||
referer=query.pop('referer', None),
|
cookies=query.get('cookies', None),
|
||||||
rendered_hostname_only=query.pop('rendered_hostname_only', True),
|
headers=query.get('headers', None),
|
||||||
# force=query.pop('force', False),
|
http_credentials=query.get('http_credentials', None),
|
||||||
# recapture_interval=query.pop('recapture_interval', 300),
|
viewport=query.get('viewport', None),
|
||||||
priority=priority
|
referer=query.get('referer', None),
|
||||||
)
|
rendered_hostname_only=query.get('rendered_hostname_only', True),
|
||||||
|
# force=query.get('force', False),
|
||||||
|
# recapture_interval=query.get('recapture_interval', 300),
|
||||||
|
priority=query.get('priority', 0)
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.critical(f'Unable to enqueue capture: {e}')
|
||||||
|
perma_uuid = str(uuid4())
|
||||||
|
query['not_queued'] = 1
|
||||||
|
finally:
|
||||||
|
if (not self.redis.hexists('lookup_dirs', perma_uuid) # already captured
|
||||||
|
and self.redis.zscore('to_capture', perma_uuid) is None): # capture ongoing
|
||||||
|
|
||||||
if (not self.redis.hexists('lookup_dirs', perma_uuid) # already captured
|
# Make the settings redis compatible
|
||||||
and self.redis.zscore('to_capture', perma_uuid) is None): # capture ongoing
|
mapping_capture: Dict[str, Union[bytes, float, int, str]] = {}
|
||||||
if priority < -10:
|
for key, value in query.items():
|
||||||
# Someone is probably abusing the system with useless URLs, remove them from the index
|
if isinstance(value, bool):
|
||||||
query['listing'] = 0
|
mapping_capture[key] = 1 if value else 0
|
||||||
|
elif isinstance(value, (list, dict)):
|
||||||
|
if value:
|
||||||
|
mapping_capture[key] = json.dumps(value)
|
||||||
|
elif value is not None:
|
||||||
|
mapping_capture[key] = value
|
||||||
|
|
||||||
|
p = self.redis.pipeline()
|
||||||
|
p.zadd('to_capture', {perma_uuid: query['priority']})
|
||||||
|
p.hset(perma_uuid, mapping=mapping_capture)
|
||||||
|
p.zincrby('queues', 1, f'{source}|{authenticated}|{user}')
|
||||||
|
p.set(f'{perma_uuid}_mgmt', f'{source}|{authenticated}|{user}')
|
||||||
|
p.execute()
|
||||||
|
|
||||||
p = self.redis.pipeline()
|
|
||||||
p.zadd('to_capture', {perma_uuid: priority})
|
|
||||||
if query:
|
|
||||||
p.hset(perma_uuid, mapping=query) # This will add the remaining entries that are lookyloo specific
|
|
||||||
p.zincrby('queues', 1, f'{source}|{authenticated}|{user}')
|
|
||||||
p.set(f'{perma_uuid}_mgmt', f'{source}|{authenticated}|{user}')
|
|
||||||
p.execute()
|
|
||||||
return perma_uuid
|
return perma_uuid
|
||||||
|
|
||||||
def send_mail(self, capture_uuid: str, /, email: str='', comment: str='') -> None:
|
def send_mail(self, capture_uuid: str, /, email: str='', comment: str='') -> None:
|
||||||
|
|
|
@ -571,7 +571,7 @@ format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lacuscore"
|
name = "lacuscore"
|
||||||
version = "1.1.2"
|
version = "1.1.3"
|
||||||
description = "Core of Lacus, usable as a module"
|
description = "Core of Lacus, usable as a module"
|
||||||
category = "main"
|
category = "main"
|
||||||
optional = false
|
optional = false
|
||||||
|
@ -901,7 +901,7 @@ docs = ["Sphinx (>=5.1.1,<6.0.0)"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pylacus"
|
name = "pylacus"
|
||||||
version = "1.1.0"
|
version = "1.1.1"
|
||||||
description = "Python CLI and module for lacus"
|
description = "Python CLI and module for lacus"
|
||||||
category = "main"
|
category = "main"
|
||||||
optional = false
|
optional = false
|
||||||
|
@ -1045,7 +1045,7 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pytz"
|
name = "pytz"
|
||||||
version = "2022.5"
|
version = "2022.6"
|
||||||
description = "World timezone definitions, modern and historical"
|
description = "World timezone definitions, modern and historical"
|
||||||
category = "main"
|
category = "main"
|
||||||
optional = false
|
optional = false
|
||||||
|
@ -1476,7 +1476,7 @@ misp = ["python-magic", "pydeep2"]
|
||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "1.1"
|
lock-version = "1.1"
|
||||||
python-versions = ">=3.8,<3.12"
|
python-versions = ">=3.8,<3.12"
|
||||||
content-hash = "f274187b6e2cc2fd68d671405f24ddbadf6a362e68cea4eb5093fdce7ae9c55e"
|
content-hash = "5adbb7f2a6e81f21a199fc3f9bd1e4594dcc6a1dece1282f892edc5e95c4ba06"
|
||||||
|
|
||||||
[metadata.files]
|
[metadata.files]
|
||||||
aiohttp = [
|
aiohttp = [
|
||||||
|
@ -1931,8 +1931,8 @@ jsonschema = [
|
||||||
{file = "jsonschema-4.16.0.tar.gz", hash = "sha256:165059f076eff6971bae5b742fc029a7b4ef3f9bcf04c14e4776a7605de14b23"},
|
{file = "jsonschema-4.16.0.tar.gz", hash = "sha256:165059f076eff6971bae5b742fc029a7b4ef3f9bcf04c14e4776a7605de14b23"},
|
||||||
]
|
]
|
||||||
lacuscore = [
|
lacuscore = [
|
||||||
{file = "lacuscore-1.1.2-py3-none-any.whl", hash = "sha256:876d3ccb743bb4d43421d1670762af2f54c6b82dfdec9b5a4c37109dbabd02c6"},
|
{file = "lacuscore-1.1.3-py3-none-any.whl", hash = "sha256:8e9cd36083723423b6ecc2bac2da4cd97a35a92e8b3bab8907d583323e732a97"},
|
||||||
{file = "lacuscore-1.1.2.tar.gz", hash = "sha256:ed83c8f4cb31e24ec0e39ce85fcd9dd675c0ae96bf9aaededf7c21469be6b1ad"},
|
{file = "lacuscore-1.1.3.tar.gz", hash = "sha256:e02ea9fa594ce32ee5094917c08d8e20cdc0b2bf5dd939cbacf6b02103c4028e"},
|
||||||
]
|
]
|
||||||
lief = [
|
lief = [
|
||||||
{file = "lief-0.12.2-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:cdadaab4b9ec756e1d1f0324acd6e280ae849d251e66f836da455df592deaf9e"},
|
{file = "lief-0.12.2-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:cdadaab4b9ec756e1d1f0324acd6e280ae849d251e66f836da455df592deaf9e"},
|
||||||
|
@ -2358,8 +2358,8 @@ pyhashlookup = [
|
||||||
{file = "pyhashlookup-1.2.1.tar.gz", hash = "sha256:eb514cc1a5559a013a8882e101849fa52a37641f2a7d9dc21c0d266b37607aa5"},
|
{file = "pyhashlookup-1.2.1.tar.gz", hash = "sha256:eb514cc1a5559a013a8882e101849fa52a37641f2a7d9dc21c0d266b37607aa5"},
|
||||||
]
|
]
|
||||||
pylacus = [
|
pylacus = [
|
||||||
{file = "pylacus-1.1.0-py3-none-any.whl", hash = "sha256:880ce273fe35a554a35d6812e899e69b2f360fa5041822b136fbaf8f336a44d3"},
|
{file = "pylacus-1.1.1-py3-none-any.whl", hash = "sha256:61eab358c20b0fbb1915a91f82d5dc8ee3b8f22de7e23dfe2206af937e4d3728"},
|
||||||
{file = "pylacus-1.1.0.tar.gz", hash = "sha256:90ca603bf58d19197f4776fffc58b9b98f34d12916457a026ff2cddbaa42fb6f"},
|
{file = "pylacus-1.1.1.tar.gz", hash = "sha256:4eca75827ba977fb6dbeef587cf2a08324e4e4cdbc30a8762c695099be27a9dc"},
|
||||||
]
|
]
|
||||||
pylookyloo = [
|
pylookyloo = [
|
||||||
{file = "pylookyloo-1.16.0-py3-none-any.whl", hash = "sha256:2c39e26eae61144e6bb986fbcb58604e9804b4d6b2fe1ff844d8b429db2628d2"},
|
{file = "pylookyloo-1.16.0-py3-none-any.whl", hash = "sha256:2c39e26eae61144e6bb986fbcb58604e9804b4d6b2fe1ff844d8b429db2628d2"},
|
||||||
|
@ -2422,8 +2422,8 @@ python-magic = [
|
||||||
{file = "python_magic-0.4.27-py2.py3-none-any.whl", hash = "sha256:c212960ad306f700aa0d01e5d7a325d20548ff97eb9920dcd29513174f0294d3"},
|
{file = "python_magic-0.4.27-py2.py3-none-any.whl", hash = "sha256:c212960ad306f700aa0d01e5d7a325d20548ff97eb9920dcd29513174f0294d3"},
|
||||||
]
|
]
|
||||||
pytz = [
|
pytz = [
|
||||||
{file = "pytz-2022.5-py2.py3-none-any.whl", hash = "sha256:335ab46900b1465e714b4fda4963d87363264eb662aab5e65da039c25f1f5b22"},
|
{file = "pytz-2022.6-py2.py3-none-any.whl", hash = "sha256:222439474e9c98fced559f1709d89e6c9cbf8d79c794ff3eb9f8800064291427"},
|
||||||
{file = "pytz-2022.5.tar.gz", hash = "sha256:c4d88f472f54d615e9cd582a5004d1e5f624854a6a27a6211591c251f22a6914"},
|
{file = "pytz-2022.6.tar.gz", hash = "sha256:e89512406b793ca39f5971bc999cc538ce125c0e51c27941bef4568b460095e2"},
|
||||||
]
|
]
|
||||||
pytz-deprecation-shim = [
|
pytz-deprecation-shim = [
|
||||||
{file = "pytz_deprecation_shim-0.1.0.post0-py2.py3-none-any.whl", hash = "sha256:8314c9692a636c8eb3bda879b9f119e350e93223ae83e70e80c31675a0fdc1a6"},
|
{file = "pytz_deprecation_shim-0.1.0.post0-py2.py3-none-any.whl", hash = "sha256:8314c9692a636c8eb3bda879b9f119e350e93223ae83e70e80c31675a0fdc1a6"},
|
||||||
|
|
|
@ -67,8 +67,8 @@ passivetotal = "^2.5.9"
|
||||||
werkzeug = "2.1.2"
|
werkzeug = "2.1.2"
|
||||||
filetype = "^1.1.0"
|
filetype = "^1.1.0"
|
||||||
pypandora = "^1.2.0"
|
pypandora = "^1.2.0"
|
||||||
lacuscore = "^1.1.2"
|
lacuscore = "^1.1.3"
|
||||||
pylacus = "^1.1.0"
|
pylacus = "^1.1.1"
|
||||||
|
|
||||||
[tool.poetry.extras]
|
[tool.poetry.extras]
|
||||||
misp = ['python-magic', 'pydeep2']
|
misp = ['python-magic', 'pydeep2']
|
||||||
|
|
|
@ -67,6 +67,8 @@ class Monitoring():
|
||||||
to_return = []
|
to_return = []
|
||||||
for uuid, rank in captures_uuid:
|
for uuid, rank in captures_uuid:
|
||||||
capture_params = self.redis_cache.hgetall(uuid)
|
capture_params = self.redis_cache.hgetall(uuid)
|
||||||
|
if 'document' in capture_params:
|
||||||
|
capture_params.pop('document')
|
||||||
if capture_params:
|
if capture_params:
|
||||||
to_return.append((uuid, rank, capture_params))
|
to_return.append((uuid, rank, capture_params))
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue