mirror of https://github.com/CIRCL/lookyloo
chg: Bump lacuscore
parent
9189888a0d
commit
d38b612c37
|
@ -6,14 +6,13 @@ import logging
|
|||
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
from typing import Dict, Optional, Set
|
||||
|
||||
from lacuscore import LacusCore
|
||||
from redis.asyncio import Redis
|
||||
from redis import Redis as RedisSync
|
||||
from lacuscore import LacusCore, CaptureStatus
|
||||
from redis import Redis
|
||||
|
||||
from lookyloo.default import AbstractManager, get_config, get_socket_path, safe_create_dir
|
||||
from lookyloo.helpers import get_captures_dir, CaptureStatus
|
||||
from lookyloo.helpers import get_captures_dir
|
||||
|
||||
from lookyloo.modules import FOX
|
||||
|
||||
|
@ -28,8 +27,9 @@ class AsyncCapture(AbstractManager):
|
|||
self.script_name = 'async_capture'
|
||||
self.only_global_lookups: bool = get_config('generic', 'only_global_lookups')
|
||||
self.capture_dir: Path = get_captures_dir()
|
||||
self.redis_sync: RedisSync = RedisSync(unix_socket_path=get_socket_path('cache'))
|
||||
self.lacus = LacusCore(self.redis_sync)
|
||||
self.redis: Redis = Redis(unix_socket_path=get_socket_path('cache'))
|
||||
self.lacus = LacusCore(self.redis)
|
||||
self.captures: Set[asyncio.Task] = set()
|
||||
|
||||
self.fox = FOX(get_config('modules', 'FOX'))
|
||||
if not self.fox.available:
|
||||
|
@ -41,15 +41,13 @@ class AsyncCapture(AbstractManager):
|
|||
|
||||
async def process_capture_queue(self) -> None:
|
||||
'''Process a query from the capture queue'''
|
||||
value: List[Tuple[bytes, float]] = await self.redis.zpopmax('to_capture')
|
||||
if not value or not value[0]:
|
||||
# The queue was consumed by an other process.
|
||||
uuid = await self.lacus.consume_queue()
|
||||
if not uuid:
|
||||
return
|
||||
uuid: str = value[0][0].decode()
|
||||
queue: Optional[bytes] = await self.redis.getdel(f'{uuid}_mgmt')
|
||||
await self.redis.sadd('ongoing', uuid)
|
||||
self.redis.sadd('ongoing', uuid)
|
||||
queue: Optional[bytes] = self.redis.getdel(f'{uuid}_mgmt')
|
||||
|
||||
to_capture: Dict[bytes, bytes] = await self.redis.hgetall(uuid)
|
||||
to_capture: Dict[bytes, bytes] = self.redis.hgetall(uuid)
|
||||
|
||||
if get_config('generic', 'default_public'):
|
||||
# By default, the captures are on the index, unless the user mark them as un-listed
|
||||
|
@ -58,19 +56,17 @@ class AsyncCapture(AbstractManager):
|
|||
# By default, the captures are not on the index, unless the user mark them as listed
|
||||
listing = True if (b'listing' in to_capture and to_capture[b'listing'].lower() in [b'true', b'1']) else False
|
||||
|
||||
status, result = await self.lacus.capture(uuid)
|
||||
|
||||
while True:
|
||||
entries = self.lacus.get_capture(uuid, decode=True)
|
||||
if entries['status'] == CaptureStatus.DONE.value:
|
||||
if entries['status'] == CaptureStatus.DONE:
|
||||
break
|
||||
elif entries['status'] == CaptureStatus.UNKNOWN.value:
|
||||
elif entries['status'] == CaptureStatus.UNKNOWN:
|
||||
self.logger.warning(f'Unable to find {uuid}.')
|
||||
break
|
||||
elif entries['status'] == CaptureStatus.QUEUED.value:
|
||||
elif entries['status'] == CaptureStatus.QUEUED:
|
||||
self.logger.info(f'{uuid} is in the queue.')
|
||||
await asyncio.sleep(5)
|
||||
elif entries['status'] == CaptureStatus.ONGOING.value:
|
||||
elif entries['status'] == CaptureStatus.ONGOING:
|
||||
self.logger.info(f'{uuid} is ongoing.')
|
||||
await asyncio.sleep(5)
|
||||
else:
|
||||
|
@ -135,23 +131,22 @@ class AsyncCapture(AbstractManager):
|
|||
with (dirpath / '0.cookies.json').open('w') as _cookies:
|
||||
json.dump(entries['cookies'], _cookies)
|
||||
|
||||
async with self.redis.pipeline() as lazy_cleanup:
|
||||
await lazy_cleanup.hset('lookup_dirs', uuid, str(dirpath))
|
||||
if queue and await self.redis.zscore('queues', queue):
|
||||
await lazy_cleanup.zincrby('queues', -1, queue)
|
||||
await lazy_cleanup.srem('ongoing', uuid)
|
||||
await lazy_cleanup.delete(uuid)
|
||||
with self.redis.pipeline() as lazy_cleanup:
|
||||
lazy_cleanup.hset('lookup_dirs', uuid, str(dirpath))
|
||||
if queue and self.redis.zscore('queues', queue):
|
||||
lazy_cleanup.zincrby('queues', -1, queue)
|
||||
lazy_cleanup.srem('ongoing', uuid)
|
||||
lazy_cleanup.delete(uuid)
|
||||
# make sure to expire the key if nothing was processed for a while (= queues empty)
|
||||
await lazy_cleanup.expire('queues', 600)
|
||||
await lazy_cleanup.execute()
|
||||
lazy_cleanup.expire('queues', 600)
|
||||
lazy_cleanup.execute()
|
||||
|
||||
async def _to_run_forever_async(self):
|
||||
self.redis: Redis = Redis(unix_socket_path=get_socket_path('cache'))
|
||||
while await self.redis.exists('to_capture'):
|
||||
await self.process_capture_queue()
|
||||
if self.shutdown_requested():
|
||||
break
|
||||
await self.redis.close()
|
||||
capture = asyncio.create_task(self.process_capture_queue())
|
||||
capture.add_done_callback(self.captures.discard)
|
||||
self.captures.add(capture)
|
||||
while len(self.captures) >= get_config('generic', 'async_capture_processes'):
|
||||
await asyncio.sleep(1)
|
||||
|
||||
|
||||
def main():
|
||||
|
|
|
@ -4,7 +4,6 @@ import json
|
|||
import logging
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from enum import IntEnum, unique
|
||||
from functools import lru_cache
|
||||
from importlib.metadata import version
|
||||
from io import BufferedIOBase
|
||||
|
@ -26,14 +25,6 @@ from .default import get_homedir, safe_create_dir, get_config
|
|||
logger = logging.getLogger('Lookyloo - Helpers')
|
||||
|
||||
|
||||
@unique
|
||||
class CaptureStatus(IntEnum):
|
||||
UNKNOWN = -1
|
||||
QUEUED = 0
|
||||
DONE = 1
|
||||
ONGOING = 2
|
||||
|
||||
|
||||
# This method is used in json.dump or json.dumps calls as the default parameter:
|
||||
# json.dumps(..., default=dump_to_json)
|
||||
def serialize_to_json(obj: Union[Set]) -> Union[List]:
|
||||
|
|
|
@ -19,7 +19,7 @@ from zipfile import ZipFile
|
|||
|
||||
from defang import defang # type: ignore
|
||||
from har2tree import CrawledTree, HostNode, URLNode
|
||||
from lacuscore import LacusCore
|
||||
from lacuscore import LacusCore, CaptureStatus
|
||||
from PIL import Image, UnidentifiedImageError
|
||||
from playwrightcapture import get_devices
|
||||
from pymisp import MISPAttribute, MISPEvent, MISPObject
|
||||
|
@ -31,7 +31,7 @@ from .context import Context
|
|||
from .default import LookylooException, get_homedir, get_config, get_socket_path
|
||||
from .exceptions import (MissingCaptureDirectory,
|
||||
MissingUUID, TreeNeedsRebuild, NoValidHarFile)
|
||||
from .helpers import (CaptureStatus, get_captures_dir, get_email_template,
|
||||
from .helpers import (get_captures_dir, get_email_template,
|
||||
get_resources_hashes, get_taxonomies,
|
||||
uniq_domains, ParsedUserAgent, load_cookies, UserAgents)
|
||||
from .indexing import Indexing
|
||||
|
@ -101,7 +101,8 @@ class Lookyloo():
|
|||
self._captures_index = CapturesIndex(self.redis, self.context)
|
||||
self.logger.info('Index initialized.')
|
||||
|
||||
self.lacus = LacusCore(self.redis, get_config('generic', 'tor_proxy'))
|
||||
self.lacus = LacusCore(self.redis, get_config('generic', 'tor_proxy'),
|
||||
get_config('generic', 'only_global_lookups'))
|
||||
|
||||
@property
|
||||
def redis(self):
|
||||
|
@ -347,13 +348,12 @@ class Lookyloo():
|
|||
|
||||
def get_capture_status(self, capture_uuid: str, /) -> CaptureStatus:
|
||||
'''Returns the status (queued, ongoing, done, or UUID unknown)'''
|
||||
if self.redis.zrank('to_capture', capture_uuid) is not None:
|
||||
return CaptureStatus.QUEUED
|
||||
elif self.redis.hexists('lookup_dirs', capture_uuid):
|
||||
if self.redis.hexists('lookup_dirs', capture_uuid):
|
||||
return CaptureStatus.DONE
|
||||
elif self.redis.sismember('ongoing', capture_uuid):
|
||||
# Post-processing on lookyloo's side
|
||||
return CaptureStatus.ONGOING
|
||||
return CaptureStatus.UNKNOWN
|
||||
return self.lacus.get_capture_status(capture_uuid)
|
||||
|
||||
def try_error_status(self, capture_uuid: str, /) -> Optional[str]:
|
||||
'''If it is not possible to do the capture, we store the error for a short amount of time'''
|
||||
|
@ -461,7 +461,6 @@ class Lookyloo():
|
|||
# Someone is probably abusing the system with useless URLs, remove them from the index
|
||||
query['listing'] = 0
|
||||
p.hset(perma_uuid, mapping=query) # This will add the remaining entries that are lookyloo specific
|
||||
p.zadd('to_capture', {perma_uuid: priority})
|
||||
p.zincrby('queues', 1, f'{source}|{authenticated}|{user}')
|
||||
p.set(f'{perma_uuid}_mgmt', f'{source}|{authenticated}|{user}')
|
||||
p.execute()
|
||||
|
|
|
@ -140,7 +140,7 @@ python-versions = "*"
|
|||
|
||||
[[package]]
|
||||
name = "certifi"
|
||||
version = "2022.6.15.2"
|
||||
version = "2022.9.14"
|
||||
description = "Python package for providing Mozilla's CA Bundle."
|
||||
category = "main"
|
||||
optional = false
|
||||
|
@ -436,7 +436,7 @@ python-versions = ">=3.6"
|
|||
|
||||
[[package]]
|
||||
name = "idna"
|
||||
version = "3.3"
|
||||
version = "3.4"
|
||||
description = "Internationalized Domain Names in Applications (IDNA)"
|
||||
category = "main"
|
||||
optional = false
|
||||
|
@ -563,6 +563,21 @@ pyrsistent = ">=0.14.0,<0.17.0 || >0.17.0,<0.17.1 || >0.17.1,<0.17.2 || >0.17.2"
|
|||
format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"]
|
||||
format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "uri-template", "webcolors (>=1.11)"]
|
||||
|
||||
[[package]]
|
||||
name = "lacuscore"
|
||||
version = "0.2.0"
|
||||
description = "Core of Lacus, usable as a module"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.8,<4.0"
|
||||
|
||||
[package.dependencies]
|
||||
playwrightcapture = ">=1.15.2,<2.0.0"
|
||||
requests = ">=2.28.1,<3.0.0"
|
||||
|
||||
[package.extras]
|
||||
docs = ["Sphinx (>=5.1.1,<6.0.0)"]
|
||||
|
||||
[[package]]
|
||||
name = "lief"
|
||||
version = "0.12.1"
|
||||
|
@ -741,7 +756,7 @@ websockets = "10.1"
|
|||
|
||||
[[package]]
|
||||
name = "playwrightcapture"
|
||||
version = "1.15.1"
|
||||
version = "1.15.2"
|
||||
description = "A simple library to capture websites using playwright"
|
||||
category = "main"
|
||||
optional = false
|
||||
|
@ -1437,7 +1452,7 @@ misp = ["python-magic", "pydeep2"]
|
|||
[metadata]
|
||||
lock-version = "1.1"
|
||||
python-versions = ">=3.8,<3.11"
|
||||
content-hash = "46db3fd177fb45d82947eb86e58fd45d5c7396b712852b76e22d0ad9901abc3a"
|
||||
content-hash = "90f66d42b27d094218cd3400cde9ca71c9b21ab0107a63b9fe3cffb4758ba47e"
|
||||
|
||||
[metadata.files]
|
||||
aiohttp = [
|
||||
|
@ -1600,8 +1615,8 @@ cchardet = [
|
|||
{file = "cchardet-2.1.7.tar.gz", hash = "sha256:c428b6336545053c2589f6caf24ea32276c6664cb86db817e03a94c60afa0eaf"},
|
||||
]
|
||||
certifi = [
|
||||
{file = "certifi-2022.6.15.2-py3-none-any.whl", hash = "sha256:0aa1a42fbd57645fabeb6290a7687c21755b0344ecaeaa05f4e9f6207ae2e9a8"},
|
||||
{file = "certifi-2022.6.15.2.tar.gz", hash = "sha256:aa08c101214127b9b0472ca6338315113c9487d45376fd3e669201b477c71003"},
|
||||
{file = "certifi-2022.9.14-py3-none-any.whl", hash = "sha256:e232343de1ab72c2aa521b625c80f699e356830fd0e2c620b465b304b17b0516"},
|
||||
{file = "certifi-2022.9.14.tar.gz", hash = "sha256:36973885b9542e6bd01dea287b2b4b3b21236307c56324fcc3f1160f2d655ed5"},
|
||||
]
|
||||
chardet = [
|
||||
{file = "chardet-5.0.0-py3-none-any.whl", hash = "sha256:d3e64f022d254183001eccc5db4040520c0f23b1a3f33d6413e099eb7f126557"},
|
||||
|
@ -1846,8 +1861,8 @@ hiredis = [
|
|||
{file = "hiredis-2.0.0.tar.gz", hash = "sha256:81d6d8e39695f2c37954d1011c0480ef7cf444d4e3ae24bc5e89ee5de360139a"},
|
||||
]
|
||||
idna = [
|
||||
{file = "idna-3.3-py3-none-any.whl", hash = "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff"},
|
||||
{file = "idna-3.3.tar.gz", hash = "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"},
|
||||
{file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"},
|
||||
{file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"},
|
||||
]
|
||||
importlib-metadata = [
|
||||
{file = "importlib_metadata-4.12.0-py3-none-any.whl", hash = "sha256:7401a975809ea1fdc658c3aa4f78cc2195a0e019c5cbc4c06122884e9ae80c23"},
|
||||
|
@ -1877,6 +1892,10 @@ jsonschema = [
|
|||
{file = "jsonschema-4.16.0-py3-none-any.whl", hash = "sha256:9e74b8f9738d6a946d70705dc692b74b5429cd0960d58e79ffecfc43b2221eb9"},
|
||||
{file = "jsonschema-4.16.0.tar.gz", hash = "sha256:165059f076eff6971bae5b742fc029a7b4ef3f9bcf04c14e4776a7605de14b23"},
|
||||
]
|
||||
lacuscore = [
|
||||
{file = "lacuscore-0.2.0-py3-none-any.whl", hash = "sha256:3ab0bb52f82a834dc24f9fbeefd39b9dd7694953f73b1e0621e9e876ea827a4c"},
|
||||
{file = "lacuscore-0.2.0.tar.gz", hash = "sha256:9b7f54b4ce9deba3c8b6f7566e523de1523ba560095a0567da0ffe45baa4417b"},
|
||||
]
|
||||
lief = [
|
||||
{file = "lief-0.12.1-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:4fbbc9d520de87ac22210c62d22a9b088e5460f9a028741311e6f68ef8877ddd"},
|
||||
{file = "lief-0.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:443e4494df448ea1a021976258c7a6aca27d81b0612783fa3a84fab196fb9fcb"},
|
||||
|
@ -2235,8 +2254,8 @@ playwright = [
|
|||
{file = "playwright-1.25.2-py3-none-win_amd64.whl", hash = "sha256:68ae739f82b78717123eb9d1b28b4619f0b368b88ef73c633681e267680697cd"},
|
||||
]
|
||||
playwrightcapture = [
|
||||
{file = "PlaywrightCapture-1.15.1.tar.gz", hash = "sha256:af8efda02e0cf7df32dd4d5d3b72bd04fae0a0e22521195989205fe40f9dfb59"},
|
||||
{file = "playwrightcapture-1.15.1-py3-none-any.whl", hash = "sha256:7ea84dc4590ad2bd0d26dc2e6019ca6873c676f9b901eb13dbfef69c916e5e5c"},
|
||||
{file = "PlaywrightCapture-1.15.2.tar.gz", hash = "sha256:a8a00dd779b7bf0dee18fbe3c19314de3c7dd2387a42f26c0784474b8b2e485f"},
|
||||
{file = "playwrightcapture-1.15.2-py3-none-any.whl", hash = "sha256:297aaf265a2646bf9e58632f2322dd5b89bd1874491f1dd0f275eaebe34ebc11"},
|
||||
]
|
||||
prompt-toolkit = [
|
||||
{file = "prompt_toolkit-3.0.31-py3-none-any.whl", hash = "sha256:9696f386133df0fc8ca5af4895afe5d78f5fcfe5258111c2a79a1c3e41ffa96d"},
|
||||
|
|
|
@ -63,11 +63,12 @@ lief = "^0.12.1"
|
|||
ua-parser = "^0.16.1"
|
||||
Flask-Login = "^0.6.2"
|
||||
har2tree = "^1.15.4"
|
||||
playwrightcapture = "^1.15.1"
|
||||
playwrightcapture = "^1.15.2"
|
||||
passivetotal = "^2.5.9"
|
||||
werkzeug = "2.1.2"
|
||||
filetype = "^1.1.0"
|
||||
pypandora = "^1.1.2"
|
||||
lacuscore = "^0.2.0"
|
||||
|
||||
[tool.poetry.extras]
|
||||
misp = ['python-magic', 'pydeep2']
|
||||
|
@ -76,7 +77,7 @@ misp = ['python-magic', 'pydeep2']
|
|||
mypy = "^0.971"
|
||||
ipython = "^8.5.0"
|
||||
types-redis = "^4.3.20"
|
||||
types-requests = "^2.28.9"
|
||||
types-requests = "^2.28.10"
|
||||
types-Flask = "^1.1.6"
|
||||
types-pkg-resources = "^0.1.3"
|
||||
types-Deprecated = "^1.2.9"
|
||||
|
|
|
@ -21,13 +21,13 @@ from flask import (Flask, Response, flash, jsonify, redirect, render_template,
|
|||
from flask_bootstrap import Bootstrap5 # type: ignore
|
||||
from flask_cors import CORS # type: ignore
|
||||
from flask_restx import Api # type: ignore
|
||||
from lacuscore import CaptureStatus
|
||||
from pymisp import MISPEvent, MISPServerError
|
||||
from werkzeug.security import check_password_hash
|
||||
|
||||
from lookyloo.default import get_config
|
||||
from lookyloo.exceptions import MissingUUID, NoValidHarFile
|
||||
from lookyloo.helpers import (CaptureStatus, get_taxonomies,
|
||||
UserAgents, load_cookies)
|
||||
from lookyloo.helpers import get_taxonomies, UserAgents, load_cookies
|
||||
from lookyloo.lookyloo import Indexing, Lookyloo
|
||||
|
||||
from .genericapi import api as generic_api
|
||||
|
|
Loading…
Reference in New Issue