chg: Bump lacuscore

lacus
Raphaël Vinot 2022-09-20 14:49:58 +02:00
parent 1157428d88
commit 3c8fcb3700
6 changed files with 70 additions and 65 deletions

View File

@ -6,14 +6,13 @@ import logging
from datetime import datetime from datetime import datetime
from pathlib import Path from pathlib import Path
from typing import Dict, List, Optional, Tuple from typing import Dict, Optional, Set
from lacuscore import LacusCore from lacuscore import LacusCore, CaptureStatus
from redis.asyncio import Redis from redis import Redis
from redis import Redis as RedisSync
from lookyloo.default import AbstractManager, get_config, get_socket_path, safe_create_dir from lookyloo.default import AbstractManager, get_config, get_socket_path, safe_create_dir
from lookyloo.helpers import get_captures_dir, CaptureStatus from lookyloo.helpers import get_captures_dir
from lookyloo.modules import FOX from lookyloo.modules import FOX
@ -28,8 +27,9 @@ class AsyncCapture(AbstractManager):
self.script_name = 'async_capture' self.script_name = 'async_capture'
self.only_global_lookups: bool = get_config('generic', 'only_global_lookups') self.only_global_lookups: bool = get_config('generic', 'only_global_lookups')
self.capture_dir: Path = get_captures_dir() self.capture_dir: Path = get_captures_dir()
self.redis_sync: RedisSync = RedisSync(unix_socket_path=get_socket_path('cache')) self.redis: Redis = Redis(unix_socket_path=get_socket_path('cache'))
self.lacus = LacusCore(self.redis_sync) self.lacus = LacusCore(self.redis)
self.captures: Set[asyncio.Task] = set()
self.fox = FOX(get_config('modules', 'FOX')) self.fox = FOX(get_config('modules', 'FOX'))
if not self.fox.available: if not self.fox.available:
@ -41,15 +41,13 @@ class AsyncCapture(AbstractManager):
async def process_capture_queue(self) -> None: async def process_capture_queue(self) -> None:
'''Process a query from the capture queue''' '''Process a query from the capture queue'''
value: List[Tuple[bytes, float]] = await self.redis.zpopmax('to_capture') uuid = await self.lacus.consume_queue()
if not value or not value[0]: if not uuid:
# The queue was consumed by an other process.
return return
uuid: str = value[0][0].decode() self.redis.sadd('ongoing', uuid)
queue: Optional[bytes] = await self.redis.getdel(f'{uuid}_mgmt') queue: Optional[bytes] = self.redis.getdel(f'{uuid}_mgmt')
await self.redis.sadd('ongoing', uuid)
to_capture: Dict[bytes, bytes] = await self.redis.hgetall(uuid) to_capture: Dict[bytes, bytes] = self.redis.hgetall(uuid)
if get_config('generic', 'default_public'): if get_config('generic', 'default_public'):
# By default, the captures are on the index, unless the user mark them as un-listed # By default, the captures are on the index, unless the user mark them as un-listed
@ -58,19 +56,17 @@ class AsyncCapture(AbstractManager):
# By default, the captures are not on the index, unless the user mark them as listed # By default, the captures are not on the index, unless the user mark them as listed
listing = True if (b'listing' in to_capture and to_capture[b'listing'].lower() in [b'true', b'1']) else False listing = True if (b'listing' in to_capture and to_capture[b'listing'].lower() in [b'true', b'1']) else False
status, result = await self.lacus.capture(uuid)
while True: while True:
entries = self.lacus.get_capture(uuid, decode=True) entries = self.lacus.get_capture(uuid, decode=True)
if entries['status'] == CaptureStatus.DONE.value: if entries['status'] == CaptureStatus.DONE:
break break
elif entries['status'] == CaptureStatus.UNKNOWN.value: elif entries['status'] == CaptureStatus.UNKNOWN:
self.logger.warning(f'Unable to find {uuid}.') self.logger.warning(f'Unable to find {uuid}.')
break break
elif entries['status'] == CaptureStatus.QUEUED.value: elif entries['status'] == CaptureStatus.QUEUED:
self.logger.info(f'{uuid} is in the queue.') self.logger.info(f'{uuid} is in the queue.')
await asyncio.sleep(5) await asyncio.sleep(5)
elif entries['status'] == CaptureStatus.ONGOING.value: elif entries['status'] == CaptureStatus.ONGOING:
self.logger.info(f'{uuid} is ongoing.') self.logger.info(f'{uuid} is ongoing.')
await asyncio.sleep(5) await asyncio.sleep(5)
else: else:
@ -135,23 +131,22 @@ class AsyncCapture(AbstractManager):
with (dirpath / '0.cookies.json').open('w') as _cookies: with (dirpath / '0.cookies.json').open('w') as _cookies:
json.dump(entries['cookies'], _cookies) json.dump(entries['cookies'], _cookies)
async with self.redis.pipeline() as lazy_cleanup: with self.redis.pipeline() as lazy_cleanup:
await lazy_cleanup.hset('lookup_dirs', uuid, str(dirpath)) lazy_cleanup.hset('lookup_dirs', uuid, str(dirpath))
if queue and await self.redis.zscore('queues', queue): if queue and self.redis.zscore('queues', queue):
await lazy_cleanup.zincrby('queues', -1, queue) lazy_cleanup.zincrby('queues', -1, queue)
await lazy_cleanup.srem('ongoing', uuid) lazy_cleanup.srem('ongoing', uuid)
await lazy_cleanup.delete(uuid) lazy_cleanup.delete(uuid)
# make sure to expire the key if nothing was processed for a while (= queues empty) # make sure to expire the key if nothing was processed for a while (= queues empty)
await lazy_cleanup.expire('queues', 600) lazy_cleanup.expire('queues', 600)
await lazy_cleanup.execute() lazy_cleanup.execute()
async def _to_run_forever_async(self): async def _to_run_forever_async(self):
self.redis: Redis = Redis(unix_socket_path=get_socket_path('cache')) capture = asyncio.create_task(self.process_capture_queue())
while await self.redis.exists('to_capture'): capture.add_done_callback(self.captures.discard)
await self.process_capture_queue() self.captures.add(capture)
if self.shutdown_requested(): while len(self.captures) >= get_config('generic', 'async_capture_processes'):
break await asyncio.sleep(1)
await self.redis.close()
def main(): def main():

View File

@ -4,7 +4,6 @@ import json
import logging import logging
from datetime import datetime, timedelta from datetime import datetime, timedelta
from enum import IntEnum, unique
from functools import lru_cache from functools import lru_cache
from importlib.metadata import version from importlib.metadata import version
from io import BufferedIOBase from io import BufferedIOBase
@ -26,14 +25,6 @@ from .default import get_homedir, safe_create_dir, get_config
logger = logging.getLogger('Lookyloo - Helpers') logger = logging.getLogger('Lookyloo - Helpers')
@unique
class CaptureStatus(IntEnum):
UNKNOWN = -1
QUEUED = 0
DONE = 1
ONGOING = 2
# This method is used in json.dump or json.dumps calls as the default parameter: # This method is used in json.dump or json.dumps calls as the default parameter:
# json.dumps(..., default=dump_to_json) # json.dumps(..., default=dump_to_json)
def serialize_to_json(obj: Union[Set]) -> Union[List]: def serialize_to_json(obj: Union[Set]) -> Union[List]:

View File

@ -19,7 +19,7 @@ from zipfile import ZipFile
from defang import defang # type: ignore from defang import defang # type: ignore
from har2tree import CrawledTree, HostNode, URLNode from har2tree import CrawledTree, HostNode, URLNode
from lacuscore import LacusCore from lacuscore import LacusCore, CaptureStatus
from PIL import Image, UnidentifiedImageError from PIL import Image, UnidentifiedImageError
from playwrightcapture import get_devices from playwrightcapture import get_devices
from pymisp import MISPAttribute, MISPEvent, MISPObject from pymisp import MISPAttribute, MISPEvent, MISPObject
@ -31,7 +31,7 @@ from .context import Context
from .default import LookylooException, get_homedir, get_config, get_socket_path from .default import LookylooException, get_homedir, get_config, get_socket_path
from .exceptions import (MissingCaptureDirectory, from .exceptions import (MissingCaptureDirectory,
MissingUUID, TreeNeedsRebuild, NoValidHarFile) MissingUUID, TreeNeedsRebuild, NoValidHarFile)
from .helpers import (CaptureStatus, get_captures_dir, get_email_template, from .helpers import (get_captures_dir, get_email_template,
get_resources_hashes, get_taxonomies, get_resources_hashes, get_taxonomies,
uniq_domains, ParsedUserAgent, load_cookies, UserAgents) uniq_domains, ParsedUserAgent, load_cookies, UserAgents)
from .indexing import Indexing from .indexing import Indexing
@ -101,7 +101,8 @@ class Lookyloo():
self._captures_index = CapturesIndex(self.redis, self.context) self._captures_index = CapturesIndex(self.redis, self.context)
self.logger.info('Index initialized.') self.logger.info('Index initialized.')
self.lacus = LacusCore(self.redis, get_config('generic', 'tor_proxy')) self.lacus = LacusCore(self.redis, get_config('generic', 'tor_proxy'),
get_config('generic', 'only_global_lookups'))
@property @property
def redis(self): def redis(self):
@ -347,13 +348,12 @@ class Lookyloo():
def get_capture_status(self, capture_uuid: str, /) -> CaptureStatus: def get_capture_status(self, capture_uuid: str, /) -> CaptureStatus:
'''Returns the status (queued, ongoing, done, or UUID unknown)''' '''Returns the status (queued, ongoing, done, or UUID unknown)'''
if self.redis.zrank('to_capture', capture_uuid) is not None: if self.redis.hexists('lookup_dirs', capture_uuid):
return CaptureStatus.QUEUED
elif self.redis.hexists('lookup_dirs', capture_uuid):
return CaptureStatus.DONE return CaptureStatus.DONE
elif self.redis.sismember('ongoing', capture_uuid): elif self.redis.sismember('ongoing', capture_uuid):
# Post-processing on lookyloo's side
return CaptureStatus.ONGOING return CaptureStatus.ONGOING
return CaptureStatus.UNKNOWN return self.lacus.get_capture_status(capture_uuid)
def try_error_status(self, capture_uuid: str, /) -> Optional[str]: def try_error_status(self, capture_uuid: str, /) -> Optional[str]:
'''If it is not possible to do the capture, we store the error for a short amount of time''' '''If it is not possible to do the capture, we store the error for a short amount of time'''
@ -461,7 +461,6 @@ class Lookyloo():
# Someone is probably abusing the system with useless URLs, remove them from the index # Someone is probably abusing the system with useless URLs, remove them from the index
query['listing'] = 0 query['listing'] = 0
p.hset(perma_uuid, mapping=query) # This will add the remaining entries that are lookyloo specific p.hset(perma_uuid, mapping=query) # This will add the remaining entries that are lookyloo specific
p.zadd('to_capture', {perma_uuid: priority})
p.zincrby('queues', 1, f'{source}|{authenticated}|{user}') p.zincrby('queues', 1, f'{source}|{authenticated}|{user}')
p.set(f'{perma_uuid}_mgmt', f'{source}|{authenticated}|{user}') p.set(f'{perma_uuid}_mgmt', f'{source}|{authenticated}|{user}')
p.execute() p.execute()

39
poetry.lock generated
View File

@ -140,7 +140,7 @@ python-versions = "*"
[[package]] [[package]]
name = "certifi" name = "certifi"
version = "2022.6.15.2" version = "2022.9.14"
description = "Python package for providing Mozilla's CA Bundle." description = "Python package for providing Mozilla's CA Bundle."
category = "main" category = "main"
optional = false optional = false
@ -436,7 +436,7 @@ python-versions = ">=3.6"
[[package]] [[package]]
name = "idna" name = "idna"
version = "3.3" version = "3.4"
description = "Internationalized Domain Names in Applications (IDNA)" description = "Internationalized Domain Names in Applications (IDNA)"
category = "main" category = "main"
optional = false optional = false
@ -563,6 +563,21 @@ pyrsistent = ">=0.14.0,<0.17.0 || >0.17.0,<0.17.1 || >0.17.1,<0.17.2 || >0.17.2"
format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"] format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"]
format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "uri-template", "webcolors (>=1.11)"] format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "uri-template", "webcolors (>=1.11)"]
[[package]]
name = "lacuscore"
version = "0.2.0"
description = "Core of Lacus, usable as a module"
category = "main"
optional = false
python-versions = ">=3.8,<4.0"
[package.dependencies]
playwrightcapture = ">=1.15.2,<2.0.0"
requests = ">=2.28.1,<3.0.0"
[package.extras]
docs = ["Sphinx (>=5.1.1,<6.0.0)"]
[[package]] [[package]]
name = "lief" name = "lief"
version = "0.12.1" version = "0.12.1"
@ -741,7 +756,7 @@ websockets = "10.1"
[[package]] [[package]]
name = "playwrightcapture" name = "playwrightcapture"
version = "1.15.1" version = "1.15.2"
description = "A simple library to capture websites using playwright" description = "A simple library to capture websites using playwright"
category = "main" category = "main"
optional = false optional = false
@ -1437,7 +1452,7 @@ misp = ["python-magic", "pydeep2"]
[metadata] [metadata]
lock-version = "1.1" lock-version = "1.1"
python-versions = ">=3.8,<3.11" python-versions = ">=3.8,<3.11"
content-hash = "46db3fd177fb45d82947eb86e58fd45d5c7396b712852b76e22d0ad9901abc3a" content-hash = "90f66d42b27d094218cd3400cde9ca71c9b21ab0107a63b9fe3cffb4758ba47e"
[metadata.files] [metadata.files]
aiohttp = [ aiohttp = [
@ -1600,8 +1615,8 @@ cchardet = [
{file = "cchardet-2.1.7.tar.gz", hash = "sha256:c428b6336545053c2589f6caf24ea32276c6664cb86db817e03a94c60afa0eaf"}, {file = "cchardet-2.1.7.tar.gz", hash = "sha256:c428b6336545053c2589f6caf24ea32276c6664cb86db817e03a94c60afa0eaf"},
] ]
certifi = [ certifi = [
{file = "certifi-2022.6.15.2-py3-none-any.whl", hash = "sha256:0aa1a42fbd57645fabeb6290a7687c21755b0344ecaeaa05f4e9f6207ae2e9a8"}, {file = "certifi-2022.9.14-py3-none-any.whl", hash = "sha256:e232343de1ab72c2aa521b625c80f699e356830fd0e2c620b465b304b17b0516"},
{file = "certifi-2022.6.15.2.tar.gz", hash = "sha256:aa08c101214127b9b0472ca6338315113c9487d45376fd3e669201b477c71003"}, {file = "certifi-2022.9.14.tar.gz", hash = "sha256:36973885b9542e6bd01dea287b2b4b3b21236307c56324fcc3f1160f2d655ed5"},
] ]
chardet = [ chardet = [
{file = "chardet-5.0.0-py3-none-any.whl", hash = "sha256:d3e64f022d254183001eccc5db4040520c0f23b1a3f33d6413e099eb7f126557"}, {file = "chardet-5.0.0-py3-none-any.whl", hash = "sha256:d3e64f022d254183001eccc5db4040520c0f23b1a3f33d6413e099eb7f126557"},
@ -1846,8 +1861,8 @@ hiredis = [
{file = "hiredis-2.0.0.tar.gz", hash = "sha256:81d6d8e39695f2c37954d1011c0480ef7cf444d4e3ae24bc5e89ee5de360139a"}, {file = "hiredis-2.0.0.tar.gz", hash = "sha256:81d6d8e39695f2c37954d1011c0480ef7cf444d4e3ae24bc5e89ee5de360139a"},
] ]
idna = [ idna = [
{file = "idna-3.3-py3-none-any.whl", hash = "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff"}, {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"},
{file = "idna-3.3.tar.gz", hash = "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"}, {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"},
] ]
importlib-metadata = [ importlib-metadata = [
{file = "importlib_metadata-4.12.0-py3-none-any.whl", hash = "sha256:7401a975809ea1fdc658c3aa4f78cc2195a0e019c5cbc4c06122884e9ae80c23"}, {file = "importlib_metadata-4.12.0-py3-none-any.whl", hash = "sha256:7401a975809ea1fdc658c3aa4f78cc2195a0e019c5cbc4c06122884e9ae80c23"},
@ -1877,6 +1892,10 @@ jsonschema = [
{file = "jsonschema-4.16.0-py3-none-any.whl", hash = "sha256:9e74b8f9738d6a946d70705dc692b74b5429cd0960d58e79ffecfc43b2221eb9"}, {file = "jsonschema-4.16.0-py3-none-any.whl", hash = "sha256:9e74b8f9738d6a946d70705dc692b74b5429cd0960d58e79ffecfc43b2221eb9"},
{file = "jsonschema-4.16.0.tar.gz", hash = "sha256:165059f076eff6971bae5b742fc029a7b4ef3f9bcf04c14e4776a7605de14b23"}, {file = "jsonschema-4.16.0.tar.gz", hash = "sha256:165059f076eff6971bae5b742fc029a7b4ef3f9bcf04c14e4776a7605de14b23"},
] ]
lacuscore = [
{file = "lacuscore-0.2.0-py3-none-any.whl", hash = "sha256:3ab0bb52f82a834dc24f9fbeefd39b9dd7694953f73b1e0621e9e876ea827a4c"},
{file = "lacuscore-0.2.0.tar.gz", hash = "sha256:9b7f54b4ce9deba3c8b6f7566e523de1523ba560095a0567da0ffe45baa4417b"},
]
lief = [ lief = [
{file = "lief-0.12.1-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:4fbbc9d520de87ac22210c62d22a9b088e5460f9a028741311e6f68ef8877ddd"}, {file = "lief-0.12.1-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:4fbbc9d520de87ac22210c62d22a9b088e5460f9a028741311e6f68ef8877ddd"},
{file = "lief-0.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:443e4494df448ea1a021976258c7a6aca27d81b0612783fa3a84fab196fb9fcb"}, {file = "lief-0.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:443e4494df448ea1a021976258c7a6aca27d81b0612783fa3a84fab196fb9fcb"},
@ -2235,8 +2254,8 @@ playwright = [
{file = "playwright-1.25.2-py3-none-win_amd64.whl", hash = "sha256:68ae739f82b78717123eb9d1b28b4619f0b368b88ef73c633681e267680697cd"}, {file = "playwright-1.25.2-py3-none-win_amd64.whl", hash = "sha256:68ae739f82b78717123eb9d1b28b4619f0b368b88ef73c633681e267680697cd"},
] ]
playwrightcapture = [ playwrightcapture = [
{file = "PlaywrightCapture-1.15.1.tar.gz", hash = "sha256:af8efda02e0cf7df32dd4d5d3b72bd04fae0a0e22521195989205fe40f9dfb59"}, {file = "PlaywrightCapture-1.15.2.tar.gz", hash = "sha256:a8a00dd779b7bf0dee18fbe3c19314de3c7dd2387a42f26c0784474b8b2e485f"},
{file = "playwrightcapture-1.15.1-py3-none-any.whl", hash = "sha256:7ea84dc4590ad2bd0d26dc2e6019ca6873c676f9b901eb13dbfef69c916e5e5c"}, {file = "playwrightcapture-1.15.2-py3-none-any.whl", hash = "sha256:297aaf265a2646bf9e58632f2322dd5b89bd1874491f1dd0f275eaebe34ebc11"},
] ]
prompt-toolkit = [ prompt-toolkit = [
{file = "prompt_toolkit-3.0.31-py3-none-any.whl", hash = "sha256:9696f386133df0fc8ca5af4895afe5d78f5fcfe5258111c2a79a1c3e41ffa96d"}, {file = "prompt_toolkit-3.0.31-py3-none-any.whl", hash = "sha256:9696f386133df0fc8ca5af4895afe5d78f5fcfe5258111c2a79a1c3e41ffa96d"},

View File

@ -63,11 +63,12 @@ lief = "^0.12.1"
ua-parser = "^0.16.1" ua-parser = "^0.16.1"
Flask-Login = "^0.6.2" Flask-Login = "^0.6.2"
har2tree = "^1.15.4" har2tree = "^1.15.4"
playwrightcapture = "^1.15.1" playwrightcapture = "^1.15.2"
passivetotal = "^2.5.9" passivetotal = "^2.5.9"
werkzeug = "2.1.2" werkzeug = "2.1.2"
filetype = "^1.1.0" filetype = "^1.1.0"
pypandora = "^1.1.2" pypandora = "^1.1.2"
lacuscore = "^0.2.0"
[tool.poetry.extras] [tool.poetry.extras]
misp = ['python-magic', 'pydeep2'] misp = ['python-magic', 'pydeep2']
@ -76,7 +77,7 @@ misp = ['python-magic', 'pydeep2']
mypy = "^0.971" mypy = "^0.971"
ipython = "^8.5.0" ipython = "^8.5.0"
types-redis = "^4.3.20" types-redis = "^4.3.20"
types-requests = "^2.28.9" types-requests = "^2.28.10"
types-Flask = "^1.1.6" types-Flask = "^1.1.6"
types-pkg-resources = "^0.1.3" types-pkg-resources = "^0.1.3"
types-Deprecated = "^1.2.9" types-Deprecated = "^1.2.9"

View File

@ -21,13 +21,13 @@ from flask import (Flask, Response, flash, jsonify, redirect, render_template,
from flask_bootstrap import Bootstrap5 # type: ignore from flask_bootstrap import Bootstrap5 # type: ignore
from flask_cors import CORS # type: ignore from flask_cors import CORS # type: ignore
from flask_restx import Api # type: ignore from flask_restx import Api # type: ignore
from lacuscore import CaptureStatus
from pymisp import MISPEvent, MISPServerError from pymisp import MISPEvent, MISPServerError
from werkzeug.security import check_password_hash from werkzeug.security import check_password_hash
from lookyloo.default import get_config from lookyloo.default import get_config
from lookyloo.exceptions import MissingUUID, NoValidHarFile from lookyloo.exceptions import MissingUUID, NoValidHarFile
from lookyloo.helpers import (CaptureStatus, get_taxonomies, from lookyloo.helpers import get_taxonomies, UserAgents, load_cookies
UserAgents, load_cookies)
from lookyloo.lookyloo import Indexing, Lookyloo from lookyloo.lookyloo import Indexing, Lookyloo
from .genericapi import api as generic_api from .genericapi import api as generic_api