chg: Bump lacuscore

pull/523/head
Raphaël Vinot 2022-09-20 14:49:58 +02:00
parent 9189888a0d
commit d38b612c37
6 changed files with 70 additions and 65 deletions

View File

@ -6,14 +6,13 @@ import logging
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional, Tuple
from typing import Dict, Optional, Set
from lacuscore import LacusCore
from redis.asyncio import Redis
from redis import Redis as RedisSync
from lacuscore import LacusCore, CaptureStatus
from redis import Redis
from lookyloo.default import AbstractManager, get_config, get_socket_path, safe_create_dir
from lookyloo.helpers import get_captures_dir, CaptureStatus
from lookyloo.helpers import get_captures_dir
from lookyloo.modules import FOX
@ -28,8 +27,9 @@ class AsyncCapture(AbstractManager):
self.script_name = 'async_capture'
self.only_global_lookups: bool = get_config('generic', 'only_global_lookups')
self.capture_dir: Path = get_captures_dir()
self.redis_sync: RedisSync = RedisSync(unix_socket_path=get_socket_path('cache'))
self.lacus = LacusCore(self.redis_sync)
self.redis: Redis = Redis(unix_socket_path=get_socket_path('cache'))
self.lacus = LacusCore(self.redis)
self.captures: Set[asyncio.Task] = set()
self.fox = FOX(get_config('modules', 'FOX'))
if not self.fox.available:
@ -41,15 +41,13 @@ class AsyncCapture(AbstractManager):
async def process_capture_queue(self) -> None:
'''Process a query from the capture queue'''
value: List[Tuple[bytes, float]] = await self.redis.zpopmax('to_capture')
if not value or not value[0]:
# The queue was consumed by an other process.
uuid = await self.lacus.consume_queue()
if not uuid:
return
uuid: str = value[0][0].decode()
queue: Optional[bytes] = await self.redis.getdel(f'{uuid}_mgmt')
await self.redis.sadd('ongoing', uuid)
self.redis.sadd('ongoing', uuid)
queue: Optional[bytes] = self.redis.getdel(f'{uuid}_mgmt')
to_capture: Dict[bytes, bytes] = await self.redis.hgetall(uuid)
to_capture: Dict[bytes, bytes] = self.redis.hgetall(uuid)
if get_config('generic', 'default_public'):
# By default, the captures are on the index, unless the user mark them as un-listed
@ -58,19 +56,17 @@ class AsyncCapture(AbstractManager):
# By default, the captures are not on the index, unless the user mark them as listed
listing = True if (b'listing' in to_capture and to_capture[b'listing'].lower() in [b'true', b'1']) else False
status, result = await self.lacus.capture(uuid)
while True:
entries = self.lacus.get_capture(uuid, decode=True)
if entries['status'] == CaptureStatus.DONE.value:
if entries['status'] == CaptureStatus.DONE:
break
elif entries['status'] == CaptureStatus.UNKNOWN.value:
elif entries['status'] == CaptureStatus.UNKNOWN:
self.logger.warning(f'Unable to find {uuid}.')
break
elif entries['status'] == CaptureStatus.QUEUED.value:
elif entries['status'] == CaptureStatus.QUEUED:
self.logger.info(f'{uuid} is in the queue.')
await asyncio.sleep(5)
elif entries['status'] == CaptureStatus.ONGOING.value:
elif entries['status'] == CaptureStatus.ONGOING:
self.logger.info(f'{uuid} is ongoing.')
await asyncio.sleep(5)
else:
@ -135,23 +131,22 @@ class AsyncCapture(AbstractManager):
with (dirpath / '0.cookies.json').open('w') as _cookies:
json.dump(entries['cookies'], _cookies)
async with self.redis.pipeline() as lazy_cleanup:
await lazy_cleanup.hset('lookup_dirs', uuid, str(dirpath))
if queue and await self.redis.zscore('queues', queue):
await lazy_cleanup.zincrby('queues', -1, queue)
await lazy_cleanup.srem('ongoing', uuid)
await lazy_cleanup.delete(uuid)
with self.redis.pipeline() as lazy_cleanup:
lazy_cleanup.hset('lookup_dirs', uuid, str(dirpath))
if queue and self.redis.zscore('queues', queue):
lazy_cleanup.zincrby('queues', -1, queue)
lazy_cleanup.srem('ongoing', uuid)
lazy_cleanup.delete(uuid)
# make sure to expire the key if nothing was processed for a while (= queues empty)
await lazy_cleanup.expire('queues', 600)
await lazy_cleanup.execute()
lazy_cleanup.expire('queues', 600)
lazy_cleanup.execute()
async def _to_run_forever_async(self):
self.redis: Redis = Redis(unix_socket_path=get_socket_path('cache'))
while await self.redis.exists('to_capture'):
await self.process_capture_queue()
if self.shutdown_requested():
break
await self.redis.close()
capture = asyncio.create_task(self.process_capture_queue())
capture.add_done_callback(self.captures.discard)
self.captures.add(capture)
while len(self.captures) >= get_config('generic', 'async_capture_processes'):
await asyncio.sleep(1)
def main():

View File

@ -4,7 +4,6 @@ import json
import logging
from datetime import datetime, timedelta
from enum import IntEnum, unique
from functools import lru_cache
from importlib.metadata import version
from io import BufferedIOBase
@ -26,14 +25,6 @@ from .default import get_homedir, safe_create_dir, get_config
logger = logging.getLogger('Lookyloo - Helpers')
@unique
class CaptureStatus(IntEnum):
UNKNOWN = -1
QUEUED = 0
DONE = 1
ONGOING = 2
# This method is used in json.dump or json.dumps calls as the default parameter:
# json.dumps(..., default=dump_to_json)
def serialize_to_json(obj: Union[Set]) -> Union[List]:

View File

@ -19,7 +19,7 @@ from zipfile import ZipFile
from defang import defang # type: ignore
from har2tree import CrawledTree, HostNode, URLNode
from lacuscore import LacusCore
from lacuscore import LacusCore, CaptureStatus
from PIL import Image, UnidentifiedImageError
from playwrightcapture import get_devices
from pymisp import MISPAttribute, MISPEvent, MISPObject
@ -31,7 +31,7 @@ from .context import Context
from .default import LookylooException, get_homedir, get_config, get_socket_path
from .exceptions import (MissingCaptureDirectory,
MissingUUID, TreeNeedsRebuild, NoValidHarFile)
from .helpers import (CaptureStatus, get_captures_dir, get_email_template,
from .helpers import (get_captures_dir, get_email_template,
get_resources_hashes, get_taxonomies,
uniq_domains, ParsedUserAgent, load_cookies, UserAgents)
from .indexing import Indexing
@ -101,7 +101,8 @@ class Lookyloo():
self._captures_index = CapturesIndex(self.redis, self.context)
self.logger.info('Index initialized.')
self.lacus = LacusCore(self.redis, get_config('generic', 'tor_proxy'))
self.lacus = LacusCore(self.redis, get_config('generic', 'tor_proxy'),
get_config('generic', 'only_global_lookups'))
@property
def redis(self):
@ -347,13 +348,12 @@ class Lookyloo():
def get_capture_status(self, capture_uuid: str, /) -> CaptureStatus:
'''Returns the status (queued, ongoing, done, or UUID unknown)'''
if self.redis.zrank('to_capture', capture_uuid) is not None:
return CaptureStatus.QUEUED
elif self.redis.hexists('lookup_dirs', capture_uuid):
if self.redis.hexists('lookup_dirs', capture_uuid):
return CaptureStatus.DONE
elif self.redis.sismember('ongoing', capture_uuid):
# Post-processing on lookyloo's side
return CaptureStatus.ONGOING
return CaptureStatus.UNKNOWN
return self.lacus.get_capture_status(capture_uuid)
def try_error_status(self, capture_uuid: str, /) -> Optional[str]:
'''If it is not possible to do the capture, we store the error for a short amount of time'''
@ -461,7 +461,6 @@ class Lookyloo():
# Someone is probably abusing the system with useless URLs, remove them from the index
query['listing'] = 0
p.hset(perma_uuid, mapping=query) # This will add the remaining entries that are lookyloo specific
p.zadd('to_capture', {perma_uuid: priority})
p.zincrby('queues', 1, f'{source}|{authenticated}|{user}')
p.set(f'{perma_uuid}_mgmt', f'{source}|{authenticated}|{user}')
p.execute()

39
poetry.lock generated
View File

@ -140,7 +140,7 @@ python-versions = "*"
[[package]]
name = "certifi"
version = "2022.6.15.2"
version = "2022.9.14"
description = "Python package for providing Mozilla's CA Bundle."
category = "main"
optional = false
@ -436,7 +436,7 @@ python-versions = ">=3.6"
[[package]]
name = "idna"
version = "3.3"
version = "3.4"
description = "Internationalized Domain Names in Applications (IDNA)"
category = "main"
optional = false
@ -563,6 +563,21 @@ pyrsistent = ">=0.14.0,<0.17.0 || >0.17.0,<0.17.1 || >0.17.1,<0.17.2 || >0.17.2"
format = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3987", "uri-template", "webcolors (>=1.11)"]
format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339-validator", "rfc3986-validator (>0.1.0)", "uri-template", "webcolors (>=1.11)"]
[[package]]
name = "lacuscore"
version = "0.2.0"
description = "Core of Lacus, usable as a module"
category = "main"
optional = false
python-versions = ">=3.8,<4.0"
[package.dependencies]
playwrightcapture = ">=1.15.2,<2.0.0"
requests = ">=2.28.1,<3.0.0"
[package.extras]
docs = ["Sphinx (>=5.1.1,<6.0.0)"]
[[package]]
name = "lief"
version = "0.12.1"
@ -741,7 +756,7 @@ websockets = "10.1"
[[package]]
name = "playwrightcapture"
version = "1.15.1"
version = "1.15.2"
description = "A simple library to capture websites using playwright"
category = "main"
optional = false
@ -1437,7 +1452,7 @@ misp = ["python-magic", "pydeep2"]
[metadata]
lock-version = "1.1"
python-versions = ">=3.8,<3.11"
content-hash = "46db3fd177fb45d82947eb86e58fd45d5c7396b712852b76e22d0ad9901abc3a"
content-hash = "90f66d42b27d094218cd3400cde9ca71c9b21ab0107a63b9fe3cffb4758ba47e"
[metadata.files]
aiohttp = [
@ -1600,8 +1615,8 @@ cchardet = [
{file = "cchardet-2.1.7.tar.gz", hash = "sha256:c428b6336545053c2589f6caf24ea32276c6664cb86db817e03a94c60afa0eaf"},
]
certifi = [
{file = "certifi-2022.6.15.2-py3-none-any.whl", hash = "sha256:0aa1a42fbd57645fabeb6290a7687c21755b0344ecaeaa05f4e9f6207ae2e9a8"},
{file = "certifi-2022.6.15.2.tar.gz", hash = "sha256:aa08c101214127b9b0472ca6338315113c9487d45376fd3e669201b477c71003"},
{file = "certifi-2022.9.14-py3-none-any.whl", hash = "sha256:e232343de1ab72c2aa521b625c80f699e356830fd0e2c620b465b304b17b0516"},
{file = "certifi-2022.9.14.tar.gz", hash = "sha256:36973885b9542e6bd01dea287b2b4b3b21236307c56324fcc3f1160f2d655ed5"},
]
chardet = [
{file = "chardet-5.0.0-py3-none-any.whl", hash = "sha256:d3e64f022d254183001eccc5db4040520c0f23b1a3f33d6413e099eb7f126557"},
@ -1846,8 +1861,8 @@ hiredis = [
{file = "hiredis-2.0.0.tar.gz", hash = "sha256:81d6d8e39695f2c37954d1011c0480ef7cf444d4e3ae24bc5e89ee5de360139a"},
]
idna = [
{file = "idna-3.3-py3-none-any.whl", hash = "sha256:84d9dd047ffa80596e0f246e2eab0b391788b0503584e8945f2368256d2735ff"},
{file = "idna-3.3.tar.gz", hash = "sha256:9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"},
{file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"},
{file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"},
]
importlib-metadata = [
{file = "importlib_metadata-4.12.0-py3-none-any.whl", hash = "sha256:7401a975809ea1fdc658c3aa4f78cc2195a0e019c5cbc4c06122884e9ae80c23"},
@ -1877,6 +1892,10 @@ jsonschema = [
{file = "jsonschema-4.16.0-py3-none-any.whl", hash = "sha256:9e74b8f9738d6a946d70705dc692b74b5429cd0960d58e79ffecfc43b2221eb9"},
{file = "jsonschema-4.16.0.tar.gz", hash = "sha256:165059f076eff6971bae5b742fc029a7b4ef3f9bcf04c14e4776a7605de14b23"},
]
lacuscore = [
{file = "lacuscore-0.2.0-py3-none-any.whl", hash = "sha256:3ab0bb52f82a834dc24f9fbeefd39b9dd7694953f73b1e0621e9e876ea827a4c"},
{file = "lacuscore-0.2.0.tar.gz", hash = "sha256:9b7f54b4ce9deba3c8b6f7566e523de1523ba560095a0567da0ffe45baa4417b"},
]
lief = [
{file = "lief-0.12.1-cp310-cp310-macosx_10_14_x86_64.whl", hash = "sha256:4fbbc9d520de87ac22210c62d22a9b088e5460f9a028741311e6f68ef8877ddd"},
{file = "lief-0.12.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:443e4494df448ea1a021976258c7a6aca27d81b0612783fa3a84fab196fb9fcb"},
@ -2235,8 +2254,8 @@ playwright = [
{file = "playwright-1.25.2-py3-none-win_amd64.whl", hash = "sha256:68ae739f82b78717123eb9d1b28b4619f0b368b88ef73c633681e267680697cd"},
]
playwrightcapture = [
{file = "PlaywrightCapture-1.15.1.tar.gz", hash = "sha256:af8efda02e0cf7df32dd4d5d3b72bd04fae0a0e22521195989205fe40f9dfb59"},
{file = "playwrightcapture-1.15.1-py3-none-any.whl", hash = "sha256:7ea84dc4590ad2bd0d26dc2e6019ca6873c676f9b901eb13dbfef69c916e5e5c"},
{file = "PlaywrightCapture-1.15.2.tar.gz", hash = "sha256:a8a00dd779b7bf0dee18fbe3c19314de3c7dd2387a42f26c0784474b8b2e485f"},
{file = "playwrightcapture-1.15.2-py3-none-any.whl", hash = "sha256:297aaf265a2646bf9e58632f2322dd5b89bd1874491f1dd0f275eaebe34ebc11"},
]
prompt-toolkit = [
{file = "prompt_toolkit-3.0.31-py3-none-any.whl", hash = "sha256:9696f386133df0fc8ca5af4895afe5d78f5fcfe5258111c2a79a1c3e41ffa96d"},

View File

@ -63,11 +63,12 @@ lief = "^0.12.1"
ua-parser = "^0.16.1"
Flask-Login = "^0.6.2"
har2tree = "^1.15.4"
playwrightcapture = "^1.15.1"
playwrightcapture = "^1.15.2"
passivetotal = "^2.5.9"
werkzeug = "2.1.2"
filetype = "^1.1.0"
pypandora = "^1.1.2"
lacuscore = "^0.2.0"
[tool.poetry.extras]
misp = ['python-magic', 'pydeep2']
@ -76,7 +77,7 @@ misp = ['python-magic', 'pydeep2']
mypy = "^0.971"
ipython = "^8.5.0"
types-redis = "^4.3.20"
types-requests = "^2.28.9"
types-requests = "^2.28.10"
types-Flask = "^1.1.6"
types-pkg-resources = "^0.1.3"
types-Deprecated = "^1.2.9"

View File

@ -21,13 +21,13 @@ from flask import (Flask, Response, flash, jsonify, redirect, render_template,
from flask_bootstrap import Bootstrap5 # type: ignore
from flask_cors import CORS # type: ignore
from flask_restx import Api # type: ignore
from lacuscore import CaptureStatus
from pymisp import MISPEvent, MISPServerError
from werkzeug.security import check_password_hash
from lookyloo.default import get_config
from lookyloo.exceptions import MissingUUID, NoValidHarFile
from lookyloo.helpers import (CaptureStatus, get_taxonomies,
UserAgents, load_cookies)
from lookyloo.helpers import get_taxonomies, UserAgents, load_cookies
from lookyloo.lookyloo import Indexing, Lookyloo
from .genericapi import api as generic_api