mirror of https://github.com/CIRCL/lookyloo
chg: Compress HARs by default, update codebase accordingly
parent
15a140471c
commit
447229ced3
|
@ -38,7 +38,9 @@ class Archiver(AbstractManager):
|
|||
self._archive()
|
||||
self._update_all_capture_indexes()
|
||||
self._load_indexes()
|
||||
self._compress_hars()
|
||||
# The HARs are supposedly all compressed so this call shouldn't be required
|
||||
# unless you're processing old captures for the first time.
|
||||
# self._compress_hars()
|
||||
|
||||
def _update_index(self, root_dir: Path) -> None:
|
||||
current_index: Dict[str, str] = {}
|
||||
|
@ -188,12 +190,23 @@ class Archiver(AbstractManager):
|
|||
p.delete(str(capture_path))
|
||||
(capture_path / 'tree.pickle').unlink(missing_ok=True)
|
||||
(capture_path / 'tree.pickle.gz').unlink(missing_ok=True)
|
||||
# If the HAR isn't archived yet, archive it before copy
|
||||
for har in capture_path.glob('*.har'):
|
||||
with har.open('rb') as f_in:
|
||||
with gzip.open(f'{har}.gz', 'wb') as f_out:
|
||||
shutil.copyfileobj(f_in, f_out)
|
||||
har.unlink()
|
||||
shutil.move(str(capture_path), str(dest_dir))
|
||||
p.execute()
|
||||
|
||||
self.logger.info('Archiving done.')
|
||||
|
||||
def _compress_hars(self):
|
||||
"""This method is very slow (it checks every single capture for non-compressed HARs)
|
||||
The new approach is to compress the har of every capture by default so this shouldn't be
|
||||
needed anymore. Keeping it here just for reference, or to process old archives that contain
|
||||
non-gziped HARs.
|
||||
"""
|
||||
self.logger.info('Compressing archived captures')
|
||||
for index in self.archived_captures_dir.glob('*/*/index'):
|
||||
if self.shutdown_requested():
|
||||
|
@ -201,9 +214,7 @@ class Archiver(AbstractManager):
|
|||
break
|
||||
with index.open('r') as _f:
|
||||
for uuid, dirname in csv.reader(_f):
|
||||
for har in (index.parent / dirname).rglob('*.har'):
|
||||
if not har.exists():
|
||||
continue
|
||||
for har in (index.parent / dirname).glob('*.har'):
|
||||
with har.open('rb') as f_in:
|
||||
with gzip.open(f'{har}.gz', 'wb') as f_out:
|
||||
shutil.copyfileobj(f_in, f_out)
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
import base64
|
||||
import copy
|
||||
import gzip
|
||||
import json
|
||||
import logging
|
||||
import operator
|
||||
|
@ -1499,8 +1500,8 @@ class Lookyloo():
|
|||
json.dump(error, _error)
|
||||
|
||||
if har:
|
||||
with (dirpath / '0.har').open('w') as _har:
|
||||
json.dump(har, _har)
|
||||
with gzip.open(dirpath / '0.har.gz', 'wt') as f_out:
|
||||
f_out.write(json.dumps(har))
|
||||
|
||||
if png:
|
||||
with (dirpath / '0.png').open('wb') as _img:
|
||||
|
|
|
@ -166,13 +166,13 @@ test = ["astroid", "pytest"]
|
|||
|
||||
[[package]]
|
||||
name = "async-timeout"
|
||||
version = "4.0.2"
|
||||
version = "4.0.3"
|
||||
description = "Timeout context manager for asyncio programs"
|
||||
optional = false
|
||||
python-versions = ">=3.6"
|
||||
python-versions = ">=3.7"
|
||||
files = [
|
||||
{file = "async-timeout-4.0.2.tar.gz", hash = "sha256:2163e1640ddb52b7a8c80d0a67a08587e5d245cc9c553a74a847056bc2976b15"},
|
||||
{file = "async_timeout-4.0.2-py3-none-any.whl", hash = "sha256:8ca1e4fcf50d07413d66d1a5e416e42cfdf5851c981d679a09851a6853383b3c"},
|
||||
{file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"},
|
||||
{file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"},
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -908,13 +908,13 @@ tornado = ["tornado (>=0.2)"]
|
|||
|
||||
[[package]]
|
||||
name = "har2tree"
|
||||
version = "1.21.6"
|
||||
version = "1.21.7"
|
||||
description = "HTTP Archive (HAR) to ETE Toolkit generator"
|
||||
optional = false
|
||||
python-versions = ">=3.8,<3.12"
|
||||
files = [
|
||||
{file = "har2tree-1.21.6-py3-none-any.whl", hash = "sha256:22e152634d1307a3a096a46581f218b71ca8d7f0aee558eff0aa5303c2130cf2"},
|
||||
{file = "har2tree-1.21.6.tar.gz", hash = "sha256:09e73fbbee97bab0da4e209332d1a017ae844a88d4d77896927a575f83a62926"},
|
||||
{file = "har2tree-1.21.7-py3-none-any.whl", hash = "sha256:ffe14f2d21c53bd95839e67682aff4ecf4bb624bfda0aa23857df6dfe5aa70fa"},
|
||||
{file = "har2tree-1.21.7.tar.gz", hash = "sha256:b03cd8c2ee7e954060c1109c08844c376db9d7fd0e2e2499bbdc5d9fe6879b7f"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
|
@ -926,7 +926,7 @@ numpy = [
|
|||
{version = "1.23.3", markers = "python_version < \"3.10\""},
|
||||
{version = ">=1.23.4,<2.0.0", markers = "python_version >= \"3.10\""},
|
||||
]
|
||||
publicsuffixlist = ">=0.10.0.20230804,<0.11.0.0"
|
||||
publicsuffixlist = ">=0.10.0.20230811,<0.11.0.0"
|
||||
w3lib = ">=2.1.2,<3.0.0"
|
||||
|
||||
[package.extras]
|
||||
|
@ -1243,13 +1243,13 @@ referencing = ">=0.28.0"
|
|||
|
||||
[[package]]
|
||||
name = "lacuscore"
|
||||
version = "1.6.5"
|
||||
version = "1.6.6"
|
||||
description = "Core of Lacus, usable as a module"
|
||||
optional = false
|
||||
python-versions = ">=3.8,<4.0"
|
||||
files = [
|
||||
{file = "lacuscore-1.6.5-py3-none-any.whl", hash = "sha256:373cde9898938521514d08e3a53e6b8d5c71dac59eaad1a067c33086fddbfd4f"},
|
||||
{file = "lacuscore-1.6.5.tar.gz", hash = "sha256:bc6da53bf58819e995db8f2ed33be032bcc9bac04e06c2f2d752cb30c6965e3a"},
|
||||
{file = "lacuscore-1.6.6-py3-none-any.whl", hash = "sha256:456914de415fba8bbaeef32446ff74703015f55ad3d79691d97515e8eb14dc66"},
|
||||
{file = "lacuscore-1.6.6.tar.gz", hash = "sha256:279087601cca7730c061e4188c27cc7f72246157bde8ecf5dc345aea69e1009e"},
|
||||
]
|
||||
|
||||
[package.dependencies]
|
||||
|
@ -1926,13 +1926,13 @@ files = [
|
|||
|
||||
[[package]]
|
||||
name = "publicsuffixlist"
|
||||
version = "0.10.0.20230806"
|
||||
version = "0.10.0.20230811"
|
||||
description = "publicsuffixlist implement"
|
||||
optional = false
|
||||
python-versions = ">=2.6"
|
||||
files = [
|
||||
{file = "publicsuffixlist-0.10.0.20230806-py2.py3-none-any.whl", hash = "sha256:c05dbd256d049d3fb94405e7e4a5215cffb39e7329471137e04b320d22cbf141"},
|
||||
{file = "publicsuffixlist-0.10.0.20230806.tar.gz", hash = "sha256:8c30ea7a0019386d144ca3db8751f757ee46acc194ea6d9619eb175041491c96"},
|
||||
{file = "publicsuffixlist-0.10.0.20230811-py2.py3-none-any.whl", hash = "sha256:cd84fcabb7d5bbca45ac3a1ea876cd8fac4d093705a827b20afbe34f5b2e70e6"},
|
||||
{file = "publicsuffixlist-0.10.0.20230811.tar.gz", hash = "sha256:0ba20a5fa7b9fe5c6dc787d978c6be212e53c962a1a417a2a5948c9d28a4c549"},
|
||||
]
|
||||
|
||||
[package.extras]
|
||||
|
@ -3154,4 +3154,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
|
|||
[metadata]
|
||||
lock-version = "2.0"
|
||||
python-versions = ">=3.8,<3.12"
|
||||
content-hash = "056a4bf984207efa7930a8af07e93d4fd1ce7751288b0047d64df1918ad92cb1"
|
||||
content-hash = "3d380da51d6c302c872b3bec918b8c3581946d43e91ba047f0984e9325293cdb"
|
||||
|
|
|
@ -60,15 +60,15 @@ pyhashlookup = "^1.2.1"
|
|||
lief = "^0.13.2"
|
||||
ua-parser = "^0.18.0"
|
||||
Flask-Login = "^0.6.2"
|
||||
har2tree = "^1.21.6"
|
||||
har2tree = "^1.21.7"
|
||||
passivetotal = "^2.5.9"
|
||||
werkzeug = "^2.3.6"
|
||||
filetype = "^1.2.0"
|
||||
pypandora = "^1.5.0"
|
||||
lacuscore = "^1.6.5"
|
||||
lacuscore = "^1.6.6"
|
||||
pylacus = "^1.6.1"
|
||||
pyipasnhistory = "^2.1.2"
|
||||
publicsuffixlist = "^0.10.0.20230806"
|
||||
publicsuffixlist = "^0.10.0.20230811"
|
||||
pyfaup = "^1.2"
|
||||
chardet = "^5.2.0"
|
||||
pysecuritytxt = "^1.1.1"
|
||||
|
|
Loading…
Reference in New Issue