mirror of https://github.com/CIRCL/lookyloo
chg: Compress HARs by default, update codebase accordingly
parent
15a140471c
commit
447229ced3
|
@ -38,7 +38,9 @@ class Archiver(AbstractManager):
|
||||||
self._archive()
|
self._archive()
|
||||||
self._update_all_capture_indexes()
|
self._update_all_capture_indexes()
|
||||||
self._load_indexes()
|
self._load_indexes()
|
||||||
self._compress_hars()
|
# The HARs are supposedly all compressed so this call shouldn't be required
|
||||||
|
# unless you're processing old captures for the first time.
|
||||||
|
# self._compress_hars()
|
||||||
|
|
||||||
def _update_index(self, root_dir: Path) -> None:
|
def _update_index(self, root_dir: Path) -> None:
|
||||||
current_index: Dict[str, str] = {}
|
current_index: Dict[str, str] = {}
|
||||||
|
@ -188,12 +190,23 @@ class Archiver(AbstractManager):
|
||||||
p.delete(str(capture_path))
|
p.delete(str(capture_path))
|
||||||
(capture_path / 'tree.pickle').unlink(missing_ok=True)
|
(capture_path / 'tree.pickle').unlink(missing_ok=True)
|
||||||
(capture_path / 'tree.pickle.gz').unlink(missing_ok=True)
|
(capture_path / 'tree.pickle.gz').unlink(missing_ok=True)
|
||||||
|
# If the HAR isn't archived yet, archive it before copy
|
||||||
|
for har in capture_path.glob('*.har'):
|
||||||
|
with har.open('rb') as f_in:
|
||||||
|
with gzip.open(f'{har}.gz', 'wb') as f_out:
|
||||||
|
shutil.copyfileobj(f_in, f_out)
|
||||||
|
har.unlink()
|
||||||
shutil.move(str(capture_path), str(dest_dir))
|
shutil.move(str(capture_path), str(dest_dir))
|
||||||
p.execute()
|
p.execute()
|
||||||
|
|
||||||
self.logger.info('Archiving done.')
|
self.logger.info('Archiving done.')
|
||||||
|
|
||||||
def _compress_hars(self):
|
def _compress_hars(self):
|
||||||
|
"""This method is very slow (it checks every single capture for non-compressed HARs)
|
||||||
|
The new approach is to compress the har of every capture by default so this shouldn't be
|
||||||
|
needed anymore. Keeping it here just for reference, or to process old archives that contain
|
||||||
|
non-gziped HARs.
|
||||||
|
"""
|
||||||
self.logger.info('Compressing archived captures')
|
self.logger.info('Compressing archived captures')
|
||||||
for index in self.archived_captures_dir.glob('*/*/index'):
|
for index in self.archived_captures_dir.glob('*/*/index'):
|
||||||
if self.shutdown_requested():
|
if self.shutdown_requested():
|
||||||
|
@ -201,9 +214,7 @@ class Archiver(AbstractManager):
|
||||||
break
|
break
|
||||||
with index.open('r') as _f:
|
with index.open('r') as _f:
|
||||||
for uuid, dirname in csv.reader(_f):
|
for uuid, dirname in csv.reader(_f):
|
||||||
for har in (index.parent / dirname).rglob('*.har'):
|
for har in (index.parent / dirname).glob('*.har'):
|
||||||
if not har.exists():
|
|
||||||
continue
|
|
||||||
with har.open('rb') as f_in:
|
with har.open('rb') as f_in:
|
||||||
with gzip.open(f'{har}.gz', 'wb') as f_out:
|
with gzip.open(f'{har}.gz', 'wb') as f_out:
|
||||||
shutil.copyfileobj(f_in, f_out)
|
shutil.copyfileobj(f_in, f_out)
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
import copy
|
import copy
|
||||||
|
import gzip
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import operator
|
import operator
|
||||||
|
@ -1499,8 +1500,8 @@ class Lookyloo():
|
||||||
json.dump(error, _error)
|
json.dump(error, _error)
|
||||||
|
|
||||||
if har:
|
if har:
|
||||||
with (dirpath / '0.har').open('w') as _har:
|
with gzip.open(dirpath / '0.har.gz', 'wt') as f_out:
|
||||||
json.dump(har, _har)
|
f_out.write(json.dumps(har))
|
||||||
|
|
||||||
if png:
|
if png:
|
||||||
with (dirpath / '0.png').open('wb') as _img:
|
with (dirpath / '0.png').open('wb') as _img:
|
||||||
|
|
|
@ -166,13 +166,13 @@ test = ["astroid", "pytest"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "async-timeout"
|
name = "async-timeout"
|
||||||
version = "4.0.2"
|
version = "4.0.3"
|
||||||
description = "Timeout context manager for asyncio programs"
|
description = "Timeout context manager for asyncio programs"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.6"
|
python-versions = ">=3.7"
|
||||||
files = [
|
files = [
|
||||||
{file = "async-timeout-4.0.2.tar.gz", hash = "sha256:2163e1640ddb52b7a8c80d0a67a08587e5d245cc9c553a74a847056bc2976b15"},
|
{file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"},
|
||||||
{file = "async_timeout-4.0.2-py3-none-any.whl", hash = "sha256:8ca1e4fcf50d07413d66d1a5e416e42cfdf5851c981d679a09851a6853383b3c"},
|
{file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -908,13 +908,13 @@ tornado = ["tornado (>=0.2)"]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "har2tree"
|
name = "har2tree"
|
||||||
version = "1.21.6"
|
version = "1.21.7"
|
||||||
description = "HTTP Archive (HAR) to ETE Toolkit generator"
|
description = "HTTP Archive (HAR) to ETE Toolkit generator"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.8,<3.12"
|
python-versions = ">=3.8,<3.12"
|
||||||
files = [
|
files = [
|
||||||
{file = "har2tree-1.21.6-py3-none-any.whl", hash = "sha256:22e152634d1307a3a096a46581f218b71ca8d7f0aee558eff0aa5303c2130cf2"},
|
{file = "har2tree-1.21.7-py3-none-any.whl", hash = "sha256:ffe14f2d21c53bd95839e67682aff4ecf4bb624bfda0aa23857df6dfe5aa70fa"},
|
||||||
{file = "har2tree-1.21.6.tar.gz", hash = "sha256:09e73fbbee97bab0da4e209332d1a017ae844a88d4d77896927a575f83a62926"},
|
{file = "har2tree-1.21.7.tar.gz", hash = "sha256:b03cd8c2ee7e954060c1109c08844c376db9d7fd0e2e2499bbdc5d9fe6879b7f"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
|
@ -926,7 +926,7 @@ numpy = [
|
||||||
{version = "1.23.3", markers = "python_version < \"3.10\""},
|
{version = "1.23.3", markers = "python_version < \"3.10\""},
|
||||||
{version = ">=1.23.4,<2.0.0", markers = "python_version >= \"3.10\""},
|
{version = ">=1.23.4,<2.0.0", markers = "python_version >= \"3.10\""},
|
||||||
]
|
]
|
||||||
publicsuffixlist = ">=0.10.0.20230804,<0.11.0.0"
|
publicsuffixlist = ">=0.10.0.20230811,<0.11.0.0"
|
||||||
w3lib = ">=2.1.2,<3.0.0"
|
w3lib = ">=2.1.2,<3.0.0"
|
||||||
|
|
||||||
[package.extras]
|
[package.extras]
|
||||||
|
@ -1243,13 +1243,13 @@ referencing = ">=0.28.0"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "lacuscore"
|
name = "lacuscore"
|
||||||
version = "1.6.5"
|
version = "1.6.6"
|
||||||
description = "Core of Lacus, usable as a module"
|
description = "Core of Lacus, usable as a module"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=3.8,<4.0"
|
python-versions = ">=3.8,<4.0"
|
||||||
files = [
|
files = [
|
||||||
{file = "lacuscore-1.6.5-py3-none-any.whl", hash = "sha256:373cde9898938521514d08e3a53e6b8d5c71dac59eaad1a067c33086fddbfd4f"},
|
{file = "lacuscore-1.6.6-py3-none-any.whl", hash = "sha256:456914de415fba8bbaeef32446ff74703015f55ad3d79691d97515e8eb14dc66"},
|
||||||
{file = "lacuscore-1.6.5.tar.gz", hash = "sha256:bc6da53bf58819e995db8f2ed33be032bcc9bac04e06c2f2d752cb30c6965e3a"},
|
{file = "lacuscore-1.6.6.tar.gz", hash = "sha256:279087601cca7730c061e4188c27cc7f72246157bde8ecf5dc345aea69e1009e"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.dependencies]
|
[package.dependencies]
|
||||||
|
@ -1926,13 +1926,13 @@ files = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "publicsuffixlist"
|
name = "publicsuffixlist"
|
||||||
version = "0.10.0.20230806"
|
version = "0.10.0.20230811"
|
||||||
description = "publicsuffixlist implement"
|
description = "publicsuffixlist implement"
|
||||||
optional = false
|
optional = false
|
||||||
python-versions = ">=2.6"
|
python-versions = ">=2.6"
|
||||||
files = [
|
files = [
|
||||||
{file = "publicsuffixlist-0.10.0.20230806-py2.py3-none-any.whl", hash = "sha256:c05dbd256d049d3fb94405e7e4a5215cffb39e7329471137e04b320d22cbf141"},
|
{file = "publicsuffixlist-0.10.0.20230811-py2.py3-none-any.whl", hash = "sha256:cd84fcabb7d5bbca45ac3a1ea876cd8fac4d093705a827b20afbe34f5b2e70e6"},
|
||||||
{file = "publicsuffixlist-0.10.0.20230806.tar.gz", hash = "sha256:8c30ea7a0019386d144ca3db8751f757ee46acc194ea6d9619eb175041491c96"},
|
{file = "publicsuffixlist-0.10.0.20230811.tar.gz", hash = "sha256:0ba20a5fa7b9fe5c6dc787d978c6be212e53c962a1a417a2a5948c9d28a4c549"},
|
||||||
]
|
]
|
||||||
|
|
||||||
[package.extras]
|
[package.extras]
|
||||||
|
@ -3154,4 +3154,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
|
||||||
[metadata]
|
[metadata]
|
||||||
lock-version = "2.0"
|
lock-version = "2.0"
|
||||||
python-versions = ">=3.8,<3.12"
|
python-versions = ">=3.8,<3.12"
|
||||||
content-hash = "056a4bf984207efa7930a8af07e93d4fd1ce7751288b0047d64df1918ad92cb1"
|
content-hash = "3d380da51d6c302c872b3bec918b8c3581946d43e91ba047f0984e9325293cdb"
|
||||||
|
|
|
@ -60,15 +60,15 @@ pyhashlookup = "^1.2.1"
|
||||||
lief = "^0.13.2"
|
lief = "^0.13.2"
|
||||||
ua-parser = "^0.18.0"
|
ua-parser = "^0.18.0"
|
||||||
Flask-Login = "^0.6.2"
|
Flask-Login = "^0.6.2"
|
||||||
har2tree = "^1.21.6"
|
har2tree = "^1.21.7"
|
||||||
passivetotal = "^2.5.9"
|
passivetotal = "^2.5.9"
|
||||||
werkzeug = "^2.3.6"
|
werkzeug = "^2.3.6"
|
||||||
filetype = "^1.2.0"
|
filetype = "^1.2.0"
|
||||||
pypandora = "^1.5.0"
|
pypandora = "^1.5.0"
|
||||||
lacuscore = "^1.6.5"
|
lacuscore = "^1.6.6"
|
||||||
pylacus = "^1.6.1"
|
pylacus = "^1.6.1"
|
||||||
pyipasnhistory = "^2.1.2"
|
pyipasnhistory = "^2.1.2"
|
||||||
publicsuffixlist = "^0.10.0.20230806"
|
publicsuffixlist = "^0.10.0.20230811"
|
||||||
pyfaup = "^1.2"
|
pyfaup = "^1.2"
|
||||||
chardet = "^5.2.0"
|
chardet = "^5.2.0"
|
||||||
pysecuritytxt = "^1.1.1"
|
pysecuritytxt = "^1.1.1"
|
||||||
|
|
Loading…
Reference in New Issue