mirror of https://github.com/CIRCL/lookyloo
chg: Make mypy happy.
parent
c25bf1bcf8
commit
d8d2c59f8b
|
@ -119,7 +119,7 @@ class Indexing():
|
|||
pipeline.execute()
|
||||
|
||||
def get_hash_uuids(self, body_hash: str) -> Tuple[str, str, str]:
|
||||
capture_uuid = self.redis.srandmember(f'bh|{body_hash}|captures')
|
||||
capture_uuid: str = self.redis.srandmember(f'bh|{body_hash}|captures') # type: ignore
|
||||
entry = self.redis.zrange(f'bh|{body_hash}|captures|{capture_uuid}', 0, 1)[0]
|
||||
urlnode_uuid, hostnode_uuid, url = entry.split('|', 2)
|
||||
return capture_uuid, urlnode_uuid, hostnode_uuid
|
||||
|
|
|
@ -410,12 +410,12 @@ class Lookyloo():
|
|||
if not redis_pipeline:
|
||||
p = self.redis.pipeline()
|
||||
else:
|
||||
p = redis_pipeline
|
||||
p = redis_pipeline # type: ignore
|
||||
p.hset('lookup_dirs', uuid, str(capture_dir))
|
||||
if error_cache:
|
||||
if 'HTTP Error' not in error_cache['error']:
|
||||
self.logger.warning(error_cache['error'])
|
||||
p.hmset(str(capture_dir), error_cache)
|
||||
p.hmset(str(capture_dir), error_cache) # type: ignore
|
||||
|
||||
if not fatal_error:
|
||||
redirects = har.initial_redirects
|
||||
|
@ -440,7 +440,7 @@ class Lookyloo():
|
|||
if (capture_dir / 'no_index').exists(): # If the folders claims anonymity
|
||||
cache['no_index'] = 1
|
||||
|
||||
p.hmset(str(capture_dir), cache)
|
||||
p.hmset(str(capture_dir), cache) # type: ignore
|
||||
if not redis_pipeline:
|
||||
p.execute()
|
||||
|
||||
|
@ -455,7 +455,7 @@ class Lookyloo():
|
|||
@property
|
||||
def capture_uuids(self) -> List[str]:
|
||||
'''All the capture UUIDs present in the cache.'''
|
||||
return self.redis.hkeys('lookup_dirs') # type: ignore
|
||||
return self.redis.hkeys('lookup_dirs')
|
||||
|
||||
@property
|
||||
def sorted_cache(self) -> List[CaptureCache]:
|
||||
|
@ -484,7 +484,7 @@ class Lookyloo():
|
|||
if self.redis.hget(str(capture_dir), 'incomplete_redirects') == '1':
|
||||
# try to rebuild the cache
|
||||
self._set_capture_cache(capture_dir, force=True)
|
||||
cached: Dict[str, Any] = self.redis.hgetall(str(capture_dir)) # type: ignore
|
||||
cached: Dict[str, Any] = self.redis.hgetall(str(capture_dir))
|
||||
if not cached:
|
||||
self.logger.warning(f'No cache available for {capture_dir}.')
|
||||
return None
|
||||
|
@ -531,7 +531,7 @@ class Lookyloo():
|
|||
if isinstance(value, bool):
|
||||
# Yes, empty string because that's False.
|
||||
query[key] = 1 if value else ''
|
||||
p.hmset(perma_uuid, query)
|
||||
p.hmset(perma_uuid, query) # type: ignore
|
||||
p.sadd('to_capture', perma_uuid)
|
||||
p.execute()
|
||||
return perma_uuid
|
||||
|
@ -541,7 +541,7 @@ class Lookyloo():
|
|||
uuid = self.redis.spop('to_capture')
|
||||
if not uuid:
|
||||
return None
|
||||
to_capture: Dict[str, Union[str, int, float]] = self.redis.hgetall(uuid) # type: ignore
|
||||
to_capture: Dict[str, Union[str, int, float]] = self.redis.hgetall(uuid)
|
||||
self.redis.delete(uuid)
|
||||
to_capture['perma_uuid'] = uuid
|
||||
if self.capture(**to_capture): # type: ignore
|
||||
|
|
|
@ -1,17 +1,21 @@
|
|||
[[package]]
|
||||
name = "aiohttp"
|
||||
version = "2.3.10"
|
||||
version = "3.7.3"
|
||||
description = "Async http client/server framework (asyncio)"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=3.4.2"
|
||||
python-versions = ">=3.6"
|
||||
|
||||
[package.dependencies]
|
||||
async-timeout = ">=1.2.0"
|
||||
chardet = "*"
|
||||
idna-ssl = ">=1.0.0"
|
||||
multidict = ">=4.0.0"
|
||||
yarl = ">=1.0.0"
|
||||
async-timeout = ">=3.0,<4.0"
|
||||
attrs = ">=17.3.0"
|
||||
chardet = ">=2.0,<4.0"
|
||||
multidict = ">=4.5,<7.0"
|
||||
typing-extensions = ">=3.6.5"
|
||||
yarl = ">=1.0,<2.0"
|
||||
|
||||
[package.extras]
|
||||
speedups = ["aiodns", "brotlipy", "cchardet"]
|
||||
|
||||
[[package]]
|
||||
name = "appnope"
|
||||
|
@ -125,11 +129,11 @@ pycparser = "*"
|
|||
|
||||
[[package]]
|
||||
name = "chardet"
|
||||
version = "4.0.0"
|
||||
version = "3.0.4"
|
||||
description = "Universal encoding detector for Python 2 and 3"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
|
||||
python-versions = "*"
|
||||
|
||||
[[package]]
|
||||
name = "click"
|
||||
|
@ -337,17 +341,6 @@ category = "main"
|
|||
optional = false
|
||||
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
|
||||
|
||||
[[package]]
|
||||
name = "idna-ssl"
|
||||
version = "1.1.0"
|
||||
description = "Patch ssl.match_hostname for Unicode(idna) domains support"
|
||||
category = "main"
|
||||
optional = false
|
||||
python-versions = "*"
|
||||
|
||||
[package.dependencies]
|
||||
idna = ">=2.0"
|
||||
|
||||
[[package]]
|
||||
name = "importlib-metadata"
|
||||
version = "3.4.0"
|
||||
|
@ -1157,28 +1150,43 @@ content-hash = "d9e9bec0868fe91d80e67622585b71ef20584dcc0ea4622eec6193943449b3b0
|
|||
|
||||
[metadata.files]
|
||||
aiohttp = [
|
||||
{file = "aiohttp-2.3.10-cp34-cp34m-macosx_10_10_x86_64.whl", hash = "sha256:834f687b806fbf49cb135b5a208b5c27338e19c219d6e09e9049936e01e8bea8"},
|
||||
{file = "aiohttp-2.3.10-cp34-cp34m-macosx_10_11_x86_64.whl", hash = "sha256:6b8c5a00432b8a5a083792006e8fdfb558b8b10019ce254200855264d3a25895"},
|
||||
{file = "aiohttp-2.3.10-cp34-cp34m-macosx_10_12_x86_64.whl", hash = "sha256:7b407c22b0ab473ffe0a7d3231f2084a8ae80fdb64a31842b88d57d6b7bdab7c"},
|
||||
{file = "aiohttp-2.3.10-cp34-cp34m-manylinux1_i686.whl", hash = "sha256:14821eb8613bfab9118be3c55afc87bf4cef97689896fa0874c6877b117afbeb"},
|
||||
{file = "aiohttp-2.3.10-cp34-cp34m-manylinux1_x86_64.whl", hash = "sha256:8f32a4e157bad9c60ebc38c3bb93fcc907a020b017ddf8f7ab1580390e21940e"},
|
||||
{file = "aiohttp-2.3.10-cp34-cp34m-win32.whl", hash = "sha256:82a9068d9cb15eb2d99ecf39f8d56b4ed9f931a77a3622a0de747465fd2a7b96"},
|
||||
{file = "aiohttp-2.3.10-cp34-cp34m-win_amd64.whl", hash = "sha256:7ac6378ae364d8e5e5260c7224ea4a1965cb6f4719f15d0552349d0b0cc93953"},
|
||||
{file = "aiohttp-2.3.10-cp35-cp35m-macosx_10_10_x86_64.whl", hash = "sha256:5a952d4af7de5f78dfb3206dbc352717890b37d447f0bbd4b5969b3c8bb713af"},
|
||||
{file = "aiohttp-2.3.10-cp35-cp35m-macosx_10_11_x86_64.whl", hash = "sha256:b25c7720c495048ed658086a29925ab485ac7ececf1b346f2b459e5431d85016"},
|
||||
{file = "aiohttp-2.3.10-cp35-cp35m-macosx_10_12_x86_64.whl", hash = "sha256:528b0b811b6260a79222b055664b82093d01f35fe5c82521d8659cb2b28b8044"},
|
||||
{file = "aiohttp-2.3.10-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:46ace48789865a89992419205024ae451d577876f9919fbb0f22f71189822dea"},
|
||||
{file = "aiohttp-2.3.10-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:5436ca0ed752bb05a399fc07dc86dc23c756db523a3b7d5da46a457eacf4c4b5"},
|
||||
{file = "aiohttp-2.3.10-cp35-cp35m-win32.whl", hash = "sha256:f5e7d41d924a1d5274060c467539ee0c4f3bab318c1671ad65abd91f6b637baf"},
|
||||
{file = "aiohttp-2.3.10-cp35-cp35m-win_amd64.whl", hash = "sha256:a8c12f3184c7cad8f66cae6c945d2c97e598b0cb7afd655a5b9471475e67f30e"},
|
||||
{file = "aiohttp-2.3.10-cp36-cp36m-macosx_10_10_x86_64.whl", hash = "sha256:756fc336a29c551b02252685f01bc87116bc9b04bbd02c1a6b8a96b3c6ad713b"},
|
||||
{file = "aiohttp-2.3.10-cp36-cp36m-macosx_10_11_x86_64.whl", hash = "sha256:cf790e61c2af0278f39dcedad9a22532bf81fb029c2cd73b1ceba7bea062c908"},
|
||||
{file = "aiohttp-2.3.10-cp36-cp36m-macosx_10_12_x86_64.whl", hash = "sha256:44c9cf24e63576244c13265ef0786b56d6751f5fb722225ecc021d6ecf91b4d2"},
|
||||
{file = "aiohttp-2.3.10-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:ef1a36a16e72b6689ce0a6c7fc6bd88837d8fef4590b16bd72817644ae1f414d"},
|
||||
{file = "aiohttp-2.3.10-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:3a4cdb9ca87c099d8ba5eb91cb8f000b60c21f8c1b50c75e04e8777e903bd278"},
|
||||
{file = "aiohttp-2.3.10-cp36-cp36m-win32.whl", hash = "sha256:f72bb19cece43483171264584bbaaf8b97717d9c0f244d1ef4a51df1cdb34085"},
|
||||
{file = "aiohttp-2.3.10-cp36-cp36m-win_amd64.whl", hash = "sha256:c77e29243a79e376a1b51d71a13df4a61bc54fd4d9597ce3790b8d82ec6eb44d"},
|
||||
{file = "aiohttp-2.3.10.tar.gz", hash = "sha256:8adda6583ba438a4c70693374e10b60168663ffa6564c5c75d3c7a9055290964"},
|
||||
{file = "aiohttp-3.7.3-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:328b552513d4f95b0a2eea4c8573e112866107227661834652a8984766aa7656"},
|
||||
{file = "aiohttp-3.7.3-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:c733ef3bdcfe52a1a75564389bad4064352274036e7e234730526d155f04d914"},
|
||||
{file = "aiohttp-3.7.3-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:2858b2504c8697beb9357be01dc47ef86438cc1cb36ecb6991796d19475faa3e"},
|
||||
{file = "aiohttp-3.7.3-cp36-cp36m-manylinux2014_i686.whl", hash = "sha256:d2cfac21e31e841d60dc28c0ec7d4ec47a35c608cb8906435d47ef83ffb22150"},
|
||||
{file = "aiohttp-3.7.3-cp36-cp36m-manylinux2014_ppc64le.whl", hash = "sha256:3228b7a51e3ed533f5472f54f70fd0b0a64c48dc1649a0f0e809bec312934d7a"},
|
||||
{file = "aiohttp-3.7.3-cp36-cp36m-manylinux2014_s390x.whl", hash = "sha256:dcc119db14757b0c7bce64042158307b9b1c76471e655751a61b57f5a0e4d78e"},
|
||||
{file = "aiohttp-3.7.3-cp36-cp36m-manylinux2014_x86_64.whl", hash = "sha256:7d9b42127a6c0bdcc25c3dcf252bb3ddc70454fac593b1b6933ae091396deb13"},
|
||||
{file = "aiohttp-3.7.3-cp36-cp36m-win32.whl", hash = "sha256:df48a623c58180874d7407b4d9ec06a19b84ed47f60a3884345b1a5099c1818b"},
|
||||
{file = "aiohttp-3.7.3-cp36-cp36m-win_amd64.whl", hash = "sha256:0b795072bb1bf87b8620120a6373a3c61bfcb8da7e5c2377f4bb23ff4f0b62c9"},
|
||||
{file = "aiohttp-3.7.3-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:0d438c8ca703b1b714e82ed5b7a4412c82577040dadff479c08405e2a715564f"},
|
||||
{file = "aiohttp-3.7.3-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:8389d6044ee4e2037dca83e3f6994738550f6ee8cfb746762283fad9b932868f"},
|
||||
{file = "aiohttp-3.7.3-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:3ea8c252d8df5e9166bcf3d9edced2af132f4ead8ac422eac723c5781063709a"},
|
||||
{file = "aiohttp-3.7.3-cp37-cp37m-manylinux2014_i686.whl", hash = "sha256:78e2f18a82b88cbc37d22365cf8d2b879a492faedb3f2975adb4ed8dfe994d3a"},
|
||||
{file = "aiohttp-3.7.3-cp37-cp37m-manylinux2014_ppc64le.whl", hash = "sha256:df3a7b258cc230a65245167a202dd07320a5af05f3d41da1488ba0fa05bc9347"},
|
||||
{file = "aiohttp-3.7.3-cp37-cp37m-manylinux2014_s390x.whl", hash = "sha256:f326b3c1bbfda5b9308252ee0dcb30b612ee92b0e105d4abec70335fab5b1245"},
|
||||
{file = "aiohttp-3.7.3-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:5e479df4b2d0f8f02133b7e4430098699450e1b2a826438af6bec9a400530957"},
|
||||
{file = "aiohttp-3.7.3-cp37-cp37m-win32.whl", hash = "sha256:6d42debaf55450643146fabe4b6817bb2a55b23698b0434107e892a43117285e"},
|
||||
{file = "aiohttp-3.7.3-cp37-cp37m-win_amd64.whl", hash = "sha256:c9c58b0b84055d8bc27b7df5a9d141df4ee6ff59821f922dd73155861282f6a3"},
|
||||
{file = "aiohttp-3.7.3-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:f411cb22115cb15452d099fec0ee636b06cf81bfb40ed9c02d30c8dc2bc2e3d1"},
|
||||
{file = "aiohttp-3.7.3-cp38-cp38-manylinux1_i686.whl", hash = "sha256:c1e0920909d916d3375c7a1fdb0b1c78e46170e8bb42792312b6eb6676b2f87f"},
|
||||
{file = "aiohttp-3.7.3-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:59d11674964b74a81b149d4ceaff2b674b3b0e4d0f10f0be1533e49c4a28408b"},
|
||||
{file = "aiohttp-3.7.3-cp38-cp38-manylinux2014_i686.whl", hash = "sha256:41608c0acbe0899c852281978492f9ce2c6fbfaf60aff0cefc54a7c4516b822c"},
|
||||
{file = "aiohttp-3.7.3-cp38-cp38-manylinux2014_ppc64le.whl", hash = "sha256:16a3cb5df5c56f696234ea9e65e227d1ebe9c18aa774d36ff42f532139066a5f"},
|
||||
{file = "aiohttp-3.7.3-cp38-cp38-manylinux2014_s390x.whl", hash = "sha256:6ccc43d68b81c424e46192a778f97da94ee0630337c9bbe5b2ecc9b0c1c59001"},
|
||||
{file = "aiohttp-3.7.3-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:d03abec50df423b026a5aa09656bd9d37f1e6a49271f123f31f9b8aed5dc3ea3"},
|
||||
{file = "aiohttp-3.7.3-cp38-cp38-win32.whl", hash = "sha256:39f4b0a6ae22a1c567cb0630c30dd082481f95c13ca528dc501a7766b9c718c0"},
|
||||
{file = "aiohttp-3.7.3-cp38-cp38-win_amd64.whl", hash = "sha256:c68fdf21c6f3573ae19c7ee65f9ff185649a060c9a06535e9c3a0ee0bbac9235"},
|
||||
{file = "aiohttp-3.7.3-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:710376bf67d8ff4500a31d0c207b8941ff4fba5de6890a701d71680474fe2a60"},
|
||||
{file = "aiohttp-3.7.3-cp39-cp39-manylinux1_i686.whl", hash = "sha256:2406dc1dda01c7f6060ab586e4601f18affb7a6b965c50a8c90ff07569cf782a"},
|
||||
{file = "aiohttp-3.7.3-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:2a7b7640167ab536c3cb90cfc3977c7094f1c5890d7eeede8b273c175c3910fd"},
|
||||
{file = "aiohttp-3.7.3-cp39-cp39-manylinux2014_i686.whl", hash = "sha256:684850fb1e3e55c9220aad007f8386d8e3e477c4ec9211ae54d968ecdca8c6f9"},
|
||||
{file = "aiohttp-3.7.3-cp39-cp39-manylinux2014_ppc64le.whl", hash = "sha256:1edfd82a98c5161497bbb111b2b70c0813102ad7e0aa81cbeb34e64c93863005"},
|
||||
{file = "aiohttp-3.7.3-cp39-cp39-manylinux2014_s390x.whl", hash = "sha256:77149002d9386fae303a4a162e6bce75cc2161347ad2ba06c2f0182561875d45"},
|
||||
{file = "aiohttp-3.7.3-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:756ae7efddd68d4ea7d89c636b703e14a0c686688d42f588b90778a3c2fc0564"},
|
||||
{file = "aiohttp-3.7.3-cp39-cp39-win32.whl", hash = "sha256:3b0036c978cbcc4a4512278e98e3e6d9e6b834dc973206162eddf98b586ef1c6"},
|
||||
{file = "aiohttp-3.7.3-cp39-cp39-win_amd64.whl", hash = "sha256:e1b95972a0ae3f248a899cdbac92ba2e01d731225f566569311043ce2226f5e7"},
|
||||
{file = "aiohttp-3.7.3.tar.gz", hash = "sha256:9c1a81af067e72261c9cbe33ea792893e83bc6aa987bfbd6fdc1e5e7b22777c4"},
|
||||
]
|
||||
appnope = [
|
||||
{file = "appnope-0.1.2-py2.py3-none-any.whl", hash = "sha256:93aa393e9d6c54c5cd570ccadd8edad61ea0c4b9ea7a01409020c9aa019eb442"},
|
||||
|
@ -1281,8 +1289,8 @@ cffi = [
|
|||
{file = "cffi-1.14.4.tar.gz", hash = "sha256:1a465cbe98a7fd391d47dce4b8f7e5b921e6cd805ef421d04f5f66ba8f06086c"},
|
||||
]
|
||||
chardet = [
|
||||
{file = "chardet-4.0.0-py2.py3-none-any.whl", hash = "sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5"},
|
||||
{file = "chardet-4.0.0.tar.gz", hash = "sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa"},
|
||||
{file = "chardet-3.0.4-py2.py3-none-any.whl", hash = "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"},
|
||||
{file = "chardet-3.0.4.tar.gz", hash = "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae"},
|
||||
]
|
||||
click = [
|
||||
{file = "click-7.1.2-py2.py3-none-any.whl", hash = "sha256:dacca89f4bfadd5de3d7489b7c8a566eee0d3676333fbb50030263894c38c0dc"},
|
||||
|
@ -1365,9 +1373,6 @@ idna = [
|
|||
{file = "idna-2.10-py2.py3-none-any.whl", hash = "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"},
|
||||
{file = "idna-2.10.tar.gz", hash = "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6"},
|
||||
]
|
||||
idna-ssl = [
|
||||
{file = "idna-ssl-1.1.0.tar.gz", hash = "sha256:a933e3bb13da54383f9e8f35dc4f9cb9eb9b3b78c6b36f311254d6d0d92c6c7c"},
|
||||
]
|
||||
importlib-metadata = [
|
||||
{file = "importlib_metadata-3.4.0-py3-none-any.whl", hash = "sha256:ace61d5fc652dc280e7b6b4ff732a9c2d40db2c0f92bc6cb74e07b73d53a1771"},
|
||||
{file = "importlib_metadata-3.4.0.tar.gz", hash = "sha256:fa5daa4477a7414ae34e95942e4dd07f62adf589143c875c133c1e53c4eff38d"},
|
||||
|
|
|
@ -5,9 +5,8 @@ from lookyloo.lookyloo import Lookyloo
|
|||
|
||||
lookyloo = Lookyloo()
|
||||
|
||||
for capture_dir in lookyloo.capture_dirs:
|
||||
for capture_uuid in lookyloo.capture_uuids:
|
||||
try:
|
||||
ct = lookyloo.get_crawled_tree(capture_dir)
|
||||
ct = lookyloo.get_crawled_tree(capture_uuid)
|
||||
except Exception:
|
||||
continue
|
||||
lookyloo._ensure_meta(capture_dir, ct)
|
||||
|
|
|
@ -2,15 +2,17 @@ from lookyloo.lookyloo import Lookyloo
|
|||
import calendar
|
||||
import datetime
|
||||
from urllib.parse import urlparse
|
||||
from typing import Dict, Any, Union, Set
|
||||
|
||||
lookyloo = Lookyloo()
|
||||
|
||||
stats = {}
|
||||
stats: Dict[Union[str, int], Any] = {}
|
||||
|
||||
today = datetime.date.today()
|
||||
calendar_week = today.isocalendar()[1]
|
||||
weeks_stats = {calendar_week - 1: {'analysis': 0, 'analysis_with_redirects': 0, 'redirects': 0, 'uniq_urls': set()},
|
||||
calendar_week: {'analysis': 0, 'analysis_with_redirects': 0, 'redirects': 0, 'uniq_urls': set()}}
|
||||
weeks_stats: Dict[int, Dict[str, Union[int, Set[str]]]] = \
|
||||
{calendar_week - 1: {'analysis': 0, 'analysis_with_redirects': 0, 'redirects': 0, 'uniq_urls': set()},
|
||||
calendar_week: {'analysis': 0, 'analysis_with_redirects': 0, 'redirects': 0, 'uniq_urls': set()}}
|
||||
|
||||
|
||||
def uniq_domains(uniq_urls):
|
||||
|
@ -23,26 +25,26 @@ def uniq_domains(uniq_urls):
|
|||
|
||||
for uuid in lookyloo.capture_uuids:
|
||||
cache = lookyloo.capture_cache(uuid)
|
||||
if 'timestamp' not in cache:
|
||||
if not cache or not hasattr(cache, 'timestamp'):
|
||||
continue
|
||||
date = datetime.datetime.fromisoformat(cache['timestamp'].rstrip('Z'))
|
||||
date = cache.timestamp
|
||||
if date.year not in stats:
|
||||
stats[date.year] = {}
|
||||
if date.month not in stats[date.year]:
|
||||
stats[date.year][date.month] = {'analysis': 0, 'analysis_with_redirects' :0, 'redirects': 0, 'uniq_urls': set()}
|
||||
stats[date.year][date.month] = {'analysis': 0, 'analysis_with_redirects': 0, 'redirects': 0, 'uniq_urls': set()}
|
||||
stats[date.year][date.month]['analysis'] += 1
|
||||
if len(cache['redirects']) > 0:
|
||||
if len(cache.redirects) > 0:
|
||||
stats[date.year][date.month]['analysis_with_redirects'] += 1
|
||||
stats[date.year][date.month]['redirects'] += len(cache['redirects'])
|
||||
stats[date.year][date.month]['uniq_urls'].update(cache['redirects'])
|
||||
stats[date.year][date.month]['uniq_urls'].add(cache['url'])
|
||||
stats[date.year][date.month]['redirects'] += len(cache.redirects)
|
||||
stats[date.year][date.month]['uniq_urls'].update(cache.redirects)
|
||||
stats[date.year][date.month]['uniq_urls'].add(cache.url)
|
||||
if date.isocalendar()[1] in weeks_stats:
|
||||
weeks_stats[date.isocalendar()[1]]['analysis'] += 1
|
||||
if len(cache['redirects']) > 0:
|
||||
weeks_stats[date.isocalendar()[1]]['analysis_with_redirects'] += 1
|
||||
weeks_stats[date.isocalendar()[1]]['redirects'] += len(cache['redirects'])
|
||||
weeks_stats[date.isocalendar()[1]]['uniq_urls'].update(cache['redirects'])
|
||||
weeks_stats[date.isocalendar()[1]]['uniq_urls'].add(cache['url'])
|
||||
weeks_stats[date.isocalendar()[1]]['analysis'] += 1 # type: ignore
|
||||
if len(cache.redirects) > 0:
|
||||
weeks_stats[date.isocalendar()[1]]['analysis_with_redirects'] += 1 # type: ignore
|
||||
weeks_stats[date.isocalendar()[1]]['redirects'] += len(cache.redirects) # type: ignore
|
||||
weeks_stats[date.isocalendar()[1]]['uniq_urls'].update(cache.redirects) # type: ignore
|
||||
weeks_stats[date.isocalendar()[1]]['uniq_urls'].add(cache.url) # type: ignore
|
||||
|
||||
print('Statistics for the last two weeks:')
|
||||
for week_number, week_stat in weeks_stats.items():
|
||||
|
@ -50,7 +52,7 @@ for week_number, week_stat in weeks_stats.items():
|
|||
print(' Number of analysis:', week_stat['analysis'])
|
||||
print(' Number of analysis with redirects:', week_stat['analysis_with_redirects'])
|
||||
print(' Number of redirects:', week_stat['redirects'])
|
||||
print(' Number of unique URLs:', len(week_stat['uniq_urls']))
|
||||
print(' Number of unique URLs:', len(week_stat['uniq_urls'])) # type: ignore
|
||||
domains = uniq_domains(week_stat['uniq_urls'])
|
||||
print(' Number of unique domains:', len(domains))
|
||||
|
||||
|
|
Loading…
Reference in New Issue