From d8d2c59f8b224d9fb6c5bf9848fbf50092efc752 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Mon, 25 Jan 2021 13:14:33 +0100 Subject: [PATCH] chg: Make mypy happy. --- lookyloo/indexing.py | 2 +- lookyloo/lookyloo.py | 14 +++--- poetry.lock | 99 +++++++++++++++++++------------------ tools/generate_meta_file.py | 5 +- tools/stats.py | 36 +++++++------- 5 files changed, 81 insertions(+), 75 deletions(-) diff --git a/lookyloo/indexing.py b/lookyloo/indexing.py index 3f0a2061..5e0b8ffb 100644 --- a/lookyloo/indexing.py +++ b/lookyloo/indexing.py @@ -119,7 +119,7 @@ class Indexing(): pipeline.execute() def get_hash_uuids(self, body_hash: str) -> Tuple[str, str, str]: - capture_uuid = self.redis.srandmember(f'bh|{body_hash}|captures') + capture_uuid: str = self.redis.srandmember(f'bh|{body_hash}|captures') # type: ignore entry = self.redis.zrange(f'bh|{body_hash}|captures|{capture_uuid}', 0, 1)[0] urlnode_uuid, hostnode_uuid, url = entry.split('|', 2) return capture_uuid, urlnode_uuid, hostnode_uuid diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py index a4303ba0..2eb956d4 100644 --- a/lookyloo/lookyloo.py +++ b/lookyloo/lookyloo.py @@ -410,12 +410,12 @@ class Lookyloo(): if not redis_pipeline: p = self.redis.pipeline() else: - p = redis_pipeline + p = redis_pipeline # type: ignore p.hset('lookup_dirs', uuid, str(capture_dir)) if error_cache: if 'HTTP Error' not in error_cache['error']: self.logger.warning(error_cache['error']) - p.hmset(str(capture_dir), error_cache) + p.hmset(str(capture_dir), error_cache) # type: ignore if not fatal_error: redirects = har.initial_redirects @@ -440,7 +440,7 @@ class Lookyloo(): if (capture_dir / 'no_index').exists(): # If the folders claims anonymity cache['no_index'] = 1 - p.hmset(str(capture_dir), cache) + p.hmset(str(capture_dir), cache) # type: ignore if not redis_pipeline: p.execute() @@ -455,7 +455,7 @@ class Lookyloo(): @property def capture_uuids(self) -> List[str]: '''All the capture UUIDs present in the cache.''' - return self.redis.hkeys('lookup_dirs') # type: ignore + return self.redis.hkeys('lookup_dirs') @property def sorted_cache(self) -> List[CaptureCache]: @@ -484,7 +484,7 @@ class Lookyloo(): if self.redis.hget(str(capture_dir), 'incomplete_redirects') == '1': # try to rebuild the cache self._set_capture_cache(capture_dir, force=True) - cached: Dict[str, Any] = self.redis.hgetall(str(capture_dir)) # type: ignore + cached: Dict[str, Any] = self.redis.hgetall(str(capture_dir)) if not cached: self.logger.warning(f'No cache available for {capture_dir}.') return None @@ -531,7 +531,7 @@ class Lookyloo(): if isinstance(value, bool): # Yes, empty string because that's False. query[key] = 1 if value else '' - p.hmset(perma_uuid, query) + p.hmset(perma_uuid, query) # type: ignore p.sadd('to_capture', perma_uuid) p.execute() return perma_uuid @@ -541,7 +541,7 @@ class Lookyloo(): uuid = self.redis.spop('to_capture') if not uuid: return None - to_capture: Dict[str, Union[str, int, float]] = self.redis.hgetall(uuid) # type: ignore + to_capture: Dict[str, Union[str, int, float]] = self.redis.hgetall(uuid) self.redis.delete(uuid) to_capture['perma_uuid'] = uuid if self.capture(**to_capture): # type: ignore diff --git a/poetry.lock b/poetry.lock index b5a1d8d3..6c90a306 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,17 +1,21 @@ [[package]] name = "aiohttp" -version = "2.3.10" +version = "3.7.3" description = "Async http client/server framework (asyncio)" category = "main" optional = false -python-versions = ">=3.4.2" +python-versions = ">=3.6" [package.dependencies] -async-timeout = ">=1.2.0" -chardet = "*" -idna-ssl = ">=1.0.0" -multidict = ">=4.0.0" -yarl = ">=1.0.0" +async-timeout = ">=3.0,<4.0" +attrs = ">=17.3.0" +chardet = ">=2.0,<4.0" +multidict = ">=4.5,<7.0" +typing-extensions = ">=3.6.5" +yarl = ">=1.0,<2.0" + +[package.extras] +speedups = ["aiodns", "brotlipy", "cchardet"] [[package]] name = "appnope" @@ -125,11 +129,11 @@ pycparser = "*" [[package]] name = "chardet" -version = "4.0.0" +version = "3.0.4" description = "Universal encoding detector for Python 2 and 3" category = "main" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +python-versions = "*" [[package]] name = "click" @@ -337,17 +341,6 @@ category = "main" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -[[package]] -name = "idna-ssl" -version = "1.1.0" -description = "Patch ssl.match_hostname for Unicode(idna) domains support" -category = "main" -optional = false -python-versions = "*" - -[package.dependencies] -idna = ">=2.0" - [[package]] name = "importlib-metadata" version = "3.4.0" @@ -1157,28 +1150,43 @@ content-hash = "d9e9bec0868fe91d80e67622585b71ef20584dcc0ea4622eec6193943449b3b0 [metadata.files] aiohttp = [ - {file = "aiohttp-2.3.10-cp34-cp34m-macosx_10_10_x86_64.whl", hash = "sha256:834f687b806fbf49cb135b5a208b5c27338e19c219d6e09e9049936e01e8bea8"}, - {file = "aiohttp-2.3.10-cp34-cp34m-macosx_10_11_x86_64.whl", hash = "sha256:6b8c5a00432b8a5a083792006e8fdfb558b8b10019ce254200855264d3a25895"}, - {file = "aiohttp-2.3.10-cp34-cp34m-macosx_10_12_x86_64.whl", hash = "sha256:7b407c22b0ab473ffe0a7d3231f2084a8ae80fdb64a31842b88d57d6b7bdab7c"}, - {file = "aiohttp-2.3.10-cp34-cp34m-manylinux1_i686.whl", hash = "sha256:14821eb8613bfab9118be3c55afc87bf4cef97689896fa0874c6877b117afbeb"}, - {file = "aiohttp-2.3.10-cp34-cp34m-manylinux1_x86_64.whl", hash = "sha256:8f32a4e157bad9c60ebc38c3bb93fcc907a020b017ddf8f7ab1580390e21940e"}, - {file = "aiohttp-2.3.10-cp34-cp34m-win32.whl", hash = "sha256:82a9068d9cb15eb2d99ecf39f8d56b4ed9f931a77a3622a0de747465fd2a7b96"}, - {file = "aiohttp-2.3.10-cp34-cp34m-win_amd64.whl", hash = "sha256:7ac6378ae364d8e5e5260c7224ea4a1965cb6f4719f15d0552349d0b0cc93953"}, - {file = "aiohttp-2.3.10-cp35-cp35m-macosx_10_10_x86_64.whl", hash = "sha256:5a952d4af7de5f78dfb3206dbc352717890b37d447f0bbd4b5969b3c8bb713af"}, - {file = "aiohttp-2.3.10-cp35-cp35m-macosx_10_11_x86_64.whl", hash = "sha256:b25c7720c495048ed658086a29925ab485ac7ececf1b346f2b459e5431d85016"}, - {file = "aiohttp-2.3.10-cp35-cp35m-macosx_10_12_x86_64.whl", hash = "sha256:528b0b811b6260a79222b055664b82093d01f35fe5c82521d8659cb2b28b8044"}, - {file = "aiohttp-2.3.10-cp35-cp35m-manylinux1_i686.whl", hash = "sha256:46ace48789865a89992419205024ae451d577876f9919fbb0f22f71189822dea"}, - {file = "aiohttp-2.3.10-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:5436ca0ed752bb05a399fc07dc86dc23c756db523a3b7d5da46a457eacf4c4b5"}, - {file = "aiohttp-2.3.10-cp35-cp35m-win32.whl", hash = "sha256:f5e7d41d924a1d5274060c467539ee0c4f3bab318c1671ad65abd91f6b637baf"}, - {file = "aiohttp-2.3.10-cp35-cp35m-win_amd64.whl", hash = "sha256:a8c12f3184c7cad8f66cae6c945d2c97e598b0cb7afd655a5b9471475e67f30e"}, - {file = "aiohttp-2.3.10-cp36-cp36m-macosx_10_10_x86_64.whl", hash = "sha256:756fc336a29c551b02252685f01bc87116bc9b04bbd02c1a6b8a96b3c6ad713b"}, - {file = "aiohttp-2.3.10-cp36-cp36m-macosx_10_11_x86_64.whl", hash = "sha256:cf790e61c2af0278f39dcedad9a22532bf81fb029c2cd73b1ceba7bea062c908"}, - {file = "aiohttp-2.3.10-cp36-cp36m-macosx_10_12_x86_64.whl", hash = "sha256:44c9cf24e63576244c13265ef0786b56d6751f5fb722225ecc021d6ecf91b4d2"}, - {file = "aiohttp-2.3.10-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:ef1a36a16e72b6689ce0a6c7fc6bd88837d8fef4590b16bd72817644ae1f414d"}, - {file = "aiohttp-2.3.10-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:3a4cdb9ca87c099d8ba5eb91cb8f000b60c21f8c1b50c75e04e8777e903bd278"}, - {file = "aiohttp-2.3.10-cp36-cp36m-win32.whl", hash = "sha256:f72bb19cece43483171264584bbaaf8b97717d9c0f244d1ef4a51df1cdb34085"}, - {file = "aiohttp-2.3.10-cp36-cp36m-win_amd64.whl", hash = "sha256:c77e29243a79e376a1b51d71a13df4a61bc54fd4d9597ce3790b8d82ec6eb44d"}, - {file = "aiohttp-2.3.10.tar.gz", hash = "sha256:8adda6583ba438a4c70693374e10b60168663ffa6564c5c75d3c7a9055290964"}, + {file = "aiohttp-3.7.3-cp36-cp36m-macosx_10_14_x86_64.whl", hash = "sha256:328b552513d4f95b0a2eea4c8573e112866107227661834652a8984766aa7656"}, + {file = "aiohttp-3.7.3-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:c733ef3bdcfe52a1a75564389bad4064352274036e7e234730526d155f04d914"}, + {file = "aiohttp-3.7.3-cp36-cp36m-manylinux2014_aarch64.whl", hash = "sha256:2858b2504c8697beb9357be01dc47ef86438cc1cb36ecb6991796d19475faa3e"}, + {file = "aiohttp-3.7.3-cp36-cp36m-manylinux2014_i686.whl", hash = "sha256:d2cfac21e31e841d60dc28c0ec7d4ec47a35c608cb8906435d47ef83ffb22150"}, + {file = "aiohttp-3.7.3-cp36-cp36m-manylinux2014_ppc64le.whl", hash = "sha256:3228b7a51e3ed533f5472f54f70fd0b0a64c48dc1649a0f0e809bec312934d7a"}, + {file = "aiohttp-3.7.3-cp36-cp36m-manylinux2014_s390x.whl", hash = "sha256:dcc119db14757b0c7bce64042158307b9b1c76471e655751a61b57f5a0e4d78e"}, + {file = "aiohttp-3.7.3-cp36-cp36m-manylinux2014_x86_64.whl", hash = "sha256:7d9b42127a6c0bdcc25c3dcf252bb3ddc70454fac593b1b6933ae091396deb13"}, + {file = "aiohttp-3.7.3-cp36-cp36m-win32.whl", hash = "sha256:df48a623c58180874d7407b4d9ec06a19b84ed47f60a3884345b1a5099c1818b"}, + {file = "aiohttp-3.7.3-cp36-cp36m-win_amd64.whl", hash = "sha256:0b795072bb1bf87b8620120a6373a3c61bfcb8da7e5c2377f4bb23ff4f0b62c9"}, + {file = "aiohttp-3.7.3-cp37-cp37m-macosx_10_14_x86_64.whl", hash = "sha256:0d438c8ca703b1b714e82ed5b7a4412c82577040dadff479c08405e2a715564f"}, + {file = "aiohttp-3.7.3-cp37-cp37m-manylinux1_i686.whl", hash = "sha256:8389d6044ee4e2037dca83e3f6994738550f6ee8cfb746762283fad9b932868f"}, + {file = "aiohttp-3.7.3-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:3ea8c252d8df5e9166bcf3d9edced2af132f4ead8ac422eac723c5781063709a"}, + {file = "aiohttp-3.7.3-cp37-cp37m-manylinux2014_i686.whl", hash = "sha256:78e2f18a82b88cbc37d22365cf8d2b879a492faedb3f2975adb4ed8dfe994d3a"}, + {file = "aiohttp-3.7.3-cp37-cp37m-manylinux2014_ppc64le.whl", hash = "sha256:df3a7b258cc230a65245167a202dd07320a5af05f3d41da1488ba0fa05bc9347"}, + {file = "aiohttp-3.7.3-cp37-cp37m-manylinux2014_s390x.whl", hash = "sha256:f326b3c1bbfda5b9308252ee0dcb30b612ee92b0e105d4abec70335fab5b1245"}, + {file = "aiohttp-3.7.3-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:5e479df4b2d0f8f02133b7e4430098699450e1b2a826438af6bec9a400530957"}, + {file = "aiohttp-3.7.3-cp37-cp37m-win32.whl", hash = "sha256:6d42debaf55450643146fabe4b6817bb2a55b23698b0434107e892a43117285e"}, + {file = "aiohttp-3.7.3-cp37-cp37m-win_amd64.whl", hash = "sha256:c9c58b0b84055d8bc27b7df5a9d141df4ee6ff59821f922dd73155861282f6a3"}, + {file = "aiohttp-3.7.3-cp38-cp38-macosx_10_14_x86_64.whl", hash = "sha256:f411cb22115cb15452d099fec0ee636b06cf81bfb40ed9c02d30c8dc2bc2e3d1"}, + {file = "aiohttp-3.7.3-cp38-cp38-manylinux1_i686.whl", hash = "sha256:c1e0920909d916d3375c7a1fdb0b1c78e46170e8bb42792312b6eb6676b2f87f"}, + {file = "aiohttp-3.7.3-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:59d11674964b74a81b149d4ceaff2b674b3b0e4d0f10f0be1533e49c4a28408b"}, + {file = "aiohttp-3.7.3-cp38-cp38-manylinux2014_i686.whl", hash = "sha256:41608c0acbe0899c852281978492f9ce2c6fbfaf60aff0cefc54a7c4516b822c"}, + {file = "aiohttp-3.7.3-cp38-cp38-manylinux2014_ppc64le.whl", hash = "sha256:16a3cb5df5c56f696234ea9e65e227d1ebe9c18aa774d36ff42f532139066a5f"}, + {file = "aiohttp-3.7.3-cp38-cp38-manylinux2014_s390x.whl", hash = "sha256:6ccc43d68b81c424e46192a778f97da94ee0630337c9bbe5b2ecc9b0c1c59001"}, + {file = "aiohttp-3.7.3-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:d03abec50df423b026a5aa09656bd9d37f1e6a49271f123f31f9b8aed5dc3ea3"}, + {file = "aiohttp-3.7.3-cp38-cp38-win32.whl", hash = "sha256:39f4b0a6ae22a1c567cb0630c30dd082481f95c13ca528dc501a7766b9c718c0"}, + {file = "aiohttp-3.7.3-cp38-cp38-win_amd64.whl", hash = "sha256:c68fdf21c6f3573ae19c7ee65f9ff185649a060c9a06535e9c3a0ee0bbac9235"}, + {file = "aiohttp-3.7.3-cp39-cp39-macosx_10_14_x86_64.whl", hash = "sha256:710376bf67d8ff4500a31d0c207b8941ff4fba5de6890a701d71680474fe2a60"}, + {file = "aiohttp-3.7.3-cp39-cp39-manylinux1_i686.whl", hash = "sha256:2406dc1dda01c7f6060ab586e4601f18affb7a6b965c50a8c90ff07569cf782a"}, + {file = "aiohttp-3.7.3-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:2a7b7640167ab536c3cb90cfc3977c7094f1c5890d7eeede8b273c175c3910fd"}, + {file = "aiohttp-3.7.3-cp39-cp39-manylinux2014_i686.whl", hash = "sha256:684850fb1e3e55c9220aad007f8386d8e3e477c4ec9211ae54d968ecdca8c6f9"}, + {file = "aiohttp-3.7.3-cp39-cp39-manylinux2014_ppc64le.whl", hash = "sha256:1edfd82a98c5161497bbb111b2b70c0813102ad7e0aa81cbeb34e64c93863005"}, + {file = "aiohttp-3.7.3-cp39-cp39-manylinux2014_s390x.whl", hash = "sha256:77149002d9386fae303a4a162e6bce75cc2161347ad2ba06c2f0182561875d45"}, + {file = "aiohttp-3.7.3-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:756ae7efddd68d4ea7d89c636b703e14a0c686688d42f588b90778a3c2fc0564"}, + {file = "aiohttp-3.7.3-cp39-cp39-win32.whl", hash = "sha256:3b0036c978cbcc4a4512278e98e3e6d9e6b834dc973206162eddf98b586ef1c6"}, + {file = "aiohttp-3.7.3-cp39-cp39-win_amd64.whl", hash = "sha256:e1b95972a0ae3f248a899cdbac92ba2e01d731225f566569311043ce2226f5e7"}, + {file = "aiohttp-3.7.3.tar.gz", hash = "sha256:9c1a81af067e72261c9cbe33ea792893e83bc6aa987bfbd6fdc1e5e7b22777c4"}, ] appnope = [ {file = "appnope-0.1.2-py2.py3-none-any.whl", hash = "sha256:93aa393e9d6c54c5cd570ccadd8edad61ea0c4b9ea7a01409020c9aa019eb442"}, @@ -1281,8 +1289,8 @@ cffi = [ {file = "cffi-1.14.4.tar.gz", hash = "sha256:1a465cbe98a7fd391d47dce4b8f7e5b921e6cd805ef421d04f5f66ba8f06086c"}, ] chardet = [ - {file = "chardet-4.0.0-py2.py3-none-any.whl", hash = "sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5"}, - {file = "chardet-4.0.0.tar.gz", hash = "sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa"}, + {file = "chardet-3.0.4-py2.py3-none-any.whl", hash = "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"}, + {file = "chardet-3.0.4.tar.gz", hash = "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae"}, ] click = [ {file = "click-7.1.2-py2.py3-none-any.whl", hash = "sha256:dacca89f4bfadd5de3d7489b7c8a566eee0d3676333fbb50030263894c38c0dc"}, @@ -1365,9 +1373,6 @@ idna = [ {file = "idna-2.10-py2.py3-none-any.whl", hash = "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"}, {file = "idna-2.10.tar.gz", hash = "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6"}, ] -idna-ssl = [ - {file = "idna-ssl-1.1.0.tar.gz", hash = "sha256:a933e3bb13da54383f9e8f35dc4f9cb9eb9b3b78c6b36f311254d6d0d92c6c7c"}, -] importlib-metadata = [ {file = "importlib_metadata-3.4.0-py3-none-any.whl", hash = "sha256:ace61d5fc652dc280e7b6b4ff732a9c2d40db2c0f92bc6cb74e07b73d53a1771"}, {file = "importlib_metadata-3.4.0.tar.gz", hash = "sha256:fa5daa4477a7414ae34e95942e4dd07f62adf589143c875c133c1e53c4eff38d"}, diff --git a/tools/generate_meta_file.py b/tools/generate_meta_file.py index 46c7fb89..2e05872a 100644 --- a/tools/generate_meta_file.py +++ b/tools/generate_meta_file.py @@ -5,9 +5,8 @@ from lookyloo.lookyloo import Lookyloo lookyloo = Lookyloo() -for capture_dir in lookyloo.capture_dirs: +for capture_uuid in lookyloo.capture_uuids: try: - ct = lookyloo.get_crawled_tree(capture_dir) + ct = lookyloo.get_crawled_tree(capture_uuid) except Exception: continue - lookyloo._ensure_meta(capture_dir, ct) diff --git a/tools/stats.py b/tools/stats.py index 6cc436d5..82155965 100644 --- a/tools/stats.py +++ b/tools/stats.py @@ -2,15 +2,17 @@ from lookyloo.lookyloo import Lookyloo import calendar import datetime from urllib.parse import urlparse +from typing import Dict, Any, Union, Set lookyloo = Lookyloo() -stats = {} +stats: Dict[Union[str, int], Any] = {} today = datetime.date.today() calendar_week = today.isocalendar()[1] -weeks_stats = {calendar_week - 1: {'analysis': 0, 'analysis_with_redirects': 0, 'redirects': 0, 'uniq_urls': set()}, - calendar_week: {'analysis': 0, 'analysis_with_redirects': 0, 'redirects': 0, 'uniq_urls': set()}} +weeks_stats: Dict[int, Dict[str, Union[int, Set[str]]]] = \ + {calendar_week - 1: {'analysis': 0, 'analysis_with_redirects': 0, 'redirects': 0, 'uniq_urls': set()}, + calendar_week: {'analysis': 0, 'analysis_with_redirects': 0, 'redirects': 0, 'uniq_urls': set()}} def uniq_domains(uniq_urls): @@ -23,26 +25,26 @@ def uniq_domains(uniq_urls): for uuid in lookyloo.capture_uuids: cache = lookyloo.capture_cache(uuid) - if 'timestamp' not in cache: + if not cache or not hasattr(cache, 'timestamp'): continue - date = datetime.datetime.fromisoformat(cache['timestamp'].rstrip('Z')) + date = cache.timestamp if date.year not in stats: stats[date.year] = {} if date.month not in stats[date.year]: - stats[date.year][date.month] = {'analysis': 0, 'analysis_with_redirects' :0, 'redirects': 0, 'uniq_urls': set()} + stats[date.year][date.month] = {'analysis': 0, 'analysis_with_redirects': 0, 'redirects': 0, 'uniq_urls': set()} stats[date.year][date.month]['analysis'] += 1 - if len(cache['redirects']) > 0: + if len(cache.redirects) > 0: stats[date.year][date.month]['analysis_with_redirects'] += 1 - stats[date.year][date.month]['redirects'] += len(cache['redirects']) - stats[date.year][date.month]['uniq_urls'].update(cache['redirects']) - stats[date.year][date.month]['uniq_urls'].add(cache['url']) + stats[date.year][date.month]['redirects'] += len(cache.redirects) + stats[date.year][date.month]['uniq_urls'].update(cache.redirects) + stats[date.year][date.month]['uniq_urls'].add(cache.url) if date.isocalendar()[1] in weeks_stats: - weeks_stats[date.isocalendar()[1]]['analysis'] += 1 - if len(cache['redirects']) > 0: - weeks_stats[date.isocalendar()[1]]['analysis_with_redirects'] += 1 - weeks_stats[date.isocalendar()[1]]['redirects'] += len(cache['redirects']) - weeks_stats[date.isocalendar()[1]]['uniq_urls'].update(cache['redirects']) - weeks_stats[date.isocalendar()[1]]['uniq_urls'].add(cache['url']) + weeks_stats[date.isocalendar()[1]]['analysis'] += 1 # type: ignore + if len(cache.redirects) > 0: + weeks_stats[date.isocalendar()[1]]['analysis_with_redirects'] += 1 # type: ignore + weeks_stats[date.isocalendar()[1]]['redirects'] += len(cache.redirects) # type: ignore + weeks_stats[date.isocalendar()[1]]['uniq_urls'].update(cache.redirects) # type: ignore + weeks_stats[date.isocalendar()[1]]['uniq_urls'].add(cache.url) # type: ignore print('Statistics for the last two weeks:') for week_number, week_stat in weeks_stats.items(): @@ -50,7 +52,7 @@ for week_number, week_stat in weeks_stats.items(): print(' Number of analysis:', week_stat['analysis']) print(' Number of analysis with redirects:', week_stat['analysis_with_redirects']) print(' Number of redirects:', week_stat['redirects']) - print(' Number of unique URLs:', len(week_stat['uniq_urls'])) + print(' Number of unique URLs:', len(week_stat['uniq_urls'])) # type: ignore domains = uniq_domains(week_stat['uniq_urls']) print(' Number of unique domains:', len(domains))