From 9952e4de32a08c3f0eb085c2b1ff91850de49af7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Sun, 18 Apr 2021 17:58:16 +0200 Subject: [PATCH] fix: remove UA - IP mapping from redis --- lookyloo/lookyloo.py | 3 +++ website/web/__init__.py | 8 ++++++++ 2 files changed, 11 insertions(+) diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py index 6ce1f3e..b59b5da 100644 --- a/lookyloo/lookyloo.py +++ b/lookyloo/lookyloo.py @@ -123,6 +123,9 @@ class Lookyloo(): with self_generated_ua_file.open('w') as f: json.dump(to_store, f, indent=2) + # Remove the UA / IP mapping. + self.redis.delete(f'user_agents|{yesterday.isoformat()}') + def _cache_capture(self, capture_uuid: str) -> CrawledTree: '''Generate the pickle, set the cache, add capture in the indexes''' capture_dir = self._get_capture_dir(capture_uuid) diff --git a/website/web/__init__.py b/website/web/__init__.py index 0b5d44b..11f4c62 100644 --- a/website/web/__init__.py +++ b/website/web/__init__.py @@ -191,6 +191,14 @@ app.jinja_env.globals.update(month_name=month_name) @app.after_request def after_request(response): + # We keep a list user agents in order to build a list to use in the capture + # interface: this is the easiest way to have something up to date. + # The reason we also get the IP address of the client is because we + # count the frequency of each user agents and use it to sort them on the + # capture page, and we want to avoid counting the same user (same IP) + # multiple times in a day. + # The cache of IPs is deleted after the UA file is generated (see lookyloo.build_ua_file), + # once a day. ua = request.headers.get('User-Agent') real_ip = request.headers.get('X-Real-IP') if ua: