From e6aded6c12a8aab938f82186fb112336871fdcf1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Wed, 17 Jun 2020 11:01:03 +0200 Subject: [PATCH] chg: Add domains aggregation by cookies names --- lookyloo/lookyloo.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py index e40c35a2..5fd6dd4c 100644 --- a/lookyloo/lookyloo.py +++ b/lookyloo/lookyloo.py @@ -102,6 +102,22 @@ class Indexing(): for capture_dir in self.lookyloo.capture_dirs: self.index_cookies_capture(capture_dir) + def aggregate_domain_cookies(self): + psl = publicsuffix2.PublicSuffixList() + pipeline = self.redis.pipeline() + for cn, cn_freq in self.cookies_names: + for domain, d_freq in self.get_cookie_domains(cn): + tld = psl.get_tld(domain) + main_domain_part = domain.strip(f'.{tld}').split('.')[-1] + pipeline.zincrby('aggregate_domains_cn', cn_freq, f'{main_domain_part}|{cn}') + pipeline.zincrby('aggregate_cn_domains', d_freq, f'{cn}|{main_domain_part}') + pipeline.execute() + aggregate_domains_cn = self.redis.zrevrange('aggregate_domains_cn', 0, -1, withscores=True) + aggregate_cn_domains = self.redis.zrevrange('aggregate_cn_domains', 0, -1, withscores=True) + self.redis.delete('aggregate_domains_cn') + self.redis.delete('aggregate_cn_domains') + return {'domains': aggregate_domains_cn, 'cookies': aggregate_cn_domains} + class Lookyloo():