diff --git a/lookyloo/indexing.py b/lookyloo/indexing.py index dcecc74..289b158 100644 --- a/lookyloo/indexing.py +++ b/lookyloo/indexing.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -from typing import Set, Tuple, List +from typing import Set, Tuple, List, Optional, Dict, Any from redis import Redis @@ -28,8 +28,18 @@ class Indexing(): def get_cookie_domains(self, cookie_name: str) -> List[Tuple[str, float]]: return self.redis.zrevrange(f'cn|{cookie_name}', 0, -1, withscores=True) + def get_capture_cache(self, capture_uuid: str) -> Optional[Dict[str, Any]]: + capture_dir = self.lookyloo.lookup_capture_dir(capture_uuid) + if capture_dir: + return self.lookyloo.capture_cache(capture_dir) + return {} + + def get_cookies_names_captures(self, cookie_name: str) -> List[Tuple[str, str]]: + return [uuids.split('|')for uuids in self.redis.smembers(f'cn|{cookie_name}|captures')] + def index_cookies(self) -> None: for capture_dir in self.lookyloo.capture_dirs: + print(f'Processing {capture_dir}') try: crawled_tree = self.lookyloo.get_crawled_tree(capture_dir) except Exception as e: @@ -47,6 +57,7 @@ class Indexing(): already_loaded.add((name, domain)) pipeline.zincrby('cookies_names', 1, name) pipeline.zincrby(f'cn|{name}', 1, domain) + pipeline.sadd(f'cn|{name}|captures', f'{crawled_tree.uuid}|{urlnode.uuid}') pipeline.zincrby(f'cn|{name}|{domain}', 1, value) pipeline.sadd('lookyloo_domains', domain) diff --git a/website/web/__init__.py b/website/web/__init__.py index cbc67ed..bf5fca0 100644 --- a/website/web/__init__.py +++ b/website/web/__init__.py @@ -431,9 +431,10 @@ def cookies_lookup(): @app.route('/cookies/', methods=['GET']) def cookies_name_detail(cookie_name: str): i = Indexing() + captures = [i.get_capture_cache(capture) for capture, url in i.get_cookies_names_captures(cookie_name)] domains = [(domain, freq, i.cookies_names_domains_values(cookie_name, domain)) for domain, freq in i.get_cookie_domains(cookie_name)] - return render_template('cookie_name.html', cookie_name=cookie_name, domains=domains) + return render_template('cookie_name.html', cookie_name=cookie_name, domains=domains, captures=captures) # Query API