diff --git a/bin/background_processing.py b/bin/background_processing.py index a773769..369e5ad 100755 --- a/bin/background_processing.py +++ b/bin/background_processing.py @@ -9,7 +9,7 @@ from typing import Any, Dict from redis import Redis from lookyloo.default import AbstractManager, get_config, get_homedir, get_socket_path, safe_create_dir -from lookyloo.helpers import ParsedUserAgent +from lookyloo.helpers import ParsedUserAgent, serialize_to_json logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s', level=logging.INFO) @@ -58,13 +58,13 @@ class Processing(AbstractManager): if platform_key not in to_store: to_store[platform_key] = {} if browser_key not in to_store[platform_key]: - to_store[platform_key][browser_key] = [] - to_store[platform_key][browser_key].append(parsed_ua.string) + to_store[platform_key][browser_key] = set() + to_store[platform_key][browser_key].add(parsed_ua.string) to_store['by_frequency'].append({'os': platform_key, 'browser': browser_key, 'useragent': parsed_ua.string}) with self_generated_ua_file.open('w') as f: - json.dump(to_store, f, indent=2) + json.dump(to_store, f, indent=2, default=serialize_to_json) # Remove the UA / IP mapping. redis.delete(f'user_agents|{yesterday.isoformat()}') diff --git a/lookyloo/helpers.py b/lookyloo/helpers.py index 842c19e..b5d1fe4 100644 --- a/lookyloo/helpers.py +++ b/lookyloo/helpers.py @@ -38,7 +38,7 @@ class CaptureStatus(IntEnum): # json.dumps(..., default=dump_to_json) def serialize_to_json(obj: Union[Set]) -> Union[List]: if isinstance(obj, set): - return list(obj) + return sorted(obj) def get_resources_hashes(har2tree_container: Union[CrawledTree, HostNode, URLNode]) -> Set[str]: @@ -96,13 +96,13 @@ class UserAgents: ua_files_path = sorted(self.path.glob('**/*.json'), reverse=True) self._load_newest_ua_file(ua_files_path[0]) - self._load_playwright_devices() def _load_newest_ua_file(self, path: Path): self.most_recent_ua_path = path with self.most_recent_ua_path.open() as f: self.most_recent_uas = json.load(f) self.by_freq = self.most_recent_uas.pop('by_frequency') + self._load_playwright_devices() def _load_playwright_devices(self): self.playwright_devices = get_devices() @@ -121,7 +121,10 @@ class UserAgents: self.most_recent_uas[platform_key] = {} if browser_key not in self.most_recent_uas[platform_key]: self.most_recent_uas[platform_key][browser_key] = [] - self.most_recent_uas[platform_key][browser_key].append(parsed_ua.string) + if parsed_ua.string in self.most_recent_uas[platform_key][browser_key]: + self.most_recent_uas[platform_key][browser_key].remove(parsed_ua.string) + # We want that one at the top of the list. + self.most_recent_uas[platform_key][browser_key].insert(0, parsed_ua.string) @property def user_agents(self) -> Dict[str, Dict[str, List[str]]]: diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py index 50a228f..47c7e36 100644 --- a/lookyloo/lookyloo.py +++ b/lookyloo/lookyloo.py @@ -876,7 +876,8 @@ class Lookyloo(): ct = self.get_crawled_tree(tree_uuid) return {node.name for node in ct.root_hartree.url_tree.traverse()} - def get_playwright_devices(self): + def get_playwright_devices(self) -> Dict: + """Get the preconfigured devices from Playwright""" return get_devices() def get_hostnode_investigator(self, capture_uuid: str, /, node_uuid: str) -> Tuple[HostNode, List[Dict[str, Any]]]: diff --git a/tools/manual_parse_ua_list.py b/tools/manual_parse_ua_list.py index 97010fb..e207d65 100644 --- a/tools/manual_parse_ua_list.py +++ b/tools/manual_parse_ua_list.py @@ -14,7 +14,7 @@ except ImportError: HAS_CF = False from lookyloo.default import get_homedir, safe_create_dir -from lookyloo.helpers import ParsedUserAgent +from lookyloo.helpers import ParsedUserAgent, serialize_to_json def update_user_agents() -> None: @@ -65,8 +65,8 @@ def ua_parser(html_content: str) -> Dict[str, Any]: if platform_key not in to_store: to_store[platform_key] = {} if browser_key not in to_store[platform_key]: - to_store[platform_key][browser_key] = [] - to_store[platform_key][browser_key].append(parsed_ua.string) + to_store[platform_key][browser_key] = set() + to_store[platform_key][browser_key].add(parsed_ua.string) to_store['by_frequency'].append({'os': platform_key, 'browser': browser_key, 'useragent': parsed_ua.string}) @@ -85,7 +85,7 @@ def main(): to_store = ua_parser(f.read()) with open(ua_file_name, 'w') as f: - json.dump(to_store, f, indent=2) + json.dump(to_store, f, indent=2, default=serialize_to_json) if __name__ == '__main__': diff --git a/website/web/templates/capture.html b/website/web/templates/capture.html index d57bdd1..9ee1996 100644 --- a/website/web/templates/capture.html +++ b/website/web/templates/capture.html @@ -134,7 +134,7 @@
- {% for browser in browsers.keys()%} + {% for browser in browsers.keys()|sort(reverse=True) %} {% endfor%}
- {% for browser, user_agents in browsers.items()%} + {% for browser, user_agents in browsers.items() %}