chg: Improve UA rendering

pull/502/head
Raphaël Vinot 2022-08-23 17:44:48 +02:00
parent ebbe6e3ce9
commit f232eba662
5 changed files with 19 additions and 15 deletions

View File

@ -9,7 +9,7 @@ from typing import Any, Dict
from redis import Redis
from lookyloo.default import AbstractManager, get_config, get_homedir, get_socket_path, safe_create_dir
from lookyloo.helpers import ParsedUserAgent
from lookyloo.helpers import ParsedUserAgent, serialize_to_json
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
level=logging.INFO)
@ -58,13 +58,13 @@ class Processing(AbstractManager):
if platform_key not in to_store:
to_store[platform_key] = {}
if browser_key not in to_store[platform_key]:
to_store[platform_key][browser_key] = []
to_store[platform_key][browser_key].append(parsed_ua.string)
to_store[platform_key][browser_key] = set()
to_store[platform_key][browser_key].add(parsed_ua.string)
to_store['by_frequency'].append({'os': platform_key,
'browser': browser_key,
'useragent': parsed_ua.string})
with self_generated_ua_file.open('w') as f:
json.dump(to_store, f, indent=2)
json.dump(to_store, f, indent=2, default=serialize_to_json)
# Remove the UA / IP mapping.
redis.delete(f'user_agents|{yesterday.isoformat()}')

View File

@ -38,7 +38,7 @@ class CaptureStatus(IntEnum):
# json.dumps(..., default=dump_to_json)
def serialize_to_json(obj: Union[Set]) -> Union[List]:
if isinstance(obj, set):
return list(obj)
return sorted(obj)
def get_resources_hashes(har2tree_container: Union[CrawledTree, HostNode, URLNode]) -> Set[str]:
@ -96,13 +96,13 @@ class UserAgents:
ua_files_path = sorted(self.path.glob('**/*.json'), reverse=True)
self._load_newest_ua_file(ua_files_path[0])
self._load_playwright_devices()
def _load_newest_ua_file(self, path: Path):
self.most_recent_ua_path = path
with self.most_recent_ua_path.open() as f:
self.most_recent_uas = json.load(f)
self.by_freq = self.most_recent_uas.pop('by_frequency')
self._load_playwright_devices()
def _load_playwright_devices(self):
self.playwright_devices = get_devices()
@ -121,7 +121,10 @@ class UserAgents:
self.most_recent_uas[platform_key] = {}
if browser_key not in self.most_recent_uas[platform_key]:
self.most_recent_uas[platform_key][browser_key] = []
self.most_recent_uas[platform_key][browser_key].append(parsed_ua.string)
if parsed_ua.string in self.most_recent_uas[platform_key][browser_key]:
self.most_recent_uas[platform_key][browser_key].remove(parsed_ua.string)
# We want that one at the top of the list.
self.most_recent_uas[platform_key][browser_key].insert(0, parsed_ua.string)
@property
def user_agents(self) -> Dict[str, Dict[str, List[str]]]:

View File

@ -876,7 +876,8 @@ class Lookyloo():
ct = self.get_crawled_tree(tree_uuid)
return {node.name for node in ct.root_hartree.url_tree.traverse()}
def get_playwright_devices(self):
def get_playwright_devices(self) -> Dict:
"""Get the preconfigured devices from Playwright"""
return get_devices()
def get_hostnode_investigator(self, capture_uuid: str, /, node_uuid: str) -> Tuple[HostNode, List[Dict[str, Any]]]:

View File

@ -14,7 +14,7 @@ except ImportError:
HAS_CF = False
from lookyloo.default import get_homedir, safe_create_dir
from lookyloo.helpers import ParsedUserAgent
from lookyloo.helpers import ParsedUserAgent, serialize_to_json
def update_user_agents() -> None:
@ -65,8 +65,8 @@ def ua_parser(html_content: str) -> Dict[str, Any]:
if platform_key not in to_store:
to_store[platform_key] = {}
if browser_key not in to_store[platform_key]:
to_store[platform_key][browser_key] = []
to_store[platform_key][browser_key].append(parsed_ua.string)
to_store[platform_key][browser_key] = set()
to_store[platform_key][browser_key].add(parsed_ua.string)
to_store['by_frequency'].append({'os': platform_key,
'browser': browser_key,
'useragent': parsed_ua.string})
@ -85,7 +85,7 @@ def main():
to_store = ua_parser(f.read())
with open(ua_file_name, 'w') as f:
json.dump(to_store, f, indent=2)
json.dump(to_store, f, indent=2, default=serialize_to_json)
if __name__ == '__main__':

View File

@ -134,7 +134,7 @@
<label for="os" class="col-sm-2 col-form-label">Operating System:</label>
<div class="col-sm-10">
<select class="form-select" name="os" id="os">
{% for os in user_agents.keys() %}
{% for os in user_agents.keys()|sort(reverse=True) %}
<!-- Select the default os -->
<option value="{{ os }}" {% if os==default['os'] %}selected{% endif %}>{{ os }}</option>
{% endfor%}
@ -149,14 +149,14 @@
<div class="col-sm-10">
<!-- Disable all the selects not related to the default os -->
<select class="form-select" name="browser" {% if not os==default['os'] %}disabled{%endif%}>
{% for browser in browsers.keys()%}
{% for browser in browsers.keys()|sort(reverse=True) %}
<!-- Select the default browser -->
<option value="{{ browser }}" {% if browser==default['browser'] %}selected{% endif %}>{{ browser }}</option>
{% endfor%}
</select>
</div>
</div>
{% for browser, user_agents in browsers.items()%}
{% for browser, user_agents in browsers.items() %}
<!-- Hide the user agents that aren't part of the default OS and browsers that aren't part of the default os -->
<div id="{{os.replace(' ', '_')}}_{{browser.replace(' ', '_')}}" class="style-sub-2 row mb-3" {% if not os==default['os'] or not browser==default['browser']%} style="display: none;"{%endif%}>
<label for="user_agent" class="col-sm-2 col-form-label">User-Agent:</label>