From 6ba019ec83a37dbf64f3eea671d3a0725402715d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Thu, 9 Jun 2022 18:57:40 +0200 Subject: [PATCH] chg: Improve somewhat the useragents available for capturing Fix #416 --- bin/async_capture.py | 6 +- config/generic.json.sample | 2 - lookyloo/helpers.py | 39 ++- user_agents/2022/06/2022-06-09.json | 500 ++++++++++++++++++++++++++++ website/web/__init__.py | 18 +- 5 files changed, 544 insertions(+), 21 deletions(-) create mode 100644 user_agents/2022/06/2022-06-09.json diff --git a/bin/async_capture.py b/bin/async_capture.py index bfe67be..ecaf1fa 100755 --- a/bin/async_capture.py +++ b/bin/async_capture.py @@ -16,7 +16,7 @@ from redis.asyncio import Redis from playwrightcapture import Capture from lookyloo.default import AbstractManager, get_config, get_socket_path, safe_create_dir -from lookyloo.helpers import get_captures_dir, load_cookies +from lookyloo.helpers import get_captures_dir, load_cookies, UserAgents from lookyloo.modules import FOX @@ -31,6 +31,7 @@ class AsyncCapture(AbstractManager): self.script_name = 'async_capture' self.only_global_lookups: bool = get_config('generic', 'only_global_lookups') self.capture_dir: Path = get_captures_dir() + self.user_agents = UserAgents() self.fox = FOX(get_config('modules', 'FOX')) if not self.fox.available: @@ -134,7 +135,8 @@ class AsyncCapture(AbstractManager): cookies = load_cookies(cookies_pseudofile) if not user_agent: # Catch case where the UA is broken on the UI, and the async submission. - ua: str = get_config('generic', 'default_user_agent') + self.user_agents.user_agents # triggers an update if needed + ua: str = self.user_agents.default['useragent'] else: ua = user_agent diff --git a/config/generic.json.sample b/config/generic.json.sample index 6b524d2..18fd090 100644 --- a/config/generic.json.sample +++ b/config/generic.json.sample @@ -7,7 +7,6 @@ "website_listen_port": 5100, "systemd_service_name": "lookyloo", "default_public": true, - "default_user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36", "users": {}, "time_delta_on_index": { "weeks": 1, @@ -56,7 +55,6 @@ "website_listen_port": "Port Flask will listen on.", "systemd_service_name": "(Optional) Name of the systemd service if your project has one.", "default_public": "If true, the capture is public and will be visible on the index page by default (can be unticked on the capture page).", - "default_user_agent": "Ultimate fallback if the capture form, or the asynchronous submission, doesn't provide a user agent.", "users": "It is some kind of an admin accounts. Format: {username: password}", "time_delta_on_index": "Time interval of the capture displayed on the index", "max_depth": "Maximum depth for scraping. Anything > 1 will be exponentially bigger.", diff --git a/lookyloo/helpers.py b/lookyloo/helpers.py index fbae98c..fdcf154 100644 --- a/lookyloo/helpers.py +++ b/lookyloo/helpers.py @@ -20,7 +20,7 @@ from ua_parser import user_agent_parser # type: ignore from werkzeug.user_agent import UserAgent from werkzeug.utils import cached_property -from .default import get_homedir, safe_create_dir +from .default import get_homedir, safe_create_dir, get_config logger = logging.getLogger('Lookyloo - Helpers') @@ -85,11 +85,40 @@ def get_email_template() -> str: return f.read() -def get_user_agents(directory: str='user_agents') -> Dict[str, Any]: - ua_files_path = sorted((get_homedir() / directory).glob('**/*.json'), reverse=True) - with ua_files_path[0].open() as f: - return json.load(f) +class UserAgents: + def __init__(self): + if get_config('generic', 'use_user_agents_users'): + self.path = get_homedir() / 'own_user_agents' + else: + self.path = get_homedir() / 'user_agents' + + ua_files_path = sorted(self.path.glob('**/*.json'), reverse=True) + self.most_recent_ua_path = ua_files_path[0] + with self.most_recent_ua_path.open() as f: + self.most_recent_uas = json.load(f) + self.by_freq = self.most_recent_uas.pop('by_frequency') + + @property + def user_agents(self) -> Dict[str, Dict[str, List[str]]]: + ua_files_path = sorted(self.path.glob('**/*.json'), reverse=True) + if ua_files_path[0] != self.most_recent_ua_path: + self.most_recent_ua_path = ua_files_path[0] + with self.most_recent_ua_path.open() as f: + self.most_recent_uas = json.load(f) + self.by_freq = self.most_recent_uas.pop('by_frequency') + return self.most_recent_uas + + @property + def default(self) -> Dict[str, str]: + blocked_words = ['bot', 'bing'] + for ua in self.by_freq: + if ua["os"] == "Other": + continue + if any(blockedword in ua['useragent'].lower() for blockedword in blocked_words): + continue + return ua + raise Exception('Erros with the User agents.') def load_known_content(directory: str='known_content') -> Dict[str, Dict[str, Any]]: to_return: Dict[str, Dict[str, Any]] = {} diff --git a/user_agents/2022/06/2022-06-09.json b/user_agents/2022/06/2022-06-09.json new file mode 100644 index 0000000..b4162fe --- /dev/null +++ b/user_agents/2022/06/2022-06-09.json @@ -0,0 +1,500 @@ +{ + "by_frequency": [ + { + "os": "Win10", + "browser": "Chrome 101.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36" + }, + { + "os": "Win10", + "browser": "Firefox 100.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:100.0) Gecko/20100101 Firefox/100.0" + }, + { + "os": "Win10", + "browser": "Chrome 101.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36" + }, + { + "os": "macOS", + "browser": "Chrome 101.0", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.64 Safari/537.36" + }, + { + "os": "Win10", + "browser": "Chrome 102.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.63 Safari/537.36" + }, + { + "os": "Win10", + "browser": "Firefox 91.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0" + }, + { + "os": "Linux", + "browser": "Firefox 100.0", + "useragent": "Mozilla/5.0 (X11; Linux x86_64; rv:100.0) Gecko/20100101 Firefox/100.0" + }, + { + "os": "Win10", + "browser": "Firefox 101.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:101.0) Gecko/20100101 Firefox/101.0" + }, + { + "os": "Win10", + "browser": "Chrome 102.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36" + }, + { + "os": "macOS", + "browser": "Chrome 102.0", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.61 Safari/537.36" + }, + { + "os": "macOS", + "browser": "Safari 15.4", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.4 Safari/605.1.15" + }, + { + "os": "macOS", + "browser": "Chrome 101.0", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36" + }, + { + "os": "macOS", + "browser": "Firefox 100.0", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:100.0) Gecko/20100101 Firefox/100.0" + }, + { + "os": "Win10", + "browser": "Chrome 102.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.61 Safari/537.36" + }, + { + "os": "macOS", + "browser": "Safari Generic", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15" + }, + { + "os": "Linux", + "browser": "Chrome 102.0", + "useragent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.61 Safari/537.36" + }, + { + "os": "Win10", + "browser": "Chrome 101.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.64 Safari/537.36" + }, + { + "os": "Win10", + "browser": "Chrome 100.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36" + }, + { + "os": "Linux", + "browser": "Firefox 101.0", + "useragent": "Mozilla/5.0 (X11; Linux x86_64; rv:101.0) Gecko/20100101 Firefox/101.0" + }, + { + "os": "Win10", + "browser": "Edge 101.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.64 Safari/537.36 Edg/101.0.1210.53" + }, + { + "os": "Linux", + "browser": "Chrome 101.0", + "useragent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.64 Safari/537.36" + }, + { + "os": "Linux", + "browser": "Firefox 100.0", + "useragent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:100.0) Gecko/20100101 Firefox/100.0" + }, + { + "os": "Win10", + "browser": "Chrome 101.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.0.0 Safari/537.36" + }, + { + "os": "Win10", + "browser": "Chrome 102.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.62 Safari/537.36" + }, + { + "os": "macOS", + "browser": "Chrome 102.0", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36" + }, + { + "os": "Win10", + "browser": "Edge 101.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.64 Safari/537.36 Edg/101.0.1210.47" + }, + { + "os": "Win10", + "browser": "Edge 101.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36 Edg/101.0.1210.39" + }, + { + "os": "macOS", + "browser": "Chrome 100.0", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36" + }, + { + "os": "macOS", + "browser": "Safari 15.3", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.3 Safari/605.1.15" + }, + { + "os": "Linux", + "browser": "Firefox 91.0", + "useragent": "Mozilla/5.0 (X11; Linux x86_64; rv:91.0) Gecko/20100101 Firefox/91.0" + }, + { + "os": "macOS", + "browser": "Firefox 101.0", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:101.0) Gecko/20100101 Firefox/101.0" + }, + { + "os": "Win10", + "browser": "Chrome 100.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36" + }, + { + "os": "Win10", + "browser": "Edge 102.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.63 Safari/537.36 Edg/102.0.1245.33" + }, + { + "os": "Linux", + "browser": "Firefox 99.0", + "useragent": "Mozilla/5.0 (X11; Linux x86_64; rv:99.0) Gecko/20100101 Firefox/99.0" + }, + { + "os": "macOS", + "browser": "Chrome 101.0", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.0.0 Safari/537.36" + }, + { + "os": "Linux", + "browser": "Chrome 101.0", + "useragent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36" + }, + { + "os": "Win10", + "browser": "Firefox 99.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:99.0) Gecko/20100101 Firefox/99.0" + }, + { + "os": "Win7", + "browser": "Firefox 100.0", + "useragent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:100.0) Gecko/20100101 Firefox/100.0" + }, + { + "os": "Win10", + "browser": "Chrome 100.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36 OPR/86.0.4363.59" + }, + { + "os": "Linux", + "browser": "Chrome 100.0", + "useragent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36" + }, + { + "os": "macOS", + "browser": "Safari 15.1", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.1 Safari/605.1.15" + }, + { + "os": "Win10", + "browser": "Firefox 91.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0" + }, + { + "os": "Linux", + "browser": "Firefox 99.0", + "useragent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:99.0) Gecko/20100101 Firefox/99.0" + }, + { + "os": "Linux", + "browser": "Chrome 102.0", + "useragent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36" + }, + { + "os": "macOS", + "browser": "Chrome 79.0", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36" + }, + { + "os": "Win7", + "browser": "Chrome 101.0", + "useragent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36" + }, + { + "os": "Win7", + "browser": "Chrome 102.0", + "useragent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36" + }, + { + "os": "Win10", + "browser": "Chrome 101.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Safari/537.36" + }, + { + "os": "Win10", + "browser": "Chrome 100.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.60 Safari/537.36" + }, + { + "os": "Win10", + "browser": "Yandex Browser Generic", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.141 YaBrowser/22.3.3.852 Yowser/2.5 Safari/537.36" + }, + { + "os": "Win10", + "browser": "Opera Generic", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36 OPR/85.0.4341.71" + }, + { + "os": "Linux", + "browser": "Firefox 100.0", + "useragent": "Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:100.0) Gecko/20100101 Firefox/100.0" + }, + { + "os": "Win10", + "browser": "Edge 102.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.63 Safari/537.36 Edg/102.0.1245.30" + }, + { + "os": "Win7", + "browser": "Chrome 101.0", + "useragent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36" + }, + { + "os": "Linux", + "browser": "Chrome 101.0", + "useragent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36" + }, + { + "os": "Win10", + "browser": "Chrome 100.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36 OPR/86.0.4363.64" + }, + { + "os": "Win10", + "browser": "Yandex Browser Generic", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.143 YaBrowser/22.5.0.1814 Yowser/2.5 Safari/537.36" + }, + { + "os": "Win10", + "browser": "Firefox 102.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0" + }, + { + "os": "Win8.1", + "browser": "Chrome 101.0", + "useragent": "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36" + }, + { + "os": "Linux", + "browser": "Chrome 101.0", + "useragent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Safari/537.36" + }, + { + "os": "macOS", + "browser": "Chrome 100.0", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.88 Safari/537.36" + }, + { + "os": "macOS", + "browser": "Safari 14.1", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Safari/605.1.15" + }, + { + "os": "Win10", + "browser": "Chrome 100.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36 OPR/86.0.4363.70" + }, + { + "os": "Win10", + "browser": "Chrome 100.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.88 Safari/537.36" + }, + { + "os": "macOS", + "browser": "Firefox 99.0", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:99.0) Gecko/20100101 Firefox/99.0" + }, + { + "os": "macOS", + "browser": "Safari 15.4", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.4 Safari/605.1.15" + }, + { + "os": "Win10", + "browser": "Chrome 101.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36" + }, + { + "os": "Win10", + "browser": "Chrome 86.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36" + }, + { + "os": "Win10", + "browser": "Chrome 86.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36" + } + ], + "Win10": { + "Chrome 101.0": [ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.64 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.0.0 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36" + ], + "Firefox 100.0": [ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:100.0) Gecko/20100101 Firefox/100.0" + ], + "Chrome 102.0": [ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.63 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.61 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.62 Safari/537.36" + ], + "Firefox 91.0": [ + "Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0" + ], + "Firefox 101.0": [ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:101.0) Gecko/20100101 Firefox/101.0" + ], + "Chrome 100.0": [ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36 OPR/86.0.4363.59", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.60 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36 OPR/86.0.4363.64", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36 OPR/86.0.4363.70", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.88 Safari/537.36" + ], + "Edge 101.0": [ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.64 Safari/537.36 Edg/101.0.1210.53", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.64 Safari/537.36 Edg/101.0.1210.47", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36 Edg/101.0.1210.39" + ], + "Edge 102.0": [ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.63 Safari/537.36 Edg/102.0.1245.33", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.63 Safari/537.36 Edg/102.0.1245.30" + ], + "Firefox 99.0": [ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:99.0) Gecko/20100101 Firefox/99.0" + ], + "Yandex Browser Generic": [ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.141 YaBrowser/22.3.3.852 Yowser/2.5 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.143 YaBrowser/22.5.0.1814 Yowser/2.5 Safari/537.36" + ], + "Opera Generic": [ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36 OPR/85.0.4341.71" + ], + "Firefox 102.0": [ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0" + ], + "Chrome 86.0": [ + "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36" + ] + }, + "macOS": { + "Chrome 101.0": [ + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.64 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.0.0 Safari/537.36" + ], + "Chrome 102.0": [ + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.61 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36" + ], + "Safari 15.4": [ + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.4 Safari/605.1.15", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.4 Safari/605.1.15" + ], + "Firefox 100.0": [ + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:100.0) Gecko/20100101 Firefox/100.0" + ], + "Safari Generic": [ + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15" + ], + "Chrome 100.0": [ + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.88 Safari/537.36" + ], + "Safari 15.3": [ + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.3 Safari/605.1.15" + ], + "Firefox 101.0": [ + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:101.0) Gecko/20100101 Firefox/101.0" + ], + "Safari 15.1": [ + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.1 Safari/605.1.15" + ], + "Chrome 79.0": [ + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36" + ], + "Safari 14.1": [ + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Safari/605.1.15" + ], + "Firefox 99.0": [ + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:99.0) Gecko/20100101 Firefox/99.0" + ] + }, + "Linux": { + "Firefox 100.0": [ + "Mozilla/5.0 (X11; Linux x86_64; rv:100.0) Gecko/20100101 Firefox/100.0", + "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:100.0) Gecko/20100101 Firefox/100.0", + "Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:100.0) Gecko/20100101 Firefox/100.0" + ], + "Chrome 102.0": [ + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.61 Safari/537.36", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36" + ], + "Firefox 101.0": [ + "Mozilla/5.0 (X11; Linux x86_64; rv:101.0) Gecko/20100101 Firefox/101.0" + ], + "Chrome 101.0": [ + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.64 Safari/537.36", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Safari/537.36" + ], + "Firefox 91.0": [ + "Mozilla/5.0 (X11; Linux x86_64; rv:91.0) Gecko/20100101 Firefox/91.0" + ], + "Firefox 99.0": [ + "Mozilla/5.0 (X11; Linux x86_64; rv:99.0) Gecko/20100101 Firefox/99.0", + "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:99.0) Gecko/20100101 Firefox/99.0" + ], + "Chrome 100.0": [ + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36" + ] + }, + "Win7": { + "Firefox 100.0": [ + "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:100.0) Gecko/20100101 Firefox/100.0" + ], + "Chrome 101.0": [ + "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.54 Safari/537.36", + "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36" + ], + "Chrome 102.0": [ + "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36" + ] + }, + "Win8.1": { + "Chrome 101.0": [ + "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36" + ] + } +} \ No newline at end of file diff --git a/website/web/__init__.py b/website/web/__init__.py index a52cff3..d55e225 100644 --- a/website/web/__init__.py +++ b/website/web/__init__.py @@ -24,7 +24,7 @@ from werkzeug.security import check_password_hash from lookyloo.default import get_config from lookyloo.exceptions import MissingUUID, NoValidHarFile from lookyloo.helpers import (CaptureStatus, get_taxonomies, - get_user_agents, load_cookies) + UserAgents, load_cookies) from lookyloo.lookyloo import Indexing, Lookyloo from .genericapi import api as generic_api @@ -49,6 +49,9 @@ version = pkg_resources.get_distribution('lookyloo').version login_manager = flask_login.LoginManager() login_manager.init_app(app) +# User agents manager +user_agents = UserAgents() + @login_manager.user_loader def user_loader(username): @@ -796,17 +799,8 @@ def search(): def _prepare_capture_template(user_ua: Optional[str], predefined_url: Optional[str]=None): - user_agents: Dict[str, Any] = {} - if use_own_ua: - user_agents = get_user_agents('own_user_agents') - if not user_agents: - user_agents = get_user_agents() - # get most frequest UA that isn't a bot (yes, it is dirty.) - for ua in user_agents.pop('by_frequency'): - if not any(blockedword in ua['useragent'].lower() for blockedword in ['bot', 'bing']): - default_ua = ua - break - return render_template('capture.html', user_agents=user_agents, default=default_ua, + return render_template('capture.html', user_agents=user_agents.user_agents, + default=user_agents.default, max_depth=max_depth, personal_ua=user_ua, default_public=get_config('generic', 'default_public'), predefined_url_to_capture=predefined_url if predefined_url else '')