From cde3d29892953ab68e4ff63e8819d27bec309857 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Thu, 25 Jun 2020 16:43:36 +0200 Subject: [PATCH] new: Add option to use User agents of the Lookyloo users for scraping --- config/generic.json.sample | 2 + lookyloo/helpers.py | 15 +- lookyloo/lookyloo.py | 60 ++- tools/README.md | 4 + tools/generate_meta_file.py | 13 + tools/manual_parse_ua_list.py | 21 + user_agents/2020/06/2020-06-25.json | 574 ++++++++++++++++++++++++++++ website/web/__init__.py | 16 +- 8 files changed, 697 insertions(+), 8 deletions(-) create mode 100644 tools/README.md create mode 100644 tools/generate_meta_file.py create mode 100644 tools/manual_parse_ua_list.py create mode 100644 user_agents/2020/06/2020-06-25.json diff --git a/config/generic.json.sample b/config/generic.json.sample index 0bab9024..50abe424 100644 --- a/config/generic.json.sample +++ b/config/generic.json.sample @@ -10,6 +10,7 @@ "days": 0, "hours": 0 }, + "use_user_agents_users": false, "enable_mail_notification": false, "email": { "from": "Lookyloo ", @@ -27,6 +28,7 @@ "default_user_agent": "Ultimate fallback if the capture form, or the asynchronous submission, don't provide a UA", "cache_clean_user": "Format: {username: password}", "time_delta_on_index": "Time interval of the capture displayed on the index", + "use_user_agents_users": "Only usable for medium/high use instances: use the user agents of the users of the platform", "enable_mail_notification": "Enable email notification or not", "email": "Configuration for sending email notifications." } diff --git a/lookyloo/helpers.py b/lookyloo/helpers.py index ae8b662a..add73c20 100644 --- a/lookyloo/helpers.py +++ b/lookyloo/helpers.py @@ -143,7 +143,13 @@ def update_user_agents() -> None: except Exception: traceback.print_exc() return - soup = BeautifulSoup(r.text, 'html.parser') + to_store = ua_parser(r.text) + with open(ua_file_name, 'w') as f: + json.dump(to_store, f, indent=2) + + +def ua_parser(html_content: str) -> Dict[str, Any]: + soup = BeautifulSoup(html_content, 'html.parser') try: uas = soup.find_all('textarea')[1].text @@ -161,12 +167,11 @@ def update_user_agents() -> None: to_store[os][browser] = [] to_store[os][browser].append(ua['useragent']) to_store['by_frequency'].append({'os': os, 'browser': browser, 'useragent': ua['useragent']}) - with open(ua_file_name, 'w') as f: - json.dump(to_store, f, indent=2) + return to_store -def get_user_agents() -> Dict[str, Any]: - ua_files_path = str(get_homedir() / 'user_agents' / '*' / '*' / '*.json') +def get_user_agents(directory: str='user_agents') -> Dict[str, Any]: + ua_files_path = str(get_homedir() / directory / '*' / '*' / '*.json') paths = sorted(glob(ua_files_path), reverse=True) if not paths: update_user_agents() diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py index 65128917..46d67178 100644 --- a/lookyloo/lookyloo.py +++ b/lookyloo/lookyloo.py @@ -2,9 +2,9 @@ # -*- coding: utf-8 -*- import base64 -from collections import defaultdict +from collections import defaultdict, Counter -from datetime import datetime +from datetime import datetime, date, timedelta from email.message import EmailMessage from io import BufferedIOBase, BytesIO import ipaddress @@ -24,6 +24,8 @@ from har2tree import CrawledTree, Har2TreeError, HarFile, HostNode, URLNode from redis import Redis from scrapysplashwrapper import crawl +from werkzeug.useragents import UserAgent + from .exceptions import NoValidHarFile, MissingUUID from .helpers import get_homedir, get_socket_path, load_cookies, load_configs, safe_create_dir, get_email_template from .modules import VirusTotal, SaneJavaScript, PhishingInitiative @@ -63,6 +65,36 @@ class Lookyloo(): if not self.redis.exists('cache_loaded'): self._init_existing_dumps() + def cache_user_agents(self, user_agent: str, remote_ip: str) -> None: + today = date.today().isoformat() + self.redis.zincrby(f'user_agents|{today}', 1, f'{remote_ip}|{user_agent}') + + def build_ua_file(self) -> None: + yesterday = (date.today() - timedelta(days=1)) + self_generated_ua_file_path = get_homedir() / 'own_user_agents' / str(yesterday.year) / f'{yesterday.month:02}' + safe_create_dir(self_generated_ua_file_path) + self_generated_ua_file = self_generated_ua_file_path / f'{yesterday.isoformat()}.json' + if self_generated_ua_file.exists(): + return + entries = self.redis.zrevrange(f'user_agents|{yesterday.isoformat()}', 0, -1) + if not entries: + return + + to_store: Dict[str, Any] = {'by_frequency': []} + uas = Counter([entry.split('|', 1)[1] for entry in entries]) + for ua, count in uas.most_common(): + parsed_ua = UserAgent(ua) + if parsed_ua.platform not in to_store: + to_store[parsed_ua.platform] = {} + if f'{parsed_ua.browser} {parsed_ua.version}' not in to_store[parsed_ua.platform]: + to_store[parsed_ua.platform][f'{parsed_ua.browser} {parsed_ua.version}'] = [] + to_store[parsed_ua.platform][f'{parsed_ua.browser} {parsed_ua.version}'].append(parsed_ua.string) + to_store['by_frequency'].append({'os': parsed_ua.platform, + 'browser': f'{parsed_ua.browser} {parsed_ua.version}', + 'useragent': parsed_ua.string}) + with self_generated_ua_file.open('w') as f: + json.dump(to_store, f, indent=2) + def rebuild_cache(self) -> None: self.redis.flushdb() self._init_existing_dumps() @@ -314,6 +346,29 @@ class Lookyloo(): except Exception as e: logging.exception(e) + def _ensure_meta(self, capture_dir: Path, tree: CrawledTree) -> None: + metafile = capture_dir / 'meta' + if metafile.exists(): + return + ua = UserAgent(tree.root_hartree.user_agent) + to_dump = {} + if ua.platform: + to_dump['os'] = ua.platform + if ua.browser: + if ua.version: + to_dump['browser'] = f'{ua.browser} {ua.version}' + else: + to_dump['browser'] = ua.browser + if ua.language: + to_dump['language'] = ua.language + + if not to_dump: + # UA not recognized + self.logger.info(f'Unable to recognize the User agent: {ua}') + to_dump['user_agent'] = ua.string + with metafile.open('w') as f: + json.dump(to_dump, f) + def get_crawled_tree(self, capture_dir: Path) -> CrawledTree: pickle_file = capture_dir / 'tree.pickle' ct = self._load_pickle(pickle_file) @@ -323,6 +378,7 @@ class Lookyloo(): har_files = sorted(capture_dir.glob('*.har')) try: ct = CrawledTree(har_files, uuid) + self._ensure_meta(capture_dir, ct) except Har2TreeError as e: raise NoValidHarFile(e.message) with pickle_file.open('wb') as _p: diff --git a/tools/README.md b/tools/README.md new file mode 100644 index 00000000..66e1515a --- /dev/null +++ b/tools/README.md @@ -0,0 +1,4 @@ +# Tools used for the maintenance of a Lookyloo instance + +* `generate_meta_file.py`: Make sure all the captures have a meta file (short view of the User Agent) +* `manual_parse_ua_list.py`: Parse html dump from https://techblog.willshouse.com/2012/01/03/most-common-user-agents/ diff --git a/tools/generate_meta_file.py b/tools/generate_meta_file.py new file mode 100644 index 00000000..46c7fb89 --- /dev/null +++ b/tools/generate_meta_file.py @@ -0,0 +1,13 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +from lookyloo.lookyloo import Lookyloo + +lookyloo = Lookyloo() + +for capture_dir in lookyloo.capture_dirs: + try: + ct = lookyloo.get_crawled_tree(capture_dir) + except Exception: + continue + lookyloo._ensure_meta(capture_dir, ct) diff --git a/tools/manual_parse_ua_list.py b/tools/manual_parse_ua_list.py new file mode 100644 index 00000000..a05166d9 --- /dev/null +++ b/tools/manual_parse_ua_list.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +from datetime import datetime +from pathlib import Path +import json + +from lookyloo.helpers import ua_parser, get_homedir, safe_create_dir + +to_parse = Path('Most Common User Agents - Tech Blog (wh).html') + +today = datetime.now() +ua_path = get_homedir() / 'user_agents' / str(today.year) / f'{today.month:02}' +safe_create_dir(ua_path) +ua_file_name: Path = ua_path / f'{today.date().isoformat()}.json' + +with to_parse.open() as f: + to_store = ua_parser(f.read()) + +with open(ua_file_name, 'w') as f: + json.dump(to_store, f, indent=2) diff --git a/user_agents/2020/06/2020-06-25.json b/user_agents/2020/06/2020-06-25.json new file mode 100644 index 00000000..551730f5 --- /dev/null +++ b/user_agents/2020/06/2020-06-25.json @@ -0,0 +1,574 @@ +{ + "by_frequency": [ + { + "os": "Win10", + "browser": "Chrome 83.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36" + }, + { + "os": "Win10", + "browser": "Chrome 83.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36" + }, + { + "os": "Win10", + "browser": "Chrome 83.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36" + }, + { + "os": "Win10", + "browser": "Firefox 77.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0" + }, + { + "os": "Win10", + "browser": "Chrome 81.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36" + }, + { + "os": "Win10", + "browser": "Firefox 76.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0" + }, + { + "os": "macOS", + "browser": "Safari 13.1", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.1 Safari/605.1.15" + }, + { + "os": "macOS", + "browser": "Safari 13.1", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1 Safari/605.1.15" + }, + { + "os": "macOS", + "browser": "Chrome 83.0", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36" + }, + { + "os": "macOS", + "browser": "Chrome 81.0", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36" + }, + { + "os": "macOS", + "browser": "Chrome 83.0", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36" + }, + { + "os": "Win10", + "browser": "Firefox 68.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; rv:68.0) Gecko/20100101 Firefox/68.0" + }, + { + "os": "Win7", + "browser": "Chrome 83.0", + "useragent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36" + }, + { + "os": "macOS", + "browser": "Chrome 83.0", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36" + }, + { + "os": "macOS", + "browser": "Chrome 83.0", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36" + }, + { + "os": "macOS", + "browser": "Firefox 77.0", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:77.0) Gecko/20100101 Firefox/77.0" + }, + { + "os": "Linux", + "browser": "Firefox 77.0", + "useragent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:77.0) Gecko/20100101 Firefox/77.0" + }, + { + "os": "Win10", + "browser": "Chrome 74.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36" + }, + { + "os": "macOS", + "browser": "Chrome 83.0", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36" + }, + { + "os": "Win10", + "browser": "Chrome 83.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36" + }, + { + "os": "Win10", + "browser": "Opera 68", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/68.0.3618.125" + }, + { + "os": "macOS", + "browser": "Firefox 76.0", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:76.0) Gecko/20100101 Firefox/76.0" + }, + { + "os": "Linux", + "browser": "Firefox 76.0", + "useragent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:76.0) Gecko/20100101 Firefox/76.0" + }, + { + "os": "macOS", + "browser": "Chrome 83.0", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36" + }, + { + "os": "Win7", + "browser": "Chrome 83.0", + "useragent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36" + }, + { + "os": "Linux", + "browser": "Chrome 83.0", + "useragent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36" + }, + { + "os": "Linux", + "browser": "Chrome 83.0", + "useragent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36" + }, + { + "os": "macOS", + "browser": "Chrome 81.0", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36" + }, + { + "os": "macOS", + "browser": "Chrome 83.0", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36" + }, + { + "os": "Linux", + "browser": "Firefox 68.0", + "useragent": "Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Firefox/68.0" + }, + { + "os": "Win10", + "browser": "IE 11.0 for Desktop", + "useragent": "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko" + }, + { + "os": "Linux", + "browser": "Chrome 81.0", + "useragent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36" + }, + { + "os": "Win10", + "browser": "Edge 18.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.18362" + }, + { + "os": "Win10", + "browser": "Edge 83.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36 Edg/83.0.478.37" + }, + { + "os": "Win7", + "browser": "Chrome 83.0", + "useragent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36" + }, + { + "os": "Linux", + "browser": "Firefox 77.0", + "useragent": "Mozilla/5.0 (X11; Linux x86_64; rv:77.0) Gecko/20100101 Firefox/77.0" + }, + { + "os": "macOS", + "browser": "Safari 13.1", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.1 Safari/605.1.15" + }, + { + "os": "Win10", + "browser": "Edge 18.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.18363" + }, + { + "os": "Win10", + "browser": "Firefox 75.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:75.0) Gecko/20100101 Firefox/75.0" + }, + { + "os": "Win7", + "browser": "Firefox 76.0", + "useragent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0" + }, + { + "os": "macOS", + "browser": "Chrome 83.0", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36" + }, + { + "os": "Win7", + "browser": "Chrome 81.0", + "useragent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36" + }, + { + "os": "Win7", + "browser": "Firefox 77.0", + "useragent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0" + }, + { + "os": "macOS", + "browser": "Firefox 77.0", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:77.0) Gecko/20100101 Firefox/77.0" + }, + { + "os": "macOS", + "browser": "Chrome 83.0", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36" + }, + { + "os": "macOS", + "browser": "Safari 13.1", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.1 Safari/605.1.15" + }, + { + "os": "Win10", + "browser": "Chrome 76.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36" + }, + { + "os": "Win8.1", + "browser": "Chrome 83.0", + "useragent": "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36" + }, + { + "os": "macOS", + "browser": "Safari 13.1", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1 Safari/605.1.15" + }, + { + "os": "Win10", + "browser": "Edge 83.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36 Edg/83.0.478.45" + }, + { + "os": "macOS", + "browser": "Safari 13.0", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.5 Safari/605.1.15" + }, + { + "os": "Win10", + "browser": "Firefox 68.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:68.0) Gecko/20100101 Firefox/68.0" + }, + { + "os": "Win8.1", + "browser": "Chrome 83.0", + "useragent": "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36" + }, + { + "os": "Linux", + "browser": "Headless Chrome 68.0", + "useragent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/68.0.3440.106 Safari/537.36" + }, + { + "os": "macOS", + "browser": "Chrome 81.0", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36" + }, + { + "os": "macOS", + "browser": "Safari 13.1", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.1 Safari/605.1.15" + }, + { + "os": "macOS", + "browser": "Chrome 83.0", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36" + }, + { + "os": "Win10", + "browser": "Opera 68", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/68.0.3618.142" + }, + { + "os": "macOS", + "browser": "Chrome 81.0", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36" + }, + { + "os": "Win10", + "browser": "Chrome 81.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36" + }, + { + "os": "Linux", + "browser": "Firefox 76.0", + "useragent": "Mozilla/5.0 (X11; Linux x86_64; rv:76.0) Gecko/20100101 Firefox/76.0" + }, + { + "os": "Linux", + "browser": "Chrome 81.0", + "useragent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.92 Safari/537.36" + }, + { + "os": "macOS", + "browser": "Firefox 76.0", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:76.0) Gecko/20100101 Firefox/76.0" + }, + { + "os": "macOS", + "browser": "Chrome 83.0", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36" + }, + { + "os": "Win10", + "browser": "Edge 83.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36 Edg/83.0.478.54" + }, + { + "os": "macOS", + "browser": "Chrome 83.0", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36" + }, + { + "os": "Win10", + "browser": "Opera 68", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/68.0.3618.165" + }, + { + "os": "Win7", + "browser": "Opera 68", + "useragent": "Mozilla/5.0 (Windows NT 6.1; ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/68.0.3618.125" + }, + { + "os": "Linux", + "browser": "Chrome 83.0", + "useragent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36" + }, + { + "os": "macOS", + "browser": "Chrome 81.0", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36" + }, + { + "os": "macOS", + "browser": "Chrome 83.0", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36" + }, + { + "os": "Win10", + "browser": "Edge 18.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/18.17763" + }, + { + "os": "Win10", + "browser": "Chrome 79.0", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36" + }, + { + "os": "Win8.1", + "browser": "Chrome 83.0", + "useragent": "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36" + }, + { + "os": "Android", + "browser": "Android Browser 4.0", + "useragent": "Mozilla/5.0 (Linux; U; Android 4.3; en-us; SM-N900T Build/JSS15J) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30" + }, + { + "os": "macOS", + "browser": "Chrome 83.0", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36" + }, + { + "os": "Win10", + "browser": "Yandex Browser Generic", + "useragent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 YaBrowser/20.6.0.905 Yowser/2.5 Yptp/1.23 Safari/537.36" + }, + { + "os": "ChromeOS", + "browser": "Chrome 81.0", + "useragent": "Mozilla/5.0 (X11; CrOS x86_64 12871.102.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.141 Safari/537.36" + }, + { + "os": "macOS", + "browser": "Firefox Generic", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:78.0) Gecko/20100101 Firefox/78.0" + }, + { + "os": "macOS", + "browser": "Chrome 83.0", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36" + }, + { + "os": "macOS", + "browser": "Chrome 83.0", + "useragent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36" + } + ], + "Win10": { + "Chrome 83.0": [ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36" + ], + "Firefox 77.0": [ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0" + ], + "Chrome 81.0": [ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36" + ], + "Firefox 76.0": [ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0" + ], + "Firefox 68.0": [ + "Mozilla/5.0 (Windows NT 10.0; rv:68.0) Gecko/20100101 Firefox/68.0", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:68.0) Gecko/20100101 Firefox/68.0" + ], + "Chrome 74.0": [ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36" + ], + "Opera 68": [ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/68.0.3618.125", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/68.0.3618.142", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/68.0.3618.165" + ], + "IE 11.0 for Desktop": [ + "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko" + ], + "Edge 18.0": [ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.18362", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.18363", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.140 Safari/537.36 Edge/18.17763" + ], + "Edge 83.0": [ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36 Edg/83.0.478.37", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36 Edg/83.0.478.45", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36 Edg/83.0.478.54" + ], + "Firefox 75.0": [ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:75.0) Gecko/20100101 Firefox/75.0" + ], + "Chrome 76.0": [ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.100 Safari/537.36" + ], + "Chrome 79.0": [ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36" + ], + "Yandex Browser Generic": [ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 YaBrowser/20.6.0.905 Yowser/2.5 Yptp/1.23 Safari/537.36" + ] + }, + "macOS": { + "Safari 13.1": [ + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.1 Safari/605.1.15", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1 Safari/605.1.15", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.1 Safari/605.1.15", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.1 Safari/605.1.15", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1 Safari/605.1.15", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.1 Safari/605.1.15" + ], + "Chrome 83.0": [ + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36" + ], + "Chrome 81.0": [ + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36" + ], + "Firefox 77.0": [ + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:77.0) Gecko/20100101 Firefox/77.0", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:77.0) Gecko/20100101 Firefox/77.0" + ], + "Firefox 76.0": [ + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:76.0) Gecko/20100101 Firefox/76.0", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:76.0) Gecko/20100101 Firefox/76.0" + ], + "Safari 13.0": [ + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_3) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.5 Safari/605.1.15" + ], + "Firefox Generic": [ + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:78.0) Gecko/20100101 Firefox/78.0" + ] + }, + "Win7": { + "Chrome 83.0": [ + "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36", + "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36", + "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36" + ], + "Firefox 76.0": [ + "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0" + ], + "Chrome 81.0": [ + "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36" + ], + "Firefox 77.0": [ + "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0" + ], + "Opera 68": [ + "Mozilla/5.0 (Windows NT 6.1; ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/68.0.3618.125" + ] + }, + "Linux": { + "Firefox 77.0": [ + "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:77.0) Gecko/20100101 Firefox/77.0", + "Mozilla/5.0 (X11; Linux x86_64; rv:77.0) Gecko/20100101 Firefox/77.0" + ], + "Firefox 76.0": [ + "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:76.0) Gecko/20100101 Firefox/76.0", + "Mozilla/5.0 (X11; Linux x86_64; rv:76.0) Gecko/20100101 Firefox/76.0" + ], + "Chrome 83.0": [ + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36" + ], + "Firefox 68.0": [ + "Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Firefox/68.0" + ], + "Chrome 81.0": [ + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.92 Safari/537.36" + ], + "Headless Chrome 68.0": [ + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/68.0.3440.106 Safari/537.36" + ] + }, + "Win8.1": { + "Chrome 83.0": [ + "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36", + "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36", + "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36" + ] + }, + "Android": { + "Android Browser 4.0": [ + "Mozilla/5.0 (Linux; U; Android 4.3; en-us; SM-N900T Build/JSS15J) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30" + ] + }, + "ChromeOS": { + "Chrome 81.0": [ + "Mozilla/5.0 (X11; CrOS x86_64 12871.102.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.141 Safari/537.36" + ] + } +} \ No newline at end of file diff --git a/website/web/__init__.py b/website/web/__init__.py index ebe7f5c9..3300fe94 100644 --- a/website/web/__init__.py +++ b/website/web/__init__.py @@ -60,6 +60,12 @@ def sizeof_fmt(num, suffix='B'): app.jinja_env.globals.update(sizeof_fmt=sizeof_fmt) +@app.after_request +def after_request(response): + lookyloo.cache_user_agents(request.headers.get('User-Agent'), request.remote_addr) + return response + + @auth.get_password def get_pw(username: str) -> Optional[str]: if username in user: @@ -115,7 +121,15 @@ def scrape_web(): user_agent=request.form.get('user_agent'), os=request.form.get('os'), browser=request.form.get('browser')) return redirect(url_for('tree', tree_uuid=perma_uuid)) - user_agents = get_user_agents() + if lookyloo.get_config('use_user_agents_users'): + lookyloo.build_ua_file() + # NOTE: For now, just generate the file, so we have an idea of the size + # user_agents = get_user_agents('own_user_agents') + user_agents = {} + if not user_agents: + user_agents = get_user_agents() + else: + user_agents = get_user_agents() user_agents.pop('by_frequency') return render_template('scrape.html', user_agents=user_agents)