From 5baab519514286fbdb4e28bbaf262aa9577b8b14 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Sun, 24 May 2020 15:35:30 +0200 Subject: [PATCH] fix: Proper fallback for the user agent. --- config/generic.json.sample | 6 ++++-- lookyloo/lookyloo.py | 7 ++++++- lookyloo/modules.py | 1 + poetry.lock | 2 +- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/config/generic.json.sample b/config/generic.json.sample index cb3b215c..0bab9024 100644 --- a/config/generic.json.sample +++ b/config/generic.json.sample @@ -3,10 +3,11 @@ "splash_loglevel": "WARNING", "only_global_lookups": true, "splash_url": "http://127.0.0.1:8050", + "default_user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36", "cache_clean_user": {}, "time_delta_on_index": { - "weeks": 0, - "days": 1, + "weeks": 1, + "days": 0, "hours": 0 }, "enable_mail_notification": false, @@ -23,6 +24,7 @@ "splash_loglevel": "(Splash) INFO is *very* verbose.", "only_global_lookups": "Set it to True if your instance is publicly available so users aren't able to scan your internal network", "splash_url": "URL to connect to splash", + "default_user_agent": "Ultimate fallback if the capture form, or the asynchronous submission, don't provide a UA", "cache_clean_user": "Format: {username: password}", "time_delta_on_index": "Time interval of the capture displayed on the index", "enable_mail_notification": "Enable email notification or not", diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py index d7eb7f52..d03a47b1 100644 --- a/lookyloo/lookyloo.py +++ b/lookyloo/lookyloo.py @@ -342,7 +342,12 @@ class Lookyloo(): return False cookies = load_cookies(cookies_pseudofile) - items = crawl(self.splash_url, url, cookies=cookies, depth=depth, user_agent=user_agent, + if not user_agent: + # Catch case where the UA is broken on the UI, and the async submission. + ua: str = self.get_config('default_user_agent') # type: ignore + else: + ua = user_agent + items = crawl(self.splash_url, url, cookies=cookies, depth=depth, user_agent=ua, log_enabled=True, log_level=self.get_config('splash_loglevel')) if not items: # broken diff --git a/lookyloo/modules.py b/lookyloo/modules.py index 68773d50..26d77457 100644 --- a/lookyloo/modules.py +++ b/lookyloo/modules.py @@ -48,6 +48,7 @@ class SaneJavaScript(): "69e2da5cdc318fc237eaa243b6ea7ecc83b68dbdea8478dc69154abdda86ecb4e16c35891cc1facb3ce7e0cf19d5abf189c50f59c769777706f4558f6442abbc": "This is a 1*1 pixel GIF", "16dd1560fdd43c3eee7bcf622d940be93e7e74dee90286da37992d69cea844130911b97f41c71f8287b54f00bd3a388191112f490470cf27c374d524f49ba516": "This is a 1*1 pixel GIF", "01211111688dc2007519ff56603fbe345d057337b911c829aaee97b8d02e7d885e7a2c2d51730f54a04aebc1821897c8041f15e216f1c973ed313087fa91a3fb": "This is a 1*1 pixel GIF", + "71db01662075fac031dea18b2c766826c77dbab01400a8642cdc7059394841d5df9020076554c3beca6f808187d42e1a1acc98fad9a0e1ad32ae869145f53746": "This is a 1*1 pixel GIF", # "": "This is a 1*1 pixel GIF", "f1c33e72643ce366fd578e3b5d393799e8c9ea27b180987826af43b4fc00b65a4eaae5e6426a23448956fee99e3108c6a86f32fb4896c156e24af0571a11c498": "This is a 1*1 pixel PNG", "dc7c40381b3d22919e32c1b700ccb77b1b0aea2690642d01c1ac802561e135c01d5a4d2a0ea18efc0ec3362e8c549814a10a23563f1f56bd62aee0ced7e2bd99": "This is a 1*1 pixel PNG", diff --git a/poetry.lock b/poetry.lock index 8715a1a1..6e0d2692 100644 --- a/poetry.lock +++ b/poetry.lock @@ -870,7 +870,7 @@ scrapy = "^1.8.0" scrapy-splash = "^0.7.2" [package.source] -reference = "9be949672aa7d90719e4d97c7b9aa70f106072bf" +reference = "d0ea9acbd769c73a15c2142633acef2870d70e08" type = "git" url = "https://github.com/viper-framework/ScrapySplashWrapper.git" [[package]]