From be2cd18d2b7708e6079ec70e3d06080db0bb5ea2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Thu, 1 Oct 2020 11:48:00 +0200 Subject: [PATCH] chg: Cleanup mypy config --- lookyloo/lookyloo.py | 14 +++++++------- mypy.ini | 14 ++++++++++++++ website/web/__init__.py | 15 ++++++++------- 3 files changed, 29 insertions(+), 14 deletions(-) create mode 100644 mypy.ini diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py index d91c429..dfb4959 100644 --- a/lookyloo/lookyloo.py +++ b/lookyloo/lookyloo.py @@ -556,7 +556,7 @@ class Lookyloo(): to_store[parsed_ua.platform][f'{parsed_ua.browser} {parsed_ua.version}'].append(parsed_ua.string) # type: ignore to_store['by_frequency'].append({'os': parsed_ua.platform, # type: ignore 'browser': f'{parsed_ua.browser} {parsed_ua.version}', # type: ignore - 'useragent': parsed_ua.string}) # type: ignore + 'useragent': parsed_ua.string}) with self_generated_ua_file.open('w') as f: json.dump(to_store, f, indent=2) @@ -977,7 +977,7 @@ class Lookyloo(): if not to_dump: # UA not recognized self.logger.info(f'Unable to recognize the User agent: {ua}') - to_dump['user_agent'] = ua.string # type: ignore + to_dump['user_agent'] = ua.string with metafile.open('w') as f: json.dump(to_dump, f) @@ -1013,8 +1013,8 @@ class Lookyloo(): def scrape(self, url: str, cookies_pseudofile: Optional[BufferedIOBase]=None, depth: int=1, listing: bool=True, user_agent: Optional[str]=None, - referer: str='', perma_uuid: str=None, os: str=None, - browser: str=None) -> Union[bool, str]: + referer: str='', perma_uuid: Optional[str]=None, os: Optional[str]=None, + browser: Optional[str]=None) -> Union[bool, str]: url = url.strip() url = refang(url) if not url.startswith('http'): @@ -1036,13 +1036,13 @@ class Lookyloo(): cookies = load_cookies(cookies_pseudofile) if not user_agent: # Catch case where the UA is broken on the UI, and the async submission. - ua: str = get_config('generic', 'default_user_agent') # type: ignore + ua: str = get_config('generic', 'default_user_agent') else: ua = user_agent - if int(depth) > int(get_config('generic', 'max_depth')): # type: ignore + if int(depth) > int(get_config('generic', 'max_depth')): self.logger.warning(f'Not allowed to scrape on a depth higher than {get_config("generic", "max_depth")}: {depth}') - depth = int(get_config('generic', 'max_depth')) # type: ignore + depth = int(get_config('generic', 'max_depth')) items = crawl(self.splash_url, url, cookies=cookies, depth=depth, user_agent=ua, referer=referer, log_enabled=True, log_level=get_config('generic', 'splash_loglevel')) if not items: diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 0000000..45178e4 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,14 @@ +[mypy] +python_version = 3.8 +check_untyped_defs = True +ignore_errors = False +ignore_missing_imports = False +strict_optional = True +no_implicit_optional = True +warn_unused_ignores = True +warn_redundant_casts = True +warn_unused_configs = True +warn_unreachable = True + +show_error_context = True +pretty = True diff --git a/website/web/__init__.py b/website/web/__init__.py index 8412cd3..b965c83 100644 --- a/website/web/__init__.py +++ b/website/web/__init__.py @@ -18,7 +18,7 @@ from lookyloo.lookyloo import Lookyloo, Indexing from lookyloo.exceptions import NoValidHarFile, MissingUUID from .proxied import ReverseProxied -from typing import Optional, Dict, Any +from typing import Optional, Dict, Any, Union import logging @@ -136,7 +136,7 @@ def scrape_web(): perma_uuid = lookyloo.scrape(url=url, cookies_pseudofile=cookie_file, depth=depth, listing=listing, user_agent=request.form.get('user_agent'), - referer=request.form.get('referer'), + referer=request.form.get('referer'), # type: ignore os=request.form.get('os'), browser=request.form.get('browser')) return redirect(url_for('tree', tree_uuid=perma_uuid)) user_agents: Dict[str, Any] = {} @@ -242,18 +242,19 @@ def urlnode_post_request(tree_uuid: str, node_uuid: str): urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid) if not urlnode.posted_data: return + posted: Union[str, bytes] if isinstance(urlnode.posted_data, (dict, list)): # JSON blob, pretty print. posted = json.dumps(urlnode.posted_data, indent=2) else: posted = urlnode.posted_data - if isinstance(posted, bytes): - to_return = BytesIO(posted) - is_blob = True - else: + if isinstance(posted, str): to_return = BytesIO(posted.encode()) is_blob = False + else: + to_return = BytesIO(posted) + is_blob = True to_return.seek(0) if is_blob: @@ -454,7 +455,7 @@ def index_generic(show_hidden: bool=False): if 'timestamp' not in cached: # this is a buggy capture, skip continue - if cut_time and datetime.fromisoformat(cached['timestamp'][:-1]) < cut_time: # type: ignore + if cut_time and datetime.fromisoformat(cached['timestamp'][:-1]) < cut_time: continue titles.append((cached['uuid'], cached['title'], cached['timestamp'], cached['url'], cached['redirects'], True if cached['incomplete_redirects'] == '1' else False))