chg: Cleanup mypy config

pull/96/head
Raphaël Vinot 2020-10-01 11:48:00 +02:00
parent f93fe60a50
commit be2cd18d2b
3 changed files with 29 additions and 14 deletions

View File

@ -556,7 +556,7 @@ class Lookyloo():
to_store[parsed_ua.platform][f'{parsed_ua.browser} {parsed_ua.version}'].append(parsed_ua.string) # type: ignore to_store[parsed_ua.platform][f'{parsed_ua.browser} {parsed_ua.version}'].append(parsed_ua.string) # type: ignore
to_store['by_frequency'].append({'os': parsed_ua.platform, # type: ignore to_store['by_frequency'].append({'os': parsed_ua.platform, # type: ignore
'browser': f'{parsed_ua.browser} {parsed_ua.version}', # type: ignore 'browser': f'{parsed_ua.browser} {parsed_ua.version}', # type: ignore
'useragent': parsed_ua.string}) # type: ignore 'useragent': parsed_ua.string})
with self_generated_ua_file.open('w') as f: with self_generated_ua_file.open('w') as f:
json.dump(to_store, f, indent=2) json.dump(to_store, f, indent=2)
@ -977,7 +977,7 @@ class Lookyloo():
if not to_dump: if not to_dump:
# UA not recognized # UA not recognized
self.logger.info(f'Unable to recognize the User agent: {ua}') self.logger.info(f'Unable to recognize the User agent: {ua}')
to_dump['user_agent'] = ua.string # type: ignore to_dump['user_agent'] = ua.string
with metafile.open('w') as f: with metafile.open('w') as f:
json.dump(to_dump, f) json.dump(to_dump, f)
@ -1013,8 +1013,8 @@ class Lookyloo():
def scrape(self, url: str, cookies_pseudofile: Optional[BufferedIOBase]=None, def scrape(self, url: str, cookies_pseudofile: Optional[BufferedIOBase]=None,
depth: int=1, listing: bool=True, user_agent: Optional[str]=None, depth: int=1, listing: bool=True, user_agent: Optional[str]=None,
referer: str='', perma_uuid: str=None, os: str=None, referer: str='', perma_uuid: Optional[str]=None, os: Optional[str]=None,
browser: str=None) -> Union[bool, str]: browser: Optional[str]=None) -> Union[bool, str]:
url = url.strip() url = url.strip()
url = refang(url) url = refang(url)
if not url.startswith('http'): if not url.startswith('http'):
@ -1036,13 +1036,13 @@ class Lookyloo():
cookies = load_cookies(cookies_pseudofile) cookies = load_cookies(cookies_pseudofile)
if not user_agent: if not user_agent:
# Catch case where the UA is broken on the UI, and the async submission. # Catch case where the UA is broken on the UI, and the async submission.
ua: str = get_config('generic', 'default_user_agent') # type: ignore ua: str = get_config('generic', 'default_user_agent')
else: else:
ua = user_agent ua = user_agent
if int(depth) > int(get_config('generic', 'max_depth')): # type: ignore if int(depth) > int(get_config('generic', 'max_depth')):
self.logger.warning(f'Not allowed to scrape on a depth higher than {get_config("generic", "max_depth")}: {depth}') self.logger.warning(f'Not allowed to scrape on a depth higher than {get_config("generic", "max_depth")}: {depth}')
depth = int(get_config('generic', 'max_depth')) # type: ignore depth = int(get_config('generic', 'max_depth'))
items = crawl(self.splash_url, url, cookies=cookies, depth=depth, user_agent=ua, items = crawl(self.splash_url, url, cookies=cookies, depth=depth, user_agent=ua,
referer=referer, log_enabled=True, log_level=get_config('generic', 'splash_loglevel')) referer=referer, log_enabled=True, log_level=get_config('generic', 'splash_loglevel'))
if not items: if not items:

14
mypy.ini Normal file
View File

@ -0,0 +1,14 @@
[mypy]
python_version = 3.8
check_untyped_defs = True
ignore_errors = False
ignore_missing_imports = False
strict_optional = True
no_implicit_optional = True
warn_unused_ignores = True
warn_redundant_casts = True
warn_unused_configs = True
warn_unreachable = True
show_error_context = True
pretty = True

View File

@ -18,7 +18,7 @@ from lookyloo.lookyloo import Lookyloo, Indexing
from lookyloo.exceptions import NoValidHarFile, MissingUUID from lookyloo.exceptions import NoValidHarFile, MissingUUID
from .proxied import ReverseProxied from .proxied import ReverseProxied
from typing import Optional, Dict, Any from typing import Optional, Dict, Any, Union
import logging import logging
@ -136,7 +136,7 @@ def scrape_web():
perma_uuid = lookyloo.scrape(url=url, cookies_pseudofile=cookie_file, perma_uuid = lookyloo.scrape(url=url, cookies_pseudofile=cookie_file,
depth=depth, listing=listing, depth=depth, listing=listing,
user_agent=request.form.get('user_agent'), user_agent=request.form.get('user_agent'),
referer=request.form.get('referer'), referer=request.form.get('referer'), # type: ignore
os=request.form.get('os'), browser=request.form.get('browser')) os=request.form.get('os'), browser=request.form.get('browser'))
return redirect(url_for('tree', tree_uuid=perma_uuid)) return redirect(url_for('tree', tree_uuid=perma_uuid))
user_agents: Dict[str, Any] = {} user_agents: Dict[str, Any] = {}
@ -242,18 +242,19 @@ def urlnode_post_request(tree_uuid: str, node_uuid: str):
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid) urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
if not urlnode.posted_data: if not urlnode.posted_data:
return return
posted: Union[str, bytes]
if isinstance(urlnode.posted_data, (dict, list)): if isinstance(urlnode.posted_data, (dict, list)):
# JSON blob, pretty print. # JSON blob, pretty print.
posted = json.dumps(urlnode.posted_data, indent=2) posted = json.dumps(urlnode.posted_data, indent=2)
else: else:
posted = urlnode.posted_data posted = urlnode.posted_data
if isinstance(posted, bytes): if isinstance(posted, str):
to_return = BytesIO(posted)
is_blob = True
else:
to_return = BytesIO(posted.encode()) to_return = BytesIO(posted.encode())
is_blob = False is_blob = False
else:
to_return = BytesIO(posted)
is_blob = True
to_return.seek(0) to_return.seek(0)
if is_blob: if is_blob:
@ -454,7 +455,7 @@ def index_generic(show_hidden: bool=False):
if 'timestamp' not in cached: if 'timestamp' not in cached:
# this is a buggy capture, skip # this is a buggy capture, skip
continue continue
if cut_time and datetime.fromisoformat(cached['timestamp'][:-1]) < cut_time: # type: ignore if cut_time and datetime.fromisoformat(cached['timestamp'][:-1]) < cut_time:
continue continue
titles.append((cached['uuid'], cached['title'], cached['timestamp'], cached['url'], titles.append((cached['uuid'], cached['title'], cached['timestamp'], cached['url'],
cached['redirects'], True if cached['incomplete_redirects'] == '1' else False)) cached['redirects'], True if cached['incomplete_redirects'] == '1' else False))