mirror of https://github.com/CIRCL/lookyloo
chg: Cleanup mypy config
parent
f93fe60a50
commit
be2cd18d2b
|
@ -556,7 +556,7 @@ class Lookyloo():
|
||||||
to_store[parsed_ua.platform][f'{parsed_ua.browser} {parsed_ua.version}'].append(parsed_ua.string) # type: ignore
|
to_store[parsed_ua.platform][f'{parsed_ua.browser} {parsed_ua.version}'].append(parsed_ua.string) # type: ignore
|
||||||
to_store['by_frequency'].append({'os': parsed_ua.platform, # type: ignore
|
to_store['by_frequency'].append({'os': parsed_ua.platform, # type: ignore
|
||||||
'browser': f'{parsed_ua.browser} {parsed_ua.version}', # type: ignore
|
'browser': f'{parsed_ua.browser} {parsed_ua.version}', # type: ignore
|
||||||
'useragent': parsed_ua.string}) # type: ignore
|
'useragent': parsed_ua.string})
|
||||||
with self_generated_ua_file.open('w') as f:
|
with self_generated_ua_file.open('w') as f:
|
||||||
json.dump(to_store, f, indent=2)
|
json.dump(to_store, f, indent=2)
|
||||||
|
|
||||||
|
@ -977,7 +977,7 @@ class Lookyloo():
|
||||||
if not to_dump:
|
if not to_dump:
|
||||||
# UA not recognized
|
# UA not recognized
|
||||||
self.logger.info(f'Unable to recognize the User agent: {ua}')
|
self.logger.info(f'Unable to recognize the User agent: {ua}')
|
||||||
to_dump['user_agent'] = ua.string # type: ignore
|
to_dump['user_agent'] = ua.string
|
||||||
with metafile.open('w') as f:
|
with metafile.open('w') as f:
|
||||||
json.dump(to_dump, f)
|
json.dump(to_dump, f)
|
||||||
|
|
||||||
|
@ -1013,8 +1013,8 @@ class Lookyloo():
|
||||||
|
|
||||||
def scrape(self, url: str, cookies_pseudofile: Optional[BufferedIOBase]=None,
|
def scrape(self, url: str, cookies_pseudofile: Optional[BufferedIOBase]=None,
|
||||||
depth: int=1, listing: bool=True, user_agent: Optional[str]=None,
|
depth: int=1, listing: bool=True, user_agent: Optional[str]=None,
|
||||||
referer: str='', perma_uuid: str=None, os: str=None,
|
referer: str='', perma_uuid: Optional[str]=None, os: Optional[str]=None,
|
||||||
browser: str=None) -> Union[bool, str]:
|
browser: Optional[str]=None) -> Union[bool, str]:
|
||||||
url = url.strip()
|
url = url.strip()
|
||||||
url = refang(url)
|
url = refang(url)
|
||||||
if not url.startswith('http'):
|
if not url.startswith('http'):
|
||||||
|
@ -1036,13 +1036,13 @@ class Lookyloo():
|
||||||
cookies = load_cookies(cookies_pseudofile)
|
cookies = load_cookies(cookies_pseudofile)
|
||||||
if not user_agent:
|
if not user_agent:
|
||||||
# Catch case where the UA is broken on the UI, and the async submission.
|
# Catch case where the UA is broken on the UI, and the async submission.
|
||||||
ua: str = get_config('generic', 'default_user_agent') # type: ignore
|
ua: str = get_config('generic', 'default_user_agent')
|
||||||
else:
|
else:
|
||||||
ua = user_agent
|
ua = user_agent
|
||||||
|
|
||||||
if int(depth) > int(get_config('generic', 'max_depth')): # type: ignore
|
if int(depth) > int(get_config('generic', 'max_depth')):
|
||||||
self.logger.warning(f'Not allowed to scrape on a depth higher than {get_config("generic", "max_depth")}: {depth}')
|
self.logger.warning(f'Not allowed to scrape on a depth higher than {get_config("generic", "max_depth")}: {depth}')
|
||||||
depth = int(get_config('generic', 'max_depth')) # type: ignore
|
depth = int(get_config('generic', 'max_depth'))
|
||||||
items = crawl(self.splash_url, url, cookies=cookies, depth=depth, user_agent=ua,
|
items = crawl(self.splash_url, url, cookies=cookies, depth=depth, user_agent=ua,
|
||||||
referer=referer, log_enabled=True, log_level=get_config('generic', 'splash_loglevel'))
|
referer=referer, log_enabled=True, log_level=get_config('generic', 'splash_loglevel'))
|
||||||
if not items:
|
if not items:
|
||||||
|
|
|
@ -0,0 +1,14 @@
|
||||||
|
[mypy]
|
||||||
|
python_version = 3.8
|
||||||
|
check_untyped_defs = True
|
||||||
|
ignore_errors = False
|
||||||
|
ignore_missing_imports = False
|
||||||
|
strict_optional = True
|
||||||
|
no_implicit_optional = True
|
||||||
|
warn_unused_ignores = True
|
||||||
|
warn_redundant_casts = True
|
||||||
|
warn_unused_configs = True
|
||||||
|
warn_unreachable = True
|
||||||
|
|
||||||
|
show_error_context = True
|
||||||
|
pretty = True
|
|
@ -18,7 +18,7 @@ from lookyloo.lookyloo import Lookyloo, Indexing
|
||||||
from lookyloo.exceptions import NoValidHarFile, MissingUUID
|
from lookyloo.exceptions import NoValidHarFile, MissingUUID
|
||||||
from .proxied import ReverseProxied
|
from .proxied import ReverseProxied
|
||||||
|
|
||||||
from typing import Optional, Dict, Any
|
from typing import Optional, Dict, Any, Union
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
@ -136,7 +136,7 @@ def scrape_web():
|
||||||
perma_uuid = lookyloo.scrape(url=url, cookies_pseudofile=cookie_file,
|
perma_uuid = lookyloo.scrape(url=url, cookies_pseudofile=cookie_file,
|
||||||
depth=depth, listing=listing,
|
depth=depth, listing=listing,
|
||||||
user_agent=request.form.get('user_agent'),
|
user_agent=request.form.get('user_agent'),
|
||||||
referer=request.form.get('referer'),
|
referer=request.form.get('referer'), # type: ignore
|
||||||
os=request.form.get('os'), browser=request.form.get('browser'))
|
os=request.form.get('os'), browser=request.form.get('browser'))
|
||||||
return redirect(url_for('tree', tree_uuid=perma_uuid))
|
return redirect(url_for('tree', tree_uuid=perma_uuid))
|
||||||
user_agents: Dict[str, Any] = {}
|
user_agents: Dict[str, Any] = {}
|
||||||
|
@ -242,18 +242,19 @@ def urlnode_post_request(tree_uuid: str, node_uuid: str):
|
||||||
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
|
urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
|
||||||
if not urlnode.posted_data:
|
if not urlnode.posted_data:
|
||||||
return
|
return
|
||||||
|
posted: Union[str, bytes]
|
||||||
if isinstance(urlnode.posted_data, (dict, list)):
|
if isinstance(urlnode.posted_data, (dict, list)):
|
||||||
# JSON blob, pretty print.
|
# JSON blob, pretty print.
|
||||||
posted = json.dumps(urlnode.posted_data, indent=2)
|
posted = json.dumps(urlnode.posted_data, indent=2)
|
||||||
else:
|
else:
|
||||||
posted = urlnode.posted_data
|
posted = urlnode.posted_data
|
||||||
|
|
||||||
if isinstance(posted, bytes):
|
if isinstance(posted, str):
|
||||||
to_return = BytesIO(posted)
|
|
||||||
is_blob = True
|
|
||||||
else:
|
|
||||||
to_return = BytesIO(posted.encode())
|
to_return = BytesIO(posted.encode())
|
||||||
is_blob = False
|
is_blob = False
|
||||||
|
else:
|
||||||
|
to_return = BytesIO(posted)
|
||||||
|
is_blob = True
|
||||||
to_return.seek(0)
|
to_return.seek(0)
|
||||||
|
|
||||||
if is_blob:
|
if is_blob:
|
||||||
|
@ -454,7 +455,7 @@ def index_generic(show_hidden: bool=False):
|
||||||
if 'timestamp' not in cached:
|
if 'timestamp' not in cached:
|
||||||
# this is a buggy capture, skip
|
# this is a buggy capture, skip
|
||||||
continue
|
continue
|
||||||
if cut_time and datetime.fromisoformat(cached['timestamp'][:-1]) < cut_time: # type: ignore
|
if cut_time and datetime.fromisoformat(cached['timestamp'][:-1]) < cut_time:
|
||||||
continue
|
continue
|
||||||
titles.append((cached['uuid'], cached['title'], cached['timestamp'], cached['url'],
|
titles.append((cached['uuid'], cached['title'], cached['timestamp'], cached['url'],
|
||||||
cached['redirects'], True if cached['incomplete_redirects'] == '1' else False))
|
cached['redirects'], True if cached['incomplete_redirects'] == '1' else False))
|
||||||
|
|
Loading…
Reference in New Issue