mirror of https://github.com/CIRCL/lookyloo
chg: remove reference to max depth (not used since playwright), cleanup
parent
4f2ee1e770
commit
ce9ecfefdb
|
@ -13,7 +13,6 @@
|
||||||
"days": 0,
|
"days": 0,
|
||||||
"hours": 0
|
"hours": 0
|
||||||
},
|
},
|
||||||
"max_depth": 1,
|
|
||||||
"async_capture_processes": 1,
|
"async_capture_processes": 1,
|
||||||
"use_user_agents_users": false,
|
"use_user_agents_users": false,
|
||||||
"enable_default_blur_screenshot": false,
|
"enable_default_blur_screenshot": false,
|
||||||
|
@ -57,7 +56,6 @@
|
||||||
"default_public": "If true, the capture is public and will be visible on the index page by default (can be unticked on the capture page).",
|
"default_public": "If true, the capture is public and will be visible on the index page by default (can be unticked on the capture page).",
|
||||||
"users": "It is some kind of an admin accounts. Format: {username: password}",
|
"users": "It is some kind of an admin accounts. Format: {username: password}",
|
||||||
"time_delta_on_index": "Time interval of the capture displayed on the index",
|
"time_delta_on_index": "Time interval of the capture displayed on the index",
|
||||||
"max_depth": "Maximum depth for scraping. Anything > 1 will be exponentially bigger.",
|
|
||||||
"async_capture_processes": "Number of async_capture processes to start. This should not be higher than the number of splash instances you have running. A very high number will use *a lot* of ram.",
|
"async_capture_processes": "Number of async_capture processes to start. This should not be higher than the number of splash instances you have running. A very high number will use *a lot* of ram.",
|
||||||
"use_user_agents_users": "Only usable for medium/high use instances: use the user agents of the users of the platform",
|
"use_user_agents_users": "Only usable for medium/high use instances: use the user agents of the users of the platform",
|
||||||
"enable_default_blur_screenshot": "If true, blur the screenshot by default (useful on public instances)",
|
"enable_default_blur_screenshot": "If true, blur the screenshot by default (useful on public instances)",
|
||||||
|
|
|
@ -94,7 +94,10 @@ class UserAgents:
|
||||||
self.path = get_homedir() / 'user_agents'
|
self.path = get_homedir() / 'user_agents'
|
||||||
|
|
||||||
ua_files_path = sorted(self.path.glob('**/*.json'), reverse=True)
|
ua_files_path = sorted(self.path.glob('**/*.json'), reverse=True)
|
||||||
self.most_recent_ua_path = ua_files_path[0]
|
self._load_newest_ua_file(ua_files_path[0])
|
||||||
|
|
||||||
|
def _load_newest_ua_file(self, path: Path):
|
||||||
|
self.most_recent_ua_path = path
|
||||||
with self.most_recent_ua_path.open() as f:
|
with self.most_recent_ua_path.open() as f:
|
||||||
self.most_recent_uas = json.load(f)
|
self.most_recent_uas = json.load(f)
|
||||||
self.by_freq = self.most_recent_uas.pop('by_frequency')
|
self.by_freq = self.most_recent_uas.pop('by_frequency')
|
||||||
|
@ -103,10 +106,7 @@ class UserAgents:
|
||||||
def user_agents(self) -> Dict[str, Dict[str, List[str]]]:
|
def user_agents(self) -> Dict[str, Dict[str, List[str]]]:
|
||||||
ua_files_path = sorted(self.path.glob('**/*.json'), reverse=True)
|
ua_files_path = sorted(self.path.glob('**/*.json'), reverse=True)
|
||||||
if ua_files_path[0] != self.most_recent_ua_path:
|
if ua_files_path[0] != self.most_recent_ua_path:
|
||||||
self.most_recent_ua_path = ua_files_path[0]
|
self._load_newest_ua_file(ua_files_path[0])
|
||||||
with self.most_recent_ua_path.open() as f:
|
|
||||||
self.most_recent_uas = json.load(f)
|
|
||||||
self.by_freq = self.most_recent_uas.pop('by_frequency')
|
|
||||||
return self.most_recent_uas
|
return self.most_recent_uas
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
@ -120,6 +120,7 @@ class UserAgents:
|
||||||
return ua
|
return ua
|
||||||
raise Exception('Erros with the User agents.')
|
raise Exception('Erros with the User agents.')
|
||||||
|
|
||||||
|
|
||||||
def load_known_content(directory: str='known_content') -> Dict[str, Dict[str, Any]]:
|
def load_known_content(directory: str='known_content') -> Dict[str, Dict[str, Any]]:
|
||||||
to_return: Dict[str, Dict[str, Any]] = {}
|
to_return: Dict[str, Dict[str, Any]] = {}
|
||||||
for known_content_file in (get_homedir() / directory).glob('*.json'):
|
for known_content_file in (get_homedir() / directory).glob('*.json'):
|
||||||
|
|
|
@ -105,7 +105,6 @@ lookyloo: Lookyloo = Lookyloo()
|
||||||
|
|
||||||
time_delta_on_index = get_config('generic', 'time_delta_on_index')
|
time_delta_on_index = get_config('generic', 'time_delta_on_index')
|
||||||
blur_screenshot = get_config('generic', 'enable_default_blur_screenshot')
|
blur_screenshot = get_config('generic', 'enable_default_blur_screenshot')
|
||||||
max_depth = get_config('generic', 'max_depth')
|
|
||||||
|
|
||||||
use_own_ua = get_config('generic', 'use_user_agents_users')
|
use_own_ua = get_config('generic', 'use_user_agents_users')
|
||||||
enable_mail_notification = get_config('generic', 'enable_mail_notification')
|
enable_mail_notification = get_config('generic', 'enable_mail_notification')
|
||||||
|
@ -801,7 +800,7 @@ def search():
|
||||||
def _prepare_capture_template(user_ua: Optional[str], predefined_url: Optional[str]=None):
|
def _prepare_capture_template(user_ua: Optional[str], predefined_url: Optional[str]=None):
|
||||||
return render_template('capture.html', user_agents=user_agents.user_agents,
|
return render_template('capture.html', user_agents=user_agents.user_agents,
|
||||||
default=user_agents.default,
|
default=user_agents.default,
|
||||||
max_depth=max_depth, personal_ua=user_ua,
|
personal_ua=user_ua,
|
||||||
default_public=get_config('generic', 'default_public'),
|
default_public=get_config('generic', 'default_public'),
|
||||||
predefined_url_to_capture=predefined_url if predefined_url else '')
|
predefined_url_to_capture=predefined_url if predefined_url else '')
|
||||||
|
|
||||||
|
@ -810,7 +809,8 @@ def _prepare_capture_template(user_ua: Optional[str], predefined_url: Optional[s
|
||||||
def recapture(tree_uuid: str):
|
def recapture(tree_uuid: str):
|
||||||
cache = lookyloo.capture_cache(tree_uuid)
|
cache = lookyloo.capture_cache(tree_uuid)
|
||||||
if cache:
|
if cache:
|
||||||
return _prepare_capture_template(user_ua=request.headers.get('User-Agent'), predefined_url=cache.url)
|
return _prepare_capture_template(user_ua=request.headers.get('User-Agent'),
|
||||||
|
predefined_url=cache.url)
|
||||||
flash(f'Unable to find the capture {tree_uuid} in the cache.', 'error')
|
flash(f'Unable to find the capture {tree_uuid} in the cache.', 'error')
|
||||||
return _prepare_capture_template(user_ua=request.headers.get('User-Agent'))
|
return _prepare_capture_template(user_ua=request.headers.get('User-Agent'))
|
||||||
|
|
||||||
|
|
|
@ -150,19 +150,6 @@
|
||||||
|
|
||||||
<div id="collapseConfigCapture" class="collapse">
|
<div id="collapseConfigCapture" class="collapse">
|
||||||
<div class="card card-body">
|
<div class="card card-body">
|
||||||
{% if max_depth >= 2 %}
|
|
||||||
<div class="row mb-3">
|
|
||||||
<label for="depth" class="col-sm-2 col-form-label">Link Depth:</label>
|
|
||||||
<div class="col-sm-1">
|
|
||||||
<select class="form-select" name="depth" id=depth>
|
|
||||||
{% for depth in range(max_depth) %}
|
|
||||||
<option value="{{ depth + 1 }}">{{ depth + 1 }}</option>
|
|
||||||
{% endfor %}
|
|
||||||
</select>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
{%endif%}
|
|
||||||
|
|
||||||
<div class="row mb-3">
|
<div class="row mb-3">
|
||||||
<label for="referer" class="col-sm-2 col-form-label">Referer:</label>
|
<label for="referer" class="col-sm-2 col-form-label">Referer:</label>
|
||||||
<div class="col-sm-10">
|
<div class="col-sm-10">
|
||||||
|
|
Loading…
Reference in New Issue