chg: remove incomplete_redirects from cache, was always 0/False

pull/746/head
Raphaël Vinot 2023-07-28 14:05:28 +02:00
parent ea2ded9beb
commit c0f601e5db
5 changed files with 13 additions and 27 deletions

View File

@ -44,7 +44,7 @@ class LookylooCacheLogAdapter(LoggerAdapter):
class CaptureCache():
__slots__ = ('uuid', 'title', 'timestamp', 'url', 'redirects', 'capture_dir',
'error', 'incomplete_redirects', 'no_index', 'categories', 'parent',
'error', 'no_index', 'categories', 'parent',
'user_agent', 'referer', 'logger')
def __init__(self, cache_entry: Dict[str, Any]):
@ -87,7 +87,6 @@ class CaptureCache():
# Error without all the keys in __default_cache_keys was fatal.
# if the keys in __default_cache_keys are present, it was an HTTP error and we still need to pass the error along
self.error: Optional[str] = cache_entry.get('error')
self.incomplete_redirects: bool = True if cache_entry.get('incomplete_redirects') in [1, '1'] else False
self.no_index: bool = True if cache_entry.get('no_index') in [1, '1'] else False
self.categories: List[str] = json.loads(cache_entry['categories']) if cache_entry.get('categories') else []
self.parent: Optional[str] = cache_entry.get('parent')
@ -181,8 +180,7 @@ class CapturesIndex(Mapping):
def __getitem__(self, uuid: str) -> CaptureCache:
if uuid in self.__cache:
if (self.__cache[uuid].capture_dir.exists()
and not self.__cache[uuid].incomplete_redirects):
if self.__cache[uuid].capture_dir.exists():
return self.__cache[uuid]
del self.__cache[uuid]
capture_dir = self._get_capture_dir(uuid)
@ -192,11 +190,9 @@ class CapturesIndex(Mapping):
# NOTE: checking for pickle to exist may be a bad idea here.
if (cc.capture_dir.exists()
and ((cc.capture_dir / 'tree.pickle.gz').exists()
or (cc.capture_dir / 'tree.pickle').exists())
and not cc.incomplete_redirects):
or (cc.capture_dir / 'tree.pickle').exists())):
self.__cache[uuid] = cc
return self.__cache[uuid]
self.__cache[uuid] = self._set_capture_cache(capture_dir)
return self.__cache[uuid]
@ -398,7 +394,6 @@ class CapturesIndex(Mapping):
cache['title'] = har.initial_title
cache['timestamp'] = har.initial_start_time
cache['redirects'] = json.dumps(tree.redirects) if tree else ''
cache['incomplete_redirects'] = 0
cache['user_agent'] = har.root_user_agent if har.root_user_agent else 'No User Agent.'
if 'url' not in cache:
# if all went well, we already filled that one above.

View File

@ -484,7 +484,9 @@ class Lookyloo():
# Do not try to build pickles
capture_uuids = set(capture_uuids) & self._captures_index.cached_captures
all_cache: List[CaptureCache] = [self._captures_index[uuid] for uuid in capture_uuids if self.capture_cache(uuid) and hasattr(self._captures_index[uuid], 'timestamp')]
all_cache: List[CaptureCache] = [self._captures_index[uuid] for uuid in capture_uuids
if self.capture_cache(uuid)
and hasattr(self._captures_index[uuid], 'timestamp')]
all_cache.sort(key=operator.attrgetter('timestamp'), reverse=True)
return all_cache
@ -1010,7 +1012,8 @@ class Lookyloo():
If a URL is given, it splits the results if the hash is seen on the same URL or an other one.
Capture UUID avoids duplicates on the same capture'''
captures_list: Dict[str, List[Tuple[str, str, str, str, str]]] = {'same_url': [], 'different_url': []}
total_captures, details = self.indexing.get_body_hash_captures(blob_hash, url, filter_capture_uuid=capture_uuid, limit=-1, prefered_uuids=self._captures_index.cached_captures)
total_captures, details = self.indexing.get_body_hash_captures(blob_hash, url, filter_capture_uuid=capture_uuid, limit=-1,
prefered_uuids=self._captures_index.cached_captures)
for h_capture_uuid, url_uuid, url_hostname, same_url in details:
cache = self.capture_cache(h_capture_uuid)
if cache and hasattr(cache, 'title'):

View File

@ -866,7 +866,7 @@ def index_generic(show_hidden: bool=False, show_error: bool=True, category: Opti
continue
titles.append((cached.uuid, cached.title, cached.timestamp.isoformat(), cached.url,
cached.redirects, cached.incomplete_redirects))
cached.redirects))
titles = sorted(titles, key=lambda x: (x[2], x[3]), reverse=True)
return render_template('index.html', titles=titles, public_domain=lookyloo.public_domain,
show_project_page=get_config('generic', 'show_project_page'),

View File

@ -143,18 +143,10 @@ class CaptureRedirects(Resource):
to_return: Dict[str, Any] = {}
try:
to_return = {'response': {'url': cache.url, 'redirects': []}}
to_return = {'response': {'url': cache.url,
'redirects': cache.redirects if cache.redirects else []}}
if not cache.redirects:
to_return['response']['info'] = 'No redirects'
return to_return
if cache.incomplete_redirects:
# Trigger tree build, get all redirects
lookyloo.get_crawled_tree(capture_uuid)
cache = lookyloo.capture_cache(capture_uuid)
if cache:
to_return['response']['redirects'] = cache.redirects
else:
to_return['response']['redirects'] = cache.redirects
except Exception as e:
if cache and hasattr(cache, 'error'):
to_return['error'] = cache.error

View File

@ -97,7 +97,7 @@ $(document).ready(function () {
</tr>
</thead>
<tbody>
{% for uuid, page_title, datetime, url, redirects, incomplete_redirects in titles %}
{% for uuid, page_title, datetime, url, redirects in titles %}
<tr>
<td data-search="{{ page_title }} {{ url }}">
<p title="{{ page_title }}"><a href="{{ url_for('tree', tree_uuid=uuid) }}">{{ page_title }}</a></p>
@ -115,11 +115,7 @@ $(document).ready(function () {
{%endif%}
</p>
{% endfor %}
{% if incomplete_redirects %}
<a style="float: right;" href="{{ url_for('cache_tree', tree_uuid=uuid) }}">Unable to find the redirects, click here to build the tree</a>
{%else%}
<a style="float: right;" href="{{ url_for('redirects', tree_uuid=uuid) }}">Download redirects</a>
{%endif%}
{% else%}
No redirect
{%endif%}