mirror of https://github.com/CIRCL/lookyloo
chg: remove incomplete_redirects from cache, was always 0/False
parent
ea2ded9beb
commit
c0f601e5db
|
@ -44,7 +44,7 @@ class LookylooCacheLogAdapter(LoggerAdapter):
|
|||
|
||||
class CaptureCache():
|
||||
__slots__ = ('uuid', 'title', 'timestamp', 'url', 'redirects', 'capture_dir',
|
||||
'error', 'incomplete_redirects', 'no_index', 'categories', 'parent',
|
||||
'error', 'no_index', 'categories', 'parent',
|
||||
'user_agent', 'referer', 'logger')
|
||||
|
||||
def __init__(self, cache_entry: Dict[str, Any]):
|
||||
|
@ -87,7 +87,6 @@ class CaptureCache():
|
|||
# Error without all the keys in __default_cache_keys was fatal.
|
||||
# if the keys in __default_cache_keys are present, it was an HTTP error and we still need to pass the error along
|
||||
self.error: Optional[str] = cache_entry.get('error')
|
||||
self.incomplete_redirects: bool = True if cache_entry.get('incomplete_redirects') in [1, '1'] else False
|
||||
self.no_index: bool = True if cache_entry.get('no_index') in [1, '1'] else False
|
||||
self.categories: List[str] = json.loads(cache_entry['categories']) if cache_entry.get('categories') else []
|
||||
self.parent: Optional[str] = cache_entry.get('parent')
|
||||
|
@ -181,8 +180,7 @@ class CapturesIndex(Mapping):
|
|||
|
||||
def __getitem__(self, uuid: str) -> CaptureCache:
|
||||
if uuid in self.__cache:
|
||||
if (self.__cache[uuid].capture_dir.exists()
|
||||
and not self.__cache[uuid].incomplete_redirects):
|
||||
if self.__cache[uuid].capture_dir.exists():
|
||||
return self.__cache[uuid]
|
||||
del self.__cache[uuid]
|
||||
capture_dir = self._get_capture_dir(uuid)
|
||||
|
@ -192,11 +190,9 @@ class CapturesIndex(Mapping):
|
|||
# NOTE: checking for pickle to exist may be a bad idea here.
|
||||
if (cc.capture_dir.exists()
|
||||
and ((cc.capture_dir / 'tree.pickle.gz').exists()
|
||||
or (cc.capture_dir / 'tree.pickle').exists())
|
||||
and not cc.incomplete_redirects):
|
||||
or (cc.capture_dir / 'tree.pickle').exists())):
|
||||
self.__cache[uuid] = cc
|
||||
return self.__cache[uuid]
|
||||
|
||||
self.__cache[uuid] = self._set_capture_cache(capture_dir)
|
||||
return self.__cache[uuid]
|
||||
|
||||
|
@ -398,7 +394,6 @@ class CapturesIndex(Mapping):
|
|||
cache['title'] = har.initial_title
|
||||
cache['timestamp'] = har.initial_start_time
|
||||
cache['redirects'] = json.dumps(tree.redirects) if tree else ''
|
||||
cache['incomplete_redirects'] = 0
|
||||
cache['user_agent'] = har.root_user_agent if har.root_user_agent else 'No User Agent.'
|
||||
if 'url' not in cache:
|
||||
# if all went well, we already filled that one above.
|
||||
|
|
|
@ -484,7 +484,9 @@ class Lookyloo():
|
|||
# Do not try to build pickles
|
||||
capture_uuids = set(capture_uuids) & self._captures_index.cached_captures
|
||||
|
||||
all_cache: List[CaptureCache] = [self._captures_index[uuid] for uuid in capture_uuids if self.capture_cache(uuid) and hasattr(self._captures_index[uuid], 'timestamp')]
|
||||
all_cache: List[CaptureCache] = [self._captures_index[uuid] for uuid in capture_uuids
|
||||
if self.capture_cache(uuid)
|
||||
and hasattr(self._captures_index[uuid], 'timestamp')]
|
||||
all_cache.sort(key=operator.attrgetter('timestamp'), reverse=True)
|
||||
return all_cache
|
||||
|
||||
|
@ -1010,7 +1012,8 @@ class Lookyloo():
|
|||
If a URL is given, it splits the results if the hash is seen on the same URL or an other one.
|
||||
Capture UUID avoids duplicates on the same capture'''
|
||||
captures_list: Dict[str, List[Tuple[str, str, str, str, str]]] = {'same_url': [], 'different_url': []}
|
||||
total_captures, details = self.indexing.get_body_hash_captures(blob_hash, url, filter_capture_uuid=capture_uuid, limit=-1, prefered_uuids=self._captures_index.cached_captures)
|
||||
total_captures, details = self.indexing.get_body_hash_captures(blob_hash, url, filter_capture_uuid=capture_uuid, limit=-1,
|
||||
prefered_uuids=self._captures_index.cached_captures)
|
||||
for h_capture_uuid, url_uuid, url_hostname, same_url in details:
|
||||
cache = self.capture_cache(h_capture_uuid)
|
||||
if cache and hasattr(cache, 'title'):
|
||||
|
|
|
@ -866,7 +866,7 @@ def index_generic(show_hidden: bool=False, show_error: bool=True, category: Opti
|
|||
continue
|
||||
|
||||
titles.append((cached.uuid, cached.title, cached.timestamp.isoformat(), cached.url,
|
||||
cached.redirects, cached.incomplete_redirects))
|
||||
cached.redirects))
|
||||
titles = sorted(titles, key=lambda x: (x[2], x[3]), reverse=True)
|
||||
return render_template('index.html', titles=titles, public_domain=lookyloo.public_domain,
|
||||
show_project_page=get_config('generic', 'show_project_page'),
|
||||
|
|
|
@ -143,18 +143,10 @@ class CaptureRedirects(Resource):
|
|||
|
||||
to_return: Dict[str, Any] = {}
|
||||
try:
|
||||
to_return = {'response': {'url': cache.url, 'redirects': []}}
|
||||
to_return = {'response': {'url': cache.url,
|
||||
'redirects': cache.redirects if cache.redirects else []}}
|
||||
if not cache.redirects:
|
||||
to_return['response']['info'] = 'No redirects'
|
||||
return to_return
|
||||
if cache.incomplete_redirects:
|
||||
# Trigger tree build, get all redirects
|
||||
lookyloo.get_crawled_tree(capture_uuid)
|
||||
cache = lookyloo.capture_cache(capture_uuid)
|
||||
if cache:
|
||||
to_return['response']['redirects'] = cache.redirects
|
||||
else:
|
||||
to_return['response']['redirects'] = cache.redirects
|
||||
except Exception as e:
|
||||
if cache and hasattr(cache, 'error'):
|
||||
to_return['error'] = cache.error
|
||||
|
|
|
@ -97,7 +97,7 @@ $(document).ready(function () {
|
|||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{% for uuid, page_title, datetime, url, redirects, incomplete_redirects in titles %}
|
||||
{% for uuid, page_title, datetime, url, redirects in titles %}
|
||||
<tr>
|
||||
<td data-search="{{ page_title }} {{ url }}">
|
||||
<p title="{{ page_title }}"><a href="{{ url_for('tree', tree_uuid=uuid) }}">{{ page_title }}</a></p>
|
||||
|
@ -115,11 +115,7 @@ $(document).ready(function () {
|
|||
{%endif%}
|
||||
</p>
|
||||
{% endfor %}
|
||||
{% if incomplete_redirects %}
|
||||
<a style="float: right;" href="{{ url_for('cache_tree', tree_uuid=uuid) }}">Unable to find the redirects, click here to build the tree</a>
|
||||
{%else%}
|
||||
<a style="float: right;" href="{{ url_for('redirects', tree_uuid=uuid) }}">Download redirects</a>
|
||||
{%endif%}
|
||||
{% else%}
|
||||
No redirect
|
||||
{%endif%}
|
||||
|
|
Loading…
Reference in New Issue