diff --git a/lookyloo/capturecache.py b/lookyloo/capturecache.py index caf8e0a1..2ad6267c 100644 --- a/lookyloo/capturecache.py +++ b/lookyloo/capturecache.py @@ -44,7 +44,7 @@ class LookylooCacheLogAdapter(LoggerAdapter): class CaptureCache(): __slots__ = ('uuid', 'title', 'timestamp', 'url', 'redirects', 'capture_dir', - 'error', 'incomplete_redirects', 'no_index', 'categories', 'parent', + 'error', 'no_index', 'categories', 'parent', 'user_agent', 'referer', 'logger') def __init__(self, cache_entry: Dict[str, Any]): @@ -87,7 +87,6 @@ class CaptureCache(): # Error without all the keys in __default_cache_keys was fatal. # if the keys in __default_cache_keys are present, it was an HTTP error and we still need to pass the error along self.error: Optional[str] = cache_entry.get('error') - self.incomplete_redirects: bool = True if cache_entry.get('incomplete_redirects') in [1, '1'] else False self.no_index: bool = True if cache_entry.get('no_index') in [1, '1'] else False self.categories: List[str] = json.loads(cache_entry['categories']) if cache_entry.get('categories') else [] self.parent: Optional[str] = cache_entry.get('parent') @@ -181,8 +180,7 @@ class CapturesIndex(Mapping): def __getitem__(self, uuid: str) -> CaptureCache: if uuid in self.__cache: - if (self.__cache[uuid].capture_dir.exists() - and not self.__cache[uuid].incomplete_redirects): + if self.__cache[uuid].capture_dir.exists(): return self.__cache[uuid] del self.__cache[uuid] capture_dir = self._get_capture_dir(uuid) @@ -192,11 +190,9 @@ class CapturesIndex(Mapping): # NOTE: checking for pickle to exist may be a bad idea here. if (cc.capture_dir.exists() and ((cc.capture_dir / 'tree.pickle.gz').exists() - or (cc.capture_dir / 'tree.pickle').exists()) - and not cc.incomplete_redirects): + or (cc.capture_dir / 'tree.pickle').exists())): self.__cache[uuid] = cc return self.__cache[uuid] - self.__cache[uuid] = self._set_capture_cache(capture_dir) return self.__cache[uuid] @@ -398,7 +394,6 @@ class CapturesIndex(Mapping): cache['title'] = har.initial_title cache['timestamp'] = har.initial_start_time cache['redirects'] = json.dumps(tree.redirects) if tree else '' - cache['incomplete_redirects'] = 0 cache['user_agent'] = har.root_user_agent if har.root_user_agent else 'No User Agent.' if 'url' not in cache: # if all went well, we already filled that one above. diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py index 96e98409..c563acd8 100644 --- a/lookyloo/lookyloo.py +++ b/lookyloo/lookyloo.py @@ -484,7 +484,9 @@ class Lookyloo(): # Do not try to build pickles capture_uuids = set(capture_uuids) & self._captures_index.cached_captures - all_cache: List[CaptureCache] = [self._captures_index[uuid] for uuid in capture_uuids if self.capture_cache(uuid) and hasattr(self._captures_index[uuid], 'timestamp')] + all_cache: List[CaptureCache] = [self._captures_index[uuid] for uuid in capture_uuids + if self.capture_cache(uuid) + and hasattr(self._captures_index[uuid], 'timestamp')] all_cache.sort(key=operator.attrgetter('timestamp'), reverse=True) return all_cache @@ -1010,7 +1012,8 @@ class Lookyloo(): If a URL is given, it splits the results if the hash is seen on the same URL or an other one. Capture UUID avoids duplicates on the same capture''' captures_list: Dict[str, List[Tuple[str, str, str, str, str]]] = {'same_url': [], 'different_url': []} - total_captures, details = self.indexing.get_body_hash_captures(blob_hash, url, filter_capture_uuid=capture_uuid, limit=-1, prefered_uuids=self._captures_index.cached_captures) + total_captures, details = self.indexing.get_body_hash_captures(blob_hash, url, filter_capture_uuid=capture_uuid, limit=-1, + prefered_uuids=self._captures_index.cached_captures) for h_capture_uuid, url_uuid, url_hostname, same_url in details: cache = self.capture_cache(h_capture_uuid) if cache and hasattr(cache, 'title'): diff --git a/website/web/__init__.py b/website/web/__init__.py index 20b90a05..e46e3bf0 100644 --- a/website/web/__init__.py +++ b/website/web/__init__.py @@ -866,7 +866,7 @@ def index_generic(show_hidden: bool=False, show_error: bool=True, category: Opti continue titles.append((cached.uuid, cached.title, cached.timestamp.isoformat(), cached.url, - cached.redirects, cached.incomplete_redirects)) + cached.redirects)) titles = sorted(titles, key=lambda x: (x[2], x[3]), reverse=True) return render_template('index.html', titles=titles, public_domain=lookyloo.public_domain, show_project_page=get_config('generic', 'show_project_page'), diff --git a/website/web/genericapi.py b/website/web/genericapi.py index 4ddcbe9f..449288a2 100644 --- a/website/web/genericapi.py +++ b/website/web/genericapi.py @@ -143,18 +143,10 @@ class CaptureRedirects(Resource): to_return: Dict[str, Any] = {} try: - to_return = {'response': {'url': cache.url, 'redirects': []}} + to_return = {'response': {'url': cache.url, + 'redirects': cache.redirects if cache.redirects else []}} if not cache.redirects: to_return['response']['info'] = 'No redirects' - return to_return - if cache.incomplete_redirects: - # Trigger tree build, get all redirects - lookyloo.get_crawled_tree(capture_uuid) - cache = lookyloo.capture_cache(capture_uuid) - if cache: - to_return['response']['redirects'] = cache.redirects - else: - to_return['response']['redirects'] = cache.redirects except Exception as e: if cache and hasattr(cache, 'error'): to_return['error'] = cache.error diff --git a/website/web/templates/index.html b/website/web/templates/index.html index 2438da2d..a9ee30ff 100644 --- a/website/web/templates/index.html +++ b/website/web/templates/index.html @@ -97,7 +97,7 @@ $(document).ready(function () {
- {% for uuid, page_title, datetime, url, redirects, incomplete_redirects in titles %} + {% for uuid, page_title, datetime, url, redirects in titles %}