chg: remove incomplete_redirects from cache, was always 0/False

2023-07-28 14:05:28 +02:00 · 2023-07-28 14:05:28 +02:00 · c0f601e5db
parent ea2ded9beb
commit c0f601e5db
5 changed files with 13 additions and 27 deletions
--- a/lookyloo/capturecache.py
+++ b/lookyloo/capturecache.py
@ -44,7 +44,7 @@ class LookylooCacheLogAdapter(LoggerAdapter):

 class CaptureCache():
    __slots__ = ('uuid', 'title', 'timestamp', 'url', 'redirects', 'capture_dir',
-                 'error', 'incomplete_redirects', 'no_index', 'categories', 'parent',
+                 'error', 'no_index', 'categories', 'parent',
                 'user_agent', 'referer', 'logger')

    def __init__(self, cache_entry: Dict[str, Any]):
@ -87,7 +87,6 @@ class CaptureCache():
        # Error without all the keys in __default_cache_keys was fatal.
        # if the keys in __default_cache_keys are present, it was an HTTP error and we still need to pass the error along
        self.error: Optional[str] = cache_entry.get('error')
-        self.incomplete_redirects: bool = True if cache_entry.get('incomplete_redirects') in [1, '1'] else False
        self.no_index: bool = True if cache_entry.get('no_index') in [1, '1'] else False
        self.categories: List[str] = json.loads(cache_entry['categories']) if cache_entry.get('categories') else []
        self.parent: Optional[str] = cache_entry.get('parent')
@ -181,8 +180,7 @@ class CapturesIndex(Mapping):

    def __getitem__(self, uuid: str) -> CaptureCache:
        if uuid in self.__cache:
-            if (self.__cache[uuid].capture_dir.exists()
-                    and not self.__cache[uuid].incomplete_redirects):
+            if self.__cache[uuid].capture_dir.exists():
                return self.__cache[uuid]
            del self.__cache[uuid]
        capture_dir = self._get_capture_dir(uuid)
@ -192,11 +190,9 @@ class CapturesIndex(Mapping):
            # NOTE: checking for pickle to exist may be a bad idea here.
            if (cc.capture_dir.exists()
                    and ((cc.capture_dir / 'tree.pickle.gz').exists()
-                         or (cc.capture_dir / 'tree.pickle').exists())
-                    and not cc.incomplete_redirects):
+                         or (cc.capture_dir / 'tree.pickle').exists())):
                self.__cache[uuid] = cc
                return self.__cache[uuid]
-
        self.__cache[uuid] = self._set_capture_cache(capture_dir)
        return self.__cache[uuid]

@ -398,7 +394,6 @@ class CapturesIndex(Mapping):
                cache['title'] = har.initial_title
                cache['timestamp'] = har.initial_start_time
                cache['redirects'] = json.dumps(tree.redirects) if tree else ''
-                cache['incomplete_redirects'] = 0
                cache['user_agent'] = har.root_user_agent if har.root_user_agent else 'No User Agent.'
                if 'url' not in cache:
                    # if all went well, we already filled that one above.
--- a/lookyloo/lookyloo.py
+++ b/lookyloo/lookyloo.py
@ -484,7 +484,9 @@ class Lookyloo():
            # Do not try to build pickles
            capture_uuids = set(capture_uuids) & self._captures_index.cached_captures

-        all_cache: List[CaptureCache] = [self._captures_index[uuid] for uuid in capture_uuids if self.capture_cache(uuid) and hasattr(self._captures_index[uuid], 'timestamp')]
+        all_cache: List[CaptureCache] = [self._captures_index[uuid] for uuid in capture_uuids
+                                         if self.capture_cache(uuid)
+                                         and hasattr(self._captures_index[uuid], 'timestamp')]
        all_cache.sort(key=operator.attrgetter('timestamp'), reverse=True)
        return all_cache

@ -1010,7 +1012,8 @@ class Lookyloo():
        If a URL is given, it splits the results if the hash is seen on the same URL or an other one.
        Capture UUID avoids duplicates on the same capture'''
        captures_list: Dict[str, List[Tuple[str, str, str, str, str]]] = {'same_url': [], 'different_url': []}
-        total_captures, details = self.indexing.get_body_hash_captures(blob_hash, url, filter_capture_uuid=capture_uuid, limit=-1, prefered_uuids=self._captures_index.cached_captures)
+        total_captures, details = self.indexing.get_body_hash_captures(blob_hash, url, filter_capture_uuid=capture_uuid, limit=-1,
+                                                                       prefered_uuids=self._captures_index.cached_captures)
        for h_capture_uuid, url_uuid, url_hostname, same_url in details:
            cache = self.capture_cache(h_capture_uuid)
            if cache and hasattr(cache, 'title'):
--- a/website/web/init.py
+++ b/website/web/init.py
@ -866,7 +866,7 @@ def index_generic(show_hidden: bool=False, show_error: bool=True, category: Opti
            continue

        titles.append((cached.uuid, cached.title, cached.timestamp.isoformat(), cached.url,
-                       cached.redirects, cached.incomplete_redirects))
+                       cached.redirects))
    titles = sorted(titles, key=lambda x: (x[2], x[3]), reverse=True)
    return render_template('index.html', titles=titles, public_domain=lookyloo.public_domain,
                           show_project_page=get_config('generic', 'show_project_page'),
--- a/website/web/genericapi.py
+++ b/website/web/genericapi.py
@ -143,18 +143,10 @@ class CaptureRedirects(Resource):

        to_return: Dict[str, Any] = {}
        try:
-            to_return = {'response': {'url': cache.url, 'redirects': []}}
+            to_return = {'response': {'url': cache.url,
+                                      'redirects': cache.redirects if cache.redirects else []}}
            if not cache.redirects:
                to_return['response']['info'] = 'No redirects'
-                return to_return
-            if cache.incomplete_redirects:
-                # Trigger tree build, get all redirects
-                lookyloo.get_crawled_tree(capture_uuid)
-                cache = lookyloo.capture_cache(capture_uuid)
-                if cache:
-                    to_return['response']['redirects'] = cache.redirects
-            else:
-                to_return['response']['redirects'] = cache.redirects
        except Exception as e:
            if cache and hasattr(cache, 'error'):
                to_return['error'] = cache.error
--- a/website/web/templates/index.html
+++ b/website/web/templates/index.html
@ -97,7 +97,7 @@ $(document).ready(function () {
     </tr>
    </thead>
    <tbody>
-      {% for uuid, page_title, datetime, url, redirects, incomplete_redirects in titles %}
+      {% for uuid, page_title, datetime, url, redirects in titles %}
      <tr>
        <td data-search="{{ page_title }} {{ url }}">
          <p title="{{ page_title }}"><a href="{{ url_for('tree', tree_uuid=uuid) }}">{{ page_title }}</a></p>
@ -115,11 +115,7 @@ $(document).ready(function () {
            {%endif%}
            </p>
            {% endfor %}
-            {% if incomplete_redirects %}
-                <a style="float: right;" href="{{ url_for('cache_tree', tree_uuid=uuid) }}">Unable to find the redirects, click here to build the tree</a>
-            {%else%}
            <a style="float: right;" href="{{ url_for('redirects', tree_uuid=uuid) }}">Download redirects</a>
-            {%endif%}
          {% else%}
            No redirect
          {%endif%}