From 0a9003f58e0f2aeee25f0405f331dd60c6d86a0f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= <raphael@vinot.info>
Date: Thu, 8 Dec 2022 11:57:45 +0100
Subject: [PATCH] chg: Use cache whenever possible

---
 lookyloo/lookyloo.py    |  2 +-
 website/web/__init__.py | 14 ++++++++------
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py
index 8d4e2ce..06ace52 100644
--- a/lookyloo/lookyloo.py
+++ b/lookyloo/lookyloo.py
@@ -702,7 +702,7 @@ class Lookyloo():
         '''Get all the files related to this capture.'''
         return self._get_raw(capture_uuid)
 
-    def get_urls_rendered_page(self, capture_uuid: str, /):
+    def get_urls_rendered_page(self, capture_uuid: str, /) -> List[str]:
         ct = self.get_crawled_tree(capture_uuid)
         return sorted(set(ct.root_hartree.rendered_node.urls_in_rendered_page)
                       - set(ct.root_hartree.all_url_requests.keys()))
diff --git a/website/web/__init__.py b/website/web/__init__.py
index dd9e5ce..caa8fc9 100644
--- a/website/web/__init__.py
+++ b/website/web/__init__.py
@@ -578,15 +578,17 @@ def bulk_captures(base_tree_uuid: str):
         user = src_request_ip(request)
     selected_urls = request.form.getlist('url')
     urls = lookyloo.get_urls_rendered_page(base_tree_uuid)
-    ct = lookyloo.get_crawled_tree(base_tree_uuid)
     cache = lookyloo.capture_cache(base_tree_uuid)
+    if not cache:
+        flash('Unable to find capture {base_tree_uuid} in cache.', 'error')
+        return redirect(url_for('tree', tree_uuid=base_tree_uuid))
     cookies = load_cookies(lookyloo.get_cookies(base_tree_uuid))
     bulk_captures = []
     for url in [urls[int(selected_id) - 1] for selected_id in selected_urls]:
         capture = {'url': url,
                    'cookies': cookies,
-                   'referer': ct.redirects[-1] if ct.redirects else ct.root_url,
-                   'user_agent': ct.user_agent,
+                   'referer': cache.redirects[-1] if cache.redirects else cache.url,
+                   'user_agent': cache.user_agent,
                    'parent': base_tree_uuid,
                    'listing': False if cache and cache.no_index else True
                    }
@@ -1092,11 +1094,11 @@ def urlnode_response_cookies(tree_uuid: str, node_uuid: str):
 def urlnode_urls_in_rendered_content(tree_uuid: str, node_uuid: str):
     # Note: we could simplify it with lookyloo.get_urls_rendered_page, but if at somepoint,
     # we have multiple page rendered on one tree, it will be a problem.
-    ct = lookyloo.get_crawled_tree(tree_uuid)
-    urlnode = ct.root_hartree.get_url_node_by_uuid(node_uuid)
-    if not urlnode.rendered_html:
+    urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid)
+    if not hasattr(urlnode, 'rendered_html') or not urlnode.rendered_html:
         return
 
+    ct = lookyloo.get_crawled_tree(tree_uuid)
     not_loaded_urls = sorted(set(urlnode.urls_in_rendered_page)
                              - set(ct.root_hartree.all_url_requests.keys()))
     to_return = StringIO()