From f40976eecdca7a319e1b8df9259fcca9a8e39200 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Mon, 11 Jan 2021 15:12:44 +0100 Subject: [PATCH] chg: cleanup download of urls in rendered content fix #148 --- website/web/__init__.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/website/web/__init__.py b/website/web/__init__.py index 5be3977b..36fdf2d0 100644 --- a/website/web/__init__.py +++ b/website/web/__init__.py @@ -562,11 +562,15 @@ def urlnode_response_cookies(tree_uuid: str, node_uuid: str): @app.route('/tree//url//urls_in_rendered_content', methods=['GET']) def urlnode_urls_in_rendered_content(tree_uuid: str, node_uuid: str): - urlnode = lookyloo.get_urlnode_from_tree(tree_uuid, node_uuid) + ct = lookyloo.get_crawled_tree(tree_uuid) + urlnode = ct.root_hartree.get_url_node_by_uuid(node_uuid) if not urlnode.rendered_html: return + + not_loaded_urls = sorted(set(urlnode.urls_in_rendered_page) + - set(ct.root_hartree.all_url_requests.keys())) to_return = StringIO() - to_return.writelines([f'{u}\n' for u in urlnode.urls_in_rendered_page]) + to_return.writelines([f'{u}\n' for u in not_loaded_urls]) return send_file(BytesIO(to_return.getvalue().encode()), mimetype='text/plain', as_attachment=True, attachment_filename='urls_in_rendered_content.txt')