mirror of https://github.com/CIRCL/lookyloo
chg: remove incomplete_redirects from cache, was always 0/False
parent
ea2ded9beb
commit
c0f601e5db
|
@ -44,7 +44,7 @@ class LookylooCacheLogAdapter(LoggerAdapter):
|
||||||
|
|
||||||
class CaptureCache():
|
class CaptureCache():
|
||||||
__slots__ = ('uuid', 'title', 'timestamp', 'url', 'redirects', 'capture_dir',
|
__slots__ = ('uuid', 'title', 'timestamp', 'url', 'redirects', 'capture_dir',
|
||||||
'error', 'incomplete_redirects', 'no_index', 'categories', 'parent',
|
'error', 'no_index', 'categories', 'parent',
|
||||||
'user_agent', 'referer', 'logger')
|
'user_agent', 'referer', 'logger')
|
||||||
|
|
||||||
def __init__(self, cache_entry: Dict[str, Any]):
|
def __init__(self, cache_entry: Dict[str, Any]):
|
||||||
|
@ -87,7 +87,6 @@ class CaptureCache():
|
||||||
# Error without all the keys in __default_cache_keys was fatal.
|
# Error without all the keys in __default_cache_keys was fatal.
|
||||||
# if the keys in __default_cache_keys are present, it was an HTTP error and we still need to pass the error along
|
# if the keys in __default_cache_keys are present, it was an HTTP error and we still need to pass the error along
|
||||||
self.error: Optional[str] = cache_entry.get('error')
|
self.error: Optional[str] = cache_entry.get('error')
|
||||||
self.incomplete_redirects: bool = True if cache_entry.get('incomplete_redirects') in [1, '1'] else False
|
|
||||||
self.no_index: bool = True if cache_entry.get('no_index') in [1, '1'] else False
|
self.no_index: bool = True if cache_entry.get('no_index') in [1, '1'] else False
|
||||||
self.categories: List[str] = json.loads(cache_entry['categories']) if cache_entry.get('categories') else []
|
self.categories: List[str] = json.loads(cache_entry['categories']) if cache_entry.get('categories') else []
|
||||||
self.parent: Optional[str] = cache_entry.get('parent')
|
self.parent: Optional[str] = cache_entry.get('parent')
|
||||||
|
@ -181,8 +180,7 @@ class CapturesIndex(Mapping):
|
||||||
|
|
||||||
def __getitem__(self, uuid: str) -> CaptureCache:
|
def __getitem__(self, uuid: str) -> CaptureCache:
|
||||||
if uuid in self.__cache:
|
if uuid in self.__cache:
|
||||||
if (self.__cache[uuid].capture_dir.exists()
|
if self.__cache[uuid].capture_dir.exists():
|
||||||
and not self.__cache[uuid].incomplete_redirects):
|
|
||||||
return self.__cache[uuid]
|
return self.__cache[uuid]
|
||||||
del self.__cache[uuid]
|
del self.__cache[uuid]
|
||||||
capture_dir = self._get_capture_dir(uuid)
|
capture_dir = self._get_capture_dir(uuid)
|
||||||
|
@ -192,11 +190,9 @@ class CapturesIndex(Mapping):
|
||||||
# NOTE: checking for pickle to exist may be a bad idea here.
|
# NOTE: checking for pickle to exist may be a bad idea here.
|
||||||
if (cc.capture_dir.exists()
|
if (cc.capture_dir.exists()
|
||||||
and ((cc.capture_dir / 'tree.pickle.gz').exists()
|
and ((cc.capture_dir / 'tree.pickle.gz').exists()
|
||||||
or (cc.capture_dir / 'tree.pickle').exists())
|
or (cc.capture_dir / 'tree.pickle').exists())):
|
||||||
and not cc.incomplete_redirects):
|
|
||||||
self.__cache[uuid] = cc
|
self.__cache[uuid] = cc
|
||||||
return self.__cache[uuid]
|
return self.__cache[uuid]
|
||||||
|
|
||||||
self.__cache[uuid] = self._set_capture_cache(capture_dir)
|
self.__cache[uuid] = self._set_capture_cache(capture_dir)
|
||||||
return self.__cache[uuid]
|
return self.__cache[uuid]
|
||||||
|
|
||||||
|
@ -398,7 +394,6 @@ class CapturesIndex(Mapping):
|
||||||
cache['title'] = har.initial_title
|
cache['title'] = har.initial_title
|
||||||
cache['timestamp'] = har.initial_start_time
|
cache['timestamp'] = har.initial_start_time
|
||||||
cache['redirects'] = json.dumps(tree.redirects) if tree else ''
|
cache['redirects'] = json.dumps(tree.redirects) if tree else ''
|
||||||
cache['incomplete_redirects'] = 0
|
|
||||||
cache['user_agent'] = har.root_user_agent if har.root_user_agent else 'No User Agent.'
|
cache['user_agent'] = har.root_user_agent if har.root_user_agent else 'No User Agent.'
|
||||||
if 'url' not in cache:
|
if 'url' not in cache:
|
||||||
# if all went well, we already filled that one above.
|
# if all went well, we already filled that one above.
|
||||||
|
|
|
@ -484,7 +484,9 @@ class Lookyloo():
|
||||||
# Do not try to build pickles
|
# Do not try to build pickles
|
||||||
capture_uuids = set(capture_uuids) & self._captures_index.cached_captures
|
capture_uuids = set(capture_uuids) & self._captures_index.cached_captures
|
||||||
|
|
||||||
all_cache: List[CaptureCache] = [self._captures_index[uuid] for uuid in capture_uuids if self.capture_cache(uuid) and hasattr(self._captures_index[uuid], 'timestamp')]
|
all_cache: List[CaptureCache] = [self._captures_index[uuid] for uuid in capture_uuids
|
||||||
|
if self.capture_cache(uuid)
|
||||||
|
and hasattr(self._captures_index[uuid], 'timestamp')]
|
||||||
all_cache.sort(key=operator.attrgetter('timestamp'), reverse=True)
|
all_cache.sort(key=operator.attrgetter('timestamp'), reverse=True)
|
||||||
return all_cache
|
return all_cache
|
||||||
|
|
||||||
|
@ -1010,7 +1012,8 @@ class Lookyloo():
|
||||||
If a URL is given, it splits the results if the hash is seen on the same URL or an other one.
|
If a URL is given, it splits the results if the hash is seen on the same URL or an other one.
|
||||||
Capture UUID avoids duplicates on the same capture'''
|
Capture UUID avoids duplicates on the same capture'''
|
||||||
captures_list: Dict[str, List[Tuple[str, str, str, str, str]]] = {'same_url': [], 'different_url': []}
|
captures_list: Dict[str, List[Tuple[str, str, str, str, str]]] = {'same_url': [], 'different_url': []}
|
||||||
total_captures, details = self.indexing.get_body_hash_captures(blob_hash, url, filter_capture_uuid=capture_uuid, limit=-1, prefered_uuids=self._captures_index.cached_captures)
|
total_captures, details = self.indexing.get_body_hash_captures(blob_hash, url, filter_capture_uuid=capture_uuid, limit=-1,
|
||||||
|
prefered_uuids=self._captures_index.cached_captures)
|
||||||
for h_capture_uuid, url_uuid, url_hostname, same_url in details:
|
for h_capture_uuid, url_uuid, url_hostname, same_url in details:
|
||||||
cache = self.capture_cache(h_capture_uuid)
|
cache = self.capture_cache(h_capture_uuid)
|
||||||
if cache and hasattr(cache, 'title'):
|
if cache and hasattr(cache, 'title'):
|
||||||
|
|
|
@ -866,7 +866,7 @@ def index_generic(show_hidden: bool=False, show_error: bool=True, category: Opti
|
||||||
continue
|
continue
|
||||||
|
|
||||||
titles.append((cached.uuid, cached.title, cached.timestamp.isoformat(), cached.url,
|
titles.append((cached.uuid, cached.title, cached.timestamp.isoformat(), cached.url,
|
||||||
cached.redirects, cached.incomplete_redirects))
|
cached.redirects))
|
||||||
titles = sorted(titles, key=lambda x: (x[2], x[3]), reverse=True)
|
titles = sorted(titles, key=lambda x: (x[2], x[3]), reverse=True)
|
||||||
return render_template('index.html', titles=titles, public_domain=lookyloo.public_domain,
|
return render_template('index.html', titles=titles, public_domain=lookyloo.public_domain,
|
||||||
show_project_page=get_config('generic', 'show_project_page'),
|
show_project_page=get_config('generic', 'show_project_page'),
|
||||||
|
|
|
@ -143,18 +143,10 @@ class CaptureRedirects(Resource):
|
||||||
|
|
||||||
to_return: Dict[str, Any] = {}
|
to_return: Dict[str, Any] = {}
|
||||||
try:
|
try:
|
||||||
to_return = {'response': {'url': cache.url, 'redirects': []}}
|
to_return = {'response': {'url': cache.url,
|
||||||
|
'redirects': cache.redirects if cache.redirects else []}}
|
||||||
if not cache.redirects:
|
if not cache.redirects:
|
||||||
to_return['response']['info'] = 'No redirects'
|
to_return['response']['info'] = 'No redirects'
|
||||||
return to_return
|
|
||||||
if cache.incomplete_redirects:
|
|
||||||
# Trigger tree build, get all redirects
|
|
||||||
lookyloo.get_crawled_tree(capture_uuid)
|
|
||||||
cache = lookyloo.capture_cache(capture_uuid)
|
|
||||||
if cache:
|
|
||||||
to_return['response']['redirects'] = cache.redirects
|
|
||||||
else:
|
|
||||||
to_return['response']['redirects'] = cache.redirects
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if cache and hasattr(cache, 'error'):
|
if cache and hasattr(cache, 'error'):
|
||||||
to_return['error'] = cache.error
|
to_return['error'] = cache.error
|
||||||
|
|
|
@ -97,7 +97,7 @@ $(document).ready(function () {
|
||||||
</tr>
|
</tr>
|
||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
{% for uuid, page_title, datetime, url, redirects, incomplete_redirects in titles %}
|
{% for uuid, page_title, datetime, url, redirects in titles %}
|
||||||
<tr>
|
<tr>
|
||||||
<td data-search="{{ page_title }} {{ url }}">
|
<td data-search="{{ page_title }} {{ url }}">
|
||||||
<p title="{{ page_title }}"><a href="{{ url_for('tree', tree_uuid=uuid) }}">{{ page_title }}</a></p>
|
<p title="{{ page_title }}"><a href="{{ url_for('tree', tree_uuid=uuid) }}">{{ page_title }}</a></p>
|
||||||
|
@ -115,11 +115,7 @@ $(document).ready(function () {
|
||||||
{%endif%}
|
{%endif%}
|
||||||
</p>
|
</p>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
{% if incomplete_redirects %}
|
<a style="float: right;" href="{{ url_for('redirects', tree_uuid=uuid) }}">Download redirects</a>
|
||||||
<a style="float: right;" href="{{ url_for('cache_tree', tree_uuid=uuid) }}">Unable to find the redirects, click here to build the tree</a>
|
|
||||||
{%else%}
|
|
||||||
<a style="float: right;" href="{{ url_for('redirects', tree_uuid=uuid) }}">Download redirects</a>
|
|
||||||
{%endif%}
|
|
||||||
{% else%}
|
{% else%}
|
||||||
No redirect
|
No redirect
|
||||||
{%endif%}
|
{%endif%}
|
||||||
|
|
Loading…
Reference in New Issue