fix: cleanup

pull/78/head
Raphaël Vinot 2020-06-29 17:23:01 +02:00
parent c20dd170e0
commit 23419a31b9
4 changed files with 9 additions and 38 deletions

View File

@ -4,6 +4,7 @@
import argparse
import logging
from lookyloo.helpers import load_pickle_tree
from lookyloo.lookyloo import Lookyloo, Indexing
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
@ -23,5 +24,6 @@ if __name__ == '__main__':
indexing = Indexing()
indexing.clear_indexes()
for capture_dir in lookyloo.capture_dirs:
indexing.index_cookies_capture(capture_dir)
indexing.index_body_hashes_capture(capture_dir)
tree = load_pickle_tree(capture_dir)
indexing.index_cookies_capture(tree)
indexing.index_body_hashes_capture(tree)

View File

@ -58,17 +58,7 @@ class Indexing():
def get_cookies_names_captures(self, cookie_name: str) -> List[Tuple[str, str]]:
return [uuids.split('|')for uuids in self.redis.smembers(f'cn|{cookie_name}|captures')]
def index_cookies_capture(self, capture_dir: Path) -> None:
print(f'Index cookies {capture_dir}')
try:
crawled_tree = load_pickle_tree(capture_dir)
except Exception as e:
print(e)
return
if not crawled_tree:
return
def index_cookies_capture(self, crawled_tree: CrawledTree) -> None:
if self.redis.sismember('indexed_cookies', crawled_tree.uuid):
# Do not reindex
return
@ -115,17 +105,7 @@ class Indexing():
return {'hash_freq': self.redis.zscore('body_hashes', body_hash),
'hash_domains_freq': self.redis.zcard(f'bh|{body_hash}')}
def index_body_hashes_capture(self, capture_dir: Path) -> None:
print(f'Index body hashes {capture_dir}')
try:
crawled_tree = load_pickle_tree(capture_dir)
except Exception as e:
print(e)
return
if not crawled_tree:
return
def index_body_hashes_capture(self, crawled_tree: CrawledTree) -> None:
if self.redis.sismember('indexed_body_hashes', crawled_tree.uuid):
# Do not reindex
return
@ -239,6 +219,8 @@ class Lookyloo():
har_files = sorted(capture_dir.glob('*.har'))
try:
ct = CrawledTree(har_files, uuid)
self.indexing.index_cookies_capture(ct)
self.indexing.index_body_hashes_capture(ct)
except Har2TreeError as e:
raise NoValidHarFile(e.message)

View File

@ -439,8 +439,8 @@ def body_hash_details(body_hash: str):
captures, domains = lookyloo.get_body_hash_investigator(body_hash)
return render_template('body_hash.html', body_hash=body_hash, domains=domains, captures=captures)
# Query API
# Query API
@app.route('/json/<string:tree_uuid>/redirects', methods=['GET'])
def json_redirects(tree_uuid: str):

View File

@ -170,19 +170,6 @@
</a></p>
<div>
{% endif %}
{% if url['sane_js'] %}
<div>
{% if url['sane_js'] is string %}
<b>{{ url['sane_js'] }} </b>
{% else %}
This file is known as part of <b>{{ url['sane_js'][0] }}</b>
version <b>{{ url['sane_js'][1] }}</b>: <b>{{ url['sane_js'][2] }}</b>.
{% if url['sane_js'][3] > 1%}
It is also present in <b>{{ url['sane_js'][3] -1 }}</b> other libraries.
{%endif%}
{%endif%}
</div>
{% endif %}
{% if url['cookies_received'] %}
<div>