mirror of https://github.com/CIRCL/lookyloo
fix: cleanup
parent
c20dd170e0
commit
23419a31b9
|
@ -4,6 +4,7 @@
|
||||||
import argparse
|
import argparse
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
from lookyloo.helpers import load_pickle_tree
|
||||||
from lookyloo.lookyloo import Lookyloo, Indexing
|
from lookyloo.lookyloo import Lookyloo, Indexing
|
||||||
|
|
||||||
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
|
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
|
||||||
|
@ -23,5 +24,6 @@ if __name__ == '__main__':
|
||||||
indexing = Indexing()
|
indexing = Indexing()
|
||||||
indexing.clear_indexes()
|
indexing.clear_indexes()
|
||||||
for capture_dir in lookyloo.capture_dirs:
|
for capture_dir in lookyloo.capture_dirs:
|
||||||
indexing.index_cookies_capture(capture_dir)
|
tree = load_pickle_tree(capture_dir)
|
||||||
indexing.index_body_hashes_capture(capture_dir)
|
indexing.index_cookies_capture(tree)
|
||||||
|
indexing.index_body_hashes_capture(tree)
|
||||||
|
|
|
@ -58,17 +58,7 @@ class Indexing():
|
||||||
def get_cookies_names_captures(self, cookie_name: str) -> List[Tuple[str, str]]:
|
def get_cookies_names_captures(self, cookie_name: str) -> List[Tuple[str, str]]:
|
||||||
return [uuids.split('|')for uuids in self.redis.smembers(f'cn|{cookie_name}|captures')]
|
return [uuids.split('|')for uuids in self.redis.smembers(f'cn|{cookie_name}|captures')]
|
||||||
|
|
||||||
def index_cookies_capture(self, capture_dir: Path) -> None:
|
def index_cookies_capture(self, crawled_tree: CrawledTree) -> None:
|
||||||
print(f'Index cookies {capture_dir}')
|
|
||||||
try:
|
|
||||||
crawled_tree = load_pickle_tree(capture_dir)
|
|
||||||
except Exception as e:
|
|
||||||
print(e)
|
|
||||||
return
|
|
||||||
|
|
||||||
if not crawled_tree:
|
|
||||||
return
|
|
||||||
|
|
||||||
if self.redis.sismember('indexed_cookies', crawled_tree.uuid):
|
if self.redis.sismember('indexed_cookies', crawled_tree.uuid):
|
||||||
# Do not reindex
|
# Do not reindex
|
||||||
return
|
return
|
||||||
|
@ -115,17 +105,7 @@ class Indexing():
|
||||||
return {'hash_freq': self.redis.zscore('body_hashes', body_hash),
|
return {'hash_freq': self.redis.zscore('body_hashes', body_hash),
|
||||||
'hash_domains_freq': self.redis.zcard(f'bh|{body_hash}')}
|
'hash_domains_freq': self.redis.zcard(f'bh|{body_hash}')}
|
||||||
|
|
||||||
def index_body_hashes_capture(self, capture_dir: Path) -> None:
|
def index_body_hashes_capture(self, crawled_tree: CrawledTree) -> None:
|
||||||
print(f'Index body hashes {capture_dir}')
|
|
||||||
try:
|
|
||||||
crawled_tree = load_pickle_tree(capture_dir)
|
|
||||||
except Exception as e:
|
|
||||||
print(e)
|
|
||||||
return
|
|
||||||
|
|
||||||
if not crawled_tree:
|
|
||||||
return
|
|
||||||
|
|
||||||
if self.redis.sismember('indexed_body_hashes', crawled_tree.uuid):
|
if self.redis.sismember('indexed_body_hashes', crawled_tree.uuid):
|
||||||
# Do not reindex
|
# Do not reindex
|
||||||
return
|
return
|
||||||
|
@ -239,6 +219,8 @@ class Lookyloo():
|
||||||
har_files = sorted(capture_dir.glob('*.har'))
|
har_files = sorted(capture_dir.glob('*.har'))
|
||||||
try:
|
try:
|
||||||
ct = CrawledTree(har_files, uuid)
|
ct = CrawledTree(har_files, uuid)
|
||||||
|
self.indexing.index_cookies_capture(ct)
|
||||||
|
self.indexing.index_body_hashes_capture(ct)
|
||||||
except Har2TreeError as e:
|
except Har2TreeError as e:
|
||||||
raise NoValidHarFile(e.message)
|
raise NoValidHarFile(e.message)
|
||||||
|
|
||||||
|
|
|
@ -439,8 +439,8 @@ def body_hash_details(body_hash: str):
|
||||||
captures, domains = lookyloo.get_body_hash_investigator(body_hash)
|
captures, domains = lookyloo.get_body_hash_investigator(body_hash)
|
||||||
return render_template('body_hash.html', body_hash=body_hash, domains=domains, captures=captures)
|
return render_template('body_hash.html', body_hash=body_hash, domains=domains, captures=captures)
|
||||||
|
|
||||||
# Query API
|
|
||||||
|
|
||||||
|
# Query API
|
||||||
|
|
||||||
@app.route('/json/<string:tree_uuid>/redirects', methods=['GET'])
|
@app.route('/json/<string:tree_uuid>/redirects', methods=['GET'])
|
||||||
def json_redirects(tree_uuid: str):
|
def json_redirects(tree_uuid: str):
|
||||||
|
|
|
@ -170,19 +170,6 @@
|
||||||
</a></p>
|
</a></p>
|
||||||
<div>
|
<div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% if url['sane_js'] %}
|
|
||||||
<div>
|
|
||||||
{% if url['sane_js'] is string %}
|
|
||||||
<b>{{ url['sane_js'] }} </b>
|
|
||||||
{% else %}
|
|
||||||
This file is known as part of <b>{{ url['sane_js'][0] }}</b>
|
|
||||||
version <b>{{ url['sane_js'][1] }}</b>: <b>{{ url['sane_js'][2] }}</b>.
|
|
||||||
{% if url['sane_js'][3] > 1%}
|
|
||||||
It is also present in <b>{{ url['sane_js'][3] -1 }}</b> other libraries.
|
|
||||||
{%endif%}
|
|
||||||
{%endif%}
|
|
||||||
</div>
|
|
||||||
{% endif %}
|
|
||||||
|
|
||||||
{% if url['cookies_received'] %}
|
{% if url['cookies_received'] %}
|
||||||
<div>
|
<div>
|
||||||
|
|
Loading…
Reference in New Issue