mirror of https://github.com/CIRCL/lookyloo
new: Initial version of cookies indexing
parent
bc6df900c4
commit
5ae7f0f7e4
|
@ -22,12 +22,26 @@ def shutdown_cache(storage_directory: Path=None):
|
||||||
Popen(["./shutdown_redis.sh"], cwd=(storage_directory / 'cache'))
|
Popen(["./shutdown_redis.sh"], cwd=(storage_directory / 'cache'))
|
||||||
|
|
||||||
|
|
||||||
|
def launch_indexing(storage_directory: Path=None):
|
||||||
|
if not storage_directory:
|
||||||
|
storage_directory = get_homedir()
|
||||||
|
if not check_running('indexing'):
|
||||||
|
Popen(["./run_redis.sh"], cwd=(storage_directory / 'indexing'))
|
||||||
|
|
||||||
|
|
||||||
|
def shutdown_indexing(storage_directory: Path=None):
|
||||||
|
if not storage_directory:
|
||||||
|
storage_directory = get_homedir()
|
||||||
|
Popen(["./shutdown_redis.sh"], cwd=(storage_directory / 'indexing'))
|
||||||
|
|
||||||
|
|
||||||
def launch_all():
|
def launch_all():
|
||||||
launch_cache()
|
launch_cache()
|
||||||
|
launch_indexing()
|
||||||
|
|
||||||
|
|
||||||
def check_all(stop=False):
|
def check_all(stop=False):
|
||||||
backends = [['cache', False]]
|
backends = [['cache', False], ['indexing', False]]
|
||||||
while True:
|
while True:
|
||||||
for b in backends:
|
for b in backends:
|
||||||
try:
|
try:
|
||||||
|
@ -50,6 +64,7 @@ def check_all(stop=False):
|
||||||
|
|
||||||
def stop_all():
|
def stop_all():
|
||||||
shutdown_cache()
|
shutdown_cache()
|
||||||
|
shutdown_indexing()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,6 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
set -e
|
||||||
|
set -x
|
||||||
|
|
||||||
|
../../redis/src/redis-server ./indexing.conf
|
|
@ -0,0 +1,6 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# set -e
|
||||||
|
set -x
|
||||||
|
|
||||||
|
../../redis/src/redis-cli -s ./indexing.sock shutdown
|
|
@ -92,6 +92,7 @@ def is_running() -> Dict[Any, Any]:
|
||||||
def get_socket_path(name: str) -> str:
|
def get_socket_path(name: str) -> str:
|
||||||
mapping = {
|
mapping = {
|
||||||
'cache': Path('cache', 'cache.sock'),
|
'cache': Path('cache', 'cache.sock'),
|
||||||
|
'indexing': Path('indexing', 'indexing.sock'),
|
||||||
'storage': Path('storage', 'storage.sock'),
|
'storage': Path('storage', 'storage.sock'),
|
||||||
}
|
}
|
||||||
return str(get_homedir() / mapping[name])
|
return str(get_homedir() / mapping[name])
|
||||||
|
|
|
@ -0,0 +1,57 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
|
from typing import Set, Tuple, List
|
||||||
|
|
||||||
|
from redis import Redis
|
||||||
|
|
||||||
|
from .helpers import get_socket_path
|
||||||
|
from .lookyloo import Lookyloo
|
||||||
|
|
||||||
|
|
||||||
|
class Indexing():
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.lookyloo = Lookyloo()
|
||||||
|
self.redis: Redis = Redis(unix_socket_path=get_socket_path('indexing'), decode_responses=True)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def cookies_names(self) -> List[Tuple[str, float]]:
|
||||||
|
return self.redis.zrevrange('cookies_names', 0, -1, withscores=True)
|
||||||
|
|
||||||
|
def cookies_names_number_domains(self, cookie_name: str) -> int:
|
||||||
|
return self.redis.zcard(f'cn|{cookie_name}')
|
||||||
|
|
||||||
|
def cookies_names_domains_values(self, cookie_name: str, domain: str) -> List[Tuple[str, float]]:
|
||||||
|
return self.redis.zrevrange(f'cn|{cookie_name}|{domain}', 0, -1, withscores=True)
|
||||||
|
|
||||||
|
def get_cookie_domains(self, cookie_name: str) -> List[Tuple[str, float]]:
|
||||||
|
return self.redis.zrevrange(f'cn|{cookie_name}', 0, -1, withscores=True)
|
||||||
|
|
||||||
|
def index_cookies(self) -> None:
|
||||||
|
for capture_dir in self.lookyloo.capture_dirs:
|
||||||
|
try:
|
||||||
|
crawled_tree = self.lookyloo.get_crawled_tree(capture_dir)
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
continue
|
||||||
|
pipeline = self.redis.pipeline()
|
||||||
|
already_loaded: Set[Tuple[str, str]] = set()
|
||||||
|
for urlnode in crawled_tree.root_hartree.url_tree.traverse():
|
||||||
|
if hasattr(urlnode, 'cookies_received'):
|
||||||
|
for domain, cookie, _ in urlnode.cookies_received:
|
||||||
|
name, value = cookie.split('=', 1)
|
||||||
|
if (name, domain) in already_loaded:
|
||||||
|
# Only add cookie name once / capture
|
||||||
|
continue
|
||||||
|
already_loaded.add((name, domain))
|
||||||
|
pipeline.zincrby('cookies_names', 1, name)
|
||||||
|
pipeline.zincrby(f'cn|{name}', 1, domain)
|
||||||
|
pipeline.zincrby(f'cn|{name}|{domain}', 1, value)
|
||||||
|
|
||||||
|
pipeline.sadd('lookyloo_domains', domain)
|
||||||
|
pipeline.sadd(domain, name)
|
||||||
|
|
||||||
|
# pipeline.zincrby('lookyloo_cookies_index_values', 1, value)
|
||||||
|
# pipeline.zincrby(value, 1, name)
|
||||||
|
pipeline.execute()
|
|
@ -13,7 +13,7 @@ from flask_bootstrap import Bootstrap # type: ignore
|
||||||
from flask_httpauth import HTTPDigestAuth # type: ignore
|
from flask_httpauth import HTTPDigestAuth # type: ignore
|
||||||
|
|
||||||
from lookyloo.helpers import get_homedir, update_user_agents, get_user_agents
|
from lookyloo.helpers import get_homedir, update_user_agents, get_user_agents
|
||||||
from lookyloo.lookyloo import Lookyloo
|
from lookyloo.lookyloo import Lookyloo, Indexing
|
||||||
from lookyloo.exceptions import NoValidHarFile, MissingUUID
|
from lookyloo.exceptions import NoValidHarFile, MissingUUID
|
||||||
from .proxied import ReverseProxied
|
from .proxied import ReverseProxied
|
||||||
|
|
||||||
|
@ -420,6 +420,21 @@ def index():
|
||||||
def index_hidden():
|
def index_hidden():
|
||||||
return index_generic(show_hidden=True)
|
return index_generic(show_hidden=True)
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/cookies', methods=['GET'])
|
||||||
|
def cookies_lookup():
|
||||||
|
i = Indexing()
|
||||||
|
cookies_names = [(name, freq, i.cookies_names_number_domains(name)) for name, freq in i.cookies_names]
|
||||||
|
return render_template('cookies.html', cookies_names=cookies_names)
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/cookies/<string:cookie_name>', methods=['GET'])
|
||||||
|
def cookies_name_detail(cookie_name: str):
|
||||||
|
i = Indexing()
|
||||||
|
domains = [(domain, freq, i.cookies_names_domains_values(cookie_name, domain))
|
||||||
|
for domain, freq in i.get_cookie_domains(cookie_name)]
|
||||||
|
return render_template('cookie_name.html', cookie_name=cookie_name, domains=domains)
|
||||||
|
|
||||||
# Query API
|
# Query API
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -125,11 +125,17 @@
|
||||||
<div>This response contains 3rd party cookies:</div>
|
<div>This response contains 3rd party cookies:</div>
|
||||||
<ul>
|
<ul>
|
||||||
{% for cookie, details in url['cookies_received']['3rd_party'].items() %}
|
{% for cookie, details in url['cookies_received']['3rd_party'].items() %}
|
||||||
|
{% set cookie_name_value = cookie.split('=', 1) %}
|
||||||
{% for detail in details %}
|
{% for detail in details %}
|
||||||
{% if detail|length == 1 %}
|
{% if detail|length == 1 %}
|
||||||
<li>{{ detail[0] }}: {{ cookie }}</li>
|
<li>
|
||||||
|
{{ detail[0] }}: <a href="{{ url_for('cookies_name_detail', cookie_name_value[0]) }}">
|
||||||
|
{{ cookie_name_value[0] }}</a>={{ cookie_name_value[1] }}
|
||||||
|
</li>
|
||||||
{% else %}
|
{% else %}
|
||||||
<li>{{ detail[0] }}: {{ cookie }} -
|
<li>
|
||||||
|
{{ detail[0] }}: <a href="{{ url_for('cookies_name_detail', cookie_name_value[0]) }}">
|
||||||
|
{{ cookie_name_value[0] }}</a>={{ cookie_name_value[1] }} -
|
||||||
<button type="button" class="btn btn-info" onclick="whereAmI('{{ detail[1] }}')">Show node sending this cookie</button>
|
<button type="button" class="btn btn-info" onclick="whereAmI('{{ detail[1] }}')">Show node sending this cookie</button>
|
||||||
</li>
|
</li>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
@ -142,8 +148,11 @@
|
||||||
<div>Other cookies sent somewhere else in the capture</div>
|
<div>Other cookies sent somewhere else in the capture</div>
|
||||||
<ul>
|
<ul>
|
||||||
{% for cookie, details in url['cookies_received']['sent'].items() %}
|
{% for cookie, details in url['cookies_received']['sent'].items() %}
|
||||||
|
{% set cookie_name_value = cookie.split('=', 1) %}
|
||||||
{% for detail in details %}
|
{% for detail in details %}
|
||||||
<li>{{ detail[0] }}: {{ cookie }} -
|
<li>
|
||||||
|
{{ detail[0] }}: <a href="{{ url_for('cookies_name_detail', cookie_name=cookie_name_value[0]) }}">
|
||||||
|
{{ cookie_name_value[0] }}</a>={{ cookie_name_value[1] }}
|
||||||
<button type="button" class="btn btn-info" onclick="whereAmI('{{ detail[1] }}')">Show node sending this cookie</button>
|
<button type="button" class="btn btn-info" onclick="whereAmI('{{ detail[1] }}')">Show node sending this cookie</button>
|
||||||
</li>
|
</li>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
@ -155,8 +164,12 @@
|
||||||
<div>Other cookies, but never sent</div>
|
<div>Other cookies, but never sent</div>
|
||||||
<ul>
|
<ul>
|
||||||
{% for cookie, details in url['cookies_received']['not_sent'].items() %}
|
{% for cookie, details in url['cookies_received']['not_sent'].items() %}
|
||||||
|
{% set cookie_name_value = cookie.split('=', 1) %}
|
||||||
{% for detail in details %}
|
{% for detail in details %}
|
||||||
<li>{{ detail[0] }}: {{ cookie }}</li>
|
<li>
|
||||||
|
{{ detail[0] }}: <a href="{{ url_for('cookies_name_detail', cookie_name=cookie_name_value[0]) }}">
|
||||||
|
{{ cookie_name_value[0] }}</a>={{ cookie_name_value[1] }}
|
||||||
|
</li>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</ul>
|
</ul>
|
||||||
|
@ -193,11 +206,17 @@
|
||||||
<div>List of cookies sent in the request</div>
|
<div>List of cookies sent in the request</div>
|
||||||
<ul>
|
<ul>
|
||||||
{% for cookie, details in url['cookies_sent'].items() %}
|
{% for cookie, details in url['cookies_sent'].items() %}
|
||||||
|
{% set cookie_name_value = cookie.split('=', 1) %}
|
||||||
{% for detail in details %}
|
{% for detail in details %}
|
||||||
{% if detail|length == 1 %}
|
{% if detail|length == 1 %}
|
||||||
<li> {{ detail[0] }}: {{ cookie }}</li>
|
<li>
|
||||||
|
{{ detail[0] }}: <a href="{{ url_for('cookies_name_detail', cookie_name=cookie_name_value[0]) }}">
|
||||||
|
{{ cookie_name_value[0] }}</a>={{ cookie_name_value[1] }}
|
||||||
|
</li>
|
||||||
{%else %}
|
{%else %}
|
||||||
<li>{{ detail[0] }}: {{ cookie }} -
|
<li>
|
||||||
|
{{ detail[0] }}: <a href="{{ url_for('cookies_name_detail', cookie_name=cookie_name_value[0]) }}">
|
||||||
|
{{ cookie_name_value[0] }}</a>={{ cookie_name_value[1] }}
|
||||||
<button type="button" class="btn btn-info" onclick="whereAmI('{{ detail[1] }}')">Show on tree node setting this cookie</button>
|
<button type="button" class="btn btn-info" onclick="whereAmI('{{ detail[1] }}')">Show on tree node setting this cookie</button>
|
||||||
</li>
|
</li>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
Loading…
Reference in New Issue