new: Initial version of cookies indexing

pull/78/head
Raphaël Vinot 2020-06-13 02:49:07 +02:00
parent bc6df900c4
commit 5ae7f0f7e4
8 changed files with 1940 additions and 9 deletions

View File

@ -22,12 +22,26 @@ def shutdown_cache(storage_directory: Path=None):
Popen(["./shutdown_redis.sh"], cwd=(storage_directory / 'cache'))
def launch_indexing(storage_directory: Path=None):
if not storage_directory:
storage_directory = get_homedir()
if not check_running('indexing'):
Popen(["./run_redis.sh"], cwd=(storage_directory / 'indexing'))
def shutdown_indexing(storage_directory: Path=None):
if not storage_directory:
storage_directory = get_homedir()
Popen(["./shutdown_redis.sh"], cwd=(storage_directory / 'indexing'))
def launch_all():
launch_cache()
launch_indexing()
def check_all(stop=False):
backends = [['cache', False]]
backends = [['cache', False], ['indexing', False]]
while True:
for b in backends:
try:
@ -50,6 +64,7 @@ def check_all(stop=False):
def stop_all():
shutdown_cache()
shutdown_indexing()
if __name__ == '__main__':

1812
indexing/indexing.conf Normal file

File diff suppressed because it is too large Load Diff

6
indexing/run_redis.sh Executable file
View File

@ -0,0 +1,6 @@
#!/bin/bash
set -e
set -x
../../redis/src/redis-server ./indexing.conf

6
indexing/shutdown_redis.sh Executable file
View File

@ -0,0 +1,6 @@
#!/bin/bash
# set -e
set -x
../../redis/src/redis-cli -s ./indexing.sock shutdown

View File

@ -92,6 +92,7 @@ def is_running() -> Dict[Any, Any]:
def get_socket_path(name: str) -> str:
mapping = {
'cache': Path('cache', 'cache.sock'),
'indexing': Path('indexing', 'indexing.sock'),
'storage': Path('storage', 'storage.sock'),
}
return str(get_homedir() / mapping[name])

57
lookyloo/indexing.py Normal file
View File

@ -0,0 +1,57 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from typing import Set, Tuple, List
from redis import Redis
from .helpers import get_socket_path
from .lookyloo import Lookyloo
class Indexing():
def __init__(self) -> None:
self.lookyloo = Lookyloo()
self.redis: Redis = Redis(unix_socket_path=get_socket_path('indexing'), decode_responses=True)
@property
def cookies_names(self) -> List[Tuple[str, float]]:
return self.redis.zrevrange('cookies_names', 0, -1, withscores=True)
def cookies_names_number_domains(self, cookie_name: str) -> int:
return self.redis.zcard(f'cn|{cookie_name}')
def cookies_names_domains_values(self, cookie_name: str, domain: str) -> List[Tuple[str, float]]:
return self.redis.zrevrange(f'cn|{cookie_name}|{domain}', 0, -1, withscores=True)
def get_cookie_domains(self, cookie_name: str) -> List[Tuple[str, float]]:
return self.redis.zrevrange(f'cn|{cookie_name}', 0, -1, withscores=True)
def index_cookies(self) -> None:
for capture_dir in self.lookyloo.capture_dirs:
try:
crawled_tree = self.lookyloo.get_crawled_tree(capture_dir)
except Exception as e:
print(e)
continue
pipeline = self.redis.pipeline()
already_loaded: Set[Tuple[str, str]] = set()
for urlnode in crawled_tree.root_hartree.url_tree.traverse():
if hasattr(urlnode, 'cookies_received'):
for domain, cookie, _ in urlnode.cookies_received:
name, value = cookie.split('=', 1)
if (name, domain) in already_loaded:
# Only add cookie name once / capture
continue
already_loaded.add((name, domain))
pipeline.zincrby('cookies_names', 1, name)
pipeline.zincrby(f'cn|{name}', 1, domain)
pipeline.zincrby(f'cn|{name}|{domain}', 1, value)
pipeline.sadd('lookyloo_domains', domain)
pipeline.sadd(domain, name)
# pipeline.zincrby('lookyloo_cookies_index_values', 1, value)
# pipeline.zincrby(value, 1, name)
pipeline.execute()

View File

@ -13,7 +13,7 @@ from flask_bootstrap import Bootstrap # type: ignore
from flask_httpauth import HTTPDigestAuth # type: ignore
from lookyloo.helpers import get_homedir, update_user_agents, get_user_agents
from lookyloo.lookyloo import Lookyloo
from lookyloo.lookyloo import Lookyloo, Indexing
from lookyloo.exceptions import NoValidHarFile, MissingUUID
from .proxied import ReverseProxied
@ -420,6 +420,21 @@ def index():
def index_hidden():
return index_generic(show_hidden=True)
@app.route('/cookies', methods=['GET'])
def cookies_lookup():
i = Indexing()
cookies_names = [(name, freq, i.cookies_names_number_domains(name)) for name, freq in i.cookies_names]
return render_template('cookies.html', cookies_names=cookies_names)
@app.route('/cookies/<string:cookie_name>', methods=['GET'])
def cookies_name_detail(cookie_name: str):
i = Indexing()
domains = [(domain, freq, i.cookies_names_domains_values(cookie_name, domain))
for domain, freq in i.get_cookie_domains(cookie_name)]
return render_template('cookie_name.html', cookie_name=cookie_name, domains=domains)
# Query API

View File

@ -125,13 +125,19 @@
<div>This response contains 3rd party cookies:</div>
<ul>
{% for cookie, details in url['cookies_received']['3rd_party'].items() %}
{% set cookie_name_value = cookie.split('=', 1) %}
{% for detail in details %}
{% if detail|length == 1 %}
<li>{{ detail[0] }}: {{ cookie }}</li>
<li>
{{ detail[0] }}: <a href="{{ url_for('cookies_name_detail', cookie_name_value[0]) }}">
{{ cookie_name_value[0] }}</a>={{ cookie_name_value[1] }}
</li>
{% else %}
<li>{{ detail[0] }}: {{ cookie }} -
<li>
{{ detail[0] }}: <a href="{{ url_for('cookies_name_detail', cookie_name_value[0]) }}">
{{ cookie_name_value[0] }}</a>={{ cookie_name_value[1] }} -
<button type="button" class="btn btn-info" onclick="whereAmI('{{ detail[1] }}')">Show node sending this cookie</button>
</li>
</li>
{% endif %}
{% endfor %}
{% endfor %}
@ -142,8 +148,11 @@
<div>Other cookies sent somewhere else in the capture</div>
<ul>
{% for cookie, details in url['cookies_received']['sent'].items() %}
{% set cookie_name_value = cookie.split('=', 1) %}
{% for detail in details %}
<li>{{ detail[0] }}: {{ cookie }} -
<li>
{{ detail[0] }}: <a href="{{ url_for('cookies_name_detail', cookie_name=cookie_name_value[0]) }}">
{{ cookie_name_value[0] }}</a>={{ cookie_name_value[1] }}
<button type="button" class="btn btn-info" onclick="whereAmI('{{ detail[1] }}')">Show node sending this cookie</button>
</li>
{% endfor %}
@ -155,8 +164,12 @@
<div>Other cookies, but never sent</div>
<ul>
{% for cookie, details in url['cookies_received']['not_sent'].items() %}
{% set cookie_name_value = cookie.split('=', 1) %}
{% for detail in details %}
<li>{{ detail[0] }}: {{ cookie }}</li>
<li>
{{ detail[0] }}: <a href="{{ url_for('cookies_name_detail', cookie_name=cookie_name_value[0]) }}">
{{ cookie_name_value[0] }}</a>={{ cookie_name_value[1] }}
</li>
{% endfor %}
{% endfor %}
</ul>
@ -193,11 +206,17 @@
<div>List of cookies sent in the request</div>
<ul>
{% for cookie, details in url['cookies_sent'].items() %}
{% set cookie_name_value = cookie.split('=', 1) %}
{% for detail in details %}
{% if detail|length == 1 %}
<li> {{ detail[0] }}: {{ cookie }}</li>
<li>
{{ detail[0] }}: <a href="{{ url_for('cookies_name_detail', cookie_name=cookie_name_value[0]) }}">
{{ cookie_name_value[0] }}</a>={{ cookie_name_value[1] }}
</li>
{%else %}
<li>{{ detail[0] }}: {{ cookie }} -
<li>
{{ detail[0] }}: <a href="{{ url_for('cookies_name_detail', cookie_name=cookie_name_value[0]) }}">
{{ cookie_name_value[0] }}</a>={{ cookie_name_value[1] }}
<button type="button" class="btn btn-info" onclick="whereAmI('{{ detail[1] }}')">Show on tree node setting this cookie</button>
</li>
{% endif %}