mirror of https://github.com/CIRCL/lookyloo
chg: Improve intergration of cookies indexing
parent
e6c9f6dada
commit
67b41ca8fb
|
@ -4,7 +4,7 @@
|
||||||
import argparse
|
import argparse
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from lookyloo.lookyloo import Lookyloo
|
from lookyloo.lookyloo import Lookyloo, Indexing
|
||||||
|
|
||||||
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
|
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
|
||||||
level=logging.INFO, datefmt='%I:%M:%S')
|
level=logging.INFO, datefmt='%I:%M:%S')
|
||||||
|
@ -19,3 +19,7 @@ if __name__ == '__main__':
|
||||||
lookyloo.rebuild_all()
|
lookyloo.rebuild_all()
|
||||||
else:
|
else:
|
||||||
lookyloo.rebuild_cache()
|
lookyloo.rebuild_cache()
|
||||||
|
|
||||||
|
indexing = Indexing()
|
||||||
|
indexing.clear_indexes()
|
||||||
|
indexing.index_all()
|
||||||
|
|
|
@ -1,68 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
from typing import Set, Tuple, List, Optional, Dict, Any
|
|
||||||
|
|
||||||
from redis import Redis
|
|
||||||
|
|
||||||
from .helpers import get_socket_path
|
|
||||||
from .lookyloo import Lookyloo
|
|
||||||
|
|
||||||
|
|
||||||
class Indexing():
|
|
||||||
|
|
||||||
def __init__(self) -> None:
|
|
||||||
self.lookyloo = Lookyloo()
|
|
||||||
self.redis: Redis = Redis(unix_socket_path=get_socket_path('indexing'), decode_responses=True)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def cookies_names(self) -> List[Tuple[str, float]]:
|
|
||||||
return self.redis.zrevrange('cookies_names', 0, -1, withscores=True)
|
|
||||||
|
|
||||||
def cookies_names_number_domains(self, cookie_name: str) -> int:
|
|
||||||
return self.redis.zcard(f'cn|{cookie_name}')
|
|
||||||
|
|
||||||
def cookies_names_domains_values(self, cookie_name: str, domain: str) -> List[Tuple[str, float]]:
|
|
||||||
return self.redis.zrevrange(f'cn|{cookie_name}|{domain}', 0, -1, withscores=True)
|
|
||||||
|
|
||||||
def get_cookie_domains(self, cookie_name: str) -> List[Tuple[str, float]]:
|
|
||||||
return self.redis.zrevrange(f'cn|{cookie_name}', 0, -1, withscores=True)
|
|
||||||
|
|
||||||
def get_capture_cache(self, capture_uuid: str) -> Optional[Dict[str, Any]]:
|
|
||||||
capture_dir = self.lookyloo.lookup_capture_dir(capture_uuid)
|
|
||||||
if capture_dir:
|
|
||||||
return self.lookyloo.capture_cache(capture_dir)
|
|
||||||
return {}
|
|
||||||
|
|
||||||
def get_cookies_names_captures(self, cookie_name: str) -> List[Tuple[str, str]]:
|
|
||||||
return [uuids.split('|')for uuids in self.redis.smembers(f'cn|{cookie_name}|captures')]
|
|
||||||
|
|
||||||
def index_cookies(self) -> None:
|
|
||||||
for capture_dir in self.lookyloo.capture_dirs:
|
|
||||||
print(f'Processing {capture_dir}')
|
|
||||||
try:
|
|
||||||
crawled_tree = self.lookyloo.get_crawled_tree(capture_dir)
|
|
||||||
except Exception as e:
|
|
||||||
print(e)
|
|
||||||
continue
|
|
||||||
pipeline = self.redis.pipeline()
|
|
||||||
already_loaded: Set[Tuple[str, str]] = set()
|
|
||||||
for urlnode in crawled_tree.root_hartree.url_tree.traverse():
|
|
||||||
if hasattr(urlnode, 'cookies_received'):
|
|
||||||
for domain, cookie, _ in urlnode.cookies_received:
|
|
||||||
name, value = cookie.split('=', 1)
|
|
||||||
if (name, domain) in already_loaded:
|
|
||||||
# Only add cookie name once / capture
|
|
||||||
continue
|
|
||||||
already_loaded.add((name, domain))
|
|
||||||
pipeline.zincrby('cookies_names', 1, name)
|
|
||||||
pipeline.zincrby(f'cn|{name}', 1, domain)
|
|
||||||
pipeline.sadd(f'cn|{name}|captures', f'{crawled_tree.uuid}|{urlnode.uuid}')
|
|
||||||
pipeline.zincrby(f'cn|{name}|{domain}', 1, value)
|
|
||||||
|
|
||||||
pipeline.sadd('lookyloo_domains', domain)
|
|
||||||
pipeline.sadd(domain, name)
|
|
||||||
|
|
||||||
# pipeline.zincrby('lookyloo_cookies_index_values', 1, value)
|
|
||||||
# pipeline.zincrby(value, 1, name)
|
|
||||||
pipeline.execute()
|
|
|
@ -32,6 +32,77 @@ from .helpers import get_homedir, get_socket_path, load_cookies, load_configs, s
|
||||||
from .modules import VirusTotal, SaneJavaScript, PhishingInitiative
|
from .modules import VirusTotal, SaneJavaScript, PhishingInitiative
|
||||||
|
|
||||||
|
|
||||||
|
class Indexing():
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.lookyloo = Lookyloo()
|
||||||
|
self.redis: Redis = Redis(unix_socket_path=get_socket_path('indexing'), decode_responses=True)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def cookies_names(self) -> List[Tuple[str, float]]:
|
||||||
|
return self.redis.zrevrange('cookies_names', 0, -1, withscores=True)
|
||||||
|
|
||||||
|
def cookies_names_number_domains(self, cookie_name: str) -> int:
|
||||||
|
return self.redis.zcard(f'cn|{cookie_name}')
|
||||||
|
|
||||||
|
def cookies_names_domains_values(self, cookie_name: str, domain: str) -> List[Tuple[str, float]]:
|
||||||
|
return self.redis.zrevrange(f'cn|{cookie_name}|{domain}', 0, -1, withscores=True)
|
||||||
|
|
||||||
|
def get_cookie_domains(self, cookie_name: str) -> List[Tuple[str, float]]:
|
||||||
|
return self.redis.zrevrange(f'cn|{cookie_name}', 0, -1, withscores=True)
|
||||||
|
|
||||||
|
def get_capture_cache(self, capture_uuid: str) -> Optional[Dict[str, Any]]:
|
||||||
|
capture_dir = self.lookyloo.lookup_capture_dir(capture_uuid)
|
||||||
|
if capture_dir:
|
||||||
|
return self.lookyloo.capture_cache(capture_dir)
|
||||||
|
return {}
|
||||||
|
|
||||||
|
def get_cookies_names_captures(self, cookie_name: str) -> List[Tuple[str, str]]:
|
||||||
|
return [uuids.split('|')for uuids in self.redis.smembers(f'cn|{cookie_name}|captures')]
|
||||||
|
|
||||||
|
def clear_indexes(self):
|
||||||
|
self.redis.flushdb()
|
||||||
|
|
||||||
|
def index_all(self):
|
||||||
|
self.index_cookies()
|
||||||
|
|
||||||
|
def index_cookies_capture(self, capture_dir: Path) -> None:
|
||||||
|
print(f'Processing {capture_dir}')
|
||||||
|
try:
|
||||||
|
crawled_tree = self.lookyloo.get_crawled_tree(capture_dir)
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
return
|
||||||
|
|
||||||
|
if self.redis.sismember('indexed_cookies', crawled_tree.uuid):
|
||||||
|
# Do not reindex
|
||||||
|
return
|
||||||
|
self.redis.sadd('indexed_cookies', crawled_tree.uuid)
|
||||||
|
|
||||||
|
pipeline = self.redis.pipeline()
|
||||||
|
already_loaded: Set[Tuple[str, str]] = set()
|
||||||
|
for urlnode in crawled_tree.root_hartree.url_tree.traverse():
|
||||||
|
if hasattr(urlnode, 'cookies_received'):
|
||||||
|
for domain, cookie, _ in urlnode.cookies_received:
|
||||||
|
name, value = cookie.split('=', 1)
|
||||||
|
if (name, domain) in already_loaded:
|
||||||
|
# Only add cookie name once / capture
|
||||||
|
continue
|
||||||
|
already_loaded.add((name, domain))
|
||||||
|
pipeline.zincrby('cookies_names', 1, name)
|
||||||
|
pipeline.zincrby(f'cn|{name}', 1, domain)
|
||||||
|
pipeline.sadd(f'cn|{name}|captures', f'{crawled_tree.uuid}|{urlnode.uuid}')
|
||||||
|
pipeline.zincrby(f'cn|{name}|{domain}', 1, value)
|
||||||
|
|
||||||
|
pipeline.sadd('lookyloo_domains', domain)
|
||||||
|
pipeline.sadd(domain, name)
|
||||||
|
pipeline.execute()
|
||||||
|
|
||||||
|
def index_cookies(self) -> None:
|
||||||
|
for capture_dir in self.lookyloo.capture_dirs:
|
||||||
|
self.index_cookies_capture(capture_dir)
|
||||||
|
|
||||||
|
|
||||||
class Lookyloo():
|
class Lookyloo():
|
||||||
|
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
|
|
Loading…
Reference in New Issue