mirror of https://github.com/CIRCL/lookyloo
chg: Improve intergration of cookies indexing
parent
e6c9f6dada
commit
67b41ca8fb
|
@ -4,7 +4,7 @@
|
|||
import argparse
|
||||
import logging
|
||||
|
||||
from lookyloo.lookyloo import Lookyloo
|
||||
from lookyloo.lookyloo import Lookyloo, Indexing
|
||||
|
||||
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
|
||||
level=logging.INFO, datefmt='%I:%M:%S')
|
||||
|
@ -19,3 +19,7 @@ if __name__ == '__main__':
|
|||
lookyloo.rebuild_all()
|
||||
else:
|
||||
lookyloo.rebuild_cache()
|
||||
|
||||
indexing = Indexing()
|
||||
indexing.clear_indexes()
|
||||
indexing.index_all()
|
||||
|
|
|
@ -1,68 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
from typing import Set, Tuple, List, Optional, Dict, Any
|
||||
|
||||
from redis import Redis
|
||||
|
||||
from .helpers import get_socket_path
|
||||
from .lookyloo import Lookyloo
|
||||
|
||||
|
||||
class Indexing():
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.lookyloo = Lookyloo()
|
||||
self.redis: Redis = Redis(unix_socket_path=get_socket_path('indexing'), decode_responses=True)
|
||||
|
||||
@property
|
||||
def cookies_names(self) -> List[Tuple[str, float]]:
|
||||
return self.redis.zrevrange('cookies_names', 0, -1, withscores=True)
|
||||
|
||||
def cookies_names_number_domains(self, cookie_name: str) -> int:
|
||||
return self.redis.zcard(f'cn|{cookie_name}')
|
||||
|
||||
def cookies_names_domains_values(self, cookie_name: str, domain: str) -> List[Tuple[str, float]]:
|
||||
return self.redis.zrevrange(f'cn|{cookie_name}|{domain}', 0, -1, withscores=True)
|
||||
|
||||
def get_cookie_domains(self, cookie_name: str) -> List[Tuple[str, float]]:
|
||||
return self.redis.zrevrange(f'cn|{cookie_name}', 0, -1, withscores=True)
|
||||
|
||||
def get_capture_cache(self, capture_uuid: str) -> Optional[Dict[str, Any]]:
|
||||
capture_dir = self.lookyloo.lookup_capture_dir(capture_uuid)
|
||||
if capture_dir:
|
||||
return self.lookyloo.capture_cache(capture_dir)
|
||||
return {}
|
||||
|
||||
def get_cookies_names_captures(self, cookie_name: str) -> List[Tuple[str, str]]:
|
||||
return [uuids.split('|')for uuids in self.redis.smembers(f'cn|{cookie_name}|captures')]
|
||||
|
||||
def index_cookies(self) -> None:
|
||||
for capture_dir in self.lookyloo.capture_dirs:
|
||||
print(f'Processing {capture_dir}')
|
||||
try:
|
||||
crawled_tree = self.lookyloo.get_crawled_tree(capture_dir)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
continue
|
||||
pipeline = self.redis.pipeline()
|
||||
already_loaded: Set[Tuple[str, str]] = set()
|
||||
for urlnode in crawled_tree.root_hartree.url_tree.traverse():
|
||||
if hasattr(urlnode, 'cookies_received'):
|
||||
for domain, cookie, _ in urlnode.cookies_received:
|
||||
name, value = cookie.split('=', 1)
|
||||
if (name, domain) in already_loaded:
|
||||
# Only add cookie name once / capture
|
||||
continue
|
||||
already_loaded.add((name, domain))
|
||||
pipeline.zincrby('cookies_names', 1, name)
|
||||
pipeline.zincrby(f'cn|{name}', 1, domain)
|
||||
pipeline.sadd(f'cn|{name}|captures', f'{crawled_tree.uuid}|{urlnode.uuid}')
|
||||
pipeline.zincrby(f'cn|{name}|{domain}', 1, value)
|
||||
|
||||
pipeline.sadd('lookyloo_domains', domain)
|
||||
pipeline.sadd(domain, name)
|
||||
|
||||
# pipeline.zincrby('lookyloo_cookies_index_values', 1, value)
|
||||
# pipeline.zincrby(value, 1, name)
|
||||
pipeline.execute()
|
|
@ -32,6 +32,77 @@ from .helpers import get_homedir, get_socket_path, load_cookies, load_configs, s
|
|||
from .modules import VirusTotal, SaneJavaScript, PhishingInitiative
|
||||
|
||||
|
||||
class Indexing():
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.lookyloo = Lookyloo()
|
||||
self.redis: Redis = Redis(unix_socket_path=get_socket_path('indexing'), decode_responses=True)
|
||||
|
||||
@property
|
||||
def cookies_names(self) -> List[Tuple[str, float]]:
|
||||
return self.redis.zrevrange('cookies_names', 0, -1, withscores=True)
|
||||
|
||||
def cookies_names_number_domains(self, cookie_name: str) -> int:
|
||||
return self.redis.zcard(f'cn|{cookie_name}')
|
||||
|
||||
def cookies_names_domains_values(self, cookie_name: str, domain: str) -> List[Tuple[str, float]]:
|
||||
return self.redis.zrevrange(f'cn|{cookie_name}|{domain}', 0, -1, withscores=True)
|
||||
|
||||
def get_cookie_domains(self, cookie_name: str) -> List[Tuple[str, float]]:
|
||||
return self.redis.zrevrange(f'cn|{cookie_name}', 0, -1, withscores=True)
|
||||
|
||||
def get_capture_cache(self, capture_uuid: str) -> Optional[Dict[str, Any]]:
|
||||
capture_dir = self.lookyloo.lookup_capture_dir(capture_uuid)
|
||||
if capture_dir:
|
||||
return self.lookyloo.capture_cache(capture_dir)
|
||||
return {}
|
||||
|
||||
def get_cookies_names_captures(self, cookie_name: str) -> List[Tuple[str, str]]:
|
||||
return [uuids.split('|')for uuids in self.redis.smembers(f'cn|{cookie_name}|captures')]
|
||||
|
||||
def clear_indexes(self):
|
||||
self.redis.flushdb()
|
||||
|
||||
def index_all(self):
|
||||
self.index_cookies()
|
||||
|
||||
def index_cookies_capture(self, capture_dir: Path) -> None:
|
||||
print(f'Processing {capture_dir}')
|
||||
try:
|
||||
crawled_tree = self.lookyloo.get_crawled_tree(capture_dir)
|
||||
except Exception as e:
|
||||
print(e)
|
||||
return
|
||||
|
||||
if self.redis.sismember('indexed_cookies', crawled_tree.uuid):
|
||||
# Do not reindex
|
||||
return
|
||||
self.redis.sadd('indexed_cookies', crawled_tree.uuid)
|
||||
|
||||
pipeline = self.redis.pipeline()
|
||||
already_loaded: Set[Tuple[str, str]] = set()
|
||||
for urlnode in crawled_tree.root_hartree.url_tree.traverse():
|
||||
if hasattr(urlnode, 'cookies_received'):
|
||||
for domain, cookie, _ in urlnode.cookies_received:
|
||||
name, value = cookie.split('=', 1)
|
||||
if (name, domain) in already_loaded:
|
||||
# Only add cookie name once / capture
|
||||
continue
|
||||
already_loaded.add((name, domain))
|
||||
pipeline.zincrby('cookies_names', 1, name)
|
||||
pipeline.zincrby(f'cn|{name}', 1, domain)
|
||||
pipeline.sadd(f'cn|{name}|captures', f'{crawled_tree.uuid}|{urlnode.uuid}')
|
||||
pipeline.zincrby(f'cn|{name}|{domain}', 1, value)
|
||||
|
||||
pipeline.sadd('lookyloo_domains', domain)
|
||||
pipeline.sadd(domain, name)
|
||||
pipeline.execute()
|
||||
|
||||
def index_cookies(self) -> None:
|
||||
for capture_dir in self.lookyloo.capture_dirs:
|
||||
self.index_cookies_capture(capture_dir)
|
||||
|
||||
|
||||
class Lookyloo():
|
||||
|
||||
def __init__(self) -> None:
|
||||
|
|
Loading…
Reference in New Issue