mirror of https://github.com/CIRCL/lookyloo
chg: More cleanup
parent
f886b8676b
commit
33d30a3f4c
|
@ -247,13 +247,13 @@ class CapturesIndex(Mapping):
|
||||||
try:
|
try:
|
||||||
tree = load_pickle_tree(capture_dir, capture_dir.stat().st_mtime)
|
tree = load_pickle_tree(capture_dir, capture_dir.stat().st_mtime)
|
||||||
except NoValidHarFile:
|
except NoValidHarFile:
|
||||||
self.logger.warning('Unable to rebuild the tree, the HAR files are broken.')
|
self.logger.debug('Unable to rebuild the tree, the HAR files are broken.')
|
||||||
except TreeNeedsRebuild:
|
except TreeNeedsRebuild:
|
||||||
try:
|
try:
|
||||||
tree = self._create_pickle(capture_dir)
|
tree = self._create_pickle(capture_dir)
|
||||||
self.indexing.new_internal_uuids(tree)
|
self.indexing.new_internal_uuids(tree)
|
||||||
except NoValidHarFile:
|
except NoValidHarFile:
|
||||||
self.logger.warning('Unable to rebuild the tree, the HAR files are broken.')
|
self.logger.info('Unable to rebuild the tree, the HAR files are broken.')
|
||||||
|
|
||||||
cache: Dict[str, Union[str, int]] = {'uuid': uuid, 'capture_dir': capture_dir_str}
|
cache: Dict[str, Union[str, int]] = {'uuid': uuid, 'capture_dir': capture_dir_str}
|
||||||
if (capture_dir / 'error.txt').exists():
|
if (capture_dir / 'error.txt').exists():
|
||||||
|
@ -285,8 +285,9 @@ class CapturesIndex(Mapping):
|
||||||
|
|
||||||
if (cache.get('error')
|
if (cache.get('error')
|
||||||
and isinstance(cache['error'], str)
|
and isinstance(cache['error'], str)
|
||||||
and 'HTTP Error' not in cache['error']):
|
and 'HTTP Error' not in cache['error']
|
||||||
self.logger.warning(cache['error'])
|
and "No har files in" not in cache['error']):
|
||||||
|
self.logger.info(cache['error'])
|
||||||
|
|
||||||
if (capture_dir / 'categories').exists():
|
if (capture_dir / 'categories').exists():
|
||||||
with (capture_dir / 'categories').open() as _categories:
|
with (capture_dir / 'categories').open() as _categories:
|
||||||
|
|
|
@ -1,11 +1,9 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
import hashlib
|
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import operator
|
import operator
|
||||||
import pickle
|
|
||||||
import smtplib
|
import smtplib
|
||||||
|
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
@ -462,15 +460,8 @@ class Lookyloo():
|
||||||
query[key] = json.dumps(value) if value else None
|
query[key] = json.dumps(value) if value else None
|
||||||
|
|
||||||
query = self._prepare_lacus_query(query)
|
query = self._prepare_lacus_query(query)
|
||||||
# dirty deduplicate
|
|
||||||
hash_query = hashlib.sha512(pickle.dumps(query)).hexdigest()
|
|
||||||
# FIXME The line below should work, but it doesn't
|
|
||||||
# if (existing_uuid := self.redis.set(f'query_hash:{hash_query}', temp_uuid, get=True, nx=True, ex=300)):
|
|
||||||
if (existing_uuid := self.redis.get(f'query_hash:{hash_query}')):
|
|
||||||
return existing_uuid
|
|
||||||
|
|
||||||
priority = get_priority(source, user, authenticated)
|
priority = get_priority(source, user, authenticated)
|
||||||
|
|
||||||
perma_uuid = self.lacus.enqueue(
|
perma_uuid = self.lacus.enqueue(
|
||||||
url=query.pop('url', None),
|
url=query.pop('url', None),
|
||||||
document_name=query.pop('document_name', None),
|
document_name=query.pop('document_name', None),
|
||||||
|
@ -492,17 +483,18 @@ class Lookyloo():
|
||||||
priority=priority
|
priority=priority
|
||||||
)
|
)
|
||||||
|
|
||||||
if priority < -10:
|
if self.redis.zscore('to_capture', perma_uuid) is None:
|
||||||
# Someone is probably abusing the system with useless URLs, remove them from the index
|
if priority < -10:
|
||||||
query['listing'] = 0
|
# Someone is probably abusing the system with useless URLs, remove them from the index
|
||||||
|
query['listing'] = 0
|
||||||
|
|
||||||
p = self.redis.pipeline()
|
p = self.redis.pipeline()
|
||||||
p.zadd('to_capture', {perma_uuid: priority})
|
p.zadd('to_capture', {perma_uuid: priority})
|
||||||
if query:
|
if query:
|
||||||
p.hset(perma_uuid, mapping=query) # This will add the remaining entries that are lookyloo specific
|
p.hset(perma_uuid, mapping=query) # This will add the remaining entries that are lookyloo specific
|
||||||
p.zincrby('queues', 1, f'{source}|{authenticated}|{user}')
|
p.zincrby('queues', 1, f'{source}|{authenticated}|{user}')
|
||||||
p.set(f'{perma_uuid}_mgmt', f'{source}|{authenticated}|{user}')
|
p.set(f'{perma_uuid}_mgmt', f'{source}|{authenticated}|{user}')
|
||||||
p.execute()
|
p.execute()
|
||||||
return perma_uuid
|
return perma_uuid
|
||||||
|
|
||||||
def send_mail(self, capture_uuid: str, /, email: str='', comment: str='') -> None:
|
def send_mail(self, capture_uuid: str, /, email: str='', comment: str='') -> None:
|
||||||
|
|
Loading…
Reference in New Issue