mirror of https://github.com/CIRCL/lookyloo
chg: More cleanup
parent
f886b8676b
commit
33d30a3f4c
|
@ -247,13 +247,13 @@ class CapturesIndex(Mapping):
|
|||
try:
|
||||
tree = load_pickle_tree(capture_dir, capture_dir.stat().st_mtime)
|
||||
except NoValidHarFile:
|
||||
self.logger.warning('Unable to rebuild the tree, the HAR files are broken.')
|
||||
self.logger.debug('Unable to rebuild the tree, the HAR files are broken.')
|
||||
except TreeNeedsRebuild:
|
||||
try:
|
||||
tree = self._create_pickle(capture_dir)
|
||||
self.indexing.new_internal_uuids(tree)
|
||||
except NoValidHarFile:
|
||||
self.logger.warning('Unable to rebuild the tree, the HAR files are broken.')
|
||||
self.logger.info('Unable to rebuild the tree, the HAR files are broken.')
|
||||
|
||||
cache: Dict[str, Union[str, int]] = {'uuid': uuid, 'capture_dir': capture_dir_str}
|
||||
if (capture_dir / 'error.txt').exists():
|
||||
|
@ -285,8 +285,9 @@ class CapturesIndex(Mapping):
|
|||
|
||||
if (cache.get('error')
|
||||
and isinstance(cache['error'], str)
|
||||
and 'HTTP Error' not in cache['error']):
|
||||
self.logger.warning(cache['error'])
|
||||
and 'HTTP Error' not in cache['error']
|
||||
and "No har files in" not in cache['error']):
|
||||
self.logger.info(cache['error'])
|
||||
|
||||
if (capture_dir / 'categories').exists():
|
||||
with (capture_dir / 'categories').open() as _categories:
|
||||
|
|
|
@ -1,11 +1,9 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import base64
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import operator
|
||||
import pickle
|
||||
import smtplib
|
||||
|
||||
from collections import defaultdict
|
||||
|
@ -462,15 +460,8 @@ class Lookyloo():
|
|||
query[key] = json.dumps(value) if value else None
|
||||
|
||||
query = self._prepare_lacus_query(query)
|
||||
# dirty deduplicate
|
||||
hash_query = hashlib.sha512(pickle.dumps(query)).hexdigest()
|
||||
# FIXME The line below should work, but it doesn't
|
||||
# if (existing_uuid := self.redis.set(f'query_hash:{hash_query}', temp_uuid, get=True, nx=True, ex=300)):
|
||||
if (existing_uuid := self.redis.get(f'query_hash:{hash_query}')):
|
||||
return existing_uuid
|
||||
|
||||
priority = get_priority(source, user, authenticated)
|
||||
|
||||
perma_uuid = self.lacus.enqueue(
|
||||
url=query.pop('url', None),
|
||||
document_name=query.pop('document_name', None),
|
||||
|
@ -492,17 +483,18 @@ class Lookyloo():
|
|||
priority=priority
|
||||
)
|
||||
|
||||
if priority < -10:
|
||||
# Someone is probably abusing the system with useless URLs, remove them from the index
|
||||
query['listing'] = 0
|
||||
if self.redis.zscore('to_capture', perma_uuid) is None:
|
||||
if priority < -10:
|
||||
# Someone is probably abusing the system with useless URLs, remove them from the index
|
||||
query['listing'] = 0
|
||||
|
||||
p = self.redis.pipeline()
|
||||
p.zadd('to_capture', {perma_uuid: priority})
|
||||
if query:
|
||||
p.hset(perma_uuid, mapping=query) # This will add the remaining entries that are lookyloo specific
|
||||
p.zincrby('queues', 1, f'{source}|{authenticated}|{user}')
|
||||
p.set(f'{perma_uuid}_mgmt', f'{source}|{authenticated}|{user}')
|
||||
p.execute()
|
||||
p = self.redis.pipeline()
|
||||
p.zadd('to_capture', {perma_uuid: priority})
|
||||
if query:
|
||||
p.hset(perma_uuid, mapping=query) # This will add the remaining entries that are lookyloo specific
|
||||
p.zincrby('queues', 1, f'{source}|{authenticated}|{user}')
|
||||
p.set(f'{perma_uuid}_mgmt', f'{source}|{authenticated}|{user}')
|
||||
p.execute()
|
||||
return perma_uuid
|
||||
|
||||
def send_mail(self, capture_uuid: str, /, email: str='', comment: str='') -> None:
|
||||
|
|
Loading…
Reference in New Issue