lookyloo/bin/archiver.py

105 lines
3.9 KiB
Python
Raw Normal View History

2021-08-20 17:46:22 +02:00
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from collections import defaultdict
import csv
from datetime import datetime, timedelta
import logging
from typing import Dict, List, Tuple
from pathlib import Path
from redis import Redis
2021-08-20 17:46:22 +02:00
from lookyloo.abstractmanager import AbstractManager
from lookyloo.helpers import get_config, get_homedir, get_socket_path, get_captures_dir
2021-08-20 17:46:22 +02:00
logging.basicConfig(format='%(asctime)s %(name)s %(levelname)s:%(message)s',
level=logging.INFO, datefmt='%I:%M:%S')
class Archiver(AbstractManager):
def __init__(self, loglevel: int=logging.INFO):
super().__init__(loglevel)
self.script_name = 'archiver'
self.redis = Redis(unix_socket_path=get_socket_path('cache'))
# make sure archived captures dir exists
self.archived_captures_dir = get_homedir / 'archived_captures'
self.archived_captures_dir.mkdir(parents=True, exist_ok=True)
self._load_archives()
2021-08-20 17:46:22 +02:00
def _to_run_forever(self):
self._archive()
def _archive(self):
archive_interval = timedelta(days=get_config('generic', 'archive'))
cut_time = (datetime.now() - archive_interval).date()
2021-08-23 15:51:06 +02:00
cut_time = cut_time.replace(day=1)
2021-08-20 17:46:22 +02:00
# Format:
# { 2020: { 12: [(directory, uuid)] } }
to_archive: Dict[int, Dict[int, List[Tuple[Path, str]]]] = defaultdict(lambda: defaultdict(list))
for capture_path in get_captures_dir().glob('*'):
2021-08-23 15:51:06 +02:00
if not capture_path.is_dir():
continue
2021-08-20 17:46:22 +02:00
timestamp = datetime.strptime(capture_path.name, '%Y-%m-%dT%H:%M:%S.%f')
2021-08-23 15:51:06 +02:00
if timestamp.date() >= cut_time:
2021-08-20 17:46:22 +02:00
# do not archive.
continue
with (capture_path / 'uuid').open() as _f:
uuid = _f.read().strip()
to_archive[timestamp.year][timestamp.month].append((capture_path, uuid))
self.logger.info(f'Archiving {capture_path}.')
if not to_archive:
self.logger.info('Nothing to archive.')
return
2021-08-20 17:46:22 +02:00
archived_uuids = {}
for year, month_captures in to_archive.items():
for month, captures in month_captures.items():
dest_dir = self.archived_captures_dir / str(year) / f'{month:02}'
2021-08-20 17:46:22 +02:00
dest_dir.mkdir(parents=True, exist_ok=True)
if (dest_dir / 'index').exists():
with (dest_dir / 'index').open('r') as _f:
current_index = {uuid: dirname for uuid, dirname in csv.reader(_f)}
else:
current_index = {}
for capture_path, uuid in captures:
current_index[uuid] = capture_path.name
capture_path.rename(dest_dir / capture_path.name)
archived_uuids[uuid] = str(dest_dir / capture_path.name)
with (dest_dir / 'index').open('w') as _f:
index_writer = csv.writer(_f)
for uuid, dirname in current_index.items():
index_writer.writerow([uuid, dirname])
if archived_uuids:
p = self.redis.pipeline()
p.redis.hdel('lookup_dirs', *archived_uuids.keys())
p.redis.hset('lookup_dirs_archived', mapping=archived_uuids)
p.execute()
self.logger.info('Archiving done.')
2021-08-20 17:46:22 +02:00
def _load_archives(self):
# Initialize archives
self.redis.delete('lookup_dirs_archived')
for year in self.archived_captures_dir.iterdir():
2021-08-23 15:14:08 +02:00
for month in year.iterdir():
if not (month / 'index').exists():
continue
with (month / 'index').open('r') as _f:
archived_uuids = {uuid: str(month / dirname) for uuid, dirname in csv.reader(_f)}
self.redis.hset('lookup_dirs_archived', mapping=archived_uuids)
2021-08-23 15:14:08 +02:00
2021-08-20 17:46:22 +02:00
def main():
a = Archiver()
a.run(sleep_in_sec=3600 * 24)
if __name__ == '__main__':
main()