From 5d0772380983117add2ae36a9ab5e18b9f590ebf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Wed, 22 Apr 2020 12:03:10 +0200 Subject: [PATCH] new: Make it possible to strip older captures from the index --- config/generic.json.sample | 8 +++++++- website/web/__init__.py | 9 +++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/config/generic.json.sample b/config/generic.json.sample index 86531e3a..b4ab9ea4 100644 --- a/config/generic.json.sample +++ b/config/generic.json.sample @@ -4,11 +4,17 @@ "only_global_lookups": true, "splash_url": "http://127.0.0.1:8050", "cache_clean_user": {}, + "time_delta_on_index": { + "weeks": 0, + "days": 1, + "hours": 0 + }, "_notes": { "loglevel": "(lookyloo) Can be one of the value listed here: https://docs.python.org/3/library/logging.html#levels", "splash_loglevel": "(Splash) INFO is *very* verbose.", "only_global_lookups": "Set it to True if your instance is publicly available so users aren't able to scan your internal network", "splash_url": "URL to connect to splash", - "cache_clean_user": "Format: {username: password}" + "cache_clean_user": "Format: {username: password}", + "time_delta_on_index": "Time interval of the capture displayed on the index" } } diff --git a/website/web/__init__.py b/website/web/__init__.py index a12a084e..f3c4d3d1 100644 --- a/website/web/__init__.py +++ b/website/web/__init__.py @@ -7,6 +7,7 @@ from zipfile import ZipFile, ZIP_DEFLATED from io import BytesIO import os from pathlib import Path +from datetime import datetime, timedelta from flask import Flask, render_template, request, session, send_file, redirect, url_for, Response, flash from flask_bootstrap import Bootstrap # type: ignore @@ -40,6 +41,7 @@ auth = HTTPDigestAuth() lookyloo: Lookyloo = Lookyloo() user = lookyloo.get_config('cache_clean_user') +time_delta_on_index = lookyloo.get_config('time_delta_on_index') logging.basicConfig(level=lookyloo.get_config('loglevel')) @@ -262,10 +264,17 @@ def index(): return 'Ack' update_user_agents() titles = [] + if time_delta_on_index: + # We want to filter the captures on the index + cut_time = datetime.now() - timedelta(**time_delta_on_index) + else: + cut_time = None for capture_dir in lookyloo.capture_dirs: cached = lookyloo.capture_cache(capture_dir) if not cached or 'no_index' in cached or 'error' in cached: continue + if cut_time and datetime.fromisoformat(cached['timestamp'][:-1]) < cut_time: + continue titles.append((cached['uuid'], cached['title'], cached['timestamp'], cached['url'], cached['redirects'], True if cached['incomplete_redirects'] == '1' else False)) titles = sorted(titles, key=lambda x: (x[2], x[3]), reverse=True)