From 5d0772380983117add2ae36a9ab5e18b9f590ebf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= <raphael@vinot.info>
Date: Wed, 22 Apr 2020 12:03:10 +0200
Subject: [PATCH] new: Make it possible to strip older captures from the index

---
 config/generic.json.sample | 8 +++++++-
 website/web/__init__.py    | 9 +++++++++
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/config/generic.json.sample b/config/generic.json.sample
index 86531e3a..b4ab9ea4 100644
--- a/config/generic.json.sample
+++ b/config/generic.json.sample
@@ -4,11 +4,17 @@
   "only_global_lookups": true,
   "splash_url": "http://127.0.0.1:8050",
   "cache_clean_user": {},
+  "time_delta_on_index": {
+    "weeks": 0,
+    "days": 1,
+    "hours": 0
+  },
   "_notes": {
     "loglevel": "(lookyloo) Can be one of the value listed here: https://docs.python.org/3/library/logging.html#levels",
     "splash_loglevel": "(Splash) INFO is *very* verbose.",
     "only_global_lookups": "Set it to True if your instance is publicly available so users aren't able to scan your internal network",
     "splash_url": "URL to connect to splash",
-    "cache_clean_user": "Format: {username: password}"
+    "cache_clean_user": "Format: {username: password}",
+    "time_delta_on_index": "Time interval of the capture displayed on the index"
   }
 }
diff --git a/website/web/__init__.py b/website/web/__init__.py
index a12a084e..f3c4d3d1 100644
--- a/website/web/__init__.py
+++ b/website/web/__init__.py
@@ -7,6 +7,7 @@ from zipfile import ZipFile, ZIP_DEFLATED
 from io import BytesIO
 import os
 from pathlib import Path
+from datetime import datetime, timedelta
 
 from flask import Flask, render_template, request, session, send_file, redirect, url_for, Response, flash
 from flask_bootstrap import Bootstrap  # type: ignore
@@ -40,6 +41,7 @@ auth = HTTPDigestAuth()
 lookyloo: Lookyloo = Lookyloo()
 
 user = lookyloo.get_config('cache_clean_user')
+time_delta_on_index = lookyloo.get_config('time_delta_on_index')
 
 logging.basicConfig(level=lookyloo.get_config('loglevel'))
 
@@ -262,10 +264,17 @@ def index():
         return 'Ack'
     update_user_agents()
     titles = []
+    if time_delta_on_index:
+        # We want to filter the captures on the index
+        cut_time = datetime.now() - timedelta(**time_delta_on_index)
+    else:
+        cut_time = None
     for capture_dir in lookyloo.capture_dirs:
         cached = lookyloo.capture_cache(capture_dir)
         if not cached or 'no_index' in cached or 'error' in cached:
             continue
+        if cut_time and datetime.fromisoformat(cached['timestamp'][:-1]) < cut_time:
+            continue
         titles.append((cached['uuid'], cached['title'], cached['timestamp'], cached['url'],
                        cached['redirects'], True if cached['incomplete_redirects'] == '1' else False))
     titles = sorted(titles, key=lambda x: (x[2], x[3]), reverse=True)