new: Make it possible to strip older captures from the index

pull/79/head
Raphaël Vinot 2020-04-22 12:03:10 +02:00
parent 3738876c89
commit 5d07723809
2 changed files with 16 additions and 1 deletions

View File

@ -4,11 +4,17 @@
"only_global_lookups": true, "only_global_lookups": true,
"splash_url": "http://127.0.0.1:8050", "splash_url": "http://127.0.0.1:8050",
"cache_clean_user": {}, "cache_clean_user": {},
"time_delta_on_index": {
"weeks": 0,
"days": 1,
"hours": 0
},
"_notes": { "_notes": {
"loglevel": "(lookyloo) Can be one of the value listed here: https://docs.python.org/3/library/logging.html#levels", "loglevel": "(lookyloo) Can be one of the value listed here: https://docs.python.org/3/library/logging.html#levels",
"splash_loglevel": "(Splash) INFO is *very* verbose.", "splash_loglevel": "(Splash) INFO is *very* verbose.",
"only_global_lookups": "Set it to True if your instance is publicly available so users aren't able to scan your internal network", "only_global_lookups": "Set it to True if your instance is publicly available so users aren't able to scan your internal network",
"splash_url": "URL to connect to splash", "splash_url": "URL to connect to splash",
"cache_clean_user": "Format: {username: password}" "cache_clean_user": "Format: {username: password}",
"time_delta_on_index": "Time interval of the capture displayed on the index"
} }
} }

View File

@ -7,6 +7,7 @@ from zipfile import ZipFile, ZIP_DEFLATED
from io import BytesIO from io import BytesIO
import os import os
from pathlib import Path from pathlib import Path
from datetime import datetime, timedelta
from flask import Flask, render_template, request, session, send_file, redirect, url_for, Response, flash from flask import Flask, render_template, request, session, send_file, redirect, url_for, Response, flash
from flask_bootstrap import Bootstrap # type: ignore from flask_bootstrap import Bootstrap # type: ignore
@ -40,6 +41,7 @@ auth = HTTPDigestAuth()
lookyloo: Lookyloo = Lookyloo() lookyloo: Lookyloo = Lookyloo()
user = lookyloo.get_config('cache_clean_user') user = lookyloo.get_config('cache_clean_user')
time_delta_on_index = lookyloo.get_config('time_delta_on_index')
logging.basicConfig(level=lookyloo.get_config('loglevel')) logging.basicConfig(level=lookyloo.get_config('loglevel'))
@ -262,10 +264,17 @@ def index():
return 'Ack' return 'Ack'
update_user_agents() update_user_agents()
titles = [] titles = []
if time_delta_on_index:
# We want to filter the captures on the index
cut_time = datetime.now() - timedelta(**time_delta_on_index)
else:
cut_time = None
for capture_dir in lookyloo.capture_dirs: for capture_dir in lookyloo.capture_dirs:
cached = lookyloo.capture_cache(capture_dir) cached = lookyloo.capture_cache(capture_dir)
if not cached or 'no_index' in cached or 'error' in cached: if not cached or 'no_index' in cached or 'error' in cached:
continue continue
if cut_time and datetime.fromisoformat(cached['timestamp'][:-1]) < cut_time:
continue
titles.append((cached['uuid'], cached['title'], cached['timestamp'], cached['url'], titles.append((cached['uuid'], cached['title'], cached['timestamp'], cached['url'],
cached['redirects'], True if cached['incomplete_redirects'] == '1' else False)) cached['redirects'], True if cached['incomplete_redirects'] == '1' else False))
titles = sorted(titles, key=lambda x: (x[2], x[3]), reverse=True) titles = sorted(titles, key=lambda x: (x[2], x[3]), reverse=True)