mirror of https://github.com/CIRCL/lookyloo
chg: Add to stats: week, prec week, and unique url/hostname count
parent
1d03f7ed9c
commit
89d06246f1
|
@ -1,11 +1,26 @@
|
||||||
from lookyloo.lookyloo import Lookyloo
|
from lookyloo.lookyloo import Lookyloo
|
||||||
import calendar
|
import calendar
|
||||||
import datetime
|
import datetime
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
lookyloo = Lookyloo()
|
lookyloo = Lookyloo()
|
||||||
|
|
||||||
stats = {}
|
stats = {}
|
||||||
|
|
||||||
|
today = datetime.date.today()
|
||||||
|
calendar_week = today.isocalendar()[1]
|
||||||
|
weeks_stats = {calendar_week - 1: {'analysis': 0, 'redirects': 0, 'uniq_urls': set()},
|
||||||
|
calendar_week: {'analysis': 0, 'redirects': 0, 'uniq_urls': set()}}
|
||||||
|
|
||||||
|
|
||||||
|
def uniq_domains(uniq_urls):
|
||||||
|
domains = set()
|
||||||
|
for url in uniq_urls:
|
||||||
|
splitted = urlparse(url)
|
||||||
|
domains.add(splitted.hostname)
|
||||||
|
return domains
|
||||||
|
|
||||||
|
|
||||||
for uuid in lookyloo.capture_uuids:
|
for uuid in lookyloo.capture_uuids:
|
||||||
cache = lookyloo.capture_cache(uuid)
|
cache = lookyloo.capture_cache(uuid)
|
||||||
if 'timestamp' not in cache:
|
if 'timestamp' not in cache:
|
||||||
|
@ -14,9 +29,25 @@ for uuid in lookyloo.capture_uuids:
|
||||||
if date.year not in stats:
|
if date.year not in stats:
|
||||||
stats[date.year] = {}
|
stats[date.year] = {}
|
||||||
if date.month not in stats[date.year]:
|
if date.month not in stats[date.year]:
|
||||||
stats[date.year][date.month] = {'analysis': 0, 'redirects': 0}
|
stats[date.year][date.month] = {'analysis': 0, 'redirects': 0, 'uniq_urls': set()}
|
||||||
stats[date.year][date.month]['analysis'] += 1
|
stats[date.year][date.month]['analysis'] += 1
|
||||||
stats[date.year][date.month]['redirects'] += len(cache['redirects'])
|
stats[date.year][date.month]['redirects'] += len(cache['redirects'])
|
||||||
|
stats[date.year][date.month]['uniq_urls'].update(cache['redirects'])
|
||||||
|
stats[date.year][date.month]['uniq_urls'].add(cache['url'])
|
||||||
|
if date.isocalendar()[1] in weeks_stats:
|
||||||
|
weeks_stats[date.isocalendar()[1]]['analysis'] += 1
|
||||||
|
weeks_stats[date.isocalendar()[1]]['redirects'] += len(cache['redirects'])
|
||||||
|
weeks_stats[date.isocalendar()[1]]['uniq_urls'].update(cache['redirects'])
|
||||||
|
weeks_stats[date.isocalendar()[1]]['uniq_urls'].add(cache['url'])
|
||||||
|
|
||||||
|
print('Statistics for the last two weeks:')
|
||||||
|
for week_number, week_stat in weeks_stats.items():
|
||||||
|
print(f'Week {week_number}:')
|
||||||
|
print(' Number of analysis:', week_stat['analysis'])
|
||||||
|
print(' Number of redirects:', week_stat['redirects'])
|
||||||
|
print(' Number of unique URLs:', len(week_stat['uniq_urls']))
|
||||||
|
domains = uniq_domains(week_stat['uniq_urls'])
|
||||||
|
print(' Number of unique domains:', len(domains))
|
||||||
|
|
||||||
|
|
||||||
for year, data in stats.items():
|
for year, data in stats.items():
|
||||||
|
@ -28,6 +59,9 @@ for year, data in stats.items():
|
||||||
print(' ', calendar.month_name[month])
|
print(' ', calendar.month_name[month])
|
||||||
print("\tNumber of analysis :", stats['analysis'])
|
print("\tNumber of analysis :", stats['analysis'])
|
||||||
print("\tNumber of redirects :", stats['redirects'])
|
print("\tNumber of redirects :", stats['redirects'])
|
||||||
|
print('\tNumber of unique URLs:', len(stats['uniq_urls']))
|
||||||
|
domains = uniq_domains(stats['uniq_urls'])
|
||||||
|
print('\tNumber of unique domains:', len(domains))
|
||||||
yearly_analysis += stats['analysis']
|
yearly_analysis += stats['analysis']
|
||||||
yearly_redirects += stats['redirects']
|
yearly_redirects += stats['redirects']
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue