mirror of https://github.com/CIRCL/lookyloo
chg: properly count unique URLs/domains in stats
parent
89d06246f1
commit
7dc38d0ff2
|
@ -9,8 +9,8 @@ stats = {}
|
|||
|
||||
today = datetime.date.today()
|
||||
calendar_week = today.isocalendar()[1]
|
||||
weeks_stats = {calendar_week - 1: {'analysis': 0, 'redirects': 0, 'uniq_urls': set()},
|
||||
calendar_week: {'analysis': 0, 'redirects': 0, 'uniq_urls': set()}}
|
||||
weeks_stats = {calendar_week - 1: {'analysis': 0, 'uniq_urls': set()},
|
||||
calendar_week: {'analysis': 0, 'uniq_urls': set()}}
|
||||
|
||||
|
||||
def uniq_domains(uniq_urls):
|
||||
|
@ -29,14 +29,12 @@ for uuid in lookyloo.capture_uuids:
|
|||
if date.year not in stats:
|
||||
stats[date.year] = {}
|
||||
if date.month not in stats[date.year]:
|
||||
stats[date.year][date.month] = {'analysis': 0, 'redirects': 0, 'uniq_urls': set()}
|
||||
stats[date.year][date.month] = {'analysis': 0, 'uniq_urls': set()}
|
||||
stats[date.year][date.month]['analysis'] += 1
|
||||
stats[date.year][date.month]['redirects'] += len(cache['redirects'])
|
||||
stats[date.year][date.month]['uniq_urls'].update(cache['redirects'])
|
||||
stats[date.year][date.month]['uniq_urls'].add(cache['url'])
|
||||
if date.isocalendar()[1] in weeks_stats:
|
||||
weeks_stats[date.isocalendar()[1]]['analysis'] += 1
|
||||
weeks_stats[date.isocalendar()[1]]['redirects'] += len(cache['redirects'])
|
||||
weeks_stats[date.isocalendar()[1]]['uniq_urls'].update(cache['redirects'])
|
||||
weeks_stats[date.isocalendar()[1]]['uniq_urls'].add(cache['url'])
|
||||
|
||||
|
@ -44,7 +42,6 @@ print('Statistics for the last two weeks:')
|
|||
for week_number, week_stat in weeks_stats.items():
|
||||
print(f'Week {week_number}:')
|
||||
print(' Number of analysis:', week_stat['analysis'])
|
||||
print(' Number of redirects:', week_stat['redirects'])
|
||||
print(' Number of unique URLs:', len(week_stat['uniq_urls']))
|
||||
domains = uniq_domains(week_stat['uniq_urls'])
|
||||
print(' Number of unique domains:', len(domains))
|
||||
|
@ -53,17 +50,19 @@ for week_number, week_stat in weeks_stats.items():
|
|||
for year, data in stats.items():
|
||||
print('Year:', year)
|
||||
yearly_analysis = 0
|
||||
yearly_redirects = 0
|
||||
yearly_urls = 0
|
||||
yearly_domains = 0
|
||||
for month in sorted(data.keys()):
|
||||
stats = data[month]
|
||||
print(' ', calendar.month_name[month])
|
||||
print("\tNumber of analysis :", stats['analysis'])
|
||||
print("\tNumber of redirects :", stats['redirects'])
|
||||
print('\tNumber of unique URLs:', len(stats['uniq_urls']))
|
||||
domains = uniq_domains(stats['uniq_urls'])
|
||||
print('\tNumber of unique domains:', len(domains))
|
||||
yearly_analysis += stats['analysis']
|
||||
yearly_redirects += stats['redirects']
|
||||
yearly_urls += len(stats['uniq_urls'])
|
||||
yearly_domains += len(domains)
|
||||
|
||||
print(" Sum analysis:", yearly_analysis)
|
||||
print(" Sum redirects:", yearly_redirects)
|
||||
print(" Sum unique urls:", yearly_urls)
|
||||
print(" Sum unique domains:", yearly_domains)
|
||||
|
|
Loading…
Reference in New Issue