chg: Cleanup stats page

pull/135/head
Raphaël Vinot 2020-11-27 16:27:29 +01:00
parent 9fe6dfd306
commit 1a7a74cccd
3 changed files with 62 additions and 65 deletions

View File

@ -996,62 +996,64 @@ class Lookyloo():
urls.append(to_append) urls.append(to_append)
return hostnode, urls return hostnode, urls
def get_stats(self) -> Dict[str, Union[List, Dict]]: def get_stats(self) -> Dict[str, List]:
stats: Dict[int, Dict[int, Dict[str, Any]]] = {}
today = date.today() today = date.today()
calendar_week = today.isocalendar()[1] calendar_week = today.isocalendar()[1]
weeks_stats: Dict[int, Dict] = {calendar_week - 1: {'analysis': 0, 'analysis_with_redirects': 0, 'redirects': 0, 'uniq_urls': set()},
calendar_week: {'analysis': 0, 'analysis_with_redirects': 0, 'redirects': 0, 'uniq_urls': set()}} stats_dict = {'analysis': 0, 'analysis_with_redirects': 0, 'redirects': 0}
statistics: Dict[str, Union[List, Dict]] = {'weeks': [], 'years': {}} stats: Dict[int, Dict[int, Dict[str, Any]]] = {}
weeks_stats: Dict[int, Dict] = {}
for uuid in self.capture_uuids: for uuid in self.capture_uuids:
# What we get here is in a random order. This look sorts the captures
cache = self.capture_cache(uuid) cache = self.capture_cache(uuid)
if 'timestamp' not in cache: if 'timestamp' not in cache:
continue continue
date_analysis: datetime = datetime.fromisoformat(cache['timestamp'].rstrip('Z')) # type: ignore date_analysis: datetime = datetime.fromisoformat(cache['timestamp'].rstrip('Z')) # type: ignore
if date_analysis.year not in stats: if date_analysis.year not in stats:
stats[date_analysis.year] = {} stats[date_analysis.year] = {}
if date_analysis.month not in stats[date_analysis.year]: if date_analysis.month not in stats[date_analysis.year]:
stats[date_analysis.year][date_analysis.month] = {'analysis': 0, 'analysis_with_redirects': 0, 'redirects': 0, 'uniq_urls': set()} stats[date_analysis.year][date_analysis.month] = defaultdict(dict, **stats_dict)
stats[date_analysis.year][date_analysis.month]['uniq_urls'] = set()
stats[date_analysis.year][date_analysis.month]['analysis'] += 1 stats[date_analysis.year][date_analysis.month]['analysis'] += 1
stats[date_analysis.year][date_analysis.month]['uniq_urls'].add(cache['url'])
if len(cache['redirects']) > 0: # type: ignore if len(cache['redirects']) > 0: # type: ignore
stats[date_analysis.year][date_analysis.month]['analysis_with_redirects'] += 1 stats[date_analysis.year][date_analysis.month]['analysis_with_redirects'] += 1
stats[date_analysis.year][date_analysis.month]['redirects'] += len(cache['redirects']) # type: ignore stats[date_analysis.year][date_analysis.month]['redirects'] += len(cache['redirects']) # type: ignore
stats[date_analysis.year][date_analysis.month]['uniq_urls'].update(cache['redirects']) stats[date_analysis.year][date_analysis.month]['uniq_urls'].update(cache['redirects'])
stats[date_analysis.year][date_analysis.month]['uniq_urls'].add(cache['url'])
if date_analysis.isocalendar()[1] in weeks_stats: if date_analysis.isocalendar()[1] >= calendar_week - 1:
if date_analysis.isocalendar()[1] not in weeks_stats:
weeks_stats[date_analysis.isocalendar()[1]] = defaultdict(dict, **stats_dict)
weeks_stats[date_analysis.isocalendar()[1]]['uniq_urls'] = set()
weeks_stats[date_analysis.isocalendar()[1]]['analysis'] += 1 weeks_stats[date_analysis.isocalendar()[1]]['analysis'] += 1
weeks_stats[date_analysis.isocalendar()[1]]['uniq_urls'].add(cache['url'])
if len(cache['redirects']) > 0: # type: ignore if len(cache['redirects']) > 0: # type: ignore
weeks_stats[date_analysis.isocalendar()[1]]['analysis_with_redirects'] += 1 weeks_stats[date_analysis.isocalendar()[1]]['analysis_with_redirects'] += 1
weeks_stats[date_analysis.isocalendar()[1]]['redirects'] += len(cache['redirects']) # type: ignore weeks_stats[date_analysis.isocalendar()[1]]['redirects'] += len(cache['redirects']) # type: ignore
weeks_stats[date_analysis.isocalendar()[1]]['uniq_urls'].update(cache['redirects']) weeks_stats[date_analysis.isocalendar()[1]]['uniq_urls'].update(cache['redirects'])
weeks_stats[date_analysis.isocalendar()[1]]['uniq_urls'].add(cache['url'])
for week_number, week_stat in weeks_stats.items(): statistics: Dict[str, List] = {'weeks': [], 'years': []}
week = {} for week_number in sorted(weeks_stats.keys()):
week['week'] = week_number week_stat = weeks_stats[week_number]
week['analysis'] = week_stat['analysis'] urls = week_stat.pop('uniq_urls')
week['analysis_with_redirects'] = week_stat['analysis_with_redirects'] week_stat['week_number'] = week_number
week['redirects'] = week_stat['redirects'] week_stat['uniq_urls'] = len(urls)
week['uniq_urls'] = len(week_stat['uniq_urls']) week_stat['uniq_domains'] = len(uniq_domains(urls))
week['uniq_domains'] = len(uniq_domains(week_stat['uniq_urls'])) statistics['weeks'].append(week_stat)
statistics['weeks'].append(week) # type: ignore
for year, data in stats.items(): for year in sorted(stats.keys()):
years: Dict[Union[int, str], Union[Dict, int]] = {} year_stats: Dict[str, Union[int, List]] = {'year': year, 'months': [], 'yearly_analysis': 0, 'yearly_redirects': 0}
yearly_analysis = 0 for month in sorted(stats[year].keys()):
yearly_redirects = 0 month_stats = stats[year][month]
for month in sorted(data.keys()): urls = month_stats.pop('uniq_urls')
_stats = data[month] month_stats['month_number'] = month
mstats = {} month_stats['uniq_urls'] = len(urls)
mstats['analysis'] = _stats['analysis'] month_stats['uniq_domains'] = len(uniq_domains(urls))
mstats['analysis_with_redirects'] = _stats['analysis_with_redirects'] year_stats['months'].append(month_stats) # type: ignore
mstats['redirects'] = _stats['redirects']
mstats['uniq_urls'] = len(_stats['uniq_urls']) year_stats['yearly_analysis'] += month_stats['analysis']
mstats['uniq_domains'] = len(uniq_domains(_stats['uniq_urls'])) year_stats['yearly_redirects'] += month_stats['redirects']
yearly_analysis += _stats['analysis'] statistics['years'].append(year_stats)
yearly_redirects += _stats['redirects']
years[month] = mstats
years['yearly_analysis'] = yearly_analysis
years['yearly_redirects'] = yearly_redirects
statistics['years'][year] = years
return statistics return statistics

View File

@ -8,15 +8,12 @@ var xScale = d3.scaleLinear()
.range([0, width]); .range([0, width]);
d3.json('/json/stats').then(json => { d3.json('/json/stats').then(json => {
for (var year in json['years']) { json['years'].forEach(year => {
var dataset = []; var dataset = [];
for (var month in json['years'][year]) { year['months'].forEach(month => {
var i_month = parseInt(month) dataset.push([month['month_number'], month['analysis']]);
if (Number.isInteger(i_month)) { height = Math.max(month['analysis'] + 50, height);
dataset.push([month, json['years'][year][month]['analysis']]); });
height = Math.max(json['years'][year][month]['analysis'] + 50, height);
};
};
var yScale = d3.scaleLinear() var yScale = d3.scaleLinear()
.domain([0, height]) .domain([0, height])
.range([height, 0]); .range([height, 0]);
@ -37,7 +34,7 @@ d3.json('/json/stats').then(json => {
.attr("y", 0 - (margin.top / 2)) .attr("y", 0 - (margin.top / 2))
.attr("text-anchor", "middle") .attr("text-anchor", "middle")
.style("font-size", "20px") .style("font-size", "20px")
.text(year); .text(year['year']);
svg.append("g") svg.append("g")
.attr("class", "x axis") .attr("class", "x axis")
@ -60,5 +57,5 @@ d3.json('/json/stats').then(json => {
.attr("cx", d => { return xScale(d[0]) }) .attr("cx", d => { return xScale(d[0]) })
.attr("cy", d => { return yScale(d[1]) }) .attr("cy", d => { return yScale(d[1]) })
.attr("r", 5); .attr("r", 5);
}; });
}); });

View File

@ -4,8 +4,8 @@
{% block content %} {% block content %}
<div> <div>
{% for weeks in stats['weeks'] %} {% for week in stats['weeks'] %}
<h2> Week: {{ weeks['week'] }}</h2> <h2> Week: {{ week['week_number'] }}</h2>
<div class="table-responsive"> <div class="table-responsive">
<table id="table" class="table" style="width:96%"> <table id="table" class="table" style="width:96%">
<thead> <thead>
@ -19,11 +19,11 @@
</thead> </thead>
<tbody> <tbody>
<tr> <tr>
<td> {{ weeks['analysis'] }} </td> <td> {{ week['analysis'] }} </td>
<td> {{ weeks['analysis_with_redirects'] }} </td> <td> {{ week['analysis_with_redirects'] }} </td>
<td> {{ weeks['redirects'] }} </td> <td> {{ week['redirects'] }} </td>
<td> {{ weeks['uniq_urls'] }} </td> <td> {{ week['uniq_urls'] }} </td>
<td> {{ weeks['uniq_domains'] }} </td> <td> {{ week['uniq_domains'] }} </td>
</tr> </tr>
</tbody> </tbody>
</table> </table>
@ -32,16 +32,15 @@
</div> </div>
<div> <div>
{% for name, dict_ in stats['years'].items() %} {% for year in stats['years'] %}
<h2>Year: {{ name }}</h2> <h2>Year: {{ year['year'] }}</h2>
<ul> <ul>
<li><b>Total analysis</b>: {{ dict_['yearly_analysis'] }}</li> <li><b>Total analysis</b>: {{ year['yearly_analysis'] }}</li>
<li><b>Total redirects</b>: {{ dict_['yearly_redirects'] }}</li> <li><b>Total redirects</b>: {{ year['yearly_redirects'] }}</li>
</ul> </ul>
<div> <div>
{% for monthnumber, month in dict_.items() %} {% for month in year['months'] %}
{% if monthnumber is number %} <h4>{{ month_name(month['month_number']) }}</h4>
<h4>{{ month_name(monthnumber) }}</h4>
<div class="table-responsive"> <div class="table-responsive">
<table id="table" class="table" style="width:96%"> <table id="table" class="table" style="width:96%">
<thead> <thead>
@ -64,7 +63,6 @@
</tbody> </tbody>
</table> </table>
</div> </div>
{% endif %}
{% endfor %} {% endfor %}
</div> </div>
{% endfor %} {% endfor %}