chg: [event:getTrendsForTags] Created TrendingTool to help generating trends

pull/8575/head
Sami Mokaddem 2022-09-08 14:03:45 +02:00
parent 4c7d892f4b
commit f33bb2dbc9
No known key found for this signature in database
GPG Key ID: 164C473F627A06FA
2 changed files with 138 additions and 111 deletions

View File

@ -0,0 +1,133 @@
<?php
class TrendingTool
{
private $eventModel;
public function __construct($eventModel)
{
$this->eventModel = $eventModel;
}
public function getTrendsForTags(array $events, int $baseDayRange, int $rollingWindows=3, $tagFilterPrefixes=null): array
{
$clusteredTags = $this->__clusterTagsForRollingWindow($events, $baseDayRange, $rollingWindows, $tagFilterPrefixes);
$trendAnalysis = $this->__computeTrendAnalysis($clusteredTags);
return [
'clustered_tags' => $clusteredTags,
'trend_analysis' => $trendAnalysis,
];
}
private function __computeTrendAnalysis(array $clusteredTags): array
{
$tagsPerRollingWindow = $clusteredTags['tagsPerRollingWindow'];
$eventNumberPerRollingWindow = $clusteredTags['eventNumberPerRollingWindow'];
$trendAnalysis = [];
$allTimestamps = array_keys($tagsPerRollingWindow);
$allTags = [];
foreach ($allTimestamps as $i => $timestamp) {
$trendAnalysis[$timestamp] = [];
$tags = $tagsPerRollingWindow[$timestamp];
$nextTimestamp = isset($allTimestamps[$i + 1]) ? $allTimestamps[$i + 1] : false;
$previousTimestamp = isset($allTimestamps[$i - 1]) ? $allTimestamps[$i - 1] : false;
foreach ($tags as $tag => $amount) {
$rawChange = 0;
$percentChange = 0;
if (!empty($nextTimestamp)) {
$nextAmount = !empty($tagsPerRollingWindow[$nextTimestamp][$tag]) ? $tagsPerRollingWindow[$nextTimestamp][$tag] : 0;
$rawChange = $amount - $nextAmount;
$percentChange = 100 * $rawChange / $amount;
}
$allTags[$tag] = true;
$trendAnalysis[$timestamp][$tag] = [
'occurence' => round($amount / $eventNumberPerRollingWindow[$timestamp], 2),
'raw_change' => $rawChange,
'percent_change' => $percentChange,
'change_sign' => $rawChange > 0 ? 1 : ($rawChange < 0 ? -1 : 0),
];
}
if (!empty($previousTimestamp)) {
foreach (array_keys($trendAnalysis[$timestamp]) as $tag) {
if (empty($trendAnalysis[$previousTimestamp][$tag])) {
$trendAnalysis[$previousTimestamp][$tag] = [
'occurence' => 0,
'raw_change' => -$amount,
'percent_change' => 100 * (-$amount / $amount),
'change_sign' => -$amount > 0 ? 1 : (-$amount < 0 ? -1 : 0),
];
}
}
}
}
return $trendAnalysis;
}
private function __clusterTagsForRollingWindow(array $events, int $baseDayRange, int $rollingWindows = 3, $tagFilterPrefixes = null): array
{
$fullDayNumber = $baseDayRange + $baseDayRange * $rollingWindows;
$tagsPerRollingWindow = [];
$eventNumberPerRollingWindow = [];
$timestampRollingWindow = [];
for ($i = 0; $i <= $fullDayNumber; $i += $baseDayRange) {
$timestamp = $this->eventModel->resolveTimeDelta($i . 'd');
$timestampRollingWindow[] = $timestamp;
$tagsPerRollingWindow[$timestamp] = [];
}
$tagsPerRollingWindow = array_map(function () {
return [];
}, array_flip(array_slice($timestampRollingWindow, 1)));
$eventNumberPerRollingWindow = array_map(function () {
return 0;
}, array_flip(array_slice($timestampRollingWindow, 1)));
$allTagsPerPrefix = [];
foreach ($events as $event) {
$allTags = $this->eventModel->extractAllTagNames($event);
$rollingTimestamps = $this->__getTimestampFromRollingWindow($event['Event']['timestamp'], $timestampRollingWindow);
$filteredTags = array_filter($allTags, function ($tag) use ($tagFilterPrefixes, &$allTagsPerPrefix) {
if (is_null($tagFilterPrefixes)) {
return true;
} else {
foreach ($tagFilterPrefixes as $tagPrefix) {
if (substr($tag, 0, strlen($tagPrefix)) === $tagPrefix) {
$allTagsPerPrefix[$tagPrefix][$tag] = true;
return true;
}
}
return false;
}
});
foreach ($filteredTags as $tag) {
if (empty($tagsPerRollingWindow[$rollingTimestamps['current']][$tag])) {
$tagsPerRollingWindow[$rollingTimestamps['current']][$tag] = 0;
}
$tagsPerRollingWindow[$rollingTimestamps['current']][$tag] += 1;
}
$eventNumberPerRollingWindow[$rollingTimestamps['current']] += 1;
}
return [
'tagsPerRollingWindow' => $tagsPerRollingWindow,
'eventNumberPerRollingWindow' => $eventNumberPerRollingWindow,
'allTagsPerPrefix' => array_map(function ($clusteredTags) {
return array_keys($clusteredTags);
}, $allTagsPerPrefix),
];
}
private function __getTimestampFromRollingWindow(int $eventTimestamp, array $rollingWindow): array
{
$i = 0;
if (count($rollingWindow) > 2) {
for ($i = 0; $i < count($rollingWindow) - 1; $i++) {
if ($eventTimestamp >= $rollingWindow[$i]) {
break;
}
}
}
return [
'previous' => isset($rollingWindow[$i - 1]) ? $rollingWindow[$i - 1] : null,
'current' => $rollingWindow[$i],
'next' => isset($rollingWindow[$i + 1]) ? $rollingWindow[$i + 1] : null,
];
}
}

View File

@ -7629,8 +7629,11 @@ class Event extends AppModel
$eventFilters['last'] = $fullRange . 'd';
$eventFilters['order'] = 'timestamp DESC';
$events = $this->fetchEvent($user, $eventFilters);
$clusteredTags = $this->__clusterTagsForRollingWindow($events, $baseDayRange, $rollingWindows, $tagFilterPrefixes);
$trendAnalysis = $this->__computeTrendAnalysis($clusteredTags);
App::uses('TrendingTool', 'Tools');
$trendingTool = new TrendingTool($this);
$trendAnalysis = $trendingTool->getTrendsForTags($events, $baseDayRange, $rollingWindows, $tagFilterPrefixes);
$clusteredTags = $trendAnalysis['clustered_tags'];
$trendAnalysis = $trendAnalysis['trend_analysis'];
return [
'clustered_tags' => $trendAnalysis,
'clustered_events' => $clusteredTags['eventNumberPerRollingWindow'],
@ -7638,113 +7641,4 @@ class Event extends AppModel
'all_timestamps' => array_keys($clusteredTags['eventNumberPerRollingWindow']),
];
}
private function __computeTrendAnalysis(array $clusteredTags): array
{
$tagsPerRollingWindow = $clusteredTags['tagsPerRollingWindow'];
$eventNumberPerRollingWindow = $clusteredTags['eventNumberPerRollingWindow'];
$trendAnalysis = [];
$allTimestamps = array_keys($tagsPerRollingWindow);
$allTags = [];
foreach ($allTimestamps as $i => $timestamp) {
$trendAnalysis[$timestamp] = [];
$tags = $tagsPerRollingWindow[$timestamp];
$nextTimestamp = isset($allTimestamps[$i+1]) ? $allTimestamps[$i+1] : false;
$previousTimestamp = isset($allTimestamps[$i-1]) ? $allTimestamps[$i-1] : false;
foreach ($tags as $tag => $amount) {
$rawChange = 0;
$percentChange = 0;
if (!empty($nextTimestamp)) {
$nextAmount = !empty($tagsPerRollingWindow[$nextTimestamp][$tag]) ? $tagsPerRollingWindow[$nextTimestamp][$tag] : 0;
$rawChange = $amount - $nextAmount;
$percentChange = 100*$rawChange/$amount;
}
$allTags[$tag] = true;
$trendAnalysis[$timestamp][$tag] = [
'occurence' => round($amount / $eventNumberPerRollingWindow[$timestamp], 2),
'raw_change' => $rawChange,
'percent_change' => $percentChange,
'change_sign' => $rawChange > 0 ? 1 : ($rawChange < 0 ? -1 : 0),
];
}
if (!empty($previousTimestamp)) {
foreach (array_keys($trendAnalysis[$timestamp]) as $tag) {
if (empty($trendAnalysis[$previousTimestamp][$tag])) {
$trendAnalysis[$previousTimestamp][$tag] = [
'occurence' => 0,
'raw_change' => -$amount,
'percent_change' => 100 * (-$amount / $amount),
'change_sign' => -$amount > 0 ? 1 : (-$amount < 0 ? -1 : 0),
];
}
}
}
}
return $trendAnalysis;
}
private function __clusterTagsForRollingWindow(array $events, int $baseDayRange, int $rollingWindows=3, $tagFilterPrefixes=null): array
{
$fullDayNumber = $baseDayRange + $baseDayRange * $rollingWindows;
$tagsPerRollingWindow = [];
$eventNumberPerRollingWindow = [];
$timestampRollingWindow = [];
for ($i=0; $i <= $fullDayNumber; $i += $baseDayRange) {
$timestamp = $this->resolveTimeDelta($i . 'd');
$timestampRollingWindow[] = $timestamp;
$tagsPerRollingWindow[$timestamp] = [];
}
$tagsPerRollingWindow = array_map(function() { return []; }, array_flip(array_slice($timestampRollingWindow, 1)));
$eventNumberPerRollingWindow = array_map(function() { return 0; }, array_flip(array_slice($timestampRollingWindow, 1)));
$allTagsPerPrefix = [];
foreach ($events as $event) {
$allTags = $this->extractAllTagNames($event);
$rollingTimestamps = $this->__getTimestampFromRollingWindow($event['Event']['timestamp'], $timestampRollingWindow);
$filteredTags = array_filter($allTags, function($tag) use ($tagFilterPrefixes, &$allTagsPerPrefix) {
if (is_null($tagFilterPrefixes)) {
return true;
} else {
foreach ($tagFilterPrefixes as $tagPrefix) {
if(substr($tag, 0, strlen($tagPrefix)) === $tagPrefix) {
$allTagsPerPrefix[$tagPrefix][$tag] = true;
return true;
}
}
return false;
}
});
foreach ($filteredTags as $tag) {
if (empty($tagsPerRollingWindow[$rollingTimestamps['current']][$tag])) {
$tagsPerRollingWindow[$rollingTimestamps['current']][$tag] = 0;
}
$tagsPerRollingWindow[$rollingTimestamps['current']][$tag] += 1;
}
$eventNumberPerRollingWindow[$rollingTimestamps['current']] += 1;
}
return [
'tagsPerRollingWindow' => $tagsPerRollingWindow,
'eventNumberPerRollingWindow' => $eventNumberPerRollingWindow,
'allTagsPerPrefix' => array_map(function($clusteredTags) {
return array_keys($clusteredTags);
}, $allTagsPerPrefix),
];
}
private function __getTimestampFromRollingWindow(int $eventTimestamp, array $rollingWindow): array
{
$i = 0;
if (count($rollingWindow) > 2) {
for ($i=0; $i < count($rollingWindow)-1; $i++) {
if ($eventTimestamp >= $rollingWindow[$i]) {
break;
}
}
}
return [
'previous' => isset($rollingWindow[$i - 1]) ? $rollingWindow[$i - 1] : null,
'current' => $rollingWindow[$i],
'next' => isset($rollingWindow[$i + 1]) ? $rollingWindow[$i + 1] : null,
];
}
}