chg: [performance] fix for events with large numbers of attributes and multiple tags from the same taxonomy

- the taxonomy conflict checks were causing multiple issues:

- non taxonomy tags were counted as a taxonomy with namespace ''
- once we identified a tag pair that could cause a conflict (same taxonomy) we loaded the taxonomy into redis
  - however, in order to see if we already have the taxonomy loaded, we went to redis to do a GET
  - In the case of 1 million attributes with at least 1 tag pair, at the minimum this means 1 million GETs on reddit with an event

- Resolution
  - remove the checks for non taxonomy tags
  - store the identified taxonomies temporarily on the model itself in memory
    - only go to redis when the model doesn't have the taxonomy cached in memory
    - still using the old approach when dealing with multiple small events

- thanks to @github-germ for flagging the issue
bad_encoding_pymisp
iglocska 2023-06-08 08:16:50 +02:00
parent 338c2e37b5
commit a326cc3506
No known key found for this signature in database
GPG Key ID: BEA224F1FEF113AC
1 changed files with 13 additions and 6 deletions

View File

@ -33,6 +33,8 @@ class Taxonomy extends AppModel
)
);
private $__taxonomyConflicts = [];
public function update()
{
$existing = $this->find('all', array(
@ -593,7 +595,6 @@ class Taxonomy extends AppModel
if ($splits === null) {
return false; // not a taxonomy tag
}
$key = "misp:taxonomies_cache:tagName=$tagName&fullTaxonomy=$fullTaxonomy";
try {
@ -729,13 +730,19 @@ class Taxonomy extends AppModel
$conflictingTaxonomy = array();
foreach ($tagNameList as $tagName) {
$tagShortened = $this->stripLastTagComponent($tagName);
// No exclusivity in non taxonomy tags.
if ($tagShortened === '') {
continue;
}
if (isset($potentiallyConflictingTaxonomy[$tagShortened])) {
$potentiallyConflictingTaxonomy[$tagShortened]['taxonomy'] = $this->getTaxonomyForTag($tagName);
if (!isset($this->__taxonomyConflicts[$tagShortened])) {
$this->__taxonomyConflicts[$tagShortened] = $this->getTaxonomyForTag($tagName);
}
$potentiallyConflictingTaxonomy[$tagShortened]['count']++;
} else {
$potentiallyConflictingTaxonomy[$tagShortened] = array(
$potentiallyConflictingTaxonomy[$tagShortened] = [
'count' => 1
);
];
}
$potentiallyConflictingTaxonomy[$tagShortened]['tagNames'][] = $tagName;
}
@ -747,9 +754,9 @@ class Taxonomy extends AppModel
) {
unset($potentiallyConflictingTaxonomy['tlp']);
}
foreach ($potentiallyConflictingTaxonomy as $potTaxonomy) {
foreach ($potentiallyConflictingTaxonomy as $taxonomyName => $potTaxonomy) {
if ($potTaxonomy['count'] > 1) {
$taxonomy = $potTaxonomy['taxonomy'];
$taxonomy = $this->__taxonomyConflicts[$taxonomyName];
if (isset($taxonomy['Taxonomy']['exclusive']) && $taxonomy['Taxonomy']['exclusive']) {
$conflictingTaxonomy[] = array(
'tags' => $potTaxonomy['tagNames'],