new: [freetext] Faster freetext parsing with more tests

pull/7181/head
Jakub Onderka 2021-03-09 16:05:02 +01:00
parent 126fe6fae5
commit fc3924c8e8
2 changed files with 40 additions and 29 deletions

View File

@ -41,7 +41,7 @@ class ComplexTypeTool
public static function refangValue($value, $type)
{
foreach (self::$__refangRegexTable as $regex) {
if (in_array($type, $regex['types'])) {
if (in_array($type, $regex['types'], true)) {
$value = preg_replace($regex['from'], $regex['to'], $value);
}
}
@ -131,16 +131,6 @@ class ComplexTypeTool
return array('type' => 'other', 'value' => $input);
}
private function __returnOddElements($array)
{
foreach ($array as $k => $v) {
if ($k % 2 != 1) {
unset($array[$k]);
}
}
return array_values($array);
}
/**
* Parse a CSV file with the given settings
* All lines starting with # are stripped
@ -203,27 +193,21 @@ class ComplexTypeTool
public function checkFreeText($input, $settings = array())
{
$charactersToTrim = '\'",() ' . "\t\n\r\0\x0B"; // custom + default PHP trim
$charactersToTrim = '\'".,() ' . "\t\n\r\0\x0B"; // custom + default PHP trim
$input = str_replace("\xc2\xa0", ' ', $input); // non breaking space to normal space
$input = preg_replace('/\p{C}+/u', ' ', $input);
$iocArray = preg_split("/\r\n|\n|\r|\s|\s+|,|\<|\>|;/", $input);
$quotedText = explode('"', $input);
foreach ($quotedText as $k => $temp) {
$temp = trim($temp);
if (empty($temp)) {
unset($quotedText[$k]);
} else {
$quotedText[$k] = $temp;
}
}
$iocArray = array_merge($iocArray, $this->__returnOddElements($quotedText));
$resultArray = array();
preg_match_all('/\"([^\"]*)\"/', $input, $matches);
foreach ($matches[1] as $match) {
if ($match !== '') {
$iocArray[] = $match;
}
}
$resultArray = [];
foreach ($iocArray as $ioc) {
// remove trailing .
$ioc = rtrim($ioc, '.');
// remove brackets if enclosed
$ioc = trim($ioc, '()');
$ioc = str_replace("\xc2\xa0", '', $ioc); // remove non breaking space
$ioc = trim($ioc, $charactersToTrim);
$ioc = preg_replace('/\p{C}+/u', '', $ioc);
if (empty($ioc)) {
continue;
}

View File

@ -94,6 +94,15 @@ EOT;
$this->assertEquals('ip-dst', $results[0]['default_type']);
}
public function testCheckFreeTextIpv4Bracket(): void
{
$complexTypeTool = new ComplexTypeTool();
$results = $complexTypeTool->checkFreeText('we also saw an IP address (8.8.8.8).');
$this->assertCount(1, $results);
$this->assertEquals('8.8.8.8', $results[0]['value']);
$this->assertEquals('ip-dst', $results[0]['default_type']);
}
public function testCheckFreeTextIpv4WithPort(): void
{
$complexTypeTool = new ComplexTypeTool();
@ -497,6 +506,24 @@ EOT;
}
}
public function testCheckFreeTextNonBreakableSpace(): void
{
$complexTypeTool = new ComplexTypeTool();
$results = $complexTypeTool->checkFreeText("127.0.0.1\xc2\xa0127.0.0.2");
$this->assertCount(2, $results);
$this->assertEquals('127.0.0.1', $results[0]['value']);
$this->assertEquals('ip-dst', $results[0]['default_type']);
}
public function testCheckFreeTextQuoted(): void
{
$complexTypeTool = new ComplexTypeTool();
$results = $complexTypeTool->checkFreeText('="127.0.0.1",="127.0.0.2","","1"');
$this->assertCount(2, $results);
$this->assertEquals('127.0.0.1', $results[0]['value']);
$this->assertEquals('ip-dst', $results[0]['default_type']);
}
public function testCheckFreeTextRemoveDuplicates(): void
{
$complexTypeTool = new ComplexTypeTool();