mirror of https://github.com/MISP/MISP
new: [freetext] Faster freetext parsing with more tests
parent
126fe6fae5
commit
fc3924c8e8
|
@ -41,7 +41,7 @@ class ComplexTypeTool
|
|||
public static function refangValue($value, $type)
|
||||
{
|
||||
foreach (self::$__refangRegexTable as $regex) {
|
||||
if (in_array($type, $regex['types'])) {
|
||||
if (in_array($type, $regex['types'], true)) {
|
||||
$value = preg_replace($regex['from'], $regex['to'], $value);
|
||||
}
|
||||
}
|
||||
|
@ -131,16 +131,6 @@ class ComplexTypeTool
|
|||
return array('type' => 'other', 'value' => $input);
|
||||
}
|
||||
|
||||
private function __returnOddElements($array)
|
||||
{
|
||||
foreach ($array as $k => $v) {
|
||||
if ($k % 2 != 1) {
|
||||
unset($array[$k]);
|
||||
}
|
||||
}
|
||||
return array_values($array);
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a CSV file with the given settings
|
||||
* All lines starting with # are stripped
|
||||
|
@ -203,27 +193,21 @@ class ComplexTypeTool
|
|||
|
||||
public function checkFreeText($input, $settings = array())
|
||||
{
|
||||
$charactersToTrim = '\'",() ' . "\t\n\r\0\x0B"; // custom + default PHP trim
|
||||
$charactersToTrim = '\'".,() ' . "\t\n\r\0\x0B"; // custom + default PHP trim
|
||||
$input = str_replace("\xc2\xa0", ' ', $input); // non breaking space to normal space
|
||||
$input = preg_replace('/\p{C}+/u', ' ', $input);
|
||||
$iocArray = preg_split("/\r\n|\n|\r|\s|\s+|,|\<|\>|;/", $input);
|
||||
$quotedText = explode('"', $input);
|
||||
foreach ($quotedText as $k => $temp) {
|
||||
$temp = trim($temp);
|
||||
if (empty($temp)) {
|
||||
unset($quotedText[$k]);
|
||||
} else {
|
||||
$quotedText[$k] = $temp;
|
||||
}
|
||||
}
|
||||
$iocArray = array_merge($iocArray, $this->__returnOddElements($quotedText));
|
||||
$resultArray = array();
|
||||
|
||||
preg_match_all('/\"([^\"]*)\"/', $input, $matches);
|
||||
foreach ($matches[1] as $match) {
|
||||
if ($match !== '') {
|
||||
$iocArray[] = $match;
|
||||
}
|
||||
}
|
||||
|
||||
$resultArray = [];
|
||||
foreach ($iocArray as $ioc) {
|
||||
// remove trailing .
|
||||
$ioc = rtrim($ioc, '.');
|
||||
// remove brackets if enclosed
|
||||
$ioc = trim($ioc, '()');
|
||||
$ioc = str_replace("\xc2\xa0", '', $ioc); // remove non breaking space
|
||||
$ioc = trim($ioc, $charactersToTrim);
|
||||
$ioc = preg_replace('/\p{C}+/u', '', $ioc);
|
||||
if (empty($ioc)) {
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -94,6 +94,15 @@ EOT;
|
|||
$this->assertEquals('ip-dst', $results[0]['default_type']);
|
||||
}
|
||||
|
||||
public function testCheckFreeTextIpv4Bracket(): void
|
||||
{
|
||||
$complexTypeTool = new ComplexTypeTool();
|
||||
$results = $complexTypeTool->checkFreeText('we also saw an IP address (8.8.8.8).');
|
||||
$this->assertCount(1, $results);
|
||||
$this->assertEquals('8.8.8.8', $results[0]['value']);
|
||||
$this->assertEquals('ip-dst', $results[0]['default_type']);
|
||||
}
|
||||
|
||||
public function testCheckFreeTextIpv4WithPort(): void
|
||||
{
|
||||
$complexTypeTool = new ComplexTypeTool();
|
||||
|
@ -497,6 +506,24 @@ EOT;
|
|||
}
|
||||
}
|
||||
|
||||
public function testCheckFreeTextNonBreakableSpace(): void
|
||||
{
|
||||
$complexTypeTool = new ComplexTypeTool();
|
||||
$results = $complexTypeTool->checkFreeText("127.0.0.1\xc2\xa0127.0.0.2");
|
||||
$this->assertCount(2, $results);
|
||||
$this->assertEquals('127.0.0.1', $results[0]['value']);
|
||||
$this->assertEquals('ip-dst', $results[0]['default_type']);
|
||||
}
|
||||
|
||||
public function testCheckFreeTextQuoted(): void
|
||||
{
|
||||
$complexTypeTool = new ComplexTypeTool();
|
||||
$results = $complexTypeTool->checkFreeText('="127.0.0.1",="127.0.0.2","","1"');
|
||||
$this->assertCount(2, $results);
|
||||
$this->assertEquals('127.0.0.1', $results[0]['value']);
|
||||
$this->assertEquals('ip-dst', $results[0]['default_type']);
|
||||
}
|
||||
|
||||
public function testCheckFreeTextRemoveDuplicates(): void
|
||||
{
|
||||
$complexTypeTool = new ComplexTypeTool();
|
||||
|
|
Loading…
Reference in New Issue