mirror of https://github.com/MISP/MISP
Merge pull request #6115 from JakubOnderka/freetext-fixes-vol2
chg: [feed] Use less memory when parsing CSV feedspull/4627/merge
commit
7eeb15a450
|
@ -1,4 +1,5 @@
|
|||
<?php
|
||||
require_once __DIR__ . '/TmpFileTool.php';
|
||||
|
||||
class ComplexTypeTool
|
||||
{
|
||||
|
@ -140,31 +141,28 @@ class ComplexTypeTool
|
|||
return array_values($array);
|
||||
}
|
||||
|
||||
/*
|
||||
* parse a CSV file with the given settings
|
||||
/**
|
||||
* Parse a CSV file with the given settings
|
||||
* All lines starting with # are stripped
|
||||
* The settings can contain the following:
|
||||
* delimiter: Expects a delimiter string (default is a simple comma).
|
||||
* For example, to split the following line: "value1##comma##value2" simply pass $settings['delimiter'] = "##comma##";
|
||||
* values: Expects an array (or a comma separated string) with numeric values denoting the columns containing indicators. If this is not set then every value will be checked. (column numbers start at 1)
|
||||
* @param string $input
|
||||
* @param array $settings
|
||||
* @return array
|
||||
* @throws Exception
|
||||
*/
|
||||
public function checkCSV($input, $settings = array())
|
||||
{
|
||||
$delimiter = !empty($settings['delimiter']) ? $settings['delimiter'] : ",";
|
||||
$rows = str_getcsv($input, "\n");
|
||||
unset($input);
|
||||
$data = array();
|
||||
foreach ($rows as $k => $row) {
|
||||
if (empty($row[0]) || $row[0] === '#') {
|
||||
continue;
|
||||
}
|
||||
if ($delimiter == '\t') {
|
||||
$data[$k] = explode("\t", $row);
|
||||
} else {
|
||||
$data[$k] = str_getcsv($row, $delimiter);
|
||||
}
|
||||
if (empty($input)) {
|
||||
return [];
|
||||
}
|
||||
|
||||
$delimiter = !empty($settings['delimiter']) ? $settings['delimiter'] : ",";
|
||||
if ($delimiter === '\t') {
|
||||
$delimiter = "\t";
|
||||
}
|
||||
unset($rows);
|
||||
$values = !empty($settings['value']) ? $settings['value'] : array();
|
||||
if (!is_array($values)) {
|
||||
$values = explode(',', $values);
|
||||
|
@ -172,23 +170,34 @@ class ComplexTypeTool
|
|||
foreach ($values as $key => $value) {
|
||||
$values[$key] = intval($value);
|
||||
}
|
||||
$iocArray = array();
|
||||
foreach ($data as $rowPos => $row) {
|
||||
|
||||
// Write to tmp file to save memory
|
||||
$tmpFile = new TmpFileTool();
|
||||
$tmpFile->write($input);
|
||||
unset($input);
|
||||
|
||||
$iocArray = [];
|
||||
foreach ($tmpFile->csv($delimiter) as $row) {
|
||||
if (!empty($row[0][0]) && $row[0][0] === '#') { // Comment
|
||||
continue;
|
||||
}
|
||||
foreach ($row as $elementPos => $element) {
|
||||
if ((!empty($values) && in_array(($elementPos + 1), $values)) || empty($values)) {
|
||||
if (empty($values) || in_array(($elementPos + 1), $values)) {
|
||||
$element = trim($element, " \t\n\r\0\x0B\"\'");
|
||||
if (isset($settings['excluderegex']) && !empty($settings['excluderegex'])) {
|
||||
if (preg_match($settings['excluderegex'], $element)) {
|
||||
continue;
|
||||
}
|
||||
if (empty($element)) {
|
||||
continue;
|
||||
}
|
||||
if (!empty($settings['excluderegex']) && preg_match($settings['excluderegex'], $element)) {
|
||||
continue;
|
||||
}
|
||||
$resolvedResult = $this->__resolveType($element);
|
||||
if (!empty($resolvedResult)) {
|
||||
if ($resolvedResult) {
|
||||
$iocArray[] = $resolvedResult;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $iocArray;
|
||||
}
|
||||
|
||||
|
@ -244,9 +253,8 @@ class ComplexTypeTool
|
|||
|
||||
private function __resolveType($raw_input)
|
||||
{
|
||||
$input = array(
|
||||
'raw' => trim($raw_input)
|
||||
);
|
||||
$input = array('raw' => trim($raw_input));
|
||||
|
||||
$input = $this->__refangInput($input);
|
||||
$input = $this->__extractPort($input);
|
||||
|
||||
|
|
|
@ -37,6 +37,31 @@ class TmpFileTool
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get one line from file parsed as CSV.
|
||||
*
|
||||
* @param string $delimiter
|
||||
* @param string $enclosure
|
||||
* @param string $escape
|
||||
* @return Generator
|
||||
* @throws Exception
|
||||
*/
|
||||
public function csv($delimiter = ',', $enclosure = '"', $escape = "\\")
|
||||
{
|
||||
$this->rewind();
|
||||
$line = 0;
|
||||
while (!feof($this->tmpfile)) {
|
||||
$result = fgetcsv($this->tmpfile, 0, $delimiter, $enclosure, $escape);
|
||||
if ($result === false) {
|
||||
throw new Exception("Could not read line $line from temporary CSV file.");
|
||||
}
|
||||
$line++;
|
||||
yield $result;
|
||||
}
|
||||
fclose($this->tmpfile);
|
||||
$this->tmpfile = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Generator
|
||||
* @throws Exception
|
||||
|
|
|
@ -5,25 +5,84 @@ use PHPUnit\Framework\TestCase;
|
|||
|
||||
class ComplexTypeToolTest extends TestCase
|
||||
{
|
||||
public function testCheckCSV(): void
|
||||
{
|
||||
$complexTypeTool = new ComplexTypeTool();
|
||||
$csv = <<<CSV
|
||||
# Downloaded from 1.1.1.1
|
||||
|
||||
127.0.0.1
|
||||
"127.0.0.2"
|
||||
CSV;
|
||||
$results = $complexTypeTool->checkCSV($csv);
|
||||
$this->assertCount(2, $results);
|
||||
}
|
||||
|
||||
public function testCheckCSVTabulator(): void
|
||||
{
|
||||
$complexTypeTool = new ComplexTypeTool();
|
||||
$csv = <<<CSV
|
||||
###########################################################################################
|
||||
# Downloaded from 1.1.1.1
|
||||
###########################################################################################
|
||||
127.0.0.1\t127.0.0.3
|
||||
"127.0.0.2"
|
||||
58.214.25.190
|
||||
58.214.239.53
|
||||
CSV;
|
||||
$results = $complexTypeTool->checkCSV($csv, ['delimiter' => '\t']);
|
||||
$this->assertCount(5, $results);
|
||||
}
|
||||
|
||||
public function testCheckCSVValues(): void
|
||||
{
|
||||
$complexTypeTool = new ComplexTypeTool();
|
||||
$csv = <<<CSV
|
||||
127.0.0.1\t127.0.0.2
|
||||
127.0.0.3\t127.0.0.4
|
||||
CSV;
|
||||
$results = $complexTypeTool->checkCSV($csv, ['value' => '1', 'delimiter' => '\t']);
|
||||
$this->assertCount(2, $results);
|
||||
foreach (['127.0.0.1', '127.0.0.3'] as $k => $test) {
|
||||
$this->assertEquals($test, $results[$k]['value']);
|
||||
$this->assertEquals('ip-dst', $results[$k]['default_type']);
|
||||
}
|
||||
}
|
||||
|
||||
public function testCheckCSVEmpty(): void
|
||||
{
|
||||
$complexTypeTool = new ComplexTypeTool();
|
||||
$results = $complexTypeTool->checkCSV('');
|
||||
$this->assertCount(0, $results);
|
||||
}
|
||||
|
||||
public function testCheckCSVEmptyLines(): void
|
||||
{
|
||||
$complexTypeTool = new ComplexTypeTool();
|
||||
$results = $complexTypeTool->checkCSV(",,,\t\n,,,,,");
|
||||
$this->assertCount(0, $results);
|
||||
}
|
||||
|
||||
public function testCheckCSVTestFile(): void
|
||||
{
|
||||
$complexTypeTool = new ComplexTypeTool();
|
||||
$results = $complexTypeTool->checkCSV(file_get_contents(__DIR__ . '/../../tests/event.csv'));
|
||||
$this->assertCount(37, $results);
|
||||
}
|
||||
|
||||
public function testCheckFreeTextHeader(): void
|
||||
{
|
||||
$complexTypeTool = new ComplexTypeTool();
|
||||
$results = $complexTypeTool->checkFreeText(<<<EOT
|
||||
$text = <<<EOT
|
||||
# LAST 1000 # UTC UPDATE 2020-07-13 08:15:00
|
||||
127.0.0.1,(127.0.0.2), <127.0.0.3>; "127.0.0.4" '127.0.0.5'
|
||||
EOT
|
||||
);
|
||||
127.0.0.1,(127.0.0.2), <127.0.0.3>; "127.0.0.4" "'127.0.0.5'"
|
||||
EOT;
|
||||
$results = $complexTypeTool->checkFreeText($text);
|
||||
$this->assertCount(5, $results);
|
||||
$this->assertEquals('127.0.0.1', $results[0]['value']);
|
||||
$this->assertEquals('ip-dst', $results[0]['default_type']);
|
||||
$this->assertEquals('127.0.0.2', $results[1]['value']);
|
||||
$this->assertEquals('ip-dst', $results[1]['default_type']);
|
||||
$this->assertEquals('127.0.0.3', $results[2]['value']);
|
||||
$this->assertEquals('ip-dst', $results[2]['default_type']);
|
||||
$this->assertEquals('127.0.0.4', $results[3]['value']);
|
||||
$this->assertEquals('ip-dst', $results[3]['default_type']);
|
||||
$this->assertEquals('127.0.0.5', $results[4]['value']);
|
||||
$this->assertEquals('ip-dst', $results[4]['default_type']);
|
||||
foreach (['127.0.0.1', '127.0.0.2', '127.0.0.3', '127.0.0.4', '127.0.0.5'] as $k => $test) {
|
||||
$this->assertEquals($test, $results[$k]['value']);
|
||||
$this->assertEquals('ip-dst', $results[$k]['default_type']);
|
||||
}
|
||||
}
|
||||
|
||||
public function testCheckFreeTextIpv4(): void
|
||||
|
@ -267,6 +326,15 @@ EOT
|
|||
$this->assertEquals('url', $results[0]['default_type']);
|
||||
}
|
||||
|
||||
public function testCheckFreeTextUrlWithPort(): void
|
||||
{
|
||||
$complexTypeTool = new ComplexTypeTool();
|
||||
$results = $complexTypeTool->checkFreeText('https://github.com:443/MISP/MISP');
|
||||
$this->assertCount(1, $results);
|
||||
$this->assertEquals('https://github.com:443/MISP/MISP', $results[0]['value']);
|
||||
$this->assertEquals('url', $results[0]['default_type']);
|
||||
}
|
||||
|
||||
public function testCheckFreeTextUrlWithoutProtocol(): void
|
||||
{
|
||||
$complexTypeTool = new ComplexTypeTool();
|
||||
|
|
Loading…
Reference in New Issue