chg: Refactor of the complex type tool

- makes it more readable
pull/3134/head
iglocska 2018-04-03 22:25:52 +02:00
parent ed70624354
commit 9485dfe5e2
1 changed files with 104 additions and 65 deletions

View File

@ -192,67 +192,111 @@ class ComplexTypeTool {
128 => array('single' => array('sha512'), 'composite' => array('filename|sha512'))
);
private function __resolveType($input) {
$input = trim($input);
if (strpos($input, '|')) {
$compositeParts = explode('|', $input);
if (count($compositeParts) == 2) {
if ($this->__resolveFilename($compositeParts[0])) {
foreach ($this->__hexHashTypes as $k => $v) {
if (strlen($compositeParts[1]) == $k && preg_match("#[0-9a-f]{" . $k . "}$#i", $compositeParts[1])) return array('types' => $v['composite'], 'to_ids' => true, 'default_type' => $v['composite'][0], 'value' => $input);
}
if (preg_match('#^[0-9]+:[0-9a-zA-Z\/\+]+:[0-9a-zA-Z\/\+]+$#', $compositeParts[1]) && !preg_match('#^[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}$#', $compositeParts[1])) {
return array('types' => array('filename|ssdeep'), 'to_ids' => true, 'default_type' => 'filename|ssdeep', 'value' => $input);
// algorithms to run through in order
private $__checks = array('Hashes', 'Email', 'IP', 'DomainOrFilename', 'SimpleRegex');
private function __resolveType($raw_input) {
$input = array(
'raw' => trim($raw_input)
);
$input = $this->__refangInput($input);
$input = $this->__extractPort($input);
foreach ($this->__checks as $check) {
$result = $this->{'__checkFor' . $check}($input);
if ($result) return $result;
}
return false;
}
private function __checkForEmail($input) {
// quick filter for an @ to see if we should validate a potential e-mail address
if (strpos($input['refanged'], '@') !== false) {
if (filter_var($input['refanged'], FILTER_VALIDATE_EMAIL)) return array('types' => array('email-src', 'email-dst', 'target-email', 'whois-registrant-email'), 'to_ids' => true, 'default_type' => 'email-src', 'value' => $input['refanged']);
}
return false;
}
private function __checkForHashes($input) {
// handle prepared composite values with the filename|hash format
if (strpos($input['raw'], '|')) {
$compositeParts = explode('|', $input['raw']);
if (count($compositeParts) == 2) {
if ($this->__resolveFilename($compositeParts[0])) {
foreach ($this->__hexHashTypes as $k => $v) {
if (strlen($compositeParts[1]) == $k && preg_match("#[0-9a-f]{" . $k . "}$#i", $compositeParts[1])) return array('types' => $v['composite'], 'to_ids' => true, 'default_type' => $v['composite'][0], 'value' => $input['raw']);
}
if (preg_match('#^[0-9]+:[0-9a-zA-Z\/\+]+:[0-9a-zA-Z\/\+]+$#', $compositeParts[1]) && !preg_match('#^[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}$#', $compositeParts[1])) {
return array('types' => array('filename|ssdeep'), 'to_ids' => true, 'default_type' => 'filename|ssdeep', 'value' => $input['raw']);
}
}
}
}
}
// check for hashes
foreach ($this->__hexHashTypes as $k => $v) {
if (strlen($input) == $k && preg_match("#[0-9a-f]{" . $k . "}$#i", $input)) return array('types' => $v['single'], 'to_ids' => true, 'default_type' => $v['single'][0], 'value' => $input);
}
if (preg_match('#^[0-9]+:[0-9a-zA-Z\/\+]+:[0-9a-zA-Z\/\+]+$#', $input) && !preg_match('#^[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}$#', $input)) return array('types' => array('ssdeep'), 'to_ids' => true, 'default_type' => 'ssdeep', 'value' => $input);
$inputRefanged = $input;
foreach ($this->__refangRegexTable as $regex => $replacement) {
$inputRefanged = preg_replace($regex, $replacement , $inputRefanged);
}
$inputRefanged = rtrim($inputRefanged, ".");
if (strpos($inputRefanged, '@') !== false) {
if (filter_var($inputRefanged, FILTER_VALIDATE_EMAIL)) return array('types' => array('email-src', 'email-dst', 'target-email', 'whois-registrant-email'), 'to_ids' => true, 'default_type' => 'email-src', 'value' => $inputRefanged);
}
// check for hashes
foreach ($this->__hexHashTypes as $k => $v) {
if (strlen($input['raw']) == $k && preg_match("#[0-9a-f]{" . $k . "}$#i", $input['raw'])) return array('types' => $v['single'], 'to_ids' => true, 'default_type' => $v['single'][0], 'value' => $input['raw']);
}
// ssdeep has a different pattern
if (preg_match('#^[0-9]+:[0-9a-zA-Z\/\+]+:[0-9a-zA-Z\/\+]+$#', $input['raw']) && !preg_match('#^[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}$#', $input['raw'])) return array('types' => array('ssdeep'), 'to_ids' => true, 'default_type' => 'ssdeep', 'value' => $input['raw']);
return false;
}
private function __extractPort($input) {
// note down and remove the port if it's a url / domain name / hostname / ip
// input2 from here on is the variable containing the original input with the port removed. It is only used by url / domain name / hostname / ip
$comment = false;
if (preg_match('/(:[0-9]{2,5})$/', $inputRefanged, $port)) {
$comment = 'On port ' . substr($port[0], 1);
$inputRefangedNoPort = str_replace($port[0], '', $inputRefanged);
$port = substr($port[0], 1);
$input['comment'] = false;
if (preg_match('/(:[0-9]{2,5})$/', $input['refanged'], $input['port'])) {
$input['comment'] = 'On port ' . substr($input['port'][0], 1);
$input['refanged_no_port'] = str_replace($input['port'][0], '', $input['refanged']);
$input['port'] = substr($input['port'][0], 1);
} else {
unset($port);
$inputRefangedNoPort = $inputRefanged;
unset($input['port']);
$input['refanged_no_port'] = $input['refanged'];
}
// check for IP
if (filter_var($inputRefangedNoPort, FILTER_VALIDATE_IP)) {
if (isset($port)) {
return array('types' => array('ip-dst|port', 'ip-src|port', 'ip-src|port/ip-dst|port'), 'to_ids' => true, 'default_type' => 'ip-dst|port', 'comment' => $comment, 'value' => $inputRefangedNoPort . '|' . $port);
return $input;
}
private function __refangInput($input) {
$input['refanged'] = $input['raw'];
foreach ($this->__refangRegexTable as $regex => $replacement) {
$input['refanged'] = preg_replace($regex, $replacement , $input['refanged']);
}
$input['refanged'] = rtrim($input['refanged'], ".");
return $input;
}
private function __checkForSimpleRegex($input) {
// CVE numbers
if (preg_match("#^cve-[0-9]{4}-[0-9]{4,9}$#i", $input['raw'])) return array('types' => array('vulnerability'), 'categories' => array('External analysis'), 'to_ids' => false, 'default_type' => 'vulnerability', 'value' => $input['raw']);
// Phone numbers
if (preg_match("#^(\+)?([0-9]{1,3}(\(0\))?)?[0-9\/\-]{5,}[0-9]$#i", $input['raw'])) return array('types' => array('phone-number', 'prtn', 'whois-registrant-phone'), 'categories' => array('Other'), 'to_ids' => false, 'default_type' => 'phone-number', 'value' => $input['raw']);
}
private function __checkForIP($input) {
if (filter_var($input['refanged_no_port'], FILTER_VALIDATE_IP)) {
if (isset($input['port'])) {
return array('types' => array('ip-dst|port', 'ip-src|port', 'ip-src|port/ip-dst|port'), 'to_ids' => true, 'default_type' => 'ip-dst|port', 'comment' => $input['comment'], 'value' => $input['refanged_no_port'] . '|' . $input['port']);
} else {
return array('types' => array('ip-dst', 'ip-src', 'ip-src/ip-dst'), 'to_ids' => true, 'default_type' => 'ip-dst', 'comment' => $comment, 'value' => $inputRefangedNoPort);
return array('types' => array('ip-dst', 'ip-src', 'ip-src/ip-dst'), 'to_ids' => true, 'default_type' => 'ip-dst', 'comment' => $input['comment'], 'value' => $input['refanged_no_port']);
}
}
if (strpos($inputRefangedNoPort, '/')) {
$temp = explode('/', $inputRefangedNoPort);
// it could still be a CIDR block
if (strpos($input['refanged_no_port'], '/')) {
$temp = explode('/', $input['refanged_no_port']);
if (count($temp) == 2) {
if (filter_var($temp[0], FILTER_VALIDATE_IP) && is_numeric($temp[1])) return array('types' => array('ip-dst', 'ip-src', 'ip-src/ip-dst'), 'to_ids' => true, 'default_type' => 'ip-dst', 'comment' => $comment, 'value' => $inputRefangedNoPort);
if (filter_var($temp[0], FILTER_VALIDATE_IP) && is_numeric($temp[1])) return array('types' => array('ip-dst', 'ip-src', 'ip-src/ip-dst'), 'to_ids' => true, 'default_type' => 'ip-dst', 'comment' => $input['comment'], 'value' => $input['refanged_no_port']);
}
}
// check for domain name, hostname, filename
if (strpos($inputRefanged, '.') !== false) {
$temp = explode('.', $inputRefanged);
}
private function __checkForDomainOrFilename($input) {
if (strpos($input['refanged'], '.') !== false) {
$temp = explode('.', $input['refanged']);
// TODO: use a more flexible matching approach, like the one below (that still doesn't support non-ASCII domains)
//if (filter_var($input, FILTER_VALIDATE_URL)) {
$domainDetection = true;
if (preg_match('/^([-\pL\pN]+\.)+[a-z]+(:[0-9]{2,5})?$/iu', $inputRefanged)) {
if (preg_match('/^([-\pL\pN]+\.)+[a-z]+(:[0-9]{2,5})?$/iu', $input['refanged'])) {
if (empty($this->__tlds) || count($this->__tlds) == 1) {
$this->__generateTLDList();
}
@ -265,42 +309,37 @@ class ComplexTypeTool {
}
if ($domainDetection) {
if (count($temp) > 2) {
return array('types' => array('hostname', 'domain', 'url'), 'to_ids' => true, 'default_type' => 'hostname', 'comment' => $comment, 'value' => $inputRefangedNoPort);
return array('types' => array('hostname', 'domain', 'url'), 'to_ids' => true, 'default_type' => 'hostname', 'comment' => $input['comment'], 'value' => $input['refanged_no_port']);
} else {
return array('types' => array('domain'), 'to_ids' => true, 'default_type' => 'domain', 'comment' => $comment, 'value' => $inputRefangedNoPort);
return array('types' => array('domain'), 'to_ids' => true, 'default_type' => 'domain', 'comment' => $input['comment'], 'value' => $input['refanged_no_port']);
}
} else {
// check if it is a URL
// Adding http:// infront of the input in case it was left off. github.com/MISP/MISP should still be counted as a valid link
if (count($temp) > 1 && (filter_var($inputRefangedNoPort, FILTER_VALIDATE_URL) || filter_var('http://' . $inputRefangedNoPort, FILTER_VALIDATE_URL))) {
// TODO: add comment explaining why there is a check for a specific domain
if (preg_match('/^https:\/\/(www.)?virustotal.com\//i', $inputRefangedNoPort)) return array('types' => array('link'), 'to_ids' => false, 'default_type' => 'link', 'comment' => $comment, 'value' => $inputRefangedNoPort);
if (preg_match('/^https:\/\/www\.hybrid-analysis\.com\//i', $inputRefangedNoPort)) return array('types' => array('link'), 'categories' => array('External analysis'), 'to_ids' => false, 'default_type' => 'link', 'comment' => $comment, 'value' => $inputRefangedNoPort);
if (strpos($inputRefangedNoPort, '/')) return array('types' => array('url'), 'to_ids' => true, 'default_type' => 'url', 'comment' => $comment, 'value' => $inputRefangedNoPort);
if (count($temp) > 1 && (filter_var($input['refanged_no_port'], FILTER_VALIDATE_URL) || filter_var('http://' . $input['refanged_no_port'], FILTER_VALIDATE_URL))) {
// Even though some domains are valid, we want to exclude them as they are known security vendors / etc
// TODO, replace that with the appropriate warninglist.
if (preg_match('/^https:\/\/(www.)?virustotal.com\//i', $input['refanged_no_port'])) return array('types' => array('link'), 'to_ids' => false, 'default_type' => 'link', 'comment' => $input['comment'], 'value' => $input['refanged_no_port']);
if (preg_match('/^https:\/\/www\.hybrid-analysis\.com\//i', $input['refanged_no_port'])) return array('types' => array('link'), 'categories' => array('External analysis'), 'to_ids' => false, 'default_type' => 'link', 'comment' => $input['comment'], 'value' => $input['refanged_no_port']);
if (strpos($input['refanged_no_port'], '/')) return array('types' => array('url'), 'to_ids' => true, 'default_type' => 'url', 'comment' => $input['comment'], 'value' => $input['refanged_no_port']);
}
if ($this->__resolveFilename($input)) return array('types' => array('filename'), 'to_ids' => true, 'default_type' => 'filename', 'value' => $input);
if ($this->__resolveFilename($input['raw'])) return array('types' => array('filename'), 'to_ids' => true, 'default_type' => 'filename', 'value' => $input['raw']);
}
}
if (strpos($input, '\\') !== false) {
$temp = explode('\\', $input);
if (strpos($input['raw'], '\\') !== false) {
$temp = explode('\\', $input['raw']);
if (strpos($temp[count($temp)-1], '.') || preg_match('/^.:/i', $temp[0])) {
if ($this->__resolveFilename($temp[count($temp)-1])) return array('types' => array('filename'), 'categories' => array('Payload installation'), 'to_ids' => true, 'default_type' => 'filename', 'value' => $input);
if ($this->__resolveFilename($temp[count($temp)-1])) return array('types' => array('filename'), 'categories' => array('Payload installation'), 'to_ids' => true, 'default_type' => 'filename', 'value' => $input['raw']);
} else {
return array('types' => array('regkey'), 'to_ids' => false, 'default_type' => 'regkey', 'value' => $input);
return array('types' => array('regkey'), 'to_ids' => false, 'default_type' => 'regkey', 'value' => $input['raw']);
}
}
// check for CVE
if (preg_match("#^cve-[0-9]{4}-[0-9]{4,9}$#i", $input)) return array('types' => array('vulnerability'), 'categories' => array('External analysis'), 'to_ids' => false, 'default_type' => 'vulnerability', 'value' => $input);
if (preg_match("#^(\+)?([0-9]{1,3}(\(0\))?)?[0-9\/\-]{5,}[0-9]$#i", $input)) return array('types' => array('phone-number', 'prtn', 'whois-registrant-phone'), 'categories' => array('Other'), 'to_ids' => false, 'default_type' => 'phone-number', 'value' => $input);
return false;
}
private function __resolveFilename($input) {
if ((preg_match('/^.:/', $input) || strpos($input, '.') !=0)) {
$parts = explode('.', $input);
private function __resolveFilename($param) {
if ((preg_match('/^.:/', $param) || strpos($param, '.') !=0)) {
$parts = explode('.', $param);
if (!is_numeric($parts[count($parts)-1]) && ctype_alnum($parts[count($parts)-1])) return true;
}
return false;