'http', '/\[\.\]/' => '.', '/\[dot\]/' => '.', '/\\\./' => '.', '/\.+/' => '.' ); public function checkComplexRouter($input, $type, $IANATLDentries) { switch ($type) { case 'File': return $this->checkComplexFile($input); break; case 'CnC': return $this->checkComplexCnC($input); break; case 'FreeText': return $this->checkFreeText($input, $IANATLDentries); break; default: return false; } } // checks if the passed input matches a valid file description attribute's pattern (filename, md5, sha1, sha256, filename|md5, filename|sha1, filename|sha256) public function checkComplexFile($input) { $original = $input; $type = ''; $composite = false; if (strpos($input, '|')) { $composite = true; $result = explode('|', $input); if (count($result) != 2 || !preg_match("#^.+#", $result[0])) { $type = 'other'; } else { $type = 'filename|'; } $input = $result[1]; } if (strlen($input) == 32 && preg_match("#[0-9a-f]{32}$#", $input)) $type .= 'md5'; if (strlen($input) == 40 && preg_match("#[0-9a-f]{40}$#", $input)) $type .= 'sha1'; if (strlen($input) == 64 && preg_match("#[0-9a-f]{64}$#", $input)) $type .= 'sha256'; if ($type == '' && !$composite && preg_match("#^.+#", $input)) $type = 'filename'; if ($type == '') $type = 'other'; return array('type' => $type, 'value' => $original); } public function checkComplexCnC($input) { $toReturn = array(); // check if it's an IP address if (filter_var($input, FILTER_VALIDATE_IP)) return array('type' => 'ip-dst', 'value' => $input); if (preg_match("#^[A-Z0-9.-]+\.[A-Z]{2,4}$#i", $input)) { $result = explode('.', $input); if (count($result) > 2) { $toReturn['multi'][] = array('type' => 'hostname', 'value' => $input); $pos = strpos($input, '.'); $toReturn['multi'][] = array('type' => 'domain', 'value' => substr($input, (1 + $pos))); return $toReturn; } return array('type' => 'domain', 'value' => $input); } if (!preg_match("#\n#", $input)) return array('type' => 'url', 'value' => $input); return array('type' => 'other', 'value' => $input); } private function __returnOddElements($array) { foreach ($array as $k => $v) if ($k % 2 != 1) unset($array[$k]); return array_values($array); } public function checkFreeText($input, $IANATLDentries) { $iocArray = preg_split("/\r\n|\n|\r|\s|\s+|,|;/", $input); $quotedText = explode('"', $input); foreach ($quotedText as $k => $temp) { $temp = trim($temp); if (empty($temp)) { unset($quotedText[$k]); } else { $quotedText[$k] = $temp; } } $iocArray = array_merge($iocArray, $this->__returnOddElements($quotedText)); $resultArray = array(); if (!empty($iocArray)) { foreach ($iocArray as $ioc) { $ioc = trim($ioc); $ioc = trim($ioc, '"'); $ioc = trim($ioc, ','); $ioc = preg_replace('/\p{C}+/u', '', $ioc); if (empty($ioc)) continue; $typeArray = $this->__resolveType($ioc, $IANATLDentries); if ($typeArray === false) continue; $temp = $typeArray; if (!isset($temp['value'])) $temp['value'] = $ioc; $resultArray[] = $temp; } } return $resultArray; } private $__hexHashTypes = array( 32 => array('single' => array('md5', 'imphash'), 'composite' => array('filename|md5', 'filename|imphash')), 40 => array('single' => array('sha1', 'pehash', 'x509-fingerprint-sha1'), 'composite' => array('filename|sha1', 'filename|pehash')), 56 => array('single' => array('sha224', 'sha512/224'), 'composite' => array('filename|sha224', 'filename|sha512/224')), 64 => array('single' => array('sha256', 'authentihash', 'sha512/256'), 'composite' => array('filename|sha256', 'filename|authentihash', 'filename|sha512/256')), 96 => array('single' => array('sha384'), 'composite' => array('filename|sha384')), 128 => array('single' => array('sha512'), 'composite' => array('filename|sha512')) ); private function __resolveType($input, $IANATLDentries) { $input = trim($input); // check for composite (|) attributes if (strpos($input, '|')) { $compositeParts = explode('|', $input); if (count($compositeParts) == 2) { if ($this->__resolveFilename($compositeParts[0])) { foreach ($this->__hexHashTypes as $k => $v) { if (strlen($compositeParts[1]) == $k && preg_match("#[0-9a-f]{" . $k . "}$#i", $compositeParts[1])) return array('types' => $v['composite'], 'to_ids' => true, 'default_type' => $v['composite'][0]); } if (preg_match('#^[0-9]+:.+:.+$#', $compositeParts[1])) return array('types' => array('ssdeep'), 'to_ids' => true, 'default_type' => 'filename|ssdeep'); } } } // check for hashes foreach ($this->__hexHashTypes as $k => $v) { if (strlen($input) == $k && preg_match("#[0-9a-f]{" . $k . "}$#i", $input)) return array('types' => $v['single'], 'to_ids' => true, 'default_type' => $v['single'][0]); } if (preg_match('#^[0-9]+:.+:.+$#', $input)) return array('types' => array('ssdeep'), 'to_ids' => true, 'default_type' => 'ssdeep'); $inputRefanged = $input; foreach ($this->__refangRegexTable as $regex => $replacement) $inputRefanged = preg_replace($regex, $replacement , $inputRefanged); $inputRefanged = rtrim($inputRefanged, "."); if (strpos($input, '@') !== false) { if (filter_var($input, FILTER_VALIDATE_EMAIL)) return array('types' => array('email-src', 'email-dst'), 'to_ids' => true, 'default_type' => 'email-src'); } // note down and remove the port if it's a url / domain name / hostname / ip // input2 from here on is the variable containing the original input with the port removed. It is only used by url / domain name / hostname / ip $comment = false; if (preg_match('/(:[0-9]{2,5})$/', $inputRefanged, $port)) { $comment = 'On port ' . substr($port[0], 1); $inputRefangedNoPort = str_replace($port[0], '', $inputRefanged); } else $inputRefangedNoPort = $inputRefanged; // check for IP if (filter_var($inputRefangedNoPort, FILTER_VALIDATE_IP)) return array('types' => array('ip-dst', 'ip-src', 'ip-src/ip-dst'), 'to_ids' => true, 'default_type' => 'ip-dst', 'comment' => $comment, 'value' => $inputRefangedNoPort); if (strpos($inputRefangedNoPort, '/')) { $temp = explode('/', $inputRefangedNoPort); if (count($temp) == 2) { if (filter_var($temp[0], FILTER_VALIDATE_IP) && is_numeric($temp[1])) return array('types' => array('ip-dst', 'ip-src', 'ip-src/ip-dst'), 'to_ids' => true, 'default_type' => 'ip-dst', 'comment' => $comment, 'value' => $inputRefangedNoPort); } } // check for domain name, hostname, filename if (strpos($inputRefanged, '.') !== false) { $temp = explode('.', $inputRefanged); // check for the new TLDs as known by IANA (if the Warninglists are not empty) if (!empty($IANATLDentries)) { $stringEnd = $temp[count($temp)-1]; if (in_array($stringEnd, $IANATLDentries)) { $types = array('filename', 'domain'); if (count($temp) > 2) $types[] = 'url'; return array('types' => $types, 'to_ids' => true, 'default_type' => 'filename', 'merge_categories' => true); } } // TODO: use a more flexible matching approach, like the one below (that still doesn't support non-ASCII domains) //if (filter_var($input, FILTER_VALIDATE_URL)) { if (preg_match('/^([-\pL\pN]+\.)+([a-z][a-z]|biz|cat|com|edu|gov|int|mil|net|org|pro|tel|aero|arpa|asia|coop|info|jobs|mobi|name|museum|travel)(:[0-9]{2,5})?$/iu', $inputRefanged)) { if (count($temp) > 2) { return array('types' => array('hostname', 'domain', 'url'), 'to_ids' => true, 'default_type' => 'hostname', 'comment' => $comment, 'value' => $inputRefangedNoPort); } else { return array('types' => array('domain'), 'to_ids' => true, 'default_type' => 'domain', 'comment' => $comment, 'value' => $inputRefangedNoPort); } } else { // check if it is a URL // Adding http:// infront of the input in case it was left off. github.com/MISP/MISP should still be counted as a valid link if (count($temp) > 1 && (filter_var($inputRefangedNoPort, FILTER_VALIDATE_URL) || filter_var('http://' . $inputRefangedNoPort, FILTER_VALIDATE_URL))) { // TODO: add comment explaining why there is a check for a specific domain if (preg_match('/^https:\/\/(www.)?virustotal.com\//i', $inputRefangedNoPort)) return array('types' => array('link'), 'to_ids' => false, 'default_type' => 'link', 'comment' => $comment, 'value' => $inputRefangedNoPort); if (strpos($inputRefangedNoPort, '/')) return array('types' => array('url'), 'to_ids' => true, 'default_type' => 'url', 'comment' => $comment, 'value' => $inputRefangedNoPort); } if ($this->__resolveFilename($input)) return array('types' => array('filename'), 'to_ids' => true, 'default_type' => 'filename'); } } if (strpos($input, '\\') !== false) { $temp = explode('\\', $input); if (strpos($temp[count($temp)-1], '.') || preg_match('/^.:/i', $temp[0])) { if ($this->__resolveFilename($temp[count($temp)-1])) return array('types' => array('filename'), 'categories' => array('Payload installation'), 'to_ids' => true, 'default_type' => 'filename'); } else { return array('types' => array('regkey'), 'to_ids' => false, 'default_type' => 'regkey'); } } // check for CVE if (preg_match("#^cve-[0-9]{4}-[0-9]{4,9}$#i", $input)) return array('types' => array('vulnerability'), 'categories' => array('External analysis'), 'to_ids' => false, 'default_type' => 'vulnerability'); return false; } private function __resolveFilename($input) { if ((preg_match('/^.:/', $input) || strpos($input, '.') !=0)) return true; return false; } }