2021-04-22 09:46:10 +02:00
|
|
|
<?php
|
|
|
|
App::uses('AppModel', 'Model');
|
|
|
|
App::uses('RandomTool', 'Tools');
|
|
|
|
|
2021-04-29 11:41:02 +02:00
|
|
|
/**
|
|
|
|
* @property Attribute $Attribute
|
|
|
|
*/
|
2021-04-22 09:46:10 +02:00
|
|
|
class Correlation extends AppModel
|
|
|
|
{
|
2021-04-29 11:41:02 +02:00
|
|
|
const CACHE_NAME = 'misp:top_correlations',
|
|
|
|
CACHE_AGE = 'misp:top_correlations_age';
|
2021-04-27 00:40:40 +02:00
|
|
|
|
2021-04-25 17:36:29 +02:00
|
|
|
public $belongsTo = array(
|
|
|
|
'Attribute' => [
|
|
|
|
'className' => 'Attribute',
|
|
|
|
'foreignKey' => 'attribute_id'
|
|
|
|
],
|
|
|
|
'Event' => array(
|
|
|
|
'className' => 'Event',
|
|
|
|
'foreignKey' => 'event_id'
|
|
|
|
)
|
|
|
|
);
|
|
|
|
|
2021-04-29 11:47:38 +02:00
|
|
|
/** @var array */
|
2021-08-12 15:05:39 +02:00
|
|
|
private $exclusions;
|
2021-04-29 11:41:02 +02:00
|
|
|
|
2021-04-29 11:47:38 +02:00
|
|
|
/**
|
|
|
|
* Use old schema with `date` and `info` fields.
|
|
|
|
* @var bool
|
|
|
|
*/
|
|
|
|
private $oldSchema;
|
|
|
|
|
2021-09-03 11:58:02 +02:00
|
|
|
/** @var bool */
|
|
|
|
private $deadlockAvoidance;
|
|
|
|
|
2021-04-29 11:47:38 +02:00
|
|
|
public function __construct($id = false, $table = null, $ds = null)
|
|
|
|
{
|
|
|
|
parent::__construct($id, $table, $ds);
|
|
|
|
$this->oldSchema = $this->schema('date') !== null;
|
2021-09-03 11:58:02 +02:00
|
|
|
$this->deadlockAvoidance = Configure::read('MISP.deadlock_avoidance');
|
2021-04-29 11:47:38 +02:00
|
|
|
}
|
|
|
|
|
2021-04-22 09:46:10 +02:00
|
|
|
public function correlateValueRouter($value)
|
|
|
|
{
|
|
|
|
if (Configure::read('MISP.background_jobs')) {
|
2021-11-02 15:35:23 +01:00
|
|
|
|
|
|
|
/** @var Job $job */
|
|
|
|
$job = ClassRegistry::init('Job');
|
|
|
|
$jobId = $job->createJob(
|
|
|
|
'SYSTEM',
|
|
|
|
Job::WORKER_DEFAULT,
|
|
|
|
'correlateValue',
|
|
|
|
$value,
|
|
|
|
'Recorrelating'
|
2021-04-22 09:46:10 +02:00
|
|
|
);
|
2021-11-02 15:35:23 +01:00
|
|
|
|
|
|
|
$this->getBackgroundJobsTool()->enqueue(
|
|
|
|
BackgroundJobsTool::DEFAULT_QUEUE,
|
|
|
|
BackgroundJobsTool::CMD_EVENT,
|
|
|
|
[
|
|
|
|
'correlateValue',
|
|
|
|
$value,
|
|
|
|
$jobId
|
|
|
|
],
|
2021-11-02 16:25:43 +01:00
|
|
|
true,
|
|
|
|
$jobId
|
2021-04-22 09:46:10 +02:00
|
|
|
);
|
2021-11-02 15:35:23 +01:00
|
|
|
|
2021-04-22 09:46:10 +02:00
|
|
|
return true;
|
|
|
|
} else {
|
2021-04-25 17:36:29 +02:00
|
|
|
return $this->correlateValue($value);
|
2021-04-22 09:46:10 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-04-25 17:36:29 +02:00
|
|
|
private function __buildAdvancedCorrelationConditions($a)
|
2021-04-22 09:46:10 +02:00
|
|
|
{
|
2021-04-25 17:36:29 +02:00
|
|
|
if (isset($a['Attribute'])) {
|
|
|
|
$a = $a['Attribute'];
|
|
|
|
}
|
|
|
|
$extraConditions = null;
|
2021-04-29 11:41:02 +02:00
|
|
|
if (in_array($a['type'], ['ip-src', 'ip-dst', 'ip-src|port', 'ip-dst|port'], true)) {
|
2021-04-25 17:36:29 +02:00
|
|
|
$extraConditions = $this->cidrCorrelation($a);
|
|
|
|
} else if ($a['type'] === 'ssdeep' && function_exists('ssdeep_fuzzy_compare')) {
|
2021-04-25 18:09:37 +02:00
|
|
|
$extraConditions = $this->ssdeepCorrelation($a);
|
2021-04-22 09:46:10 +02:00
|
|
|
}
|
2021-04-25 18:09:37 +02:00
|
|
|
return $extraConditions;
|
2021-04-22 09:46:10 +02:00
|
|
|
}
|
|
|
|
|
2021-04-25 17:36:29 +02:00
|
|
|
private function __addAdvancedCorrelations($correlatingAttribute)
|
|
|
|
{
|
|
|
|
if (empty(Configure::read('MISP.enable_advanced_correlations'))) {
|
|
|
|
return [];
|
|
|
|
}
|
|
|
|
$extraConditions = $this->__buildAdvancedCorrelationConditions($correlatingAttribute);
|
|
|
|
if (empty($extraConditions)) {
|
|
|
|
return [];
|
|
|
|
}
|
|
|
|
return $this->Attribute->find('all', [
|
|
|
|
'conditions' => [
|
|
|
|
'AND' => $extraConditions,
|
|
|
|
'NOT' => [
|
2021-07-27 15:19:41 +02:00
|
|
|
'Attribute.type' => Attribute::NON_CORRELATING_TYPES,
|
2021-04-25 17:36:29 +02:00
|
|
|
],
|
|
|
|
'Attribute.disable_correlation' => 0,
|
|
|
|
'Event.disable_correlation' => 0,
|
|
|
|
'Attribute.deleted' => 0
|
|
|
|
],
|
|
|
|
'recursive' => -1,
|
|
|
|
'fields' => [
|
|
|
|
'Attribute.event_id',
|
|
|
|
'Attribute.id',
|
|
|
|
'Attribute.distribution',
|
|
|
|
'Attribute.sharing_group_id',
|
|
|
|
'Attribute.value1',
|
|
|
|
'Attribute.value2',
|
|
|
|
],
|
|
|
|
'contain' => [
|
|
|
|
'Event' => [
|
2021-09-03 11:54:28 +02:00
|
|
|
'fields' => ['Event.id', 'Event.org_id', 'Event.distribution', 'Event.sharing_group_id']
|
2021-04-25 17:36:29 +02:00
|
|
|
]
|
|
|
|
],
|
|
|
|
'order' => [],
|
|
|
|
]);
|
|
|
|
}
|
|
|
|
|
|
|
|
private function __getMatchingAttributes($value)
|
2021-04-22 09:46:10 +02:00
|
|
|
{
|
|
|
|
$conditions = [
|
2021-04-25 17:36:29 +02:00
|
|
|
'OR' => [
|
|
|
|
'Attribute.value1' => $value,
|
|
|
|
'AND' => [
|
|
|
|
'Attribute.value2' => $value,
|
2021-07-21 08:42:05 +02:00
|
|
|
'NOT' => ['Attribute.type' => Attribute::PRIMARY_ONLY_CORRELATING_TYPES]
|
2021-04-25 17:36:29 +02:00
|
|
|
]
|
|
|
|
],
|
2021-04-22 09:46:10 +02:00
|
|
|
'NOT' => [
|
2021-07-27 15:19:41 +02:00
|
|
|
'Attribute.type' => Attribute::NON_CORRELATING_TYPES,
|
2021-04-22 09:46:10 +02:00
|
|
|
],
|
|
|
|
'Attribute.disable_correlation' => 0,
|
|
|
|
'Event.disable_correlation' => 0,
|
|
|
|
'Attribute.deleted' => 0
|
|
|
|
];
|
2021-04-25 17:36:29 +02:00
|
|
|
$correlatingAttributes = $this->Attribute->find('all', [
|
2021-04-22 09:46:10 +02:00
|
|
|
'conditions' => $conditions,
|
|
|
|
'recursive' => -1,
|
|
|
|
'fields' => [
|
|
|
|
'Attribute.event_id',
|
|
|
|
'Attribute.id',
|
2021-04-25 17:36:29 +02:00
|
|
|
'Attribute.type',
|
2021-04-22 09:46:10 +02:00
|
|
|
'Attribute.distribution',
|
|
|
|
'Attribute.sharing_group_id',
|
|
|
|
'Attribute.value1',
|
|
|
|
'Attribute.value2',
|
|
|
|
],
|
|
|
|
'contain' => [
|
|
|
|
'Event' => [
|
2021-09-03 11:54:28 +02:00
|
|
|
'fields' => ['Event.id', 'Event.org_id', 'Event.distribution', 'Event.sharing_group_id']
|
2021-04-22 09:46:10 +02:00
|
|
|
]
|
|
|
|
],
|
|
|
|
'order' => [],
|
|
|
|
]);
|
2021-04-25 17:36:29 +02:00
|
|
|
return $correlatingAttributes;
|
|
|
|
}
|
|
|
|
|
|
|
|
private function __addCorrelationEntry($value, $a, $b, $correlations)
|
|
|
|
{
|
|
|
|
if (
|
|
|
|
$a['Attribute']['event_id'] !== $b['Attribute']['event_id']
|
|
|
|
) {
|
2021-09-03 11:58:02 +02:00
|
|
|
if ($this->deadlockAvoidance) {
|
2021-04-25 17:36:29 +02:00
|
|
|
$correlations[] = [
|
|
|
|
'value' => $value,
|
|
|
|
'1_event_id' => $a['Event']['id'],
|
|
|
|
'1_attribute_id' => $a['Attribute']['id'],
|
|
|
|
'event_id' => $b['Attribute']['event_id'],
|
|
|
|
'attribute_id' => $b['Attribute']['id'],
|
|
|
|
'org_id' => $b['Event']['org_id'],
|
|
|
|
'distribution' => $b['Event']['distribution'],
|
|
|
|
'a_distribution' => $b['Attribute']['distribution'],
|
|
|
|
'sharing_group_id' => $b['Event']['sharing_group_id'],
|
|
|
|
'a_sharing_group_id' => $b['Attribute']['sharing_group_id'],
|
|
|
|
];
|
|
|
|
} else {
|
|
|
|
$correlations[] = [
|
|
|
|
$value,
|
|
|
|
$a['Event']['id'],
|
|
|
|
$a['Attribute']['id'],
|
|
|
|
$b['Attribute']['event_id'],
|
|
|
|
$b['Attribute']['id'],
|
|
|
|
$b['Event']['org_id'],
|
|
|
|
$b['Event']['distribution'],
|
|
|
|
$b['Attribute']['distribution'],
|
|
|
|
$b['Event']['sharing_group_id'],
|
|
|
|
$b['Attribute']['sharing_group_id'],
|
|
|
|
];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return $correlations;
|
|
|
|
}
|
|
|
|
|
|
|
|
public function correlateValue($value, $jobId = false)
|
|
|
|
{
|
|
|
|
$correlatingAttributes = $this->__getMatchingAttributes($value);
|
2021-04-22 09:46:10 +02:00
|
|
|
$count = count($correlatingAttributes);
|
|
|
|
$correlations = [];
|
2021-04-25 17:36:29 +02:00
|
|
|
if ($jobId) {
|
|
|
|
if (empty($this->Job)) {
|
|
|
|
$this->Job = ClassRegistry::init('Job');
|
|
|
|
}
|
|
|
|
$job = $this->Job->find('first', [
|
|
|
|
'recursive' => -1,
|
|
|
|
'conditions' => ['id' => $jobId]
|
|
|
|
]);
|
|
|
|
if (empty($job)) {
|
|
|
|
$jobId = false;
|
|
|
|
}
|
|
|
|
}
|
2021-04-22 09:46:10 +02:00
|
|
|
foreach ($correlatingAttributes as $k => $correlatingAttribute) {
|
2021-04-29 11:41:02 +02:00
|
|
|
foreach ($correlatingAttributes as $correlatingAttribute2) {
|
2021-04-25 17:36:29 +02:00
|
|
|
$correlations = $this->__addCorrelationEntry($value, $correlatingAttribute, $correlatingAttribute2, $correlations);
|
|
|
|
}
|
|
|
|
$extraCorrelations = $this->__addAdvancedCorrelations($correlatingAttribute);
|
|
|
|
if (!empty($extraCorrelations)) {
|
2021-04-29 11:41:02 +02:00
|
|
|
foreach ($extraCorrelations as $extraCorrelation) {
|
2021-04-25 17:36:29 +02:00
|
|
|
$correlations = $this->__addCorrelationEntry($value, $correlatingAttribute, $extraCorrelation, $correlations);
|
2021-04-27 00:40:40 +02:00
|
|
|
//$correlations = $this->__addCorrelationEntry($value, $extraCorrelation, $correlatingAttribute, $correlations);
|
2021-04-22 09:46:10 +02:00
|
|
|
}
|
|
|
|
}
|
2021-04-25 17:36:29 +02:00
|
|
|
if ($jobId && $k % 100 === 0) {
|
2021-04-29 11:41:02 +02:00
|
|
|
$this->Job->saveProgress($jobId, __('Correlating Attributes based on value. %s attributes correlated out of %s.', $k, $count), floor(100 * $k / $count));
|
2021-04-25 17:36:29 +02:00
|
|
|
}
|
2021-04-22 09:46:10 +02:00
|
|
|
}
|
2021-04-25 17:36:29 +02:00
|
|
|
return $this->__saveCorrelations($correlations);
|
|
|
|
}
|
2021-04-22 09:46:10 +02:00
|
|
|
|
2021-04-25 17:36:29 +02:00
|
|
|
private function __saveCorrelations($correlations)
|
|
|
|
{
|
2021-04-25 23:18:28 +02:00
|
|
|
if (empty($correlations)) {
|
|
|
|
return true;
|
|
|
|
}
|
2021-04-25 17:36:29 +02:00
|
|
|
$fields = [
|
|
|
|
'value', '1_event_id', '1_attribute_id', 'event_id', 'attribute_id', 'org_id',
|
|
|
|
'distribution', 'a_distribution', 'sharing_group_id', 'a_sharing_group_id',
|
|
|
|
];
|
2021-04-29 11:47:38 +02:00
|
|
|
|
|
|
|
// In older MISP instances, correlations table contains also date and info columns, that stores information
|
|
|
|
// about correlated event title and date. But because this information can be fetched directly from Event table,
|
|
|
|
// it is not necessary to keep them there. The problem is that these columns are marked as not null, so they must
|
|
|
|
// be filled with value and removing these columns can take long time for big instances. So for new installation
|
|
|
|
// these columns doesn't exists anymore and we don't need to save dummy value into them. Also feel free to remove
|
|
|
|
// them from your instance.
|
|
|
|
if ($this->oldSchema) {
|
|
|
|
$fields[] = 'date';
|
|
|
|
$fields[] = 'info';
|
|
|
|
}
|
|
|
|
|
2021-09-03 11:58:02 +02:00
|
|
|
if ($this->deadlockAvoidance) {
|
2021-04-29 11:47:38 +02:00
|
|
|
if ($this->oldSchema) {
|
|
|
|
foreach ($correlations as &$correlation) {
|
|
|
|
$correlation['date'] = '1000-01-01'; // Dummy value
|
|
|
|
$correlation['info'] = ''; // Dummy value
|
|
|
|
}
|
|
|
|
}
|
2021-04-22 09:46:10 +02:00
|
|
|
return $this->saveMany($correlations, array(
|
|
|
|
'atomic' => false,
|
|
|
|
'callbacks' => false,
|
|
|
|
'deep' => false,
|
|
|
|
'validate' => false,
|
2021-04-25 17:36:29 +02:00
|
|
|
'fieldList' => $fields
|
2021-04-22 09:46:10 +02:00
|
|
|
));
|
|
|
|
} else {
|
2021-04-29 11:47:38 +02:00
|
|
|
if ($this->oldSchema) {
|
|
|
|
foreach ($correlations as &$correlation) {
|
|
|
|
$correlation[] = '1000-01-01'; // Dummy value
|
|
|
|
$correlation[] = ''; // Dummy value
|
|
|
|
}
|
|
|
|
}
|
2021-04-22 09:46:10 +02:00
|
|
|
$db = $this->getDataSource();
|
|
|
|
return $db->insertMulti('correlations', $fields, $correlations);
|
|
|
|
}
|
|
|
|
}
|
2021-04-25 17:36:29 +02:00
|
|
|
|
|
|
|
public function beforeSaveCorrelation($attribute)
|
|
|
|
{
|
|
|
|
// (update-only) clean up the relation of the old value: remove the existing relations related to that attribute, we DO have a reference, the id
|
|
|
|
// ==> DELETE FROM correlations WHERE 1_attribute_id = $a_id OR attribute_id = $a_id; */
|
|
|
|
// first check if it's an update
|
|
|
|
if (isset($attribute['id'])) {
|
2021-09-03 12:10:03 +02:00
|
|
|
$this->deleteAll([
|
|
|
|
'OR' => [
|
2021-04-25 17:36:29 +02:00
|
|
|
'Correlation.1_attribute_id' => $attribute['id'],
|
2021-09-03 12:10:03 +02:00
|
|
|
'Correlation.attribute_id' => $attribute['id']
|
|
|
|
],
|
|
|
|
], false);
|
2021-04-25 17:36:29 +02:00
|
|
|
}
|
|
|
|
if ($attribute['type'] === 'ssdeep') {
|
|
|
|
$this->FuzzyCorrelateSsdeep = ClassRegistry::init('FuzzyCorrelateSsdeep');
|
|
|
|
$this->FuzzyCorrelateSsdeep->purge(null, $attribute['id']);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
public function afterSaveCorrelation($a, $full = false, $event = false)
|
|
|
|
{
|
|
|
|
if (!empty($a['disable_correlation']) || Configure::read('MISP.completely_disable_correlation')) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
// Don't do any correlation if the type is a non correlating type
|
2021-07-27 15:19:41 +02:00
|
|
|
if (in_array($a['type'], Attribute::NON_CORRELATING_TYPES, true)) {
|
2021-04-25 17:36:29 +02:00
|
|
|
return true;
|
|
|
|
}
|
2021-04-25 17:54:53 +02:00
|
|
|
if ($this->__preventExcludedCorrelations($a)) {
|
|
|
|
return true;
|
|
|
|
}
|
2021-04-25 17:36:29 +02:00
|
|
|
if (!$event) {
|
|
|
|
$event = $this->Attribute->Event->find('first', array(
|
|
|
|
'recursive' => -1,
|
2021-04-29 11:47:38 +02:00
|
|
|
'fields' => array('Event.distribution', 'Event.id', 'Event.org_id', 'Event.sharing_group_id', 'Event.disable_correlation'),
|
2021-04-25 17:36:29 +02:00
|
|
|
'conditions' => array('id' => $a['event_id']),
|
|
|
|
'order' => array(),
|
|
|
|
));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!empty($event['Event']['disable_correlation']) && $event['Event']['disable_correlation']) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
// generate additional correlating attribute list based on the advanced correlations
|
|
|
|
$extraConditions = $this->__buildAdvancedCorrelationConditions($a);
|
|
|
|
$correlatingValues = array($a['value1']);
|
2021-07-21 08:42:05 +02:00
|
|
|
if (!empty($a['value2']) && !in_array($a['type'], Attribute::PRIMARY_ONLY_CORRELATING_TYPES, true)) {
|
2021-04-25 17:36:29 +02:00
|
|
|
$correlatingValues[] = $a['value2'];
|
|
|
|
}
|
|
|
|
|
|
|
|
$correlatingAttributes = [];
|
|
|
|
foreach ($correlatingValues as $k => $cV) {
|
|
|
|
$conditions = [
|
|
|
|
'OR' => [
|
|
|
|
'Attribute.value1' => $cV,
|
|
|
|
'AND' => [
|
|
|
|
'Attribute.value2' => $cV,
|
2021-07-21 08:42:05 +02:00
|
|
|
'NOT' => ['Attribute.type' => Attribute::PRIMARY_ONLY_CORRELATING_TYPES]
|
2021-04-25 17:36:29 +02:00
|
|
|
]
|
|
|
|
],
|
|
|
|
'NOT' => [
|
|
|
|
'Attribute.event_id' => $a['event_id'],
|
2021-07-27 15:19:41 +02:00
|
|
|
'Attribute.type' => Attribute::NON_CORRELATING_TYPES,
|
2021-04-25 17:36:29 +02:00
|
|
|
],
|
|
|
|
'Attribute.disable_correlation' => 0,
|
|
|
|
'Event.disable_correlation' => 0,
|
|
|
|
'Attribute.deleted' => 0
|
|
|
|
];
|
|
|
|
if (!empty($extraConditions)) {
|
|
|
|
$conditions['OR'][] = $extraConditions;
|
|
|
|
}
|
|
|
|
if ($full) {
|
|
|
|
$conditions['Attribute.id > '] = $a['id'];
|
|
|
|
}
|
|
|
|
$correlatingAttributes[$k] = $this->Attribute->find('all', array(
|
|
|
|
'conditions' => $conditions,
|
|
|
|
'recursive' => -1,
|
|
|
|
'fields' => [
|
|
|
|
'Attribute.event_id', 'Attribute.id', 'Attribute.distribution', 'Attribute.sharing_group_id',
|
|
|
|
'Attribute.value1', 'Attribute.value2'
|
|
|
|
],
|
2021-04-29 11:47:38 +02:00
|
|
|
'contain' => ['Event.id', 'Event.org_id', 'Event.distribution', 'Event.sharing_group_id'],
|
2021-04-25 17:36:29 +02:00
|
|
|
'order' => []
|
|
|
|
));
|
|
|
|
}
|
|
|
|
$correlations = array();
|
|
|
|
foreach ($correlatingAttributes as $k => $cA) {
|
|
|
|
foreach ($cA as $corr) {
|
|
|
|
$correlations = $this->__addCorrelationEntry(
|
|
|
|
$k === 0 ? $corr['Attribute']['value1'] : $corr['Attribute']['value2'],
|
|
|
|
['Attribute' => $a, 'Event' => $event['Event']],
|
|
|
|
$corr,
|
|
|
|
$correlations
|
|
|
|
);
|
|
|
|
$correlations = $this->__addCorrelationEntry(
|
|
|
|
$correlatingValues[$k],
|
|
|
|
$corr,
|
|
|
|
['Attribute' => $a, 'Event' => $event['Event']],
|
|
|
|
$correlations
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return $this->__saveCorrelations($correlations);
|
|
|
|
}
|
|
|
|
|
2021-08-12 15:05:39 +02:00
|
|
|
/**
|
|
|
|
* @param array $a
|
|
|
|
* @return bool True if attribute value is excluded
|
|
|
|
*/
|
2021-04-25 17:36:29 +02:00
|
|
|
private function __preventExcludedCorrelations($a)
|
|
|
|
{
|
2021-08-12 15:05:39 +02:00
|
|
|
if ($this->exclusions === null) {
|
2021-04-25 17:36:29 +02:00
|
|
|
try {
|
2021-04-29 11:41:02 +02:00
|
|
|
$redis = $this->setupRedisWithException();
|
|
|
|
$this->exclusions = $redis->sMembers('misp:correlation_exclusions');
|
2021-04-25 17:36:29 +02:00
|
|
|
} catch (Exception $e) {
|
2021-04-29 11:41:02 +02:00
|
|
|
return false;
|
2021-04-25 17:36:29 +02:00
|
|
|
}
|
2021-08-12 15:05:39 +02:00
|
|
|
} else if (empty($this->exclusions)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
$value = $a['value1'];
|
|
|
|
if (!empty($a['value2'])) {
|
|
|
|
$value .= '|' . $a['value2'];
|
2021-04-25 17:36:29 +02:00
|
|
|
}
|
|
|
|
foreach ($this->exclusions as $exclusion) {
|
|
|
|
if (!empty($exclusion)) {
|
|
|
|
$firstChar = $exclusion[0];
|
|
|
|
$lastChar = substr($exclusion, -1);
|
|
|
|
if ($firstChar === '%' && $lastChar === '%') {
|
|
|
|
$exclusion = substr($exclusion, 1, -1);
|
|
|
|
if (strpos($value, $exclusion) !== false) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
} else if ($firstChar === '%') {
|
|
|
|
$exclusion = substr($exclusion, 1);
|
|
|
|
if (substr($value, -strlen($exclusion)) === $exclusion) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
} else if ($lastChar === '%') {
|
|
|
|
$exclusion = substr($exclusion, 0, -1);
|
|
|
|
if (substr($value, 0, strlen($exclusion)) === $exclusion) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if ($value === $exclusion) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2021-04-29 11:41:02 +02:00
|
|
|
private function ssdeepCorrelation($a)
|
2021-04-25 18:09:37 +02:00
|
|
|
{
|
2021-09-03 16:20:14 +02:00
|
|
|
if (!isset($this->FuzzyCorrelateSsdeep)) {
|
2021-04-25 18:09:37 +02:00
|
|
|
$this->FuzzyCorrelateSsdeep = ClassRegistry::init('FuzzyCorrelateSsdeep');
|
|
|
|
}
|
|
|
|
$fuzzyIds = $this->FuzzyCorrelateSsdeep->query_ssdeep_chunks($a['value1'], $a['id']);
|
|
|
|
if (!empty($fuzzyIds)) {
|
|
|
|
$ssdeepIds = $this->Attribute->find('list', array(
|
|
|
|
'recursive' => -1,
|
|
|
|
'conditions' => array(
|
|
|
|
'Attribute.type' => 'ssdeep',
|
|
|
|
'Attribute.id' => $fuzzyIds
|
|
|
|
),
|
|
|
|
'fields' => array('Attribute.id', 'Attribute.value1')
|
|
|
|
));
|
|
|
|
$threshold = Configure::read('MISP.ssdeep_correlation_threshold') ?: 40;
|
|
|
|
$attributeIds = array();
|
|
|
|
foreach ($ssdeepIds as $attributeId => $v) {
|
|
|
|
$ssdeep_value = ssdeep_fuzzy_compare($a['value1'], $v);
|
|
|
|
if ($ssdeep_value >= $threshold) {
|
|
|
|
$attributeIds[] = $attributeId;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return ['Attribute.id' => $attributeIds];
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2021-04-29 11:41:02 +02:00
|
|
|
private function cidrCorrelation($a)
|
2021-04-25 17:36:29 +02:00
|
|
|
{
|
|
|
|
$ipValues = array();
|
|
|
|
$ip = $a['value1'];
|
|
|
|
if (strpos($ip, '/') !== false) { // IP is CIDR
|
|
|
|
list($networkIp, $mask) = explode('/', $ip);
|
|
|
|
$ip_version = filter_var($networkIp, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4) ? 4 : 6;
|
|
|
|
|
|
|
|
$conditions = array(
|
|
|
|
'type' => array('ip-src', 'ip-dst', 'ip-src|port', 'ip-dst|port'),
|
|
|
|
'value1 NOT LIKE' => '%/%', // do not return CIDR, just plain IPs
|
|
|
|
'disable_correlation' => 0,
|
|
|
|
'deleted' => 0,
|
|
|
|
);
|
|
|
|
|
2021-04-29 11:41:02 +02:00
|
|
|
if (in_array($this->getDataSource()->config['datasource'], ['Database/Mysql', 'Database/MysqlObserver'])) {
|
2021-04-25 17:36:29 +02:00
|
|
|
// Massive speed up for CIDR correlation. Instead of testing all in PHP, database can do that work much
|
|
|
|
// faster. But these methods are just supported by MySQL.
|
|
|
|
if ($ip_version === 4) {
|
|
|
|
$startIp = ip2long($networkIp) & ((-1 << (32 - $mask)));
|
|
|
|
$endIp = $startIp + pow(2, (32 - $mask)) - 1;
|
|
|
|
// Just fetch IP address that fit in CIDR range.
|
|
|
|
$conditions['INET_ATON(value1) BETWEEN ? AND ?'] = array($startIp, $endIp);
|
|
|
|
|
2021-04-25 17:54:53 +02:00
|
|
|
// Just fetch IPv4 address that starts with given prefix. This is fast, because value1 is indexed.
|
|
|
|
// This optimisation is possible just to mask bigger than 8 bites.
|
2021-04-25 17:36:29 +02:00
|
|
|
if ($mask >= 8) {
|
|
|
|
$ipv4Parts = explode('.', $networkIp);
|
|
|
|
$ipv4Parts = array_slice($ipv4Parts, 0, intval($mask / 8));
|
|
|
|
$prefix = implode('.', $ipv4Parts);
|
|
|
|
$conditions['value1 LIKE'] = $prefix . '%';
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
$conditions[] = 'IS_IPV6(value1)';
|
|
|
|
// Just fetch IPv6 address that starts with given prefix. This is fast, because value1 is indexed.
|
|
|
|
if ($mask >= 16) {
|
|
|
|
$ipv6Parts = explode(':', rtrim($networkIp, ':'));
|
|
|
|
$ipv6Parts = array_slice($ipv6Parts, 0, intval($mask / 16));
|
|
|
|
$prefix = implode(':', $ipv6Parts);
|
|
|
|
$conditions['value1 LIKE'] = $prefix . '%';
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
$ipList = $this->Attribute->find('column', array(
|
|
|
|
'conditions' => $conditions,
|
|
|
|
'fields' => ['Attribute.value1'],
|
|
|
|
'unique' => true,
|
|
|
|
'order' => false,
|
|
|
|
));
|
|
|
|
foreach ($ipList as $ipToCheck) {
|
|
|
|
$ipToCheckVersion = filter_var($ipToCheck, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4) ? 4 : 6;
|
|
|
|
if ($ipToCheckVersion === $ip_version) {
|
|
|
|
if ($ip_version === 4) {
|
|
|
|
if ($this->__ipv4InCidr($ipToCheck, $ip)) {
|
|
|
|
$ipValues[] = $ipToCheck;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if ($this->__ipv6InCidr($ipToCheck, $ip)) {
|
|
|
|
$ipValues[] = $ipToCheck;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
$ip_version = filter_var($ip, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4) ? 4 : 6;
|
|
|
|
$cidrList = $this->Attribute->getSetCIDRList();
|
|
|
|
foreach ($cidrList as $cidr) {
|
|
|
|
if (strpos($cidr, '.') !== false) {
|
|
|
|
if ($ip_version === 4 && $this->__ipv4InCidr($ip, $cidr)) {
|
|
|
|
$ipValues[] = $cidr;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if ($ip_version === 6 && $this->__ipv6InCidr($ip, $cidr)) {
|
|
|
|
$ipValues[] = $cidr;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
$extraConditions = array();
|
|
|
|
if (!empty($ipValues)) {
|
|
|
|
$extraConditions = array('OR' => array(
|
|
|
|
'Attribute.value1' => $ipValues,
|
|
|
|
'Attribute.value2' => $ipValues
|
|
|
|
));
|
|
|
|
}
|
|
|
|
return $extraConditions;
|
|
|
|
}
|
|
|
|
|
|
|
|
// using Alnitak's solution from http://stackoverflow.com/questions/594112/matching-an-ip-to-a-cidr-mask-in-php5
|
|
|
|
private function __ipv4InCidr($ip, $cidr)
|
|
|
|
{
|
|
|
|
list($subnet, $bits) = explode('/', $cidr);
|
|
|
|
$ip = ip2long($ip);
|
|
|
|
$subnet = ip2long($subnet);
|
|
|
|
$mask = -1 << (32 - $bits);
|
|
|
|
$subnet &= $mask; # nb: in case the supplied subnet wasn't correctly aligned
|
|
|
|
return ($ip & $mask) == $subnet;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Using solution from https://github.com/symfony/symfony/blob/master/src/Symfony/Component/HttpFoundation/IpUtils.php
|
|
|
|
private function __ipv6InCidr($ip, $cidr)
|
|
|
|
{
|
|
|
|
list($address, $netmask) = explode('/', $cidr);
|
|
|
|
|
|
|
|
$bytesAddr = unpack('n*', inet_pton($address));
|
|
|
|
$bytesTest = unpack('n*', inet_pton($ip));
|
|
|
|
|
|
|
|
for ($i = 1, $ceil = ceil($netmask / 16); $i <= $ceil; ++$i) {
|
|
|
|
$left = $netmask - 16 * ($i - 1);
|
|
|
|
$left = ($left <= 16) ? $left : 16;
|
|
|
|
$mask = ~(0xffff >> $left) & 0xffff;
|
|
|
|
if (($bytesAddr[$i] & $mask) != ($bytesTest[$i] & $mask)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
2021-04-27 00:40:40 +02:00
|
|
|
|
2021-04-30 15:56:07 +02:00
|
|
|
/**
|
|
|
|
* @return int|bool
|
|
|
|
* @throws Exception
|
|
|
|
*/
|
2021-04-27 00:40:40 +02:00
|
|
|
public function generateTopCorrelationsRouter()
|
|
|
|
{
|
|
|
|
if (Configure::read('MISP.background_jobs')) {
|
2021-11-02 15:35:23 +01:00
|
|
|
/** @var Job $job */
|
|
|
|
$job = ClassRegistry::init('Job');
|
|
|
|
$jobId = $job->createJob(
|
|
|
|
'SYSTEM',
|
|
|
|
Job::WORKER_DEFAULT,
|
|
|
|
'generateTopCorrelations',
|
|
|
|
'',
|
|
|
|
'Starting generation of top correlations.'
|
2021-04-27 00:40:40 +02:00
|
|
|
);
|
2021-11-02 15:35:23 +01:00
|
|
|
|
|
|
|
$this->getBackgroundJobsTool()->enqueue(
|
|
|
|
BackgroundJobsTool::DEFAULT_QUEUE,
|
|
|
|
BackgroundJobsTool::CMD_EVENT,
|
|
|
|
[
|
|
|
|
'generateTopCorrelations',
|
|
|
|
$jobId
|
|
|
|
],
|
2021-11-02 16:25:43 +01:00
|
|
|
true,
|
|
|
|
$jobId
|
2021-04-27 00:40:40 +02:00
|
|
|
);
|
2021-11-02 15:35:23 +01:00
|
|
|
|
2021-04-30 15:56:07 +02:00
|
|
|
return $jobId;
|
2021-04-27 00:40:40 +02:00
|
|
|
} else {
|
|
|
|
return $this->generateTopCorrelations();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
public function generateTopCorrelations($jobId = false)
|
|
|
|
{
|
|
|
|
try {
|
2021-04-29 11:41:02 +02:00
|
|
|
$redis = $this->setupRedisWithException();
|
2021-04-27 00:40:40 +02:00
|
|
|
} catch (Exception $e) {
|
|
|
|
throw new NotFoundException(__('No redis connection found.'));
|
|
|
|
}
|
2022-05-09 14:34:38 +02:00
|
|
|
$maxId = $this->find('first', [
|
2021-04-27 00:40:40 +02:00
|
|
|
'fields' => ['MAX(id) AS max_id'],
|
2022-05-09 14:34:38 +02:00
|
|
|
'recursive' => -1,
|
2021-04-27 00:40:40 +02:00
|
|
|
]);
|
2022-05-09 14:34:38 +02:00
|
|
|
if (empty($maxId)) {
|
2021-04-27 00:40:40 +02:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
if ($jobId) {
|
|
|
|
if (empty($this->Job)) {
|
|
|
|
$this->Job = ClassRegistry::init('Job');
|
|
|
|
}
|
|
|
|
$job = $this->Job->find('first', [
|
|
|
|
'recursive' => -1,
|
|
|
|
'conditions' => ['id' => $jobId]
|
|
|
|
]);
|
|
|
|
if (empty($job)) {
|
|
|
|
$jobId = false;
|
|
|
|
}
|
|
|
|
}
|
2022-05-09 14:34:38 +02:00
|
|
|
$maxId = $maxId[0]['max_id'];
|
2021-04-27 00:40:40 +02:00
|
|
|
|
2021-04-29 11:41:02 +02:00
|
|
|
$redis->del(self::CACHE_NAME);
|
|
|
|
$redis->set(self::CACHE_AGE, time());
|
2022-05-09 14:34:38 +02:00
|
|
|
$chunkSize = 1000000;
|
|
|
|
$maxPage = ceil($maxId / $chunkSize);
|
|
|
|
for ($page = 0; $page < $maxPage; $page++) {
|
2021-04-27 00:40:40 +02:00
|
|
|
$correlations = $this->find('column', [
|
|
|
|
'fields' => ['value'],
|
|
|
|
'conditions' => [
|
2022-05-09 14:34:38 +02:00
|
|
|
'id >' => $page * $chunkSize,
|
|
|
|
'id <=' => ($page + 1) * $chunkSize
|
2022-05-09 14:59:07 +02:00
|
|
|
],
|
|
|
|
'callbacks' => false, // when callbacks are enabled, memory is leaked
|
2021-04-27 00:40:40 +02:00
|
|
|
]);
|
|
|
|
$newElements = count($correlations);
|
|
|
|
$correlations = array_count_values($correlations);
|
2021-04-29 11:41:02 +02:00
|
|
|
$pipeline = $redis->pipeline();
|
2021-04-27 00:40:40 +02:00
|
|
|
foreach ($correlations as $correlation => $count) {
|
2021-04-29 11:41:02 +02:00
|
|
|
$pipeline->zadd(self::CACHE_NAME, ['INCR'], $count, $correlation);
|
2021-04-27 00:40:40 +02:00
|
|
|
}
|
|
|
|
$pipeline->exec();
|
|
|
|
if ($jobId) {
|
2022-05-09 14:34:38 +02:00
|
|
|
$this->Job->saveProgress($jobId, __('Generating top correlations. Processed %s IDs.', ($page * $chunkSize) + $newElements), floor(100 * $page / $maxPage));
|
2021-04-27 00:40:40 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2021-04-30 15:56:07 +02:00
|
|
|
public function findTop(array $query)
|
2021-04-27 00:40:40 +02:00
|
|
|
{
|
|
|
|
try {
|
2021-04-29 11:41:02 +02:00
|
|
|
$redis = $this->setupRedisWithException();
|
2021-04-27 00:40:40 +02:00
|
|
|
} catch (Exception $e) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
$start = $query['limit'] * ($query['page'] -1);
|
2021-04-29 11:41:02 +02:00
|
|
|
$end = $query['limit'] * $query['page'];
|
|
|
|
$list = $redis->zRevRange(self::CACHE_NAME, $start, $end, true);
|
2021-04-27 00:40:40 +02:00
|
|
|
$results = [];
|
|
|
|
foreach ($list as $value => $count) {
|
|
|
|
$results[] = [
|
|
|
|
'Correlation' => [
|
|
|
|
'value' => $value,
|
2021-04-27 08:41:41 +02:00
|
|
|
'count' => $count,
|
2021-04-30 15:56:07 +02:00
|
|
|
'excluded' => $this->__preventExcludedCorrelations(['value1' => $value]),
|
2021-04-27 00:40:40 +02:00
|
|
|
]
|
|
|
|
];
|
|
|
|
}
|
|
|
|
return $results;
|
|
|
|
}
|
|
|
|
|
|
|
|
public function getTopTime()
|
|
|
|
{
|
|
|
|
try {
|
2021-04-29 11:41:02 +02:00
|
|
|
$redis = $this->setupRedisWithException();
|
2021-04-27 00:40:40 +02:00
|
|
|
} catch (Exception $e) {
|
|
|
|
return false;
|
|
|
|
}
|
2021-04-29 11:41:02 +02:00
|
|
|
return $redis->get(self::CACHE_AGE);
|
2021-04-27 00:40:40 +02:00
|
|
|
}
|
2021-04-22 09:46:10 +02:00
|
|
|
}
|