From c042a2a66f1137c7ecd9b431c8c00c205e4ff6c6 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Tue, 11 Feb 2020 15:50:56 +0100 Subject: [PATCH] chg: [MISP import] import files obj: decoded + screenshot --- bin/packages/Correlation.py | 368 ++++++++++++++++++++++++++++++++++++ 1 file changed, 368 insertions(+) create mode 100755 bin/packages/Correlation.py diff --git a/bin/packages/Correlation.py b/bin/packages/Correlation.py new file mode 100755 index 00000000..26558ff0 --- /dev/null +++ b/bin/packages/Correlation.py @@ -0,0 +1,368 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import sys +import redis + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) +import ConfigLoader + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) +import Date +#import Tag + +config_loader = ConfigLoader.ConfigLoader() +r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") +config_loader = None + +def get_all_correlation_objects(): + ''' + Return a list of all correllated objects + ''' + return ['domain', 'paste'] + +class Correlation(object): + + def __init__(self, correlation_name, all_correlation_types): + self.correlation_name = correlation_name + self.all_correlation_types = all_correlation_types + + def _exist_corelation_field(self, correlation_type, field_name, item_type='paste'): + if item_type=='paste': + return r_serv_metadata.exists('set_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name)) + else: + return r_serv_metadata.exists('set_domain_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name)) + + def exist_correlation(self, subtype, obj_id): + res = r_serv_metadata.zscore('{}_all:{}'.format(self.correlation_name, subtype), obj_id) + if res: + return True + else: + return False + + def _get_items(self, correlation_type, field_name): + res = r_serv_metadata.smembers('set_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name)) + if res: + return list(res) + else: + return [] + + def get_correlation_first_seen(self, subtype, obj_id, r_int=False): + res = r_serv_metadata.hget('{}_metadata_{}:{}'.format(self.correlation_name, subtype, obj_id), 'first_seen') + if r_int: + if res: + return int(res) + else: + return 99999999 + else: + return res + + def get_correlation_last_seen(self, subtype, obj_id, r_int=False): + res = r_serv_metadata.hget('{}_metadata_{}:{}'.format(self.correlation_name, subtype, obj_id), 'last_seen') + if r_int: + if res: + return int(res) + else: + return 0 + else: + return res + + def _get_metadata(self, subtype, obj_id): + meta_dict = {} + meta_dict['first_seen'] = self.get_correlation_first_seen(subtype, obj_id) + meta_dict['last_seen'] = self.get_correlation_last_seen(subtype, obj_id) + meta_dict['nb_seen'] = r_serv_metadata.scard('set_{}_{}:{}'.format(self.correlation_name, subtype, obj_id)) + return meta_dict + + def get_metadata(self, correlation_type, field_name, date_format='str_date'): + meta_dict = self._get_metadata(correlation_type, field_name) + if date_format == "str_date": + if meta_dict['first_seen']: + meta_dict['first_seen'] = '{}/{}/{}'.format(meta_dict['first_seen'][0:4], meta_dict['first_seen'][4:6], meta_dict['first_seen'][6:8]) + if meta_dict['last_seen']: + meta_dict['last_seen'] = '{}/{}/{}'.format(meta_dict['last_seen'][0:4], meta_dict['last_seen'][4:6], meta_dict['last_seen'][6:8]) + return meta_dict + + def get_nb_object_seen_by_date(self, correlation_type, field_name, date_day): + nb = r_serv_metadata.hget('{}:{}:{}'.format(self.correlation_name, correlation_type, date_day), field_name) + if nb is None: + return 0 + else: + return int(nb) + + def get_list_nb_previous_correlation_object(self, correlation_type, field_name, numDay): + nb_previous_correlation = [] + for date_day in Date.get_previous_date_list(numDay): + nb_previous_correlation.append(self.get_nb_object_seen_by_date(correlation_type, field_name, date_day)) + return nb_previous_correlation + + def _get_correlation_by_date(self, correlation_type, date): + return r_serv_metadata.hkeys('{}:{}:{}'.format(self.correlation_name, correlation_type, date)) + + def verify_correlation_field_request(self, request_dict, correlation_type, item_type='paste'): + if not request_dict: + return ({'status': 'error', 'reason': 'Malformed JSON'}, 400) + + field_name = request_dict.get(correlation_type, None) + if not field_name: + return ( {'status': 'error', 'reason': 'Mandatory parameter(s) not provided'}, 400 ) + if not self._exist_corelation_field(correlation_type, field_name, item_type=item_type): + return ( {'status': 'error', 'reason': 'Item not found'}, 404 ) + + def get_correlation(self, request_dict, correlation_type, field_name): + dict_resp = {} + + if request_dict.get('items'): + dict_resp['items'] = self._get_items(correlation_type, field_name) + + if request_dict.get('metadata'): + dict_resp['metadata'] = self._get_metadata(correlation_type, field_name) + + dict_resp[correlation_type] = field_name + + return (dict_resp, 200) + + def get_all_correlation_types(self): + ''' + Gel all correlation types + + :return: A list of all the correlation types + :rtype: list + ''' + return self.all_correlation_types + + def get_correlation_obj_type(self): + if self.correlation_name=='pgpdump': + return 'pgp' + else: + return 'cryptocurrency' + + def sanythise_correlation_types(self, correlation_types, r_boolean=False): + ''' + Check if all correlation types in the list are valid. + + :param correlation_types: list of correlation type + :type currency_type: list + + :return: If a type is invalid, return the full list of correlation types else return the provided list + :rtype: list + ''' + if correlation_types is None: + if r_boolean: + return False + else: + return self.get_all_correlation_types() + for correl in correlation_types: # # TODO: # OPTIMIZE: + if correl not in self.get_all_correlation_types(): + if r_boolean: + return False + else: + return self.get_all_correlation_types() + if r_boolean: + return True + else: + return correlation_types + + + def _get_domain_correlation_obj(self, domain, correlation_type): + ''' + Return correlation of a given domain. + + :param domain: crawled domain + :type domain: str + :param correlation_type: correlation type + :type correlation_type: str + + :return: a list of correlation + :rtype: list + ''' + res = r_serv_metadata.smembers('domain_{}_{}:{}'.format(self.correlation_name, correlation_type, domain)) + if res: + return list(res) + else: + return [] + + def get_domain_correlation_dict(self, domain, correlation_type=None, get_nb=False): + ''' + Return all correlation of a given domain. + + :param domain: crawled domain + :param correlation_type: list of correlation types + :type correlation_type: list, optional + + :return: a dictionnary of all the requested correlations + :rtype: dict + ''' + correlation_type = self.sanythise_correlation_types(correlation_type) + dict_correlation = {} + for correl in correlation_type: + res = self._get_domain_correlation_obj(domain, correl) + if res: + dict_correlation[correl] = res + if get_nb: + dict_correlation['nb'] = dict_correlation.get('nb', 0) + len(dict_correlation[correl]) + return dict_correlation + + def _get_correlation_obj_domain(self, field_name, correlation_type): + ''' + Return all domains that contain this correlation. + + :param domain: field name + :type domain: str + :param correlation_type: correlation type + :type correlation_type: str + + :return: a list of correlation + :rtype: list + ''' + res = r_serv_metadata.smembers('set_domain_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name)) + if res: + return list(res) + else: + return [] + + def get_correlation_obj_domain(self, field_name, correlation_type=None): + ''' + Return all domain correlation of a given correlation_value. + + :param field_name: field_name + :param correlation_type: list of correlation types + :type correlation_type: list, optional + + :return: a dictionnary of all the requested correlations + :rtype: list + ''' + correlation_type = self.sanythise_correlation_types(correlation_type) + for correl in correlation_type: + res = self._get_correlation_obj_domain(field_name, correl) + if res: + return res + return [] + + + + def _get_item_correlation_obj(self, item_id, correlation_type): + ''' + Return correlation of a given item id. + + :param item_id: item id + :type item_id: str + :param correlation_type: correlation type + :type correlation_type: str + + :return: a list of correlation + :rtype: list + ''' + res = r_serv_metadata.smembers('item_{}_{}:{}'.format(self.correlation_name, correlation_type, item_id)) + if res: + return list(res) + else: + return [] + + def get_item_correlation_dict(self, item_id, correlation_type=None, get_nb=False): + ''' + Return all correlation of a given item id. + + :param item_id: item id + :param correlation_type: list of correlation types + :type correlation_type: list, optional + + :return: a dictionnary of all the requested correlations + :rtype: dict + ''' + correlation_type = self.sanythise_correlation_types(correlation_type) + dict_correlation = {} + for correl in correlation_type: + res = self._get_item_correlation_obj(item_id, correl) + if res: + dict_correlation[correl] = res + if get_nb: + dict_correlation['nb'] = dict_correlation.get('nb', 0) + len(dict_correlation[correl]) + return dict_correlation + + + def get_correlation_all_object(self, correlation_type, correlation_value, correlation_objects=[]): + if correlation_objects is None: + correlation_objects = get_all_correlation_objects() + correlation_obj = {} + for correlation_object in correlation_objects: + if correlation_object == 'paste': + res = self._get_items(correlation_type, correlation_value) + elif correlation_object == 'domain': + res = self.get_correlation_obj_domain(correlation_value, correlation_type=correlation_type) + else: + res = None + if res: + correlation_obj[correlation_object] = res + return correlation_obj + + def update_correlation_daterange(self, subtype, obj_id, date): + date = int(date) + # obj_id don't exit + if not r_serv_metadata.exists('{}_metadata_{}:{}'.format(self.correlation_name, subtype, obj_id)): + r_serv_metadata.hset('{}_metadata_{}:{}'.format(self.correlation_name, subtype, obj_id), 'first_seen', date) + r_serv_metadata.hset('{}_metadata_{}:{}'.format(self.correlation_name, subtype, obj_id), 'last_seen', date) + else: + first_seen = self.get_correlation_last_seen(subtype, obj_id, r_int=True) + last_seen = self.get_correlation_first_seen(subtype, obj_id, r_int=True) + if date < first_seen: + r_serv_metadata.hset('{}_metadata_{}:{}'.format(self.correlation_name, subtype, obj_id), 'first_seen', date) + if date > last_seen: + r_serv_metadata.hset('{}_metadata_{}:{}'.format(self.correlation_name, subtype, obj_id), 'last_seen', date) + + def save_item_correlation(self, subtype, date, obj_id, item_id, item_date): + update_correlation_daterange(subtype, obj_id, item_date) + + # global set + r_serv_metadata.sadd('set_{}_{}:{}'.format(self.correlation_name, subtype, obj_id), item_id) + + # daily + r_serv_metadata.hincrby('{}:{}:{}'.format(self.correlation_name, subtype, item_date), obj_id, 1) + + # all type + r_serv_metadata.zincrby('{}_all:{}'.format(self.correlation_name, subtype), obj_id, 1) + + ## object_metadata + # item + r_serv_metadata.sadd('item_{}_{}:{}'.format(self.correlation_name, subtype, item_id), obj_id) + + # domain + if Item.is_crawled(item_id): + domain = Item.get_item_domain(item_id) + self.save_domain_correlation(domain, subtype, obj_id) + + def save_domain_correlation(self, domain, subtype, obj_id): + r_serv_metadata.sadd('domain_{}_{}:{}'.format(self.correlation_name, subtype, domain), obj_id) + r_serv_metadata.sadd('set_domain_{}_{}:{}'.format(self.correlation_name, subtype, obj_id), domain) + + + def save_correlation(self, subtype, obj_id, date_range): + r_serv_metadata.zincrby('{}_all:{}'.format(self.correlation_name, subtype), obj_id, 0) + self.update_correlation_daterange(subtype, obj_id, date_range['date_from']) + if date_range['date_from'] != date_range['date_to']: + self.update_correlation_daterange(subtype, obj_id, date_range['date_to']) + return True + + def create_correlation(self, subtype, obj_id, obj_meta): + res = self.sanythise_correlation_types([subtype], r_boolean=True) + if not res: + print('invalid subtype') + return False + first_seen = obj_meta.get('first_seen', None) + last_seen = obj_meta.get('last_seen', None) + date_range = Date.sanitise_date_range(first_seen, last_seen, separator='', date_type='datetime') + res = self.save_correlation(subtype, obj_id, date_range) + if res and 'tags' in obj_meta: + # # TODO: handle mixed tags: taxonomies and Galaxies + pass + #Tag.api_add_obj_tags(tags=obj_meta['tags'], object_id=obj_id, object_type=self.get_correlation_obj_type()) + return True + + def delete_correlation(self, subtype, obj_id): + pass + +######## API EXPOSED ######## + + +######## ########