From 4d2449cd6144b4e52eba7057d9f1a4b613f0565a Mon Sep 17 00:00:00 2001 From: Terrtia Date: Thu, 13 Feb 2020 15:03:05 +0100 Subject: [PATCH] chg: [MISP import export] decoded with relationships --- bin/Decoder.py | 102 ++------------------- bin/export/MispImport.py | 18 ++-- bin/lib/Correlate_object.py | 4 +- bin/lib/Decoded.py | 177 ++++++++++++++++++++++++++++++------ bin/lib/Screenshot.py | 8 +- 5 files changed, 176 insertions(+), 133 deletions(-) diff --git a/bin/Decoder.py b/bin/Decoder.py index ef2cfc9a..2dd0abd2 100755 --- a/bin/Decoder.py +++ b/bin/Decoder.py @@ -43,113 +43,25 @@ def binary_decoder(binary_string): def base64_decoder(base64_string): return base64.b64decode(base64_string) -def decode_string(content, message, date, encoded_list, decoder_name, encoded_min_size): +def decode_string(content, item_id, item_date, encoded_list, decoder_name, encoded_min_size): find = False for encoded in encoded_list: if len(encoded) >= encoded_min_size: - decode = decoder_function[decoder_name](encoded) + decoded_file = decoder_function[decoder_name](encoded) find = True - save_hash(decoder_name, message, date, decode) + sha1_string = sha1(decoded_file).hexdigest() + mimetype = Decoded.get_file_mimetype(file_content) + Decoded.save_decoded_file_content(sha1_string, decoded_file, item_date, mimetype=mimetype) + Decoded.save_item_relationship(sha1_string, item_id, decoder_type=decoder_name) #remove encoded from paste content content = content.replace(encoded, '', 1) - if(find): - set_out_paste(decoder_name, message) + set_out_paste(decoder_name, item_id) return content -# # TODO: FIXME check db -def save_hash(decoder_name, message, date, decoded): - print(decoder_name) - type = magic.from_buffer(decoded, mime=True) - hash = sha1(decoded).hexdigest() - print(hash) - - data = {} - data['name'] = hash - data['date'] = datetime.datetime.now().strftime("%d/%m/%y") - data['origin'] = message - data['estimated type'] = type - json_data = json.dumps(data) - - date_paste = '{}/{}/{}'.format(date[0:4], date[4:6], date[6:8]) - date_key = date[0:4] + date[4:6] + date[6:8] - - serv_metadata.incrby(decoder_name+'_decoded:'+date_key, 1) - serv_metadata.zincrby('hash_date:'+date_key, hash, 1) - serv_metadata.zincrby(decoder_name+'_date:'+date_key, hash, 1) - - # first time we see this hash - if not serv_metadata.hexists('metadata_hash:'+hash, 'estimated_type'): - serv_metadata.hset('metadata_hash:'+hash, 'first_seen', date_paste) - serv_metadata.hset('metadata_hash:'+hash, 'last_seen', date_paste) - else: - serv_metadata.hset('metadata_hash:'+hash, 'last_seen', date_paste) - - # first time we see this hash (all encoding) on this paste - if serv_metadata.zscore('nb_seen_hash:'+hash, message) is None: - serv_metadata.hincrby('metadata_hash:'+hash, 'nb_seen_in_all_pastes', 1) - serv_metadata.sadd('hash_paste:'+message, hash) # paste - hash map - # create hash metadata - serv_metadata.hset('metadata_hash:'+hash, 'estimated_type', type) - serv_metadata.sadd('hash_all_type', type) - - # first time we see this hash encoding on this paste - if serv_metadata.zscore(decoder_name+'_hash:'+hash, message) is None: - print('first '+decoder_name) - - serv_metadata.sadd(decoder_name+'_paste:'+message, hash) # paste - hash map - - # create hash metadata - serv_metadata.sadd('hash_'+ decoder_name +'_all_type', type) - - # first time we see this hash today - #if serv_metadata.zscore('hash_date:'+date_key, hash) is None: - # serv_metadata.zincrby('hash_type:'+type, date_key, 1) - - # first time we see this hash encoding today - if serv_metadata.zscore(decoder_name+'_date:'+date_key, hash) is None: - serv_metadata.zincrby(decoder_name+'_type:'+type, date_key, 1) - - save_hash_on_disk(decoded, type, hash, json_data) - print('found {} '.format(type)) - - serv_metadata.hincrby('metadata_hash:'+hash, decoder_name+'_decoder', 1) - - serv_metadata.zincrby(decoder_name+'_type:'+type, date_key, 1) - - serv_metadata.zincrby('nb_seen_hash:'+hash, message, 1)# hash - paste map - serv_metadata.zincrby(decoder_name+'_hash:'+hash, message, 1) # number of b64 on this paste - - # Domain Object - if Item.is_crawled(message): - domain = Item.get_item_domain(message) - Decoded.save_domain_decoded(domain, hash) - - -def save_hash_on_disk(decode, type, hash, json_data): - - local_filename_hash = os.path.join(p.config.get("Directories", "hash"), type, hash[:2], hash) - filename_hash = os.path.join(os.environ['AIL_HOME'], local_filename_hash) - - filename_json = os.path.join(os.environ['AIL_HOME'], - p.config.get("Directories", "hash"), type, hash[:2], hash + '.json') - - dirname = os.path.dirname(filename_hash) - if not os.path.exists(dirname): - os.makedirs(dirname) - - with open(filename_hash, 'wb') as f: - f.write(decode) - - # create hash metadata - serv_metadata.hset('metadata_hash:'+hash, 'size', os.path.getsize(filename_hash)) - - with open(filename_json, 'w') as f: - f.write(json_data) - def set_out_paste(decoder_name, message): publisher.warning(decoder_name+' decoded') #Send to duplicate diff --git a/bin/export/MispImport.py b/bin/export/MispImport.py index a1a08727..11191a54 100755 --- a/bin/export/MispImport.py +++ b/bin/export/MispImport.py @@ -95,8 +95,6 @@ def unpack_obj_pgp(map_uuid_global_id, misp_obj): if obj_id and obj_subtype: obj_meta = get_object_metadata(misp_obj) - print(obj_id) - print(obj_meta) res = Pgp.pgp.create_correlation(obj_subtype, obj_id, obj_meta) map_uuid_global_id[misp_obj.uuid] = get_global_id('pgp', obj_id, obj_subtype=obj_subtype) @@ -116,7 +114,7 @@ def unpack_obj_cryptocurrency(map_uuid_global_id, misp_obj): obj_meta = get_object_metadata(misp_obj) res = Cryptocurrency.cryptocurrency.create_correlation(obj_subtype, obj_id, obj_meta) - map_uuid_global_id[misp_obj.uuid] = get_global_id('pgp', obj_id, obj_subtype=obj_subtype) + map_uuid_global_id[misp_obj.uuid] = get_global_id('cryptocurrency', obj_id, obj_subtype=obj_subtype) def get_obj_type_from_relationship(misp_obj): obj_uuid = misp_obj.uuid @@ -160,11 +158,11 @@ def unpack_file(map_uuid_global_id, misp_obj): obj_meta = get_object_metadata(misp_obj) if obj_type == 'screenshot': Screenshot.create_screenshot(obj_id, obj_meta, io_content) - pass + map_uuid_global_id[misp_obj.uuid] = get_global_id('image', obj_id) else: #decoded Decoded.create_decoded(obj_id, obj_meta, io_content) + map_uuid_global_id[misp_obj.uuid] = get_global_id('decoded', obj_id) - map_uuid_global_id[misp_obj.uuid] = get_global_id('item', obj_id) def get_misp_import_fct(map_uuid_global_id, misp_obj): if misp_obj.name == 'ail-leak': @@ -189,6 +187,12 @@ def create_obj_relationships(map_uuid_global_id, misp_obj): if relationship.referenced_uuid in map_uuid_global_id: obj_meta_src = get_global_id_from_id(map_uuid_global_id[relationship.object_uuid]) obj_meta_target = get_global_id_from_id(map_uuid_global_id[relationship.referenced_uuid]) + + print('000000') + print(obj_meta_src) + print(obj_meta_target) + print('111111') + Correlate_object.create_obj_relationship(obj_meta_src['type'], obj_meta_src['id'], obj_meta_target['type'], obj_meta_target['id'], obj1_subtype=obj_meta_src['subtype'], obj2_subtype=obj_meta_target['subtype']) @@ -214,7 +218,7 @@ if __name__ == '__main__': import_objs_from_file('test_import_item.json') - #Screenshot.delete_screenshot('d4065d632a232a323d33a30144924763ae229a972c363e12abf48009017ec10c') + #Screenshot.delete_screenshot('a92d459f70c4dea8a14688f585a5e2364be8b91fbf924290ead361d9b909dcf1') - #Decoded.delete_correlation('23a44cc266880d26386a0a77318afbe09696f935') + #Decoded.delete_decoded('bfd5f1d89e55b10a8b122a9d7ce31667ec1d086a') #Pgp.pgp.delete_correlation('key', '0xA4BB02A75E6AF448') diff --git a/bin/lib/Correlate_object.py b/bin/lib/Correlate_object.py index 2c971f07..d9db9c4e 100755 --- a/bin/lib/Correlate_object.py +++ b/bin/lib/Correlate_object.py @@ -213,9 +213,9 @@ def create_obj_relationship(obj1_type, obj1_id, obj2_type, obj2_id, obj1_subtype elif obj1_type == 'cryptocurrency': Cryptocurrency.cryptocurrency.save_obj_relationship(obj1_subtype, obj1_type, obj2_type, obj2_id) elif obj1_type == 'decoded': - pass + Decoded.save_obj_relationship(obj1_id, obj2_type, obj2_id) elif obj1_type == 'image': - pass + Screenshot.save_obj_relationship(obj1_id, obj2_type, obj2_id) def create_graph_links(links_set): diff --git a/bin/lib/Decoded.py b/bin/lib/Decoded.py index 1d86426a..03b80aa0 100755 --- a/bin/lib/Decoded.py +++ b/bin/lib/Decoded.py @@ -30,6 +30,16 @@ def get_all_correlation_objects(): ''' return ['domain', 'paste'] +def get_all_decoder(): + return ['base64', 'binary', 'hexadecimal'] + +# TODO: # REVIEW: default => base64 +def sanitize_decoder_name(decoder_name): + if decoder_name in get_all_decoder(): + return decoder_name + else: + return 'base64' + def get_decoded_item_type(sha1_string): ''' Retun the estimed type of a given decoded item. @@ -66,6 +76,28 @@ def get_decoded_filepath(sha1_string, mimetype=None): def exist_decoded(sha1_string): return r_serv_metadata.exists('metadata_hash:{}'.format(sha1_string)) +def get_decoded_first_seen(sha1_string, r_int=False): + res = r_serv_metadata.hget('metadata_hash:{}'.format(sha1_string), 'first_seen') + if res: + res = res.replace('/', '') + if r_int: + if res: + return int(res) + else: + return 99999999 + return res + +def get_decoded_last_seen(sha1_string, r_int=False): + res = r_serv_metadata.hget('metadata_hash:{}'.format(sha1_string), 'last_seen') + if res: + res = res.replace('/', '') + if r_int: + if res: + return int(res) + else: + return 0 + return res + def get_decoded_metadata(sha1_string, nb_seen=False, size=False, file_type=False, tag=False): metadata_dict = {} metadata_dict['first_seen'] = r_serv_metadata.hget('metadata_hash:{}'.format(sha1_string), 'first_seen') @@ -156,7 +188,7 @@ def get_decoded_correlated_object(sha1_string, correlation_objects=[]): :return: a dict of all correlation for a given sha1 :rtype: dict ''' - if correlation_objects is None: + if not correlation_objects: correlation_objects = get_all_correlation_objects() decoded_correlation = {} for correlation_object in correlation_objects: @@ -171,25 +203,72 @@ def get_decoded_correlated_object(sha1_string, correlation_objects=[]): return decoded_correlation # # # TODO: check if item and decoded exist -def save_decoded_item_correlation(sha1_string, item_id, decoder_type): +def save_item_relationship(sha1_string, item_id, decoder_type='base64'): + estimated_type = get_decoded_item_type(sha1_string) + if not estimated_type: + print('error, unknow sha1_string') + + decoder_type = sanitize_decoder_name(decoder_type) item_date = Item.get_item_date(item_id) + r_serv_metadata.incrby('{}_decoded:{}'.format(decoder_type, item_date), 1) + r_serv_metadata.zincrby('hash_date:{}'.format(item_date), sha1_string, 1) + r_serv_metadata.zincrby('{}_date:{}'.format(decoder_type, item_date), sha1_string, 1) + + update_decoded_daterange(sha1_string, item_date) + + # first time we see this hash (all encoding) on this item + if r_serv_metadata.zscore('nb_seen_hash:{}'.format(sha1_string), item_id) is None: + r_serv_metadata.hincrby('metadata_hash:{}'.format(sha1_string), 'nb_seen_in_all_pastes', 1) + r_serv_metadata.sadd('hash_paste:{}'.format(item_id), sha1_string) # item - hash map + + # first time we see this hash encoding on this item + if r_serv_metadata.zscore('{}_hash:{}'.format(decoder_type, sha1_string), item_id) is None: + + # create hash metadata + r_serv_metadata.sadd('hash_{}_all_type'.format(decoder_type), estimated_type) + + # first time we see this hash encoding today + if r_serv_metadata.zscore('{}_date:{}'.format(decoder_type, item_date), sha1_string) is None: + r_serv_metadata.zincrby('{}_type:{}'.format(decoder_type, estimated_type), item_date, 1) # # TODO: # DUP1 + + r_serv_metadata.hincrby('metadata_hash:{}'.format(sha1_string), '{}_decoder'.format(decoder_type), 1) + r_serv_metadata.zincrby('{}_type:{}'.format(decoder_type, estimated_type), item_date, 1) # # TODO: # DUP1 + + r_serv_metadata.zincrby('nb_seen_hash:{}'.format(sha1_string), item_id, 1)# hash - paste map + r_serv_metadata.zincrby('{}_hash:{}'.format(decoder_type, sha1_string), item_id, 1) # number of b64 on this paste # domain if Item.is_crawled(item_id): domain = Item.get_item_domain(item_id) - save_domain_decoded(domain, sha1_string) + save_domain_relationship(domain, sha1_string) pass -def save_domain_decoded(domain, sha1_string): +def save_domain_relationship(domain, sha1_string): r_serv_metadata.sadd('hash_domain:{}'.format(domain), sha1_string) # domain - hash map - r_serv_metadata.sadd('domain_hash:{}'.format(sha1_string), domain) # hash - domain ma + r_serv_metadata.sadd('domain_hash:{}'.format(sha1_string), domain) # hash - domain map -def save_decoded_correlation(sha1_string, referenced_obj_type, referenced_obj_id): - if referenced_obj_type=='domain': - save_domain_decoded(referenced_obj_type, sha1_string) - elif referenced_obj_type=='item': - pass +def update_decoded_daterange(obj_id, new_date): + new_date = int(new_date) + new_date_str = str(new_date) + new_date_str = '{}/{}/{}'.format(new_date_str[0:4], new_date_str[4:6], new_date_str[6:8]) + # obj_id don't exit + if not r_serv_metadata.hexists('metadata_hash:{}'.format(obj_id), 'first_seen'): + r_serv_metadata.hset('metadata_hash:{}'.format(obj_id), 'first_seen', new_date_str) + r_serv_metadata.hset('metadata_hash:{}'.format(obj_id), 'last_seen', new_date_str) + else: + first_seen = get_decoded_first_seen(obj_id, r_int=True) + last_seen = get_decoded_last_seen(obj_id, r_int=True) + if new_date < first_seen: + r_serv_metadata.hset('metadata_hash:{}'.format(obj_id), 'first_seen', new_date_str) + if new_date > last_seen: + r_serv_metadata.hset('metadata_hash:{}'.format(obj_id), 'last_seen', new_date_str) + +def save_obj_relationship(obj_id, referenced_obj_type, referenced_obj_id): + if referenced_obj_type == 'domain': + save_domain_relationship(referenced_obj_id, obj_id) + elif referenced_obj_type == 'item': + save_item_relationship(obj_id, referenced_obj_id, decoder_type='base64') # # TODO: handle decoder type def get_decoded_file_content(sha1_string, mimetype=None): filepath = get_decoded_filepath(sha1_string, mimetype=mimetype) @@ -198,14 +277,12 @@ def get_decoded_file_content(sha1_string, mimetype=None): return file_content # # TODO: check file format -def save_decoded_file_content(sha1_string, io_content, date_range, mimetype=None): +def save_decoded_file_content(sha1_string, file_content, date_from, date_to=None, mimetype=None): if not mimetype: if exist_decoded(sha1_string): mimetype = get_decoded_item_type(sha1_string) else: - mimetype = get_file_mimetype(io_content.getvalue()) - - + mimetype = get_file_mimetype(file_content) filepath = get_decoded_filepath(sha1_string, mimetype=mimetype) if os.path.isfile(filepath): @@ -218,32 +295,76 @@ def save_decoded_file_content(sha1_string, io_content, date_range, mimetype=None os.makedirs(dirname) with open(filepath, 'wb') as f: - f.write(io_content.getvalue()) + f.write(file_content) # create hash metadata - # # TODO: save estimated type r_serv_metadata.hset('metadata_hash:{}'.format(sha1_string), 'size', os.path.getsize(filepath)) + r_serv_metadata.hset('metadata_hash:{}'.format(sha1_string), 'estimated_type', mimetype) + r_serv_metadata.sadd('hash_all_type', mimetype) - r_serv_metadata.hset('metadata_hash:{}'.format(sha1_string), 'first_seen', date_range['date_from']) - r_serv_metadata.hset('metadata_hash:{}'.format(sha1_string), 'last_seen', date_range['date_to']) + update_decoded_daterange(sha1_string, date_from) + if date_from != date_to and date_to: + update_decoded_daterange(sha1_string, date_to) return True -def delete_decoded_file(obj_id, io_content): - # check if item exists - if not exist_decoded(obj_id): +def delete_decoded_file(obj_id): + filepath = get_decoded_filepath(obj_id) + if not os.path.isfile(filepath): return False - else: - Tag.delete_obj_tags(obj_id, 'decoded', Tag.get_obj_tag(obj_id)) - os.remove(get_decoded_filepath(sha1_string)) - r_serv_metadata.delete('metadata_hash:{}'.format(obj_id)) - return True + + Tag.delete_obj_tags(obj_id, 'decoded', Tag.get_obj_tag(obj_id)) + os.remove(filepath) + return True def create_decoded(obj_id, obj_meta, io_content): first_seen = obj_meta.get('first_seen', None) last_seen = obj_meta.get('last_seen', None) date_range = Date.sanitise_date_range(first_seen, last_seen, separator='', date_type='datetime') + decoded_file_content = io_content.getvalue() - res = save_decoded_file_content(obj_id, io_content, date_range, mimetype=None) + res = save_decoded_file_content(obj_id, decoded_file_content, date_range['date_from'], date_to=date_range['date_to'], mimetype=None) if res and 'tags' in obj_meta: - Tag.api_add_obj_tags(tags=obj_metadata['tags'], object_id=obj_id, object_type="decoded") + Tag.api_add_obj_tags(tags=obj_meta['tags'], object_id=obj_id, object_type="decoded") + +def delete_decoded(obj_id): + if not exist_decoded(obj_id): + return False + + res = delete_decoded_file(obj_id) + if not res: + return False + + obj_correlations = get_decoded_correlated_object(obj_id) + if 'domain' in obj_correlations: + for domain in obj_correlations['domain']: + r_serv_metadata.srem('hash_domain:{}'.format(domain), obj_id) + r_serv_metadata.delete('domain_hash:{}'.format(obj_id), domain) + + if 'paste' in obj_correlations: # TODO: handle item + for item_id in obj_correlations['paste']: + item_date = Item.get_item_date(item_id) + + r_serv_metadata.zrem('hash_date:{}'.format(item_date), obj_id) + r_serv_metadata.srem('hash_paste:{}'.format(item_id), obj_id) + for decoder_name in get_all_decoder(): + + r_serv_metadata.incrby('{}_decoded:{}'.format(decoder_name, item_date), -1) + r_serv_metadata.zrem('{}_date:{}'.format(decoder_name, item_date), obj_id) + + for decoder_name in get_all_decoder(): + r_serv_metadata.delete('{}_hash:{}'.format(decoder_name, obj_id)) + + r_serv_metadata.delete('nb_seen_hash:{}'.format(obj_id)) + + + ####### # TODO: DUP1 + #r_serv_metadata.zincrby('{}_type:{}'.format(decoder_type, estimated_type), item_date, 1) + ####### + + ### + #r_serv_metadata.sadd('hash_{}_all_type'.format(decoder_type), estimated_type) + #r_serv_metadata.sadd('hash_all_type', estimated_type) + ### + + r_serv_metadata.delete('metadata_hash:{}'.format(obj_id)) diff --git a/bin/lib/Screenshot.py b/bin/lib/Screenshot.py index 7491efb9..c4e19e68 100755 --- a/bin/lib/Screenshot.py +++ b/bin/lib/Screenshot.py @@ -131,6 +131,12 @@ def get_screenshot_correlated_object(sha256_string, correlation_objects=[]): def save_item_relationship(obj_id, item_id): r_serv_metadata.hset('paste_metadata:{}'.format(item_id), 'screenshot', obj_id) r_serv_onion.sadd('screenshot:{}'.format(obj_id), item_id) + print('---') + print(item_id) + if Item.is_crawled(item_id): + domain = Item.get_item_domain(item_id) + print(domain) + save_domain_relationship(obj_id, domain) def save_domain_relationship(obj_id, domain): r_serv_onion.sadd('domain_screenshot:{}'.format(domain), obj_id) @@ -196,6 +202,6 @@ def delete_screenshot(obj_id): if 'paste' in obj_correlations: # TODO: handle item for item_id in obj_correlations['paste']: r_serv_metadata.hdel('paste_metadata:{}'.format(item_id), 'screenshot') - r_serv_onion.sadd('screenshot:{}'.format(obj_id), item_id) + r_serv_onion.delete('screenshot:{}'.format(obj_id), item_id) return True