chg: [MISP import] import files obj: decoded + screenshot

pull/486/head
Terrtia 2020-02-11 15:48:30 +01:00
parent 6bbcef024b
commit 25e3022eab
No known key found for this signature in database
GPG Key ID: 1E1B1F50D84613D0
11 changed files with 154 additions and 404 deletions

View File

@ -144,7 +144,6 @@ def save_hash_on_disk(decode, type, hash, json_data):
f.write(decode)
# create hash metadata
serv_metadata.hset('metadata_hash:'+hash, 'saved_path', local_filename_hash)
serv_metadata.hset('metadata_hash:'+hash, 'size', os.path.getsize(filename_hash))
with open(filename_json, 'w') as f:

View File

@ -316,11 +316,12 @@ def get_relationship_between_global_obj(obj_global_id_1, obj_global_id_2):
if __name__ == '__main__':
l_obj = [#{'id': 'crawled/2019/11/08/6d3zimnpbwbzdgnp.onionf58258c8-c990-4707-b236-762a2b881183', 'type': 'item', 'lvl': 3},
#{'id': '6d3zimnpbwbzdgnp.onion', 'type': 'domain', 'lvl': 0},
l_obj = [{'id': 'crawled/2019/11/08/6d3zimnpbwbzdgnp.onionf58258c8-c990-4707-b236-762a2b881183', 'type': 'item', 'lvl': 3},
{'id': '6d3zimnpbwbzdgnp.onion', 'type': 'domain', 'lvl': 0},
{'id': 'bfd5f1d89e55b10a8b122a9d7ce31667ec1d086a', 'type': 'decoded', 'lvl': 2},
#{'id': 'a92d459f70c4dea8a14688f585a5e2364be8b91fbf924290ead361d9b909dcf1', 'type': 'image', 'lvl': 3},
#{'id': 'archive/pastebin.com_pro/2020/01/27/iHjcWhkD.gz', 'type': 'item', 'lvl': 3},
{'id': '15efuhpw5V9B1opHAgNXKPBPqdYALXP4hc', 'type': 'cryptocurrency', 'subtype': 'bitcoin', 'lvl': 0}
{'id': 'archive/pastebin.com_pro/2020/01/27/iHjcWhkD.gz', 'type': 'item', 'lvl': 1},
{'id': '15efuhpw5V9B1opHAgNXKPBPqdYALXP4hc', 'type': 'cryptocurrency', 'subtype': 'bitcoin', 'lvl': 1}
]
create_list_of_objs_to_export(l_obj, mode='union')

View File

@ -69,10 +69,6 @@ def unpack_item_obj(map_uuid_global_id, misp_obj):
map_uuid_global_id[misp_obj.uuid] = get_global_id('item', obj_id)
def get_obj_relationship(misp_obj):
for item in misp_obj.ObjectReference:
print(item.to_json())
## TODO: handle multiple pgp in the same object
@ -118,7 +114,57 @@ def unpack_obj_cryptocurrency(map_uuid_global_id, misp_obj):
map_uuid_global_id[misp_obj.uuid] = get_global_id('pgp', obj_id, obj_subtype=obj_subtype)
get_obj_relationship(misp_obj)
#get_obj_relationship(misp_obj)
def get_obj_type_from_relationship(misp_obj):
obj_uuid = misp_obj.uuid
obj_type = None
for relation in misp_obj.ObjectReference:
if relation.object_uuid == obj_uuid:
if relation.relationship_type == "screenshot-of":
return 'screenshot'
if relation.relationship_type == "included-in":
obj_type = 'decoded'
return obj_type
def get_obj_relationship(misp_obj):
for item in misp_obj.ObjectReference:
print(item.to_json())
# # TODO: covert md5 and sha1 to expected
def unpack_file(map_uuid_global_id, misp_obj):
obj_type = get_obj_type_from_relationship(misp_obj)
if obj_type:
obj_id = None
io_content = None
for attribute in misp_obj.attributes:
# get file content
if attribute.object_relation == 'attachment':
io_content = attribute.data
elif attribute.object_relation == 'malware-sample':
io_content = attribute.data
# # TODO: use/verify specified mimetype
elif attribute.object_relation == 'mimetype':
print(attribute.value)
# # TODO: support more
elif attribute.object_relation == 'sha1' and obj_type == 'decoded':
obj_id = attribute.value
elif attribute.object_relation == 'sha256' and obj_type == 'screenshot':
obj_id = attribute.value
if obj_id and io_content:
print(obj_type)
obj_meta = get_object_metadata(misp_obj)
if obj_type == 'screenshot':
#Screenshot.create_screenshot(obj_id, obj_meta, io_content)
pass
else: #decoded
Decoded.create_decoded(obj_id, obj_meta, io_content)
def get_misp_import_fct(map_uuid_global_id, misp_obj):
#print(misp_obj.ObjectReference)
@ -129,17 +175,22 @@ def get_misp_import_fct(map_uuid_global_id, misp_obj):
#print(misp_obj.name)
if misp_obj.name == 'ail-leak':
unpack_item_obj(map_uuid_global_id, misp_obj)
#unpack_item_obj(map_uuid_global_id, misp_obj)
#print(misp_obj.to_json())
pass
elif misp_obj.name == 'domain-ip':
pass
elif misp_obj.name == 'pgp-meta':
unpack_obj_pgp(map_uuid_global_id, misp_obj)
#unpack_obj_pgp(map_uuid_global_id, misp_obj)
pass
elif misp_obj.name == 'coin-address':
#unpack_obj_cryptocurrency(map_uuid_global_id, misp_obj)
pass
elif misp_obj.name == 'file':
unpack_file(map_uuid_global_id, misp_obj)
print()
print('---')
print()
#unpack_item_obj(map_uuid_global_id, misp_obj)
pass
@ -160,4 +211,4 @@ if __name__ == '__main__':
# misp = PyMISP('https://127.0.0.1:8443/', 'uXgcN42b7xuL88XqK5hubwD8Q8596VrrBvkHQzB0', False)
#import_objs_from_file('test_import_item.json')
import_objs_from_file('test_export.json')
import_objs_from_file('test_import_item.json')

View File

@ -44,7 +44,7 @@ def get_all_correlation_objects():
'''
return ['domain', 'paste']
def exist_object(object_type, correlation_id, type_id=None):
def exist_object(object_type, correlation_id, type_id=None): # => work on object level
if object_type == 'domain':
return Domain.verify_if_domain_exist(correlation_id)
elif object_type == 'paste' or object_type == 'item':

View File

@ -2,6 +2,7 @@
# -*-coding:UTF-8 -*
import os
import magic
import sys
import redis
@ -13,7 +14,6 @@ import Date
import Tag
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
import correlation
import ConfigLoader
@ -23,6 +23,13 @@ r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
HASH_DIR = config_loader.get_config_str('Directories', 'hash')
config_loader = None
# # TODO: move me in another file
def get_all_correlation_objects():
'''
Return a list of all correllated objects
'''
return ['domain', 'paste']
def get_decoded_item_type(sha1_string):
'''
Retun the estimed type of a given decoded item.
@ -31,6 +38,9 @@ def get_decoded_item_type(sha1_string):
'''
return r_serv_metadata.hget('metadata_hash:{}'.format(sha1_string), 'estimated_type')
def get_file_mimetype(bytes_content):
return magic.from_buffer(bytes_content, mime=True)
def nb_decoded_seen_in_item(sha1_string):
nb = r_serv_metadata.hget('metadata_hash:{}'.format(sha1_string), 'nb_seen_in_all_pastes')
if nb is None:
@ -147,7 +157,7 @@ def get_decoded_correlated_object(sha1_string, correlation_objects=[]):
:rtype: dict
'''
if correlation_objects is None:
correlation_objects = correlation.get_all_correlation_objects()
correlation_objects = get_all_correlation_objects()
decoded_correlation = {}
for correlation_object in correlation_objects:
if correlation_object == 'paste':
@ -170,3 +180,53 @@ def get_decoded_file_content(sha1_string, mimetype=None):
with open(filepath, 'rb') as f:
file_content = BytesIO(f.read())
return file_content
# # TODO: check file format
def save_decoded_file_content(sha1_string, io_content, date_range, mimetype=None):
if not mimetype:
if exist_decoded(sha1_string):
mimetype = get_decoded_item_type(sha1_string)
else:
mimetype = get_file_mimetype(io_content.getvalue())
filepath = get_decoded_filepath(sha1_string, mimetype=mimetype)
if os.path.isfile(filepath):
print('File already exist')
return False
# create dir
dirname = os.path.dirname(filepath)
if not os.path.exists(dirname):
os.makedirs(dirname)
with open(filepath, 'wb') as f:
f.write(io_content.getvalue())
# create hash metadata
r_serv_metadata.hset('metadata_hash:{}'.format(sha1_string), 'size', os.path.getsize(filepath))
r_serv_metadata.hset('metadata_hash:{}'.format(sha1_string), 'first_seen', date_range['date_from'])
r_serv_metadata.hset('metadata_hash:{}'.format(sha1_string), 'last_seen', date_range['date_to'])
return True
def delete_decoded_file(obj_id, io_content):
# check if item exists
if not exist_decoded(obj_id):
return False
else:
Tag.delete_obj_tags(obj_id, 'decoded', Tag.get_obj_tag(obj_id))
os.remove(get_decoded_filepath(sha1_string))
r_serv_metadata.delete('metadata_hash:{}'.format(obj_id))
return True
def create_decoded(obj_id, obj_meta, io_content):
first_seen = obj_meta.get('first_seen', None)
last_seen = obj_meta.get('last_seen', None)
date_range = Date.sanitise_date_range(first_seen, last_seen, separator='', date_type='datetime')
res = save_decoded_file_content(obj_id, io_content, date_range, mimetype=None)
if res and 'tags' in obj_meta:
Tag.api_add_obj_tags(tags=obj_metadata['tags'], object_id=obj_id, object_type="decoded")

View File

@ -145,7 +145,7 @@ def save_screenshot_file(sha256_string, io_content):
f.write(io_content.getvalue())
return True
def create_screenshot(sha256_string, io_content):
def create_screenshot(sha256_string, obj_meta, io_content):
# check if sha256
res = save_screenshot_file(sha256_string, io_content)
if res:

View File

@ -8,7 +8,8 @@ import redis
from hashlib import sha256
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages'))
import correlation
import Correlation
import Item
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
@ -19,7 +20,7 @@ config_loader = None
digits58 = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
cryptocurrency = correlation.Correlation('cryptocurrency', ['bitcoin', 'ethereum', 'bitcoin-cash', 'litecoin', 'monero', 'zcash', 'dash'])
cryptocurrency = Correlation.Correlation('cryptocurrency', ['bitcoin', 'ethereum', 'bitcoin-cash', 'litecoin', 'monero', 'zcash', 'dash'])
# http://rosettacode.org/wiki/Bitcoin/address_validation#Python
def decode_base58(bc, length):
@ -74,18 +75,18 @@ def get_cryptocurrency_symbol(crypto_type):
return None
def get_cryptocurrency_type(crypto_symbol):
if crypto_type=='BTC':
if crypto_symbol=='BTC':
return 'bitcoin'
elif crypto_type=='ETH':
elif crypto_symbol=='ETH':
return 'ethereum'
elif crypto_type=='BCH':
elif crypto_symbol=='BCH':
return 'bitcoin-cash'
elif crypto_type=='LTC':
elif crypto_symbol=='LTC':
return 'litecoin'
elif crypto_type=='XMR':
elif crypto_symbol=='XMR':
return 'monero'
elif crypto_type=='ZEC':
elif crypto_symbol=='ZEC':
return 'zcash'
elif crypto_type=='DASH':
elif crypto_symbol=='DASH':
return 'dash'
return None

View File

@ -402,6 +402,7 @@ def delete_item(obj_id):
### REQUIRE MORE WORK
# delete child/son !!!
### TODO in inport V2
# delete from tracked items
# delete from queue
###

View File

@ -6,7 +6,8 @@ import sys
import redis
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages'))
import correlation
import Correlation
import Item
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
@ -15,7 +16,7 @@ config_loader = ConfigLoader.ConfigLoader()
serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
config_loader = None
pgp = correlation.Correlation('pgpdump', ['key', 'mail', 'name'])
pgp = Correlation.Correlation('pgpdump', ['key', 'mail', 'name'])
def get_pgp(request_dict, pgp_type):
# basic verification

View File

@ -12,7 +12,6 @@ import Item
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
import Correlate_object
from pytaxonomies import Taxonomies
from pymispgalaxies import Galaxies, Clusters
@ -311,12 +310,15 @@ def update_tag_global_by_obj_type(object_type, tag):
# update object global tags
r_serv_tags.srem('list_tags:{}'.format(object_type), tag)
# update global tags
for obj_type in Correlate_object.get_all_objects():
for obj_type in get_all_objects():
if r_serv_tags.exists('{}:{}'.format(obj_type, tag)):
tag_deleted = False
if tag_deleted:
r_serv_tags.srem('list_tags', tag)
def get_all_objects():
return ['domain', 'item', 'pgp', 'cryptocurrency', 'decoded', 'image']
def add_global_tag(tag, object_type=None):
'''
Create a set of all tags used in AIL (all + by object)
@ -422,6 +424,13 @@ def delete_tag(object_type, tag, object_id, obj_date=None):
else:
return ({'status': 'error', 'reason': 'object id or tag not found', 'value': tag}, 400)
# # TODO: move me
def get_obj_date(object_type, object_id):
if object_type == "item":
return int(Item.get_item_date(object_id))
else:
return None
# API QUERY
def api_delete_obj_tags(tags=[], object_id=None, object_type="item"):
if not object_id:
@ -559,10 +568,3 @@ def get_obj_by_tags(object_type, l_tags, date_from=None, date_to=None, nb_obj=50
l_tagged_obj = list(l_tagged_obj)
return {"tagged_obj":l_tagged_obj, "page":page, "nb_pages":nb_pages, "nb_first_elem":start+1, "nb_last_elem":stop, "nb_all_elem":nb_all_elem}
def get_obj_date(object_type, object_id): # # TODO: move me in another file
if object_type == "item":
return int(Item.get_item_date(object_id))
else:
return None

View File

@ -1,366 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import os
import sys
import redis
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/'))
import Date
import Item
import Tag
config_loader = ConfigLoader.ConfigLoader()
r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata")
config_loader = None
def get_all_correlation_objects():
'''
Return a list of all correllated objects
'''
return ['domain', 'paste']
class Correlation(object):
def __init__(self, correlation_name, all_correlation_types):
self.correlation_name = correlation_name
self.all_correlation_types = all_correlation_types
def _exist_corelation_field(self, correlation_type, field_name, item_type='paste'):
if item_type=='paste':
return r_serv_metadata.exists('set_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name))
else:
return r_serv_metadata.exists('set_domain_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name))
def exist_correlation(self, subtype, obj_id):
res = r_serv_metadata.zscore('{}_all:{}'.format(self.correlation_name, subtype), obj_id)
if res:
return True
else:
return False
def _get_items(self, correlation_type, field_name):
res = r_serv_metadata.smembers('set_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name))
if res:
return list(res)
else:
return []
def get_correlation_first_seen(self, subtype, obj_id, r_int=False):
res = r_serv_metadata.hget('{}_metadata_{}:{}'.format(self.correlation_name, subtype, obj_id), 'first_seen')
if r_int:
if res:
return int(res)
else:
return 99999999
else:
return res
def get_correlation_last_seen(self, subtype, obj_id, r_int=False):
res = r_serv_metadata.hget('{}_metadata_{}:{}'.format(self.correlation_name, subtype, obj_id), 'last_seen')
if r_int:
if res:
return int(res)
else:
return 0
else:
return res
def _get_metadata(self, subtype, obj_id):
meta_dict = {}
meta_dict['first_seen'] = self.get_correlation_first_seen(subtype, obj_id)
meta_dict['last_seen'] = self.get_correlation_last_seen(subtype, obj_id)
meta_dict['nb_seen'] = r_serv_metadata.scard('set_{}_{}:{}'.format(self.correlation_name, subtype, obj_id))
return meta_dict
def get_metadata(self, correlation_type, field_name, date_format='str_date'):
meta_dict = self._get_metadata(correlation_type, field_name)
if date_format == "str_date":
if meta_dict['first_seen']:
meta_dict['first_seen'] = '{}/{}/{}'.format(meta_dict['first_seen'][0:4], meta_dict['first_seen'][4:6], meta_dict['first_seen'][6:8])
if meta_dict['last_seen']:
meta_dict['last_seen'] = '{}/{}/{}'.format(meta_dict['last_seen'][0:4], meta_dict['last_seen'][4:6], meta_dict['last_seen'][6:8])
return meta_dict
def get_nb_object_seen_by_date(self, correlation_type, field_name, date_day):
nb = r_serv_metadata.hget('{}:{}:{}'.format(self.correlation_name, correlation_type, date_day), field_name)
if nb is None:
return 0
else:
return int(nb)
def get_list_nb_previous_correlation_object(self, correlation_type, field_name, numDay):
nb_previous_correlation = []
for date_day in Date.get_previous_date_list(numDay):
nb_previous_correlation.append(self.get_nb_object_seen_by_date(correlation_type, field_name, date_day))
return nb_previous_correlation
def _get_correlation_by_date(self, correlation_type, date):
return r_serv_metadata.hkeys('{}:{}:{}'.format(self.correlation_name, correlation_type, date))
def verify_correlation_field_request(self, request_dict, correlation_type, item_type='paste'):
if not request_dict:
return ({'status': 'error', 'reason': 'Malformed JSON'}, 400)
field_name = request_dict.get(correlation_type, None)
if not field_name:
return ( {'status': 'error', 'reason': 'Mandatory parameter(s) not provided'}, 400 )
if not self._exist_corelation_field(correlation_type, field_name, item_type=item_type):
return ( {'status': 'error', 'reason': 'Item not found'}, 404 )
def get_correlation(self, request_dict, correlation_type, field_name):
dict_resp = {}
if request_dict.get('items'):
dict_resp['items'] = self._get_items(correlation_type, field_name)
if request_dict.get('metadata'):
dict_resp['metadata'] = self._get_metadata(correlation_type, field_name)
dict_resp[correlation_type] = field_name
return (dict_resp, 200)
def get_all_correlation_types(self):
'''
Gel all correlation types
:return: A list of all the correlation types
:rtype: list
'''
return self.all_correlation_types
def get_correlation_obj_type(self):
if self.correlation_name=='pgpdump':
return 'pgp'
else:
return 'cryptocurrency'
def sanythise_correlation_types(self, correlation_types, r_boolean=False):
'''
Check if all correlation types in the list are valid.
:param correlation_types: list of correlation type
:type currency_type: list
:return: If a type is invalid, return the full list of correlation types else return the provided list
:rtype: list
'''
if correlation_types is None:
if r_boolean:
return False
else:
return self.get_all_correlation_types()
for correl in correlation_types: # # TODO: # OPTIMIZE:
if correl not in self.get_all_correlation_types():
if r_boolean:
return False
else:
return self.get_all_correlation_types()
if r_boolean:
return True
else:
return correlation_types
def _get_domain_correlation_obj(self, domain, correlation_type):
'''
Return correlation of a given domain.
:param domain: crawled domain
:type domain: str
:param correlation_type: correlation type
:type correlation_type: str
:return: a list of correlation
:rtype: list
'''
res = r_serv_metadata.smembers('domain_{}_{}:{}'.format(self.correlation_name, correlation_type, domain))
if res:
return list(res)
else:
return []
def get_domain_correlation_dict(self, domain, correlation_type=None, get_nb=False):
'''
Return all correlation of a given domain.
:param domain: crawled domain
:param correlation_type: list of correlation types
:type correlation_type: list, optional
:return: a dictionnary of all the requested correlations
:rtype: dict
'''
correlation_type = self.sanythise_correlation_types(correlation_type)
dict_correlation = {}
for correl in correlation_type:
res = self._get_domain_correlation_obj(domain, correl)
if res:
dict_correlation[correl] = res
if get_nb:
dict_correlation['nb'] = dict_correlation.get('nb', 0) + len(dict_correlation[correl])
return dict_correlation
def _get_correlation_obj_domain(self, field_name, correlation_type):
'''
Return all domains that contain this correlation.
:param domain: field name
:type domain: str
:param correlation_type: correlation type
:type correlation_type: str
:return: a list of correlation
:rtype: list
'''
res = r_serv_metadata.smembers('set_domain_{}_{}:{}'.format(self.correlation_name, correlation_type, field_name))
if res:
return list(res)
else:
return []
def get_correlation_obj_domain(self, field_name, correlation_type=None):
'''
Return all domain correlation of a given correlation_value.
:param field_name: field_name
:param correlation_type: list of correlation types
:type correlation_type: list, optional
:return: a dictionnary of all the requested correlations
:rtype: list
'''
correlation_type = self.sanythise_correlation_types(correlation_type)
for correl in correlation_type:
res = self._get_correlation_obj_domain(field_name, correl)
if res:
return res
return []
def _get_item_correlation_obj(self, item_id, correlation_type):
'''
Return correlation of a given item id.
:param item_id: item id
:type item_id: str
:param correlation_type: correlation type
:type correlation_type: str
:return: a list of correlation
:rtype: list
'''
res = r_serv_metadata.smembers('item_{}_{}:{}'.format(self.correlation_name, correlation_type, item_id))
if res:
return list(res)
else:
return []
def get_item_correlation_dict(self, item_id, correlation_type=None, get_nb=False):
'''
Return all correlation of a given item id.
:param item_id: item id
:param correlation_type: list of correlation types
:type correlation_type: list, optional
:return: a dictionnary of all the requested correlations
:rtype: dict
'''
correlation_type = self.sanythise_correlation_types(correlation_type)
dict_correlation = {}
for correl in correlation_type:
res = self._get_item_correlation_obj(item_id, correl)
if res:
dict_correlation[correl] = res
if get_nb:
dict_correlation['nb'] = dict_correlation.get('nb', 0) + len(dict_correlation[correl])
return dict_correlation
def get_correlation_all_object(self, correlation_type, correlation_value, correlation_objects=[]):
if correlation_objects is None:
correlation_objects = get_all_correlation_objects()
correlation_obj = {}
for correlation_object in correlation_objects:
if correlation_object == 'paste':
res = self._get_items(correlation_type, correlation_value)
elif correlation_object == 'domain':
res = self.get_correlation_obj_domain(correlation_value, correlation_type=correlation_type)
else:
res = None
if res:
correlation_obj[correlation_object] = res
return correlation_obj
def update_correlation_daterange(self, subtype, obj_id, date):
date = int(date)
# obj_id don't exit
if not r_serv_metadata.exists('{}_metadata_{}:{}'.format(self.correlation_name, subtype, obj_id)):
r_serv_metadata.hset('{}_metadata_{}:{}'.format(self.correlation_name, subtype, obj_id), 'first_seen', date)
r_serv_metadata.hset('{}_metadata_{}:{}'.format(self.correlation_name, subtype, obj_id), 'last_seen', date)
else:
first_seen = self.get_correlation_last_seen(subtype, obj_id, r_int=True)
last_seen = self.get_correlation_first_seen(subtype, obj_id, r_int=True)
if date < first_seen:
r_serv_metadata.hset('{}_metadata_{}:{}'.format(self.correlation_name, subtype, obj_id), 'first_seen', date)
if date > last_seen:
r_serv_metadata.hset('{}_metadata_{}:{}'.format(self.correlation_name, subtype, obj_id), 'last_seen', date)
def save_item_correlation(self, subtype, date, obj_id, item_id, item_date):
update_correlation_daterange(subtype, obj_id, item_date)
# global set
r_serv_metadata.sadd('set_{}_{}:{}'.format(self.correlation_name, subtype, obj_id), item_id)
# daily
r_serv_metadata.hincrby('{}:{}:{}'.format(self.correlation_name, subtype, item_date), obj_id, 1)
# all type
r_serv_metadata.zincrby('{}_all:{}'.format(self.correlation_name, subtype), obj_id, 1)
## object_metadata
# item
r_serv_metadata.sadd('item_{}_{}:{}'.format(self.correlation_name, subtype, item_id), obj_id)
# domain
if Item.is_crawled(item_id):
domain = Item.get_item_domain(item_id)
self.save_domain_correlation(domain, subtype, obj_id)
def save_domain_correlation(self, domain, subtype, obj_id):
r_serv_metadata.sadd('domain_{}_{}:{}'.format(self.correlation_name, subtype, domain), obj_id)
r_serv_metadata.sadd('set_domain_{}_{}:{}'.format(self.correlation_name, subtype, obj_id), domain)
def save_correlation(self, subtype, obj_id, date_range):
r_serv_metadata.zincrby('{}_all:{}'.format(self.correlation_name, subtype), obj_id, 0)
self.update_correlation_daterange(subtype, obj_id, date_range['date_from'])
if date_range['date_from'] != date_range['date_to']:
self.update_correlation_daterange(subtype, obj_id, date_range['date_to'])
return True
def create_correlation(self, subtype, obj_id, obj_meta):
res = self.sanythise_correlation_types([subtype], r_boolean=True)
if not res:
print('invalid subtype')
return False
first_seen = obj_meta.get('first_seen', None)
last_seen = obj_meta.get('last_seen', None)
date_range = Date.sanitise_date_range(first_seen, last_seen, separator='', date_type='datetime')
print(date_range)
res = self.save_correlation(subtype, obj_id, date_range)
if res and 'tags' in obj_meta:
# # TODO: handle mixed tags: taxonomies and Galaxies
Tag.api_add_obj_tags(tags=obj_meta['tags'], object_id=obj_id, object_type=self.get_correlation_obj_type())
return True
######## API EXPOSED ########
######## ########