diff --git a/bin/lib/Correlate_object.py b/bin/lib/Correlate_object.py index df483bbc..590698cd 100755 --- a/bin/lib/Correlate_object.py +++ b/bin/lib/Correlate_object.py @@ -23,6 +23,9 @@ config_loader = ConfigLoader.ConfigLoader() r_serv_metadata = config_loader.get_redis_conn("ARDB_Metadata") config_loader = None +def get_all_objects(): + return ['domain', 'paste', 'pgp', 'cryptocurrency', 'decoded', 'screenshot'] + def get_all_correlation_names(): ''' Return a list of all available correlations diff --git a/bin/packages/Tag.py b/bin/packages/Tag.py index 1cc56fcd..2b131b75 100755 --- a/bin/packages/Tag.py +++ b/bin/packages/Tag.py @@ -11,7 +11,7 @@ import Item sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) import ConfigLoader -import Domain +import Correlate_object from pytaxonomies import Taxonomies from pymispgalaxies import Galaxies, Clusters @@ -150,163 +150,6 @@ def unpack_str_tags_list(str_tags_list): else: return [] - -# TEMPLATE + API QUERY # # TODO: # REVIEW: -def add_items_tag(tags=[], galaxy_tags=[], item_id=None): ## TODO: remove me - res_dict = {} - if item_id == None: - return ({'status': 'error', 'reason': 'Item id not found'}, 404) - if not tags and not galaxy_tags: - return ({'status': 'error', 'reason': 'Tags or Galaxy not specified'}, 400) - - res_dict['tags'] = [] - for tag in tags: - taxonomie = get_taxonomie_from_tag(tag) - if is_taxonomie_tag_enabled(taxonomie, tag): - add_item_tag(tag, item_id) - res_dict['tags'].append(tag) - else: - return ({'status': 'error', 'reason': 'Tags or Galaxy not enabled'}, 400) - - for tag in galaxy_tags: - galaxy = get_galaxy_from_tag(tag) - if is_galaxy_tag_enabled(galaxy, tag): - add_item_tag(tag, item_id) - res_dict['tags'].append(tag) - else: - return ({'status': 'error', 'reason': 'Tags or Galaxy not enabled'}, 400) - - res_dict['id'] = item_id - return (res_dict, 200) - -def api_add_obj_tags(tags=[], galaxy_tags=[], object_id=None, object_type="item"): - pass - -# TEMPLATE + API QUERY -def add_items_tags(tags=[], galaxy_tags=[], item_id=None, item_type="paste"): - res_dict = {} - if item_id == None: - return ({'status': 'error', 'reason': 'Item id not found'}, 404) - if not tags and not galaxy_tags: - return ({'status': 'error', 'reason': 'Tags or Galaxy not specified'}, 400) - if item_type not in ('paste', 'domain'): - return ({'status': 'error', 'reason': 'Incorrect item_type'}, 400) - - res_dict['tags'] = [] - for tag in tags: - if tag: - taxonomie = get_taxonomie_from_tag(tag) - if is_taxonomie_tag_enabled(taxonomie, tag): - add_item_tag(tag, item_id, item_type=item_type) - res_dict['tags'].append(tag) - else: - return ({'status': 'error', 'reason': 'Tags or Galaxy not enabled'}, 400) - - for tag in galaxy_tags: - if tag: - galaxy = get_galaxy_from_tag(tag) - if is_galaxy_tag_enabled(galaxy, tag): - add_item_tag(tag, item_id, item_type=item_type) - res_dict['tags'].append(tag) - else: - return ({'status': 'error', 'reason': 'Tags or Galaxy not enabled'}, 400) - - res_dict['id'] = item_id - res_dict['type'] = item_type - return (res_dict, 200) - -def add_domain_tag(tag, domain, item_date): - r_serv_tags.sadd('list_tags:domain', tag) - r_serv_metadata.sadd('tag:{}'.format(domain), tag) - r_serv_tags.sadd('domain:{}:{}'.format(tag, item_date), domain) - -def add_item_tag(tag, item_path, item_type="paste", tag_date=None): - - if item_type=="paste": - item_date = int(Item.get_item_date(item_path)) - - #add tag - r_serv_metadata.sadd('tag:{}'.format(item_path), tag) - r_serv_tags.sadd('{}:{}'.format(tag, item_date), item_path) - - if Item.is_crawled(item_path): - domain = Item.get_item_domain(item_path) - r_serv_metadata.sadd('tag:{}'.format(domain), tag) - r_serv_tags.sadd('domain:{}:{}'.format(tag, item_date), domain) - # domain item - else: - item_date = int(Domain.get_domain_last_check(item_path, r_format="int")) - add_domain_tag(tag, item_path, item_date) - - r_serv_tags.hincrby('daily_tags:{}'.format(item_date), tag, 1) - - tag_first_seen = r_serv_tags.hget('tag_metadata:{}'.format(tag), 'last_seen') - if tag_first_seen is None: - tag_first_seen = 99999999 - else: - tag_first_seen = int(tag_first_seen) - tag_last_seen = r_serv_tags.hget('tag_metadata:{}'.format(tag), 'last_seen') - if tag_last_seen is None: - tag_last_seen = 0 - else: - tag_last_seen = int(tag_last_seen) - - #add new tag in list of all used tags - r_serv_tags.sadd('list_tags', tag) - - # update fisrt_seen/last_seen - if item_date < tag_first_seen: - r_serv_tags.hset('tag_metadata:{}'.format(tag), 'first_seen', item_date) - - # update metadata last_seen - if item_date > tag_last_seen: - r_serv_tags.hset('tag_metadata:{}'.format(tag), 'last_seen', item_date) - -# API QUERY -def remove_item_tags(tags=[], item_id=None): - if item_id == None: - return ({'status': 'error', 'reason': 'Item id not found'}, 404) - if not tags: - return ({'status': 'error', 'reason': 'No Tag(s) specified'}, 400) - - dict_res = {} - dict_res['tags'] = [] - for tag in tags: - res = remove_item_tag(tag, item_id) - if res[1] != 200: - return res - else: - dict_res['tags'].append(tag) - dict_res['id'] = item_id - return (dict_res, 200) - -# TEMPLATE + API QUERY -def remove_item_tag(tag, item_id): - item_date = int(Item.get_item_date(item_id)) - - #remove tag - r_serv_metadata.srem('tag:{}'.format(item_id), tag) - res = r_serv_tags.srem('{}:{}'.format(tag, item_date), item_id) - - if res ==1: - # no tag for this day - if int(r_serv_tags.hget('daily_tags:{}'.format(item_date), tag)) == 1: - r_serv_tags.hdel('daily_tags:{}'.format(item_date), tag) - else: - r_serv_tags.hincrby('daily_tags:{}'.format(item_date), tag, -1) - - tag_first_seen = int(r_serv_tags.hget('tag_metadata:{}'.format(tag), 'last_seen')) - tag_last_seen = int(r_serv_tags.hget('tag_metadata:{}'.format(tag), 'last_seen')) - # update fisrt_seen/last_seen - if item_date == tag_first_seen: - update_tag_first_seen(tag, tag_first_seen, tag_last_seen) - if item_date == tag_last_seen: - update_tag_last_seen(tag, tag_first_seen, tag_last_seen) - return ({'status': 'success'}, 200) - else: - return ({'status': 'error', 'reason': 'Item id or tag not found'}, 400) - - # used by modal def get_modal_add_tags(item_id, object_type='item'): ''' @@ -386,7 +229,6 @@ def update_tag_first_seen(tag, tag_first_seen, tag_last_seen): r_serv_tags.hset('tag_metadata:{}'.format(tag), 'first_seen', tag_first_seen) # no tag in db else: - r_serv_tags.srem('list_tags', tag) r_serv_tags.hdel('tag_metadata:{}'.format(tag), 'first_seen') r_serv_tags.hdel('tag_metadata:{}'.format(tag), 'last_seen') else: @@ -402,7 +244,6 @@ def update_tag_last_seen(tag, tag_first_seen, tag_last_seen): r_serv_tags.hset('tag_metadata:{}'.format(tag), 'last_seen', tag_last_seen) # no tag in db else: - r_serv_tags.srem('list_tags', tag) r_serv_tags.hdel('tag_metadata:{}'.format(tag), 'first_seen') r_serv_tags.hdel('tag_metadata:{}'.format(tag), 'last_seen') else: @@ -432,10 +273,28 @@ def update_tag_metadata(tag, tag_date, object_type=None, add_tag=True): ## REMOVE tag ## else: if tag_date == tag_metadata['first_seen']: - update_tag_first_seen(tag, tag_metadata['first_seen'], tag_metadata['last_seen']) + update_tag_first_seen(object_type, tag, tag_metadata['first_seen'], tag_metadata['last_seen']) if tag_date == tag_metadata['last_seen']: update_tag_last_seen(tag, tag_metadata['first_seen'], tag_metadata['last_seen']) +def update_tag_global_by_obj_type(object_type, tag): + tag_deleted = False + if object_type=='item': + if not r_serv_tags.exists('tag_metadata:{}'.format(tag)): + tag_deleted = True + else: + if not r_serv_tags.exists('{}:{}'.format(object_type, tag)): + tag_deleted = True + if tag_deleted: + # update object global tags + r_serv_tags.srem('list_tags:{}'.format(object_type), tag) + # update global tags + for obj_type in Correlate_object.get_all_objects(): + if r_serv_tags.exists('{}:{}'.format(obj_type, tag)): + tag_deleted = False + if tag_deleted: + r_serv_tags.srem('list_tags', tag) + def add_global_tag(tag, object_type=None): ''' Create a set of all tags used in AIL (all + by object) @@ -449,27 +308,68 @@ def add_global_tag(tag, object_type=None): if object_type: r_serv_tags.sadd('list_tags:{}'.format(object_type), tag) -def add_obj_tag(object_type, object_id, tag, obj_date): +def add_obj_tags(object_id, object_type, tags=[], galaxy_tags=[]): + obj_date = get_obj_date(object_type, object_id) + for tag in tags: + if tag: + taxonomie = get_taxonomie_from_tag(tag) + if is_taxonomie_tag_enabled(taxonomie, tag): + add_tag(object_type, tag, object_id, obj_date=obj_date) + else: + return ({'status': 'error', 'reason': 'Tags or Galaxy not enabled', 'value': tag}, 400) + + for tag in galaxy_tags: + if tag: + galaxy = get_galaxy_from_tag(tag) + if is_galaxy_tag_enabled(galaxy, tag): + add_tag(object_type, tag, object_id, obj_date=obj_date) + else: + return ({'status': 'error', 'reason': 'Tags or Galaxy not enabled', 'value': tag}, 400) + +# TEMPLATE + API QUERY +def api_add_obj_tags(tags=[], galaxy_tags=[], object_id=None, object_type="item"): + res_dict = {} + if object_id == None: + return ({'status': 'error', 'reason': 'object_id id not found'}, 404) + if not tags and not galaxy_tags: + return ({'status': 'error', 'reason': 'Tags or Galaxy not specified'}, 400) + if object_type not in ('paste', 'domain'): # # TODO: put me in another file + return ({'status': 'error', 'reason': 'Incorrect object_type'}, 400) + + res = add_obj_tags(object_id, object_type, tags=[], galaxy_tags=[]) + if res: + return res + + res_dict['tags'] = tags + galaxy_tags + res_dict['id'] = item_id + res_dict['type'] = item_type + return (res_dict, 200) + +def add_obj_tag(object_type, object_id, tag, obj_date=None): if object_type=="item": # # TODO: # FIXME: # REVIEW: rename me !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + if obj_date is None: + raise ValueError("obj_date is None") + # add tag r_serv_metadata.sadd('tag:{}'.format(object_id), tag) r_serv_tags.sadd('{}:{}'.format(tag, obj_date), object_id) # add domain tag - if Item.is_crawled(object_id) and tag!='infoleak:submission="crawler"': + if Item.is_crawled(object_id) and tag!='infoleak:submission="crawler"' and tag != 'infoleak:submission="manual"': domain = Item.get_item_domain(object_id) add_tag("domain", tag, domain) else: r_serv_metadata.sadd('tag:{}'.format(object_id), tag) r_serv_tags.sadd('{}:{}'.format(object_type, tag), object_id) -def add_tag(object_type, tag, object_id): +def add_tag(object_type, tag, object_id, obj_date=None): # new tag if not is_obj_tagged(object_id, tag): # # TODO: # FIXME: sanityze object_type - obj_date = get_obj_date(object_type, object_id) + if not obj_date: + obj_date = get_obj_date(object_type, object_id) add_global_tag(tag, object_type=object_type) - add_obj_tag(object_type, object_id, tag, obj_date) + add_obj_tag(object_type, object_id, tag, obj_date=obj_date) update_tag_metadata(tag, obj_date) # create tags stats # # TODO: put me in cache @@ -484,15 +384,42 @@ def delete_obj_tag(object_type, object_id, tag, obj_date): r_serv_metadata.srem('tag:{}'.format(object_id), tag) r_serv_tags.srem('{}:{}'.format(object_type, tag), object_id) -def delete_tag(object_type, tag, object_id): +def delete_tag(object_type, tag, object_id, obj_date=None): # tag exist if is_obj_tagged(object_id, tag): - obj_date = get_obj_date(object_type, object_id) + if not obj_date: + obj_date = get_obj_date(object_type, object_id) delete_obj_tag(object_type, object_id, tag, obj_date) update_tag_metadata(tag, obj_date, object_type=object_type, add_tag=False) + update_tag_global_by_obj_type(object_type, tag) + else: + return ({'status': 'error', 'reason': 'object id or tag not found', 'value': tag}, 400) -def get_obj_date(object_type, object_id): # # TODO: move me in another file + REVIEW +# API QUERY +def api_delete_obj_tags(tags=[], object_id=None, object_type="item"): + if not object_id: + return ({'status': 'error', 'reason': 'object id not found'}, 404) + if not tags: + return ({'status': 'error', 'reason': 'No Tag(s) specified'}, 400) + + res = delete_obj_tags(object_id, object_type, tags=[]) + if res: + return res + + dict_res = {} + dict_res['tags'] = tags + dict_res['id'] = object_id + return (dict_res, 200) + +def delete_obj_tags(object_id, object_type, tags=[]): + obj_date = get_obj_date(object_type, object_id) + for tag in tags: + res = delete_tag(object_type, tag, object_id, obj_date=obj_date) + if res: + return res + +def get_obj_date(object_type, object_id): # # TODO: move me in another file if object_type == "item": return Item.get_item_date(object_id) else: diff --git a/bin/submit_paste.py b/bin/submit_paste.py index cae9c0ed..1aec936a 100755 --- a/bin/submit_paste.py +++ b/bin/submit_paste.py @@ -16,6 +16,9 @@ import sflock from Helper import Process from pubsublogger import publisher +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages/')) +import Tag + sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) import ConfigLoader @@ -50,10 +53,10 @@ def create_paste(uuid, paste_content, ltags, ltagsgalaxies, name): # add tags for tag in ltags: - add_item_tag(tag, rel_item_path) + Tag.add_tag('item', tag, rel_item_path) for tag in ltagsgalaxies: - add_item_tag(tag, rel_item_path) + Tag.add_tag('item', tag, rel_item_path) r_serv_log_submit.incr(uuid + ':nb_end') r_serv_log_submit.incr(uuid + ':nb_sucess') @@ -108,37 +111,6 @@ def get_item_date(item_filename): l_directory = item_filename.split('/') return '{}{}{}'.format(l_directory[-4], l_directory[-3], l_directory[-2]) -def add_item_tag(tag, item_path): - item_date = int(get_item_date(item_path)) - - #add tag - r_serv_metadata.sadd('tag:{}'.format(item_path), tag) - r_serv_tags.sadd('{}:{}'.format(tag, item_date), item_path) - - r_serv_tags.hincrby('daily_tags:{}'.format(item_date), tag, 1) - - tag_first_seen = r_serv_tags.hget('tag_metadata:{}'.format(tag), 'last_seen') - if tag_first_seen is None: - tag_first_seen = 99999999 - else: - tag_first_seen = int(tag_first_seen) - tag_last_seen = r_serv_tags.hget('tag_metadata:{}'.format(tag), 'last_seen') - if tag_last_seen is None: - tag_last_seen = 0 - else: - tag_last_seen = int(tag_last_seen) - - #add new tag in list of all used tags - r_serv_tags.sadd('list_tags', tag) - - # update fisrt_seen/last_seen - if item_date < tag_first_seen: - r_serv_tags.hset('tag_metadata:{}'.format(tag), 'first_seen', item_date) - - # update metadata last_seen - if item_date > tag_last_seen: - r_serv_tags.hset('tag_metadata:{}'.format(tag), 'last_seen', item_date) - def verify_extention_filename(filename): if not '.' in filename: return True diff --git a/update/v2.4/Update_domain.py b/update/v2.4/Update_domain.py index 8554460c..addaedb6 100755 --- a/update/v2.4/Update_domain.py +++ b/update/v2.4/Update_domain.py @@ -30,7 +30,7 @@ def update_domain_by_item(domain_obj, item_id): # update domain tags for tag in Tag.get_obj_tag(item_id): if tag != 'infoleak:submission="crawler"' and tag != 'infoleak:submission="manual"': - Tag.add_domain_tag(tag, domain_name, Item.get_item_date(item_id)) + Tag.add_tag("domain", tag, domain_name, obj_date=Item.get_item_date(item_id)) # update domain correlation item_correlation = Item.get_item_all_correlation(item_id) diff --git a/var/www/blueprints/tags.py b/var/www/blueprints/tags.py index 2e7d42b4..9f38e860 100644 --- a/var/www/blueprints/tags.py +++ b/var/www/blueprints/tags.py @@ -43,6 +43,28 @@ tags_ui = Blueprint('tags', __name__, template_folder=os.path.join(os.environ['A # ============= ROUTES ============== +# @Tags.route("/Tags/addTags") # REVIEW: # used in showPaste +# @login_required +# @login_analyst +# def addTags(): +# +# tags = request.args.get('tags') +# tagsgalaxies = request.args.get('tagsgalaxies') +# path = request.args.get('path') +# +# list_tag = tags.split(',') +# list_tag_galaxies = tagsgalaxies.split(',') +# +# res = Tag.add_items_tags(list_tag, list_tag_galaxies, item_id=path) +# print(res) +# # error +# if res[1] != 200: +# return str(res[0]) +# # success +# return redirect(url_for('showsavedpastes.showsavedpaste', paste=path)) + + + @tags_ui.route('/tag/add_tags') @login_required @login_analyst @@ -56,12 +78,14 @@ def add_tags(): list_tag = tags.split(',') list_tag_galaxies = tagsgalaxies.split(',') - res = Tag.add_items_tags(tags=list_tag, galaxy_tags=list_tag_galaxies, item_id=object_id, item_type=item_type) + res = Tag.api_add_obj_tags(tags=list_tag, galaxy_tags=list_tag_galaxies, object_id=object_id, object_type=object_type) # error if res[1] != 200: return str(res[0]) # success + # # TODO: use object FUNCTIONS + if object_type=='domain': return redirect(url_for('crawler_splash.showDomain', domain=object_id)) else: diff --git a/var/www/modules/Tags/Flask_Tags.py b/var/www/modules/Tags/Flask_Tags.py index 9d4026c3..d9060912 100644 --- a/var/www/modules/Tags/Flask_Tags.py +++ b/var/www/modules/Tags/Flask_Tags.py @@ -375,9 +375,9 @@ def remove_tag(): path = request.args.get('paste') tag = request.args.get('tag') - res = Tag.remove_item_tag(tag, path) + res = Tag.api_delete_obj_tags(tags=tag, object_id=path, object_type="item") if res[1] != 200: - str(res[0]) + return str(res[0]) return redirect(url_for('showsavedpastes.showsavedpaste', paste=path)) @Tags.route("/Tags/confirm_tag") @@ -390,7 +390,7 @@ def confirm_tag(): tag = request.args.get('tag') if(tag[9:28] == 'automatic-detection'): - Tag.remove_item_tag(tag, path) + Tag.api_delete_obj_tags(tags=tag, object_id=path, object_type="item") tag = tag.replace('automatic-detection','analyst-detection', 1) #add analyst tag @@ -422,26 +422,6 @@ def tag_validation(): else: return 'input error' -@Tags.route("/Tags/addTags") # REVIEW: # used in showPaste -@login_required -@login_analyst -def addTags(): - - tags = request.args.get('tags') - tagsgalaxies = request.args.get('tagsgalaxies') - path = request.args.get('path') - - list_tag = tags.split(',') - list_tag_galaxies = tagsgalaxies.split(',') - - res = Tag.add_items_tags(list_tag, list_tag_galaxies, item_id=path) - print(res) - # error - if res[1] != 200: - return str(res[0]) - # success - return redirect(url_for('showsavedpastes.showsavedpaste', paste=path)) - @Tags.route("/Tags/taxonomies") @login_required @login_read_only diff --git a/var/www/modules/restApi/Flask_restApi.py b/var/www/modules/restApi/Flask_restApi.py index 8f777790..308e8146 100644 --- a/var/www/modules/restApi/Flask_restApi.py +++ b/var/www/modules/restApi/Flask_restApi.py @@ -246,11 +246,11 @@ def add_item_tags(): if not data: return Response(json.dumps({'status': 'error', 'reason': 'Malformed JSON'}, indent=2, sort_keys=True), mimetype='application/json'), 400 - item_id = data.get('id', None) + object_id = data.get('id', None) tags = data.get('tags', []) galaxy = data.get('galaxy', []) - res = Tag.add_items_tag(tags, galaxy, item_id) + res = Tag.api_add_obj_tags(tags=tags, galaxy_tags=galaxy, object_id=object_id, object_type="item") return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1] # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # @@ -275,10 +275,10 @@ def delete_item_tags(): if not data: return Response(json.dumps({'status': 'error', 'reason': 'Malformed JSON'}, indent=2, sort_keys=True), mimetype='application/json'), 400 - item_id = data.get('id', None) + object_id = data.get('id', None) tags = data.get('tags', []) - res = Tag.remove_item_tags(tags, item_id) + res = Tag.api_delete_obj_tags(tags=tags, object_id=object_id, object_type="item") return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1] # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #