diff --git a/OVERVIEW.md b/OVERVIEW.md index 40eefa41..f677da42 100644 --- a/OVERVIEW.md +++ b/OVERVIEW.md @@ -198,8 +198,6 @@ Redis and ARDB overview ##### Hset: | Key | Field | Value | | ------ | ------ | ------ | -| per_paste_**epoch** | **term** | **nb_seen** | -| | | | tag_metadata:**tag** | first_seen | **date** | | tag_metadata:**tag** | last_seen | **date** | @@ -207,13 +205,20 @@ Redis and ARDB overview | Key | Value | | ------ | ------ | | list_tags | **tag** | +| list_tags:**object_type** | **tag** | +| list_tags:domain | **tag** | +|| | active_taxonomies | **taxonomie** | | active_galaxies | **galaxie** | | active_tag_**taxonomie or galaxy** | **tag** | | synonym_tag_misp-galaxy:**galaxy** | **tag synonym** | | list_export_tags | **user_tag** | +|| | **tag**:**date** | **paste** | - +| **object_type**:**tag** | **object_id** | +|| +| DB7 | +| tag:**object_id** | **tag** | ##### old: | Key | Value | diff --git a/bin/Tags.py b/bin/Tags.py index a707d259..3f0dc44f 100755 --- a/bin/Tags.py +++ b/bin/Tags.py @@ -8,29 +8,11 @@ The Tags Module This module create tags. """ -import redis - import time -import datetime from pubsublogger import publisher from Helper import Process -from packages import Paste -from packages import Item - - -def get_item_date(item_filename): - l_directory = item_filename.split('/') - return '{}{}{}'.format(l_directory[-4], l_directory[-3], l_directory[-2]) - -def set_tag_metadata(tag, date): - # First time we see this tag ## TODO: filter paste from the paste ? - if not server.hexists('tag_metadata:{}'.format(tag), 'first_seen'): - server.hset('tag_metadata:{}'.format(tag), 'first_seen', date) - # Check and Set tag last_seen - last_seen = server.hget('tag_metadata:{}'.format(tag), 'last_seen') - if last_seen is None or date > last_seen: - server.hset('tag_metadata:{}'.format(tag), 'last_seen', date) +from packages import Tag if __name__ == '__main__': @@ -45,18 +27,6 @@ if __name__ == '__main__': # Setup the I/O queues p = Process(config_section) - server = redis.StrictRedis( - host=p.config.get("ARDB_Tags", "host"), - port=p.config.get("ARDB_Tags", "port"), - db=p.config.get("ARDB_Tags", "db"), - decode_responses=True) - - server_metadata = redis.StrictRedis( - host=p.config.get("ARDB_Metadata", "host"), - port=p.config.get("ARDB_Metadata", "port"), - db=p.config.get("ARDB_Metadata", "db"), - decode_responses=True) - # Sent to the logging a description of the module publisher.info("Tags module started") @@ -71,27 +41,7 @@ if __name__ == '__main__': continue else: - tag, path = message.split(';') - # add the tag to the tags word_list - res = server.sadd('list_tags', tag) - if res == 1: - print("new tags added : {}".format(tag)) - # add the path to the tag set - #curr_date = datetime.date.today().strftime("%Y%m%d") - item_date = get_item_date(path) - res = server.sadd('{}:{}'.format(tag, item_date), path) - if res == 1: - print("new paste: {}".format(path)) - print(" tagged: {}".format(tag)) - set_tag_metadata(tag, item_date) - server_metadata.sadd('tag:{}'.format(path), tag) + tag, item_id = message.split(';') - # Domain Object - if Item.is_crawled(path) and tag!='infoleak:submission="crawler"': - domain = Item.get_item_domain(path) - server_metadata.sadd('tag:{}'.format(domain), tag) - server.sadd('domain:{}:{}'.format(tag, item_date), domain) - - curr_date = datetime.date.today().strftime("%Y%m%d") - server.hincrby('daily_tags:{}'.format(item_date), tag, 1) + Tag.add_tag("item", tag, item_id) p.populate_set_out(message, 'MISP_The_Hive_feeder') diff --git a/bin/lib/Domain.py b/bin/lib/Domain.py index c2ab508e..e6cd5472 100755 --- a/bin/lib/Domain.py +++ b/bin/lib/Domain.py @@ -292,7 +292,7 @@ def get_domain_items_crawled(domain, domain_type, port, epoch=None, items_link=F if item_screenshot: dict_item['screenshot'] = Item.get_item_screenshot(item) if item_tag: - dict_item['tags'] = Tag.get_item_tags_minimal(item) + dict_item['tags'] = Tag.get_obj_tags_minimal(item) item_crawled['items'].append(dict_item) return item_crawled @@ -365,7 +365,7 @@ def get_domain_tags(domain): :param domain: crawled domain ''' - return Tag.get_item_tags(domain) + return Tag.get_obj_tag(domain) def get_domain_metadata(domain, domain_type, first_seen=True, last_ckeck=True, status=True, ports=True, tags=False): ''' diff --git a/bin/lib/Screenshot.py b/bin/lib/Screenshot.py index fa85c5f2..d646d839 100755 --- a/bin/lib/Screenshot.py +++ b/bin/lib/Screenshot.py @@ -43,13 +43,16 @@ def get_screenshot_items_list(sha256_string): else: return [] +def get_item_screenshot(item_id): + return r_serv_metadata.hget('paste_metadata:{}'.format(item_id), 'screenshot') + def get_item_screenshot_list(item_id): ''' Retun all decoded item of a given item id. :param item_id: item id ''' - screenshot = r_serv_metadata.hget('paste_metadata:{}'.format(item_id), 'screenshot') + screenshot = get_item_screenshot(item_id) if screenshot: return [screenshot] else: diff --git a/bin/packages/Item.py b/bin/packages/Item.py index 156115b0..b1722209 100755 --- a/bin/packages/Item.py +++ b/bin/packages/Item.py @@ -104,7 +104,7 @@ def get_item(request_dict): dict_item['date'] = get_item_date(item_id, add_separator=add_separator) tags = request_dict.get('tags', True) if tags: - dict_item['tags'] = Tag.get_item_tags(item_id) + dict_item['tags'] = Tag.get_obj_tag(item_id) size = request_dict.get('size', False) if size: @@ -242,7 +242,7 @@ def get_item_pgp_correlation(item_id): def get_item_list_desc(list_item_id): desc_list = [] for item_id in list_item_id: - desc_list.append( {'id': item_id, 'date': get_item_date(item_id), 'tags': Tag.get_item_tags(item_id)} ) + desc_list.append( {'id': item_id, 'date': get_item_date(item_id), 'tags': Tag.get_obj_tag(item_id)} ) return desc_list # # TODO: add an option to check the tag diff --git a/bin/packages/Tag.py b/bin/packages/Tag.py index d523bb9e..1cc56fcd 100755 --- a/bin/packages/Tag.py +++ b/bin/packages/Tag.py @@ -4,6 +4,7 @@ import os import sys import redis +import datetime import Date import Item @@ -35,6 +36,19 @@ def build_unsafe_tags(): # set of unsafe tags unsafe_tags = build_unsafe_tags() +def is_tags_safe(ltags): + ''' + Check if a list of tags contain an unsafe tag (CE, ...) + + :param ltags: list of tags + :type ltags: list + :return: is a tag in the unsafe set + :rtype: boolean + ''' + return unsafe_tags.isdisjoint(ltags) + +#### Taxonomies - Galaxies #### + def get_taxonomie_from_tag(tag): return tag.split(':')[0] @@ -105,21 +119,7 @@ def is_valid_tags_taxonomies_galaxy(list_tags, list_tags_galaxy): return False return True -def get_tag_metadata(tag): - first_seen = r_serv_tags.hget('tag_metadata:{}'.format(tag), 'first_seen') - last_seen = r_serv_tags.hget('tag_metadata:{}'.format(tag), 'last_seen') - return {'tag': tag, 'first_seen': first_seen, 'last_seen': last_seen} - -def is_tags_safe(ltags): - ''' - Check if a list of tags contain an unsafe tag (CE, ...) - - :param ltags: list of tags - :type ltags: list - :return: is a tag in the unsafe set - :rtype: boolean - ''' - return unsafe_tags.isdisjoint(ltags) +#### #### def is_tag_in_all_tag(tag): if r_serv_tags.sismember('list_tags', tag): @@ -127,20 +127,6 @@ def is_tag_in_all_tag(tag): else: return False -def get_all_tags(): - return list(r_serv_tags.smembers('list_tags')) - -def get_item_tags(item_id): - ''' - Retun all the tags of a given item. - :param item_id: (Paste or domain) - ''' - tags = r_serv_metadata.smembers('tag:{}'.format(item_id)) - if tags: - return list(tags) - else: - return [] - def get_min_tag(tag): tag = tag.split('=') if len(tag) > 1: @@ -154,8 +140,8 @@ def get_min_tag(tag): tag = tag[0] return tag -def get_item_tags_minimal(item_id): - return [ {"tag": tag, "min_tag": get_min_tag(tag)} for tag in get_item_tags(item_id) ] +def get_obj_tags_minimal(item_id): + return [ {"tag": tag, "min_tag": get_min_tag(tag)} for tag in get_obj_tag(item_id) ] def unpack_str_tags_list(str_tags_list): str_tags_list = str_tags_list.replace('"','\"') @@ -165,7 +151,7 @@ def unpack_str_tags_list(str_tags_list): return [] -# TEMPLATE + API QUERY +# TEMPLATE + API QUERY # # TODO: # REVIEW: def add_items_tag(tags=[], galaxy_tags=[], item_id=None): ## TODO: remove me res_dict = {} if item_id == None: @@ -193,6 +179,8 @@ def add_items_tag(tags=[], galaxy_tags=[], item_id=None): ## TODO: remove me res_dict['id'] = item_id return (res_dict, 200) +def api_add_obj_tags(tags=[], galaxy_tags=[], object_id=None, object_type="item"): + pass # TEMPLATE + API QUERY def add_items_tags(tags=[], galaxy_tags=[], item_id=None, item_type="paste"): @@ -318,6 +306,80 @@ def remove_item_tag(tag, item_id): else: return ({'status': 'error', 'reason': 'Item id or tag not found'}, 400) + +# used by modal +def get_modal_add_tags(item_id, object_type='item'): + ''' + Modal: add tags to domain or Paste + ''' + return {"active_taxonomies": get_active_taxonomies(), "active_galaxies": get_active_galaxies(), + "object_id": item_id, "object_type": tag_type} + +######## NEW VERSION ######## +def get_tag_first_seen(tag, r_int=False): + ''' + Get tag first seen (current: item only) + ''' + res = r_serv_tags.hget('tag_metadata:{}'.format(tag), 'first_seen') + if r_int: + if res is None: + return 99999999 + else: + return int(res) + return res + +def get_tag_last_seen(tag, r_int=False): + ''' + Get tag last seen (current: item only) + ''' + res = r_serv_tags.hget('tag_metadata:{}'.format(tag), 'last_seen') + if r_int: + if res is None: + return 0 + else: + return int(res) + return res + +def get_tag_metadata(tag, r_int=False): + ''' + Get tag metadata (current: item only) + ''' + tag_metadata = {"tag": tag} + tag_metadata['first_seen'] = get_tag_first_seen(tag) + tag_metadata['last_seen'] = get_tag_last_seen(tag) + return tag_metadata + +def is_obj_tagged(object_id, tag): + ''' + Check if a object is tagged + + :param object_id: object id + :type domain: str + :param tag: object type + :type domain: str + + :return: is object tagged + :rtype: boolean + ''' + return r_serv_tags.sismember('tag:{}'.format(object_id), tag) + +def get_all_tags(): + return list(r_serv_tags.smembers('list_tags')) + +def get_all_obj_tags(object_type): + return list(r_serv_tags.smembers('list_tags:{}'.format(object_type))) + +def get_obj_tag(object_id): + ''' + Retun all the tags of a given object. + :param object_id: (item_id, domain, ...) + ''' + res = r_serv_metadata.smembers('tag:{}'.format(object_id)) + if res: + return list(res) + else: + return [] + def update_tag_first_seen(tag, tag_first_seen, tag_last_seen): if tag_first_seen == tag_last_seen: if r_serv_tags.scard('{}:{}'.format(tag, tag_first_seen)) > 0: @@ -350,11 +412,88 @@ def update_tag_last_seen(tag, tag_first_seen, tag_last_seen): tag_last_seen = Date.date_substract_day(tag_last_seen) update_tag_last_seen(tag, tag_first_seen, tag_last_seen) +def update_tag_metadata(tag, tag_date, object_type=None, add_tag=True): + ''' + Update tag metadata (current: item only) + ''' + if object_type=="item": + # get object metadata + tag_metadata = get_tag_metadata(tag, r_int=True) + ############# + ## ADD tag ## + if add_tag: + # update fisrt_seen + if tag_date < tag_metadata['first_seen']: + r_serv_tags.hset('tag_metadata:{}'.format(tag), 'first_seen', tag_date) + # update last_seen + if tag_date > tag_metadata['last_seen']: + r_serv_tags.hset('tag_metadata:{}'.format(tag), 'last_seen', tag_date) + ################ + ## REMOVE tag ## + else: + if tag_date == tag_metadata['first_seen']: + update_tag_first_seen(tag, tag_metadata['first_seen'], tag_metadata['last_seen']) + if tag_date == tag_metadata['last_seen']: + update_tag_last_seen(tag, tag_metadata['first_seen'], tag_metadata['last_seen']) -# used by modal -def get_modal_add_tags(item_id, tag_type='paste'): +def add_global_tag(tag, object_type=None): ''' - Modal: add tags to domain or Paste + Create a set of all tags used in AIL (all + by object) + + :param tag: tag + :type domain: str + :param object_type: object type + :type domain: str ''' - return {"active_taxonomies": get_active_taxonomies(), "active_galaxies": get_active_galaxies(), - "item_id": item_id, "type": tag_type} + r_serv_tags.sadd('list_tags', tag) + if object_type: + r_serv_tags.sadd('list_tags:{}'.format(object_type), tag) + +def add_obj_tag(object_type, object_id, tag, obj_date): + if object_type=="item": # # TODO: # FIXME: # REVIEW: rename me !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + # add tag + r_serv_metadata.sadd('tag:{}'.format(object_id), tag) + r_serv_tags.sadd('{}:{}'.format(tag, obj_date), object_id) + + # add domain tag + if Item.is_crawled(object_id) and tag!='infoleak:submission="crawler"': + domain = Item.get_item_domain(object_id) + add_tag("domain", tag, domain) + else: + r_serv_metadata.sadd('tag:{}'.format(object_id), tag) + r_serv_tags.sadd('{}:{}'.format(object_type, tag), object_id) + +def add_tag(object_type, tag, object_id): + # new tag + if not is_obj_tagged(object_id, tag): + # # TODO: # FIXME: sanityze object_type + obj_date = get_obj_date(object_type, object_id) + add_global_tag(tag, object_type=object_type) + add_obj_tag(object_type, object_id, tag, obj_date) + update_tag_metadata(tag, obj_date) + + # create tags stats # # TODO: put me in cache + r_serv_tags.hincrby('daily_tags:{}'.format(datetime.date.today().strftime("%Y%m%d")), tag, 1) + +def delete_obj_tag(object_type, object_id, tag, obj_date): + if object_type=="item": # # TODO: # FIXME: # REVIEW: rename me !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + obj_date = get_obj_date(object_type, object_id) + r_serv_metadata.srem('tag:{}'.format(object_id), tag) + r_serv_tags.srem('{}:{}'.format(tag, obj_date), object_id) + else: + r_serv_metadata.srem('tag:{}'.format(object_id), tag) + r_serv_tags.srem('{}:{}'.format(object_type, tag), object_id) + +def delete_tag(object_type, tag, object_id): + # tag exist + if is_obj_tagged(object_id, tag): + obj_date = get_obj_date(object_type, object_id) + delete_obj_tag(object_type, object_id, tag, obj_date) + update_tag_metadata(tag, obj_date, object_type=object_type, add_tag=False) + + +def get_obj_date(object_type, object_id): # # TODO: move me in another file + REVIEW + if object_type == "item": + return Item.get_item_date(object_id) + else: + return None diff --git a/tests/testApi.py b/tests/testApi.py index db65cdec..6d9fc191 100644 --- a/tests/testApi.py +++ b/tests/testApi.py @@ -128,7 +128,7 @@ class TestApiV1(unittest.TestCase): # POST api/v1/add/item/tag def test_0007_api_add_item_tag(self): tags_to_add = ["infoleak:analyst-detection=\"api-key\""] - current_item_tag = Tag.get_item_tags(self.__class__.item_id) + current_item_tag = Tag.get_obj_tag(self.__class__.item_id) current_item_tag.append(tags_to_add[0]) #galaxy_to_add = ["misp-galaxy:stealer=\"Vidar\""] @@ -138,7 +138,7 @@ class TestApiV1(unittest.TestCase): item_tags = req_json['tags'] self.assertEqual(item_tags, tags_to_add) - new_item_tag = Tag.get_item_tags(self.__class__.item_id) + new_item_tag = Tag.get_obj_tag(self.__class__.item_id) self.assertCountEqual(new_item_tag, current_item_tag) # DELETE api/v1/delete/item/tag @@ -149,7 +149,7 @@ class TestApiV1(unittest.TestCase): req_json = parse_response(self, req) item_tags = req_json['tags'] self.assertCountEqual(item_tags, tags_to_delete) - current_item_tag = Tag.get_item_tags(self.__class__.item_id) + current_item_tag = Tag.get_obj_tag(self.__class__.item_id) if tags_to_delete[0] in current_item_tag: self.fail('Tag no deleted') diff --git a/update/v2.4/Update_domain.py b/update/v2.4/Update_domain.py index 94d3407a..8554460c 100755 --- a/update/v2.4/Update_domain.py +++ b/update/v2.4/Update_domain.py @@ -28,7 +28,7 @@ def update_update_stats(): def update_domain_by_item(domain_obj, item_id): domain_name = domain_obj.get_domain_name() # update domain tags - for tag in Tag.get_item_tags(item_id): + for tag in Tag.get_obj_tag(item_id): if tag != 'infoleak:submission="crawler"' and tag != 'infoleak:submission="manual"': Tag.add_domain_tag(tag, domain_name, Item.get_item_date(item_id)) diff --git a/var/www/blueprints/crawler_splash.py b/var/www/blueprints/crawler_splash.py index bb728a65..07f9ab60 100644 --- a/var/www/blueprints/crawler_splash.py +++ b/var/www/blueprints/crawler_splash.py @@ -73,4 +73,4 @@ def showDomain(): dict_domain['crawler_history']['random_item'] = random.choice(dict_domain['crawler_history']['items']) return render_template("showDomain.html", dict_domain=dict_domain, bootstrap_label=bootstrap_label, - modal_add_tags=Tag.get_modal_add_tags(dict_domain['domain'], tag_type="domain")) + modal_add_tags=Tag.get_modal_add_tags(dict_domain['domain'], object_type="domain")) diff --git a/var/www/blueprints/tags.py b/var/www/blueprints/tags.py new file mode 100644 index 00000000..2e7d42b4 --- /dev/null +++ b/var/www/blueprints/tags.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +''' + Blueprint Flask: crawler splash endpoints: dashboard, onion crawler ... +''' + +import os +import sys +import json +import random + +from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response +from flask_login import login_required, current_user, login_user, logout_user + +sys.path.append('modules') +import Flask_config + +# Import Role_Manager +from Role_Manager import create_user_db, check_password_strength, check_user_role_integrity +from Role_Manager import login_admin, login_analyst + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages')) +import Date +import Tag + +sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) +import Domain + +r_cache = Flask_config.r_cache +r_serv_db = Flask_config.r_serv_db +r_serv_tags = Flask_config.r_serv_tags +bootstrap_label = Flask_config.bootstrap_label + +# ============ BLUEPRINT ============ +tags_ui = Blueprint('tags', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/tags')) + +# ============ VARIABLES ============ + + + +# ============ FUNCTIONS ============ + + +# ============= ROUTES ============== +@tags_ui.route('/tag/add_tags') +@login_required +@login_analyst +def add_tags(): + + tags = request.args.get('tags') + tagsgalaxies = request.args.get('tagsgalaxies') + object_id = request.args.get('object_id') # old: item_id + object_type = request.args.get('object_type') # old type + + list_tag = tags.split(',') + list_tag_galaxies = tagsgalaxies.split(',') + + res = Tag.add_items_tags(tags=list_tag, galaxy_tags=list_tag_galaxies, item_id=object_id, item_type=item_type) + # error + if res[1] != 200: + return str(res[0]) + # success + + if object_type=='domain': + return redirect(url_for('crawler_splash.showDomain', domain=object_id)) + else: + return redirect(url_for('showsavedpastes.showsavedpaste', paste=object_id)) + + + + +# # add route : /crawlers/show_domain +# @tags_ui.route('/tags/search/domain') +# @login_required +# @login_analyst +# def showDomain(): +# date_from = request.args.get('date_from') +# date_to = request.args.get('date_to') +# tags = request.args.get('ltags') +# +# print(date_from) +# print(date_to) +# +# dates = Date.sanitise_date_range(date_from, date_to) +# +# if tags is None: +# return 'tags_none' +# #return render_template("Tags.html", date_from=dates['date_from'], date_to=dates['date_to']) +# else: +# tags = Tag.unpack_str_tags_list(tags) +# +# +# +# +# return render_template("showDomain.html", dict_domain=dict_domain, bootstrap_label=bootstrap_label, +# tag_type="domain")) diff --git a/var/www/modules/Tags/Flask_Tags.py b/var/www/modules/Tags/Flask_Tags.py index cd97e99d..9d4026c3 100644 --- a/var/www/modules/Tags/Flask_Tags.py +++ b/var/www/modules/Tags/Flask_Tags.py @@ -422,7 +422,7 @@ def tag_validation(): else: return 'input error' -@Tags.route("/Tags/addTags") +@Tags.route("/Tags/addTags") # REVIEW: # used in showPaste @login_required @login_analyst def addTags(): @@ -442,29 +442,6 @@ def addTags(): # success return redirect(url_for('showsavedpastes.showsavedpaste', paste=path)) -@Tags.route("/Tags/add_item_tags") -@login_required -@login_analyst -def add_item_tags(): - - tags = request.args.get('tags') - tagsgalaxies = request.args.get('tagsgalaxies') - item_id = request.args.get('item_id') - item_type = request.args.get('type') - - list_tag = tags.split(',') - list_tag_galaxies = tagsgalaxies.split(',') - - res = Tag.add_items_tags(tags=list_tag, galaxy_tags=list_tag_galaxies, item_id=item_id, item_type=item_type) - # error - if res[1] != 200: - return str(res[0]) - # success - if item_type=='domain': - return redirect(url_for('crawler_splash.showDomain', domain=item_id)) - else: - return redirect(url_for('showsavedpastes.showsavedpaste', paste=item_id)) - @Tags.route("/Tags/taxonomies") @login_required @login_read_only diff --git a/var/www/templates/modals/add_tags.html b/var/www/templates/modals/add_tags.html index 98cb5479..e2a4f7a0 100644 --- a/var/www/templates/modals/add_tags.html +++ b/var/www/templates/modals/add_tags.html @@ -126,6 +126,6 @@ jQuery("#all-tags-galaxies").click(function(e){ function addTags() { var tags = ltags.getValue() var tagsgalaxy = ltagsgalaxies.getValue() - window.location.replace("{{ url_for('Tags.add_item_tags') }}?tags=" + tags + "&tagsgalaxies=" + tagsgalaxy + "&item_id={{ modal_add_tags['item_id'] }}&type={{ modal_add_tags['type'] }}"); + window.location.replace("{{ url_for('tags_ui.add_item_tags') }}?tags=" + tags + "&tagsgalaxies=" + tagsgalaxy + "&object_id={{ modal_add_tags['object_id'] }}&object_type={{ modal_add_tags['object_type'] }}"); }