chg: [Tag core] objects tagging, Part 1/2

pull/449/head
Terrtia 2020-01-06 17:07:52 +01:00
parent 354a4fef7d
commit 2be06973ee
No known key found for this signature in database
GPG Key ID: 1E1B1F50D84613D0
12 changed files with 299 additions and 128 deletions

View File

@ -198,8 +198,6 @@ Redis and ARDB overview
##### Hset:
| Key | Field | Value |
| ------ | ------ | ------ |
| per_paste_**epoch** | **term** | **nb_seen** |
| | |
| tag_metadata:**tag** | first_seen | **date** |
| tag_metadata:**tag** | last_seen | **date** |
@ -207,13 +205,20 @@ Redis and ARDB overview
| Key | Value |
| ------ | ------ |
| list_tags | **tag** |
| list_tags:**object_type** | **tag** |
| list_tags:domain | **tag** |
||
| active_taxonomies | **taxonomie** |
| active_galaxies | **galaxie** |
| active_tag_**taxonomie or galaxy** | **tag** |
| synonym_tag_misp-galaxy:**galaxy** | **tag synonym** |
| list_export_tags | **user_tag** |
||
| **tag**:**date** | **paste** |
| **object_type**:**tag** | **object_id** |
||
| DB7 |
| tag:**object_id** | **tag** |
##### old:
| Key | Value |

View File

@ -8,29 +8,11 @@ The Tags Module
This module create tags.
"""
import redis
import time
import datetime
from pubsublogger import publisher
from Helper import Process
from packages import Paste
from packages import Item
def get_item_date(item_filename):
l_directory = item_filename.split('/')
return '{}{}{}'.format(l_directory[-4], l_directory[-3], l_directory[-2])
def set_tag_metadata(tag, date):
# First time we see this tag ## TODO: filter paste from the paste ?
if not server.hexists('tag_metadata:{}'.format(tag), 'first_seen'):
server.hset('tag_metadata:{}'.format(tag), 'first_seen', date)
# Check and Set tag last_seen
last_seen = server.hget('tag_metadata:{}'.format(tag), 'last_seen')
if last_seen is None or date > last_seen:
server.hset('tag_metadata:{}'.format(tag), 'last_seen', date)
from packages import Tag
if __name__ == '__main__':
@ -45,18 +27,6 @@ if __name__ == '__main__':
# Setup the I/O queues
p = Process(config_section)
server = redis.StrictRedis(
host=p.config.get("ARDB_Tags", "host"),
port=p.config.get("ARDB_Tags", "port"),
db=p.config.get("ARDB_Tags", "db"),
decode_responses=True)
server_metadata = redis.StrictRedis(
host=p.config.get("ARDB_Metadata", "host"),
port=p.config.get("ARDB_Metadata", "port"),
db=p.config.get("ARDB_Metadata", "db"),
decode_responses=True)
# Sent to the logging a description of the module
publisher.info("Tags module started")
@ -71,27 +41,7 @@ if __name__ == '__main__':
continue
else:
tag, path = message.split(';')
# add the tag to the tags word_list
res = server.sadd('list_tags', tag)
if res == 1:
print("new tags added : {}".format(tag))
# add the path to the tag set
#curr_date = datetime.date.today().strftime("%Y%m%d")
item_date = get_item_date(path)
res = server.sadd('{}:{}'.format(tag, item_date), path)
if res == 1:
print("new paste: {}".format(path))
print(" tagged: {}".format(tag))
set_tag_metadata(tag, item_date)
server_metadata.sadd('tag:{}'.format(path), tag)
tag, item_id = message.split(';')
# Domain Object
if Item.is_crawled(path) and tag!='infoleak:submission="crawler"':
domain = Item.get_item_domain(path)
server_metadata.sadd('tag:{}'.format(domain), tag)
server.sadd('domain:{}:{}'.format(tag, item_date), domain)
curr_date = datetime.date.today().strftime("%Y%m%d")
server.hincrby('daily_tags:{}'.format(item_date), tag, 1)
Tag.add_tag("item", tag, item_id)
p.populate_set_out(message, 'MISP_The_Hive_feeder')

View File

@ -292,7 +292,7 @@ def get_domain_items_crawled(domain, domain_type, port, epoch=None, items_link=F
if item_screenshot:
dict_item['screenshot'] = Item.get_item_screenshot(item)
if item_tag:
dict_item['tags'] = Tag.get_item_tags_minimal(item)
dict_item['tags'] = Tag.get_obj_tags_minimal(item)
item_crawled['items'].append(dict_item)
return item_crawled
@ -365,7 +365,7 @@ def get_domain_tags(domain):
:param domain: crawled domain
'''
return Tag.get_item_tags(domain)
return Tag.get_obj_tag(domain)
def get_domain_metadata(domain, domain_type, first_seen=True, last_ckeck=True, status=True, ports=True, tags=False):
'''

View File

@ -43,13 +43,16 @@ def get_screenshot_items_list(sha256_string):
else:
return []
def get_item_screenshot(item_id):
return r_serv_metadata.hget('paste_metadata:{}'.format(item_id), 'screenshot')
def get_item_screenshot_list(item_id):
'''
Retun all decoded item of a given item id.
:param item_id: item id
'''
screenshot = r_serv_metadata.hget('paste_metadata:{}'.format(item_id), 'screenshot')
screenshot = get_item_screenshot(item_id)
if screenshot:
return [screenshot]
else:

View File

@ -104,7 +104,7 @@ def get_item(request_dict):
dict_item['date'] = get_item_date(item_id, add_separator=add_separator)
tags = request_dict.get('tags', True)
if tags:
dict_item['tags'] = Tag.get_item_tags(item_id)
dict_item['tags'] = Tag.get_obj_tag(item_id)
size = request_dict.get('size', False)
if size:
@ -242,7 +242,7 @@ def get_item_pgp_correlation(item_id):
def get_item_list_desc(list_item_id):
desc_list = []
for item_id in list_item_id:
desc_list.append( {'id': item_id, 'date': get_item_date(item_id), 'tags': Tag.get_item_tags(item_id)} )
desc_list.append( {'id': item_id, 'date': get_item_date(item_id), 'tags': Tag.get_obj_tag(item_id)} )
return desc_list
# # TODO: add an option to check the tag

View File

@ -4,6 +4,7 @@
import os
import sys
import redis
import datetime
import Date
import Item
@ -35,6 +36,19 @@ def build_unsafe_tags():
# set of unsafe tags
unsafe_tags = build_unsafe_tags()
def is_tags_safe(ltags):
'''
Check if a list of tags contain an unsafe tag (CE, ...)
:param ltags: list of tags
:type ltags: list
:return: is a tag in the unsafe set
:rtype: boolean
'''
return unsafe_tags.isdisjoint(ltags)
#### Taxonomies - Galaxies ####
def get_taxonomie_from_tag(tag):
return tag.split(':')[0]
@ -105,21 +119,7 @@ def is_valid_tags_taxonomies_galaxy(list_tags, list_tags_galaxy):
return False
return True
def get_tag_metadata(tag):
first_seen = r_serv_tags.hget('tag_metadata:{}'.format(tag), 'first_seen')
last_seen = r_serv_tags.hget('tag_metadata:{}'.format(tag), 'last_seen')
return {'tag': tag, 'first_seen': first_seen, 'last_seen': last_seen}
def is_tags_safe(ltags):
'''
Check if a list of tags contain an unsafe tag (CE, ...)
:param ltags: list of tags
:type ltags: list
:return: is a tag in the unsafe set
:rtype: boolean
'''
return unsafe_tags.isdisjoint(ltags)
#### ####
def is_tag_in_all_tag(tag):
if r_serv_tags.sismember('list_tags', tag):
@ -127,20 +127,6 @@ def is_tag_in_all_tag(tag):
else:
return False
def get_all_tags():
return list(r_serv_tags.smembers('list_tags'))
def get_item_tags(item_id):
'''
Retun all the tags of a given item.
:param item_id: (Paste or domain)
'''
tags = r_serv_metadata.smembers('tag:{}'.format(item_id))
if tags:
return list(tags)
else:
return []
def get_min_tag(tag):
tag = tag.split('=')
if len(tag) > 1:
@ -154,8 +140,8 @@ def get_min_tag(tag):
tag = tag[0]
return tag
def get_item_tags_minimal(item_id):
return [ {"tag": tag, "min_tag": get_min_tag(tag)} for tag in get_item_tags(item_id) ]
def get_obj_tags_minimal(item_id):
return [ {"tag": tag, "min_tag": get_min_tag(tag)} for tag in get_obj_tag(item_id) ]
def unpack_str_tags_list(str_tags_list):
str_tags_list = str_tags_list.replace('"','\"')
@ -165,7 +151,7 @@ def unpack_str_tags_list(str_tags_list):
return []
# TEMPLATE + API QUERY
# TEMPLATE + API QUERY # # TODO: # REVIEW:
def add_items_tag(tags=[], galaxy_tags=[], item_id=None): ## TODO: remove me
res_dict = {}
if item_id == None:
@ -193,6 +179,8 @@ def add_items_tag(tags=[], galaxy_tags=[], item_id=None): ## TODO: remove me
res_dict['id'] = item_id
return (res_dict, 200)
def api_add_obj_tags(tags=[], galaxy_tags=[], object_id=None, object_type="item"):
pass
# TEMPLATE + API QUERY
def add_items_tags(tags=[], galaxy_tags=[], item_id=None, item_type="paste"):
@ -318,6 +306,80 @@ def remove_item_tag(tag, item_id):
else:
return ({'status': 'error', 'reason': 'Item id or tag not found'}, 400)
# used by modal
def get_modal_add_tags(item_id, object_type='item'):
'''
Modal: add tags to domain or Paste
'''
return {"active_taxonomies": get_active_taxonomies(), "active_galaxies": get_active_galaxies(),
"object_id": item_id, "object_type": tag_type}
######## NEW VERSION ########
def get_tag_first_seen(tag, r_int=False):
'''
Get tag first seen (current: item only)
'''
res = r_serv_tags.hget('tag_metadata:{}'.format(tag), 'first_seen')
if r_int:
if res is None:
return 99999999
else:
return int(res)
return res
def get_tag_last_seen(tag, r_int=False):
'''
Get tag last seen (current: item only)
'''
res = r_serv_tags.hget('tag_metadata:{}'.format(tag), 'last_seen')
if r_int:
if res is None:
return 0
else:
return int(res)
return res
def get_tag_metadata(tag, r_int=False):
'''
Get tag metadata (current: item only)
'''
tag_metadata = {"tag": tag}
tag_metadata['first_seen'] = get_tag_first_seen(tag)
tag_metadata['last_seen'] = get_tag_last_seen(tag)
return tag_metadata
def is_obj_tagged(object_id, tag):
'''
Check if a object is tagged
:param object_id: object id
:type domain: str
:param tag: object type
:type domain: str
:return: is object tagged
:rtype: boolean
'''
return r_serv_tags.sismember('tag:{}'.format(object_id), tag)
def get_all_tags():
return list(r_serv_tags.smembers('list_tags'))
def get_all_obj_tags(object_type):
return list(r_serv_tags.smembers('list_tags:{}'.format(object_type)))
def get_obj_tag(object_id):
'''
Retun all the tags of a given object.
:param object_id: (item_id, domain, ...)
'''
res = r_serv_metadata.smembers('tag:{}'.format(object_id))
if res:
return list(res)
else:
return []
def update_tag_first_seen(tag, tag_first_seen, tag_last_seen):
if tag_first_seen == tag_last_seen:
if r_serv_tags.scard('{}:{}'.format(tag, tag_first_seen)) > 0:
@ -350,11 +412,88 @@ def update_tag_last_seen(tag, tag_first_seen, tag_last_seen):
tag_last_seen = Date.date_substract_day(tag_last_seen)
update_tag_last_seen(tag, tag_first_seen, tag_last_seen)
def update_tag_metadata(tag, tag_date, object_type=None, add_tag=True):
'''
Update tag metadata (current: item only)
'''
if object_type=="item":
# get object metadata
tag_metadata = get_tag_metadata(tag, r_int=True)
#############
## ADD tag ##
if add_tag:
# update fisrt_seen
if tag_date < tag_metadata['first_seen']:
r_serv_tags.hset('tag_metadata:{}'.format(tag), 'first_seen', tag_date)
# update last_seen
if tag_date > tag_metadata['last_seen']:
r_serv_tags.hset('tag_metadata:{}'.format(tag), 'last_seen', tag_date)
################
## REMOVE tag ##
else:
if tag_date == tag_metadata['first_seen']:
update_tag_first_seen(tag, tag_metadata['first_seen'], tag_metadata['last_seen'])
if tag_date == tag_metadata['last_seen']:
update_tag_last_seen(tag, tag_metadata['first_seen'], tag_metadata['last_seen'])
# used by modal
def get_modal_add_tags(item_id, tag_type='paste'):
def add_global_tag(tag, object_type=None):
'''
Modal: add tags to domain or Paste
Create a set of all tags used in AIL (all + by object)
:param tag: tag
:type domain: str
:param object_type: object type
:type domain: str
'''
return {"active_taxonomies": get_active_taxonomies(), "active_galaxies": get_active_galaxies(),
"item_id": item_id, "type": tag_type}
r_serv_tags.sadd('list_tags', tag)
if object_type:
r_serv_tags.sadd('list_tags:{}'.format(object_type), tag)
def add_obj_tag(object_type, object_id, tag, obj_date):
if object_type=="item": # # TODO: # FIXME: # REVIEW: rename me !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# add tag
r_serv_metadata.sadd('tag:{}'.format(object_id), tag)
r_serv_tags.sadd('{}:{}'.format(tag, obj_date), object_id)
# add domain tag
if Item.is_crawled(object_id) and tag!='infoleak:submission="crawler"':
domain = Item.get_item_domain(object_id)
add_tag("domain", tag, domain)
else:
r_serv_metadata.sadd('tag:{}'.format(object_id), tag)
r_serv_tags.sadd('{}:{}'.format(object_type, tag), object_id)
def add_tag(object_type, tag, object_id):
# new tag
if not is_obj_tagged(object_id, tag):
# # TODO: # FIXME: sanityze object_type
obj_date = get_obj_date(object_type, object_id)
add_global_tag(tag, object_type=object_type)
add_obj_tag(object_type, object_id, tag, obj_date)
update_tag_metadata(tag, obj_date)
# create tags stats # # TODO: put me in cache
r_serv_tags.hincrby('daily_tags:{}'.format(datetime.date.today().strftime("%Y%m%d")), tag, 1)
def delete_obj_tag(object_type, object_id, tag, obj_date):
if object_type=="item": # # TODO: # FIXME: # REVIEW: rename me !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
obj_date = get_obj_date(object_type, object_id)
r_serv_metadata.srem('tag:{}'.format(object_id), tag)
r_serv_tags.srem('{}:{}'.format(tag, obj_date), object_id)
else:
r_serv_metadata.srem('tag:{}'.format(object_id), tag)
r_serv_tags.srem('{}:{}'.format(object_type, tag), object_id)
def delete_tag(object_type, tag, object_id):
# tag exist
if is_obj_tagged(object_id, tag):
obj_date = get_obj_date(object_type, object_id)
delete_obj_tag(object_type, object_id, tag, obj_date)
update_tag_metadata(tag, obj_date, object_type=object_type, add_tag=False)
def get_obj_date(object_type, object_id): # # TODO: move me in another file + REVIEW
if object_type == "item":
return Item.get_item_date(object_id)
else:
return None

View File

@ -128,7 +128,7 @@ class TestApiV1(unittest.TestCase):
# POST api/v1/add/item/tag
def test_0007_api_add_item_tag(self):
tags_to_add = ["infoleak:analyst-detection=\"api-key\""]
current_item_tag = Tag.get_item_tags(self.__class__.item_id)
current_item_tag = Tag.get_obj_tag(self.__class__.item_id)
current_item_tag.append(tags_to_add[0])
#galaxy_to_add = ["misp-galaxy:stealer=\"Vidar\""]
@ -138,7 +138,7 @@ class TestApiV1(unittest.TestCase):
item_tags = req_json['tags']
self.assertEqual(item_tags, tags_to_add)
new_item_tag = Tag.get_item_tags(self.__class__.item_id)
new_item_tag = Tag.get_obj_tag(self.__class__.item_id)
self.assertCountEqual(new_item_tag, current_item_tag)
# DELETE api/v1/delete/item/tag
@ -149,7 +149,7 @@ class TestApiV1(unittest.TestCase):
req_json = parse_response(self, req)
item_tags = req_json['tags']
self.assertCountEqual(item_tags, tags_to_delete)
current_item_tag = Tag.get_item_tags(self.__class__.item_id)
current_item_tag = Tag.get_obj_tag(self.__class__.item_id)
if tags_to_delete[0] in current_item_tag:
self.fail('Tag no deleted')

View File

@ -28,7 +28,7 @@ def update_update_stats():
def update_domain_by_item(domain_obj, item_id):
domain_name = domain_obj.get_domain_name()
# update domain tags
for tag in Tag.get_item_tags(item_id):
for tag in Tag.get_obj_tag(item_id):
if tag != 'infoleak:submission="crawler"' and tag != 'infoleak:submission="manual"':
Tag.add_domain_tag(tag, domain_name, Item.get_item_date(item_id))

View File

@ -73,4 +73,4 @@ def showDomain():
dict_domain['crawler_history']['random_item'] = random.choice(dict_domain['crawler_history']['items'])
return render_template("showDomain.html", dict_domain=dict_domain, bootstrap_label=bootstrap_label,
modal_add_tags=Tag.get_modal_add_tags(dict_domain['domain'], tag_type="domain"))
modal_add_tags=Tag.get_modal_add_tags(dict_domain['domain'], object_type="domain"))

View File

@ -0,0 +1,97 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
'''
Blueprint Flask: crawler splash endpoints: dashboard, onion crawler ...
'''
import os
import sys
import json
import random
from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response
from flask_login import login_required, current_user, login_user, logout_user
sys.path.append('modules')
import Flask_config
# Import Role_Manager
from Role_Manager import create_user_db, check_password_strength, check_user_role_integrity
from Role_Manager import login_admin, login_analyst
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages'))
import Date
import Tag
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
import Domain
r_cache = Flask_config.r_cache
r_serv_db = Flask_config.r_serv_db
r_serv_tags = Flask_config.r_serv_tags
bootstrap_label = Flask_config.bootstrap_label
# ============ BLUEPRINT ============
tags_ui = Blueprint('tags', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/tags'))
# ============ VARIABLES ============
# ============ FUNCTIONS ============
# ============= ROUTES ==============
@tags_ui.route('/tag/add_tags')
@login_required
@login_analyst
def add_tags():
tags = request.args.get('tags')
tagsgalaxies = request.args.get('tagsgalaxies')
object_id = request.args.get('object_id') # old: item_id
object_type = request.args.get('object_type') # old type
list_tag = tags.split(',')
list_tag_galaxies = tagsgalaxies.split(',')
res = Tag.add_items_tags(tags=list_tag, galaxy_tags=list_tag_galaxies, item_id=object_id, item_type=item_type)
# error
if res[1] != 200:
return str(res[0])
# success
if object_type=='domain':
return redirect(url_for('crawler_splash.showDomain', domain=object_id))
else:
return redirect(url_for('showsavedpastes.showsavedpaste', paste=object_id))
# # add route : /crawlers/show_domain
# @tags_ui.route('/tags/search/domain')
# @login_required
# @login_analyst
# def showDomain():
# date_from = request.args.get('date_from')
# date_to = request.args.get('date_to')
# tags = request.args.get('ltags')
#
# print(date_from)
# print(date_to)
#
# dates = Date.sanitise_date_range(date_from, date_to)
#
# if tags is None:
# return 'tags_none'
# #return render_template("Tags.html", date_from=dates['date_from'], date_to=dates['date_to'])
# else:
# tags = Tag.unpack_str_tags_list(tags)
#
#
#
#
# return render_template("showDomain.html", dict_domain=dict_domain, bootstrap_label=bootstrap_label,
# tag_type="domain"))

View File

@ -422,7 +422,7 @@ def tag_validation():
else:
return 'input error'
@Tags.route("/Tags/addTags")
@Tags.route("/Tags/addTags") # REVIEW: # used in showPaste
@login_required
@login_analyst
def addTags():
@ -442,29 +442,6 @@ def addTags():
# success
return redirect(url_for('showsavedpastes.showsavedpaste', paste=path))
@Tags.route("/Tags/add_item_tags")
@login_required
@login_analyst
def add_item_tags():
tags = request.args.get('tags')
tagsgalaxies = request.args.get('tagsgalaxies')
item_id = request.args.get('item_id')
item_type = request.args.get('type')
list_tag = tags.split(',')
list_tag_galaxies = tagsgalaxies.split(',')
res = Tag.add_items_tags(tags=list_tag, galaxy_tags=list_tag_galaxies, item_id=item_id, item_type=item_type)
# error
if res[1] != 200:
return str(res[0])
# success
if item_type=='domain':
return redirect(url_for('crawler_splash.showDomain', domain=item_id))
else:
return redirect(url_for('showsavedpastes.showsavedpaste', paste=item_id))
@Tags.route("/Tags/taxonomies")
@login_required
@login_read_only

View File

@ -126,6 +126,6 @@ jQuery("#all-tags-galaxies").click(function(e){
function addTags() {
var tags = ltags.getValue()
var tagsgalaxy = ltagsgalaxies.getValue()
window.location.replace("{{ url_for('Tags.add_item_tags') }}?tags=" + tags + "&tagsgalaxies=" + tagsgalaxy + "&item_id={{ modal_add_tags['item_id'] }}&type={{ modal_add_tags['type'] }}");
window.location.replace("{{ url_for('tags_ui.add_item_tags') }}?tags=" + tags + "&tagsgalaxies=" + tagsgalaxy + "&object_id={{ modal_add_tags['object_id'] }}&object_type={{ modal_add_tags['object_type'] }}");
}
</script>