diff --git a/OVERVIEW.md b/OVERVIEW.md index f4ee12ec..77339321 100644 --- a/OVERVIEW.md +++ b/OVERVIEW.md @@ -109,8 +109,56 @@ Redis and ARDB overview | **uuid**:ltags | **tag** | | **uuid**:ltagsgalaxies | **tag** | +## DB2 - New TermFreq: + +##### Term Tracker metadata: +| Hset - Key | Field | Value | +| ------ | ------ | ------ | +| tracked_term:**uuid** | tracked | **tacked word/set/regex** | +| | type | **term/set/regex** | +| | date | **date added** | +| | user_id | **created by user_id** | +| | dashboard | **0/1 Display alert on dashboard** | +| | level | **0/1 Tracker visibility** | + +##### Term Tracked by user_id (visibility level: user only): +| Set - Key | Value | +| ------ | ------ | +| user:tracked_term:**user_id** | **uuid - tracked term uuid** | + +##### Global Term Tracked (visibility level: all users): +| Set - Key | Value | +| ------ | ------ | +| gobal:tracked_term | **uuid - tracked term uuid** | + +##### All Term Tracked by type: +| Set - Key | Value | +| ------ | ------ | +| all:tracked_term:**word/set/regex - term type** | **tracked term** | + +| Set - Key | Value | +| ------ | ------ | +| all:tracked_term_uuid:**tracked term** | **uuid - tracked term uuid** | + +##### All Term Tracked items: +| Set - Key | Value | +| ------ | ------ | +| tracked_term:item:**uuid** | **item_id** | + +##### All Term Tracked tags: +| Set - Key | Value | +| ------ | ------ | +| tracked_term:tags:**uuid** | **tag** | + +##### All Term Tracked tags: +| Set - Key | Value | +| ------ | ------ | +| tracked_term:mail:**uuid** | **mail** | + ## DB2 - TermFreq: +##### Set: + ##### Set: | Key | Value | | ------ | ------ | @@ -118,6 +166,17 @@ Redis and ARDB overview | TrackedSetSet | **tracked_set** | | TrackedRegexSet | **tracked_regex** | | | | +| | | +| global:TrackedSetTermSet | **tracked_term** | +| global:TrackedSetSet | **tracked_set** | +| global:TrackedRegexSet | **tracked_regex** | +| | | +| | | +| user:**user_id**:TrackedSetTermSet | **tracked_term** | +| user:**user_id**:TrackedSetSet | **tracked_set** | +| user:**user_id**:TrackedRegexSet | **tracked_regex** | +| | | +| | | | tracked_**tracked_term** | **item_path** | | set_**tracked_set** | **item_path** | | regex_**tracked_regex** | **item_path** | diff --git a/bin/TermTrackerMod.py b/bin/TermTrackerMod.py new file mode 100755 index 00000000..2d0458b5 --- /dev/null +++ b/bin/TermTrackerMod.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* +""" +The TermTracker Module +=================== + +""" +import os +import sys +import time + +from packages import Paste +from packages import Term + +sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules')) +import Flask_config + +r_serv_term = Flask_config.r_serv_term + +# loads tracked words +list_tracked_words = Term.get_tracked_words_list() +set_tracked_words_list = Term.get_set_tracked_words_list() + +def new_term_found(term, term_type): + uuid_list = get_term_uuid_list() + email_notification = [] + tags = [] + + for term_uuid in uuid_list: + pass + + +if __name__ == "__main__": + + item_id = 'submitted/2019/08/02/cc1900ed-6051-473a-ba7a-850a17d0cc02.gz' + #item_id = 'submitted/2019/08/02/0a52d82d-a89d-4004-9535-8a0bc9c1ce49.gz' + paste = Paste.Paste(item_id) + res = Term.parse_tracked_term_to_add('test zorro meroio apple weert', 'word') + + ''' + dict_words_freq = Term.get_text_word_frequency(paste.get_p_content()) + + # check solo words + for word in list_tracked_words: + if word in dict_words_freq: + pass + # tag + get uuids ... + + # check words set + for list_words, nb_words_threshold in set_tracked_words_list: + nb_uniq_word = 0 + for word in list_words: + if word in dict_words_freq: + nb_uniq_word += 1 + if nb_uniq_word > nb_words_threshold: + # tag + get uuid + pass + ''' diff --git a/bin/packages/Term.py b/bin/packages/Term.py new file mode 100755 index 00000000..0902d56f --- /dev/null +++ b/bin/packages/Term.py @@ -0,0 +1,206 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +import os +import sys +import uuid +import redis +import datetime + +from collections import defaultdict + +from nltk.tokenize import RegexpTokenizer +from textblob import TextBlob + +sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules')) +import Flask_config + +r_serv_term = Flask_config.r_serv_term + +special_characters = set('[<>~!?@#$%^&*|()_-+={}":;,.\'\n\r\t]/\\') +special_characters.add('\\s') + +# NLTK tokenizer +tokenizer = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]+', + gaps=True, discard_empty=True) + +def get_text_word_frequency(item_content, filtering=True): + item_content = item_content.lower() + words_dict = defaultdict(int) + + if filtering: + blob = TextBlob(item_content , tokenizer=tokenizer) + else: + blob = TextBlob(item_content) + for word in blob.tokens: + words_dict[word] += 1 + print(words_dict) + return words_dict + +# # TODO: create all tracked words +def get_tracked_words_list(): + return list(r_serv_term.smembers('all:tracked_term:word')) + +def get_set_tracked_words_list(): + set_list = r_serv_term.smembers('all:tracked_term:set') + all_set_list = [] + for elem in set_list: + elem = elem.split(';') + num_words = int(elem[1]) + ter_set = elem[0].split(',') + all_set_list.append((ter_set, num_words)) + +def parse_json_term_to_add(dict_input): + term = dict_input.get('term', None) + if not term: + return ({"status": "error", "reason": "Term not provided"}, 400) + term_type = dict_input.get('term', None) + if not term_type: + return ({"status": "error", "reason": "Term type not provided"}, 400) + nb_words = dict_input.get('nb_words', 1) + + res = parse_tracked_term_to_add(term , term_type, nb_words=nb_words) + if res['status']=='error': + return res + + # get user_id + tags = dict_input.get('tags', []) + mails = dict_input.get('mails', []) + ## TODO: verify mail integrity + + ## TODO: add dashboard key + level = dict_input.get('level', 1) + try: + level = int(level) + if level not in range(0, 1): + level = 1 + except: + level = 1 + + term_uuid = add_tracked_term(term , term_type, user_id, level, tags, mails) + + return ({'term': term, 'uuid': term_uuid}, 200) + + +def parse_tracked_term_to_add(term , term_type, nb_words=1): + + # todo verify regex format + if term_type=='regex': + # TODO: verify regex integrity + pass + elif term_type=='word' or term_type=='set': + # force lowercase + term = term.lower() + word_set = set(term) + set_inter = word_set.intersection(special_characters) + if set_inter: + return ({"status": "error", "reason": "special character not allowed", "message": "Please use a regex or remove all special characters"}, 400) + words = term.split() + # not a word + if term_type=='word' and words: + term_type = 'set' + + # ouput format: term1,term2,term3;2 + if term_type=='set': + try: + nb_words = int(nb_words) + except: + nb_words = 1 + + words_set = set(words) + words_set = sorted(words_set) + term = ",".join(words_set) + term = "{};{}".format(term, nb_words) + + print(term) + print(term_type) + + return ({"status": "success", "term": term, "type": term_type}, 200) + + else: + return ({"status": "error", "reason": "Incorrect type"}, 400) + +def add_tracked_term(term , term_type, user_id, level, tags, mails, dashboard=0): + + term_uuid = str(uuid.uuid4()) + + # create metadata + r_serv_term.hset('tracked_term:{}'.format(term_uuid), 'tracked',term) + r_serv_term.hset('tracked_term:{}'.format(term_uuid), 'type', term_type) + r_serv_term.hset('tracked_term:{}'.format(term_uuid), 'date', datetime.date.today().strftime("%Y%m%d")) + r_serv_term.hset('tracked_term:{}'.format(term_uuid), 'user_id', user_id) + r_serv_term.hset('tracked_term:{}'.format(term_uuid), 'level', level) + r_serv_term.hset('tracked_term:{}'.format(term_uuid), 'dashboard', dashboard) + + # create all term set + r_serv_term.sadd('all:tracked_term:{}'.format(term_type), term) + + # create term - uuid map + r_serv_term.sadd('all:tracked_term_uuid:{}'.format(term), term_uuid) + + # add display level set + if level == 0: # user only + r_serv_term.sadd('user:tracked_term:{}'.format(user_id), term_uuid) + elif level == 1: # global + r_serv_term.sadd('gobal:tracked_term', term_uuid) + + # create term tags list + for tag in tags: + r_serv_term.sadd('tracked_term:tags:{}'.format(term_uuid), tag) + + # create term tags mail notification list + for mail in mails: + r_serv_term.sadd('tracked_term:mail:{}'.format(term_uuid), mail) + + return term_uuid + +def get_term_uuid_list(term): + return list(r_serv_term.smembers('all:tracked_term_uuid:{}'.format(term))) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +def get_global_tracked_term(): + dict_tracked = {} + tracked_set = list(r_serv_term.smembers('global:TrackedSetSet')) + tracked_regex = list(r_serv_term.smembers('global:TrackedRegexSet')) + tracked_terms = list(r_serv_term.smembers('global:TrackedSetTermSet')) + return {'term': tracked_terms, 'set': tracked_terms, 'regex': tracked_regex} + +def get_user_tracked_term(user_id): + dict_tracked = {} + tracked_set = list(r_serv_term.smembers('user:{}:TrackedSetSet'.format(user_id))) + tracked_regex = list(r_serv_term.smembers('user:{}:TrackedRegexSet').format(user_id)) + tracked_terms = list(r_serv_term.smembers('user:{}:TrackedSetTermSet').format(user_id)) + return {'term': tracked_terms, 'set': tracked_terms, 'regex': tracked_regex} diff --git a/doc/README.md b/doc/README.md index a466c681..52768cd0 100644 --- a/doc/README.md +++ b/doc/README.md @@ -583,6 +583,94 @@ curl https://127.0.0.1:7000/api/v1/get/tag/metadata --header "Authorization: iHc + + +## Tracker + + + +### Add term tracker: `api/v1/add/tracker/term` + +#### Description +Add term tracker + +**Method** : `POST` + +#### Parameters +- `term` + - term to add + - *str - word(s)* + - default: `text` +- `nb_words` + - number of words in set + - *int* + - default: `1` +- `type` + - term type + - *str* + - mandatory: `word`, `set`, `regex` +- `tags` + - list of tags + - *list* + - default: `[]` +- `mails` + - list of mails to notify + - *list* + - default: `[]` +- `level` + - tracker visibility + - *int - 0: user only, 1: all users* + - default: `1` + +#### JSON response +- `uuid` + - import uuid + - *uuid4* + +#### Example +``` +curl https://127.0.0.1:7000/api/v1/import/item --header "Authorization: iHc1_ChZxj1aXmiFiF1mkxxQkzawwriEaZpPqyTQj " -H "Content-Type: application/json" --data @input.json -X POST +``` + +#### input.json Example +```json + { + "type": "text", + "tags": [ + "infoleak:analyst-detection=\"private-key\"" + ], + "text": "text to import" + } +``` + +#### Expected Success Response +**HTTP Status Code** : `200` + +```json + { + "uuid": "0c3d7b34-936e-4f01-9cdf-2070184b6016" + } +``` + +#### Expected Fail Response +**HTTP Status Code** : `400` + +```json + {"status": "error", "reason": "Malformed JSON"} + {"status": "error", "reason": "No text supplied"} + {"status": "error", "reason": "Tags or Galaxy not enabled"} + {"status": "error", "reason": "Size exceeds default"} +``` + + + + + + + + + + ## Import management diff --git a/var/www/modules/restApi/Flask_restApi.py b/var/www/modules/restApi/Flask_restApi.py index 6ea8dd69..f951ef9f 100644 --- a/var/www/modules/restApi/Flask_restApi.py +++ b/var/www/modules/restApi/Flask_restApi.py @@ -307,6 +307,15 @@ def get_all_tags(): res = {'tags': Tag.get_all_tags()} return Response(json.dumps(res, indent=2, sort_keys=True), mimetype='application/json'), 200 +# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # +# # # # # # # # # # # # # # TAGS # # # # # # # # # # # # # # # # # +# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # +@restApi.route("api/v1/add/tracker/term", methods=['POST']) +#@token_required('analyst') +def add_tracker_term(): + data = request.get_json() + + # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # IMPORT # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #