mirror of https://github.com/CIRCL/AIL-framework
chg: [Term tracker] add term tracker module (word + set) + API: add new term to track (word + set + regex)
parent
28320a32a6
commit
bb6d3a6a26
|
@ -20,13 +20,6 @@ configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
|
|||
publisher.port = 6380
|
||||
publisher.channel = "Script"
|
||||
|
||||
# notifications enabled/disabled
|
||||
TrackedTermsNotificationEnabled_Name = "TrackedNotifications"
|
||||
|
||||
# associated notification email addresses for a specific term`
|
||||
# Keys will be e.g. TrackedNotificationEmails<TERMNAME>
|
||||
TrackedTermsNotificationEmailsPrefix_Name = "TrackedNotificationEmails_"
|
||||
|
||||
def sendEmailNotification(recipient, alert_name, content):
|
||||
|
||||
if not os.path.exists(configfile):
|
||||
|
|
|
@ -9,50 +9,84 @@ import os
|
|||
import sys
|
||||
import time
|
||||
|
||||
from Helper import Process
|
||||
from pubsublogger import publisher
|
||||
|
||||
import NotificationHelper
|
||||
|
||||
from packages import Paste
|
||||
from packages import Term
|
||||
|
||||
sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules'))
|
||||
import Flask_config
|
||||
|
||||
r_serv_term = Flask_config.r_serv_term
|
||||
full_item_url = "/showsavedpaste/?paste="
|
||||
|
||||
mail_body_template = "AIL Framework,\nNew occurrence for term tracked term: {}\nitem id: {}\nurl: {}{}"
|
||||
|
||||
# loads tracked words
|
||||
list_tracked_words = Term.get_tracked_words_list()
|
||||
set_tracked_words_list = Term.get_set_tracked_words_list()
|
||||
|
||||
def new_term_found(term, term_type):
|
||||
uuid_list = get_term_uuid_list()
|
||||
email_notification = []
|
||||
tags = []
|
||||
def new_term_found(term, term_type, item_id):
|
||||
uuid_list = Term.get_term_uuid_list(term)
|
||||
|
||||
for term_uuid in uuid_list:
|
||||
pass
|
||||
Term.add_tracked_item(term_uuid, item_id)
|
||||
|
||||
tags_to_add = Term.get_term_tags(term_uuid)
|
||||
for tag in tags_to_add:
|
||||
msg = '{};{}'.format(tag, item_id)
|
||||
p.populate_set_out(msg, 'Tags')
|
||||
|
||||
mail_to_notify = Term.get_term_mails(term_uuid)
|
||||
if mail_to_notify:
|
||||
mail_body = mail_body_template.format(term, item_id, full_item_url, item_id)
|
||||
for mail in mail_to_notify:
|
||||
NotificationHelper.sendEmailNotification(mail, 'Term Tracker', mail_body)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
item_id = 'submitted/2019/08/02/cc1900ed-6051-473a-ba7a-850a17d0cc02.gz'
|
||||
#item_id = 'submitted/2019/08/02/0a52d82d-a89d-4004-9535-8a0bc9c1ce49.gz'
|
||||
paste = Paste.Paste(item_id)
|
||||
res = Term.parse_tracked_term_to_add('test zorro meroio apple weert', 'word')
|
||||
publisher.port = 6380
|
||||
publisher.channel = "Script"
|
||||
publisher.info("Script TermTrackerMod started")
|
||||
|
||||
'''
|
||||
dict_words_freq = Term.get_text_word_frequency(paste.get_p_content())
|
||||
#config_section = 'TermTrackerMod'
|
||||
config_section = 'Curve'
|
||||
p = Process(config_section)
|
||||
|
||||
# check solo words
|
||||
for word in list_tracked_words:
|
||||
if word in dict_words_freq:
|
||||
pass
|
||||
# tag + get uuids ...
|
||||
full_item_url = p.config.get("Notifications", "ail_domain") + full_item_url
|
||||
|
||||
# check words set
|
||||
for list_words, nb_words_threshold in set_tracked_words_list:
|
||||
nb_uniq_word = 0
|
||||
for word in list_words:
|
||||
if word in dict_words_freq:
|
||||
nb_uniq_word += 1
|
||||
if nb_uniq_word > nb_words_threshold:
|
||||
# tag + get uuid
|
||||
pass
|
||||
'''
|
||||
while True:
|
||||
|
||||
item_id = p.get_from_set()
|
||||
item_id = 'submitted/2019/08/02/cc1900ed-6051-473a-ba7a-850a17d0cc02.gz'
|
||||
#item_id = 'submitted/2019/08/02/0a52d82d-a89d-4004-9535-8a0bc9c1ce49.gz'
|
||||
|
||||
if message is not None:
|
||||
|
||||
paste = Paste.Paste(item_id)
|
||||
|
||||
dict_words_freq = Term.get_text_word_frequency(paste.get_p_content())
|
||||
|
||||
# check solo words
|
||||
for word in list_tracked_words:
|
||||
if word in dict_words_freq:
|
||||
new_term_found(word, 'word', item_id)
|
||||
|
||||
# check words set
|
||||
for elem in set_tracked_words_list:
|
||||
list_words = elem[0]
|
||||
nb_words_threshold = elem[1]
|
||||
word_set = elem[2]
|
||||
nb_uniq_word = 0
|
||||
|
||||
for word in list_words:
|
||||
if word in dict_words_freq:
|
||||
nb_uniq_word += 1
|
||||
if nb_uniq_word >= nb_words_threshold:
|
||||
new_term_found(word_set, 'set', item_id)
|
||||
|
||||
else:
|
||||
time.sleep(5)
|
||||
|
|
|
@ -2,6 +2,7 @@
|
|||
# -*-coding:UTF-8 -*
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import uuid
|
||||
import redis
|
||||
|
@ -16,6 +17,7 @@ sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules'))
|
|||
import Flask_config
|
||||
|
||||
r_serv_term = Flask_config.r_serv_term
|
||||
email_regex = Flask_config.email_regex
|
||||
|
||||
special_characters = set('[<>~!?@#$%^&*|()_-+={}":;,.\'\n\r\t]/\\')
|
||||
special_characters.add('\\s')
|
||||
|
@ -24,6 +26,26 @@ special_characters.add('\\s')
|
|||
tokenizer = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]+',
|
||||
gaps=True, discard_empty=True)
|
||||
|
||||
def is_valid_mail(email):
|
||||
result = email_regex.match(email)
|
||||
if result:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def verify_mail_list(mail_list):
|
||||
for mail in mail_list:
|
||||
if not is_valid_mail(mail):
|
||||
return ({'status': 'error', 'reason': 'Invalid email', 'value': mail}, 400)
|
||||
return None
|
||||
|
||||
def is_valid_regex(term_regex):
|
||||
try:
|
||||
re.compile(term_regex)
|
||||
return True
|
||||
except:
|
||||
return False
|
||||
|
||||
def get_text_word_frequency(item_content, filtering=True):
|
||||
item_content = item_content.lower()
|
||||
words_dict = defaultdict(int)
|
||||
|
@ -34,7 +56,6 @@ def get_text_word_frequency(item_content, filtering=True):
|
|||
blob = TextBlob(item_content)
|
||||
for word in blob.tokens:
|
||||
words_dict[word] += 1
|
||||
print(words_dict)
|
||||
return words_dict
|
||||
|
||||
# # TODO: create all tracked words
|
||||
|
@ -45,28 +66,40 @@ def get_set_tracked_words_list():
|
|||
set_list = r_serv_term.smembers('all:tracked_term:set')
|
||||
all_set_list = []
|
||||
for elem in set_list:
|
||||
elem = elem.split(';')
|
||||
num_words = int(elem[1])
|
||||
ter_set = elem[0].split(',')
|
||||
all_set_list.append((ter_set, num_words))
|
||||
res = elem.split(';')
|
||||
num_words = int(res[1])
|
||||
ter_set = res[0].split(',')
|
||||
all_set_list.append((ter_set, num_words, elem))
|
||||
return all_set_list
|
||||
|
||||
def parse_json_term_to_add(dict_input):
|
||||
def is_term_tracked_in_global_level(term):
|
||||
res = r_serv_term.smembers('all:tracked_term_uuid:{}'.format(term))
|
||||
if res:
|
||||
for elem_uuid in res:
|
||||
if r_serv_term.hget('tracked_term:{}'.format(elem_uuid), 'level')=='1':
|
||||
return True
|
||||
return False
|
||||
|
||||
def parse_json_term_to_add(dict_input, user_id):
|
||||
term = dict_input.get('term', None)
|
||||
if not term:
|
||||
return ({"status": "error", "reason": "Term not provided"}, 400)
|
||||
term_type = dict_input.get('term', None)
|
||||
term_type = dict_input.get('type', None)
|
||||
if not term_type:
|
||||
return ({"status": "error", "reason": "Term type not provided"}, 400)
|
||||
nb_words = dict_input.get('nb_words', 1)
|
||||
|
||||
res = parse_tracked_term_to_add(term , term_type, nb_words=nb_words)
|
||||
if res['status']=='error':
|
||||
if res[1]!=200:
|
||||
return res
|
||||
term = res[0]['term']
|
||||
term_type = res[0]['type']
|
||||
|
||||
# get user_id
|
||||
tags = dict_input.get('tags', [])
|
||||
mails = dict_input.get('mails', [])
|
||||
## TODO: verify mail integrity
|
||||
res = verify_mail_list(mails)
|
||||
if res:
|
||||
return res
|
||||
|
||||
## TODO: add dashboard key
|
||||
level = dict_input.get('level', 1)
|
||||
|
@ -77,17 +110,20 @@ def parse_json_term_to_add(dict_input):
|
|||
except:
|
||||
level = 1
|
||||
|
||||
# check if term already tracked in global
|
||||
if level==1:
|
||||
if is_term_tracked_in_global_level(term):
|
||||
return ({"status": "error", "reason": "Term already tracked"}, 409)
|
||||
|
||||
term_uuid = add_tracked_term(term , term_type, user_id, level, tags, mails)
|
||||
|
||||
return ({'term': term, 'uuid': term_uuid}, 200)
|
||||
return ({'term': term, 'type': term_type, 'uuid': term_uuid}, 200)
|
||||
|
||||
|
||||
def parse_tracked_term_to_add(term , term_type, nb_words=1):
|
||||
|
||||
# todo verify regex format
|
||||
if term_type=='regex':
|
||||
# TODO: verify regex integrity
|
||||
pass
|
||||
if not is_valid_regex(term):
|
||||
return ({"status": "error", "reason": "Invalid regex"}, 400)
|
||||
elif term_type=='word' or term_type=='set':
|
||||
# force lowercase
|
||||
term = term.lower()
|
||||
|
@ -97,7 +133,7 @@ def parse_tracked_term_to_add(term , term_type, nb_words=1):
|
|||
return ({"status": "error", "reason": "special character not allowed", "message": "Please use a regex or remove all special characters"}, 400)
|
||||
words = term.split()
|
||||
# not a word
|
||||
if term_type=='word' and words:
|
||||
if term_type=='word' and len(words)>1:
|
||||
term_type = 'set'
|
||||
|
||||
# ouput format: term1,term2,term3;2
|
||||
|
@ -106,19 +142,21 @@ def parse_tracked_term_to_add(term , term_type, nb_words=1):
|
|||
nb_words = int(nb_words)
|
||||
except:
|
||||
nb_words = 1
|
||||
if nb_words==0:
|
||||
nb_words = 1
|
||||
|
||||
words_set = set(words)
|
||||
words_set = sorted(words_set)
|
||||
|
||||
term = ",".join(words_set)
|
||||
term = "{};{}".format(term, nb_words)
|
||||
|
||||
print(term)
|
||||
print(term_type)
|
||||
|
||||
return ({"status": "success", "term": term, "type": term_type}, 200)
|
||||
if nb_words > len(words_set):
|
||||
nb_words = len(words_set)
|
||||
|
||||
else:
|
||||
return ({"status": "error", "reason": "Incorrect type"}, 400)
|
||||
return ({"status": "success", "term": term, "type": term_type}, 200)
|
||||
|
||||
def add_tracked_term(term , term_type, user_id, level, tags, mails, dashboard=0):
|
||||
|
||||
|
@ -154,9 +192,44 @@ def add_tracked_term(term , term_type, user_id, level, tags, mails, dashboard=0)
|
|||
|
||||
return term_uuid
|
||||
|
||||
def delete_term(term_uuid):
|
||||
term = r_serv_term.hget('tracked_term:{}'.format(term_uuid), 'tracked')
|
||||
term_type = r_serv_term.hget('tracked_term:{}'.format(term_uuid), 'type')
|
||||
term_level = r_serv_term.hget('tracked_term:{}'.format(term_uuid), 'level')
|
||||
r_serv_term.srem('all:tracked_term_uuid:{}'.format(term), term_uuid)
|
||||
r_serv_term.srem('all:tracked_term:{}'.format(term_type), term_uuid)
|
||||
|
||||
|
||||
if level == 0: # user only
|
||||
user_id = term_type = r_serv_term.hget('tracked_term:{}'.format(term_uuid), 'user_id')
|
||||
r_serv_term.srem('user:tracked_term:{}'.format(user_id), term_uuid)
|
||||
elif level == 1: # global
|
||||
r_serv_term.srem('gobal:tracked_term', term_uuid)
|
||||
|
||||
# delete metatadata
|
||||
r_serv_term.delete('tracked_term:{}'.format(term_uuid))
|
||||
|
||||
# remove tags
|
||||
r_serv_term.delete('tracked_term:tags:{}'.format(term_uuid))
|
||||
|
||||
# remove mails
|
||||
r_serv_term.delete('tracked_term:mail:{}'.format(term_uuid))
|
||||
|
||||
# remove item set
|
||||
r_serv_term.delete('tracked_term:item:{}'.format(term_uuid))
|
||||
|
||||
def get_term_uuid_list(term):
|
||||
return list(r_serv_term.smembers('all:tracked_term_uuid:{}'.format(term)))
|
||||
|
||||
def get_term_tags(term_uuid):
|
||||
return list(r_serv_term.smembers('tracked_term:tags:{}'.format(term_uuid)))
|
||||
|
||||
def get_term_mails(term_uuid):
|
||||
return list(r_serv_term.smembers('tracked_term:mail:{}'.format(term_uuid)))
|
||||
|
||||
def add_tracked_item(term_uuid, item_id):
|
||||
r_serv_term.sadd('tracked_term:item:{}'.format(term_uuid), item_id)
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -23,7 +23,7 @@ sentiment_lexicon_file = sentiment/vader_lexicon.zip/vader_lexicon/vader_lexicon
|
|||
|
||||
##### Notifications ######
|
||||
[Notifications]
|
||||
ail_domain = http://localhost:7000
|
||||
ail_domain = https://localhost:7000
|
||||
sender = sender@example.com
|
||||
sender_host = smtp.example.com
|
||||
sender_port = 1337
|
||||
|
|
|
@ -600,7 +600,7 @@ Add term tracker
|
|||
- `term`
|
||||
- term to add
|
||||
- *str - word(s)*
|
||||
- default: `text`
|
||||
- mandatory
|
||||
- `nb_words`
|
||||
- number of words in set
|
||||
- *int*
|
||||
|
|
|
@ -17,6 +17,7 @@ import Import_helper
|
|||
import Item
|
||||
import Paste
|
||||
import Tag
|
||||
import Term
|
||||
|
||||
from flask import Flask, render_template, jsonify, request, Blueprint, redirect, url_for, Response
|
||||
from flask_login import login_required
|
||||
|
@ -55,8 +56,11 @@ def verify_token(token):
|
|||
else:
|
||||
return False
|
||||
|
||||
def get_user_from_token(token):
|
||||
return r_serv_db.hget('user:tokens', token)
|
||||
|
||||
def verify_user_role(role, token):
|
||||
user_id = r_serv_db.hget('user:tokens', token)
|
||||
user_id = get_user_from_token(token)
|
||||
if user_id:
|
||||
if is_in_role(user_id, role):
|
||||
return True
|
||||
|
@ -308,13 +312,17 @@ def get_all_tags():
|
|||
return Response(json.dumps(res, indent=2, sort_keys=True), mimetype='application/json'), 200
|
||||
|
||||
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
||||
# # # # # # # # # # # # # # TAGS # # # # # # # # # # # # # # # # #
|
||||
# # # # # # # # # # # # # # TRACKER # # # # # # # # # # # # # # # # #
|
||||
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
||||
@restApi.route("api/v1/add/tracker/term", methods=['POST'])
|
||||
#@token_required('analyst')
|
||||
@restApi.route("api/v1/add/tracker/term", methods=['GET'])
|
||||
@token_required('analyst')
|
||||
def add_tracker_term():
|
||||
data = request.get_json()
|
||||
|
||||
#data = request.get_json()
|
||||
data = {"term": "pi", 'type' : "word"}
|
||||
user_token = get_auth_from_header()
|
||||
user_id = get_user_from_token(user_token)
|
||||
res = Term.parse_json_term_to_add(data, user_id)
|
||||
return Response(json.dumps(res[0], indent=2, sort_keys=True), mimetype='application/json'), res[1]
|
||||
|
||||
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
|
||||
# # # # # # # # # # # # # IMPORT # # # # # # # # # # # # # # # # # #
|
||||
|
|
Loading…
Reference in New Issue