AIL-framework/bin/TermTrackerMod.py

93 lines
2.6 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
The TermTracker Module
===================
"""
import os
import sys
import time
from Helper import Process
from pubsublogger import publisher
import NotificationHelper
from packages import Paste
from packages import Term
sys.path.append(os.path.join(os.environ['AIL_FLASK'], 'modules'))
import Flask_config
full_item_url = "/showsavedpaste/?paste="
mail_body_template = "AIL Framework,\nNew occurrence for term tracked term: {}\nitem id: {}\nurl: {}{}"
# loads tracked words
list_tracked_words = Term.get_tracked_words_list()
set_tracked_words_list = Term.get_set_tracked_words_list()
def new_term_found(term, term_type, item_id):
uuid_list = Term.get_term_uuid_list(term)
for term_uuid in uuid_list:
Term.add_tracked_item(term_uuid, item_id)
tags_to_add = Term.get_term_tags(term_uuid)
for tag in tags_to_add:
msg = '{};{}'.format(tag, item_id)
p.populate_set_out(msg, 'Tags')
mail_to_notify = Term.get_term_mails(term_uuid)
if mail_to_notify:
mail_body = mail_body_template.format(term, item_id, full_item_url, item_id)
for mail in mail_to_notify:
NotificationHelper.sendEmailNotification(mail, 'Term Tracker', mail_body)
if __name__ == "__main__":
publisher.port = 6380
publisher.channel = "Script"
publisher.info("Script TermTrackerMod started")
#config_section = 'TermTrackerMod'
config_section = 'TermTrackerMod'
p = Process(config_section)
full_item_url = p.config.get("Notifications", "ail_domain") + full_item_url
while True:
item_id = p.get_from_set()
item_id = 'submitted/2019/08/02/cc1900ed-6051-473a-ba7a-850a17d0cc02.gz'
#item_id = 'submitted/2019/08/02/0a52d82d-a89d-4004-9535-8a0bc9c1ce49.gz'
if message is not None:
paste = Paste.Paste(item_id)
dict_words_freq = Term.get_text_word_frequency(paste.get_p_content())
# check solo words
for word in list_tracked_words:
if word in dict_words_freq:
new_term_found(word, 'word', item_id)
# check words set
for elem in set_tracked_words_list:
list_words = elem[0]
nb_words_threshold = elem[1]
word_set = elem[2]
nb_uniq_word = 0
for word in list_words:
if word in dict_words_freq:
nb_uniq_word += 1
if nb_uniq_word >= nb_words_threshold:
new_term_found(word_set, 'set', item_id)
else:
time.sleep(5)