AIL-framework/bin/TermTrackerMod.py

125 lines
3.8 KiB
Python
Executable File

#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
The TermTracker Module
===================
"""
import os
import sys
import time
import signal
from Helper import Process
from pubsublogger import publisher
import NotificationHelper
from packages import Item
from packages import Term
from lib import Tracker
full_item_url = "/object/item?id="
mail_body_template = "AIL Framework,\nNew occurrence for term tracked term: {}\nitem id: {}\nurl: {}{}"
# loads tracked words
list_tracked_words = Term.get_tracked_words_list()
last_refresh_word = time.time()
set_tracked_words_list = Term.get_set_tracked_words_list()
last_refresh_set = time.time()
class TimeoutException(Exception):
pass
def timeout_handler(signum, frame):
raise TimeoutException
signal.signal(signal.SIGALRM, timeout_handler)
def new_term_found(term, term_type, item_id, item_date):
uuid_list = Term.get_term_uuid_list(term, term_type)
print('new tracked term found: {} in {}'.format(term, item_id))
for term_uuid in uuid_list:
Term.add_tracked_item(term_uuid, item_id, item_date)
tags_to_add = Term.get_term_tags(term_uuid)
for tag in tags_to_add:
msg = '{};{}'.format(tag, item_id)
p.populate_set_out(msg, 'Tags')
mail_to_notify = Term.get_term_mails(term_uuid)
if mail_to_notify:
mail_subject = Tracker.get_email_subject(term_uuid)
mail_body = mail_body_template.format(term, item_id, full_item_url, item_id)
for mail in mail_to_notify:
NotificationHelper.sendEmailNotification(mail, mail_subject, mail_body)
if __name__ == "__main__":
publisher.port = 6380
publisher.channel = "Script"
publisher.info("Script TermTrackerMod started")
config_section = 'TermTrackerMod'
p = Process(config_section)
max_execution_time = p.config.getint(config_section, "max_execution_time")
full_item_url = p.config.get("Notifications", "ail_domain") + full_item_url
while True:
item_id = p.get_from_set()
if item_id is not None:
item_date = Item.get_item_date(item_id)
item_content = Item.get_item_content(item_id)
signal.alarm(max_execution_time)
try:
dict_words_freq = Term.get_text_word_frequency(item_content)
except TimeoutException:
print ("{0} processing timeout".format(item_id))
continue
else:
signal.alarm(0)
# create token statistics
#for word in dict_words_freq:
# Term.create_token_statistics(item_date, word, dict_words_freq[word])
# check solo words
for word in list_tracked_words:
if word in dict_words_freq:
new_term_found(word, 'word', item_id, item_date)
# check words set
for elem in set_tracked_words_list:
list_words = elem[0]
nb_words_threshold = elem[1]
word_set = elem[2]
nb_uniq_word = 0
for word in list_words:
if word in dict_words_freq:
nb_uniq_word += 1
if nb_uniq_word >= nb_words_threshold:
new_term_found(word_set, 'set', item_id, item_date)
else:
time.sleep(5)
# refresh Tracked term
if last_refresh_word < Term.get_tracked_term_last_updated_by_type('word'):
list_tracked_words = Term.get_tracked_words_list()
last_refresh_word = time.time()
print('Tracked word refreshed')
if last_refresh_set < Term.get_tracked_term_last_updated_by_type('set'):
set_tracked_words_list = Term.get_set_tracked_words_list()
last_refresh_set = time.time()
print('Tracked set refreshed')