2019-08-06 17:03:49 +02:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
# -*-coding:UTF-8 -*
|
|
|
|
"""
|
2021-06-02 16:04:52 +02:00
|
|
|
The Tracker_Term Module
|
2019-08-06 17:03:49 +02:00
|
|
|
===================
|
|
|
|
|
|
|
|
"""
|
2021-04-02 09:52:05 +02:00
|
|
|
|
|
|
|
##################################
|
|
|
|
# Import External packages
|
|
|
|
##################################
|
2019-08-06 17:03:49 +02:00
|
|
|
import os
|
|
|
|
import sys
|
|
|
|
import time
|
2019-08-09 14:20:13 +02:00
|
|
|
import signal
|
2019-08-06 17:03:49 +02:00
|
|
|
|
2021-06-02 16:04:52 +02:00
|
|
|
sys.path.append(os.environ['AIL_BIN'])
|
2021-04-02 09:52:05 +02:00
|
|
|
##################################
|
|
|
|
# Import Project packages
|
|
|
|
##################################
|
2021-06-02 16:04:52 +02:00
|
|
|
from modules.abstract_module import AbstractModule
|
2019-08-07 12:08:24 +02:00
|
|
|
import NotificationHelper
|
2021-06-08 16:46:36 +02:00
|
|
|
from packages.Item import Item
|
2019-08-06 17:03:49 +02:00
|
|
|
from packages import Term
|
2020-07-10 15:54:14 +02:00
|
|
|
from lib import Tracker
|
|
|
|
|
2019-08-06 17:03:49 +02:00
|
|
|
|
2019-08-09 14:20:13 +02:00
|
|
|
class TimeoutException(Exception):
|
|
|
|
pass
|
|
|
|
def timeout_handler(signum, frame):
|
|
|
|
raise TimeoutException
|
|
|
|
signal.signal(signal.SIGALRM, timeout_handler)
|
|
|
|
|
2019-08-07 12:08:24 +02:00
|
|
|
|
2021-06-02 16:04:52 +02:00
|
|
|
class Tracker_Term(AbstractModule):
|
2019-08-06 17:03:49 +02:00
|
|
|
|
2021-06-02 16:04:52 +02:00
|
|
|
mail_body_template = "AIL Framework,\nNew occurrence for tracked term: {}\nitem id: {}\nurl: {}{}"
|
2019-08-06 17:03:49 +02:00
|
|
|
|
2021-04-02 09:52:05 +02:00
|
|
|
"""
|
2021-06-02 16:04:52 +02:00
|
|
|
Tracker_Term module for AIL framework
|
2021-04-02 09:52:05 +02:00
|
|
|
"""
|
|
|
|
def __init__(self):
|
2021-06-02 16:04:52 +02:00
|
|
|
super(Tracker_Term, self).__init__()
|
2019-08-06 17:03:49 +02:00
|
|
|
|
2021-04-02 09:52:05 +02:00
|
|
|
self.pending_seconds = 5
|
2019-08-07 12:08:24 +02:00
|
|
|
|
2021-06-02 16:04:52 +02:00
|
|
|
self.max_execution_time = self.process.config.getint('Tracker_Term', "max_execution_time")
|
2019-08-07 12:08:24 +02:00
|
|
|
|
2021-04-02 09:52:05 +02:00
|
|
|
self.full_item_url = self.process.config.get("Notifications", "ail_domain") + "/object/item?id="
|
2021-04-02 16:54:45 +02:00
|
|
|
|
2021-04-02 09:52:05 +02:00
|
|
|
# loads tracked words
|
|
|
|
self.list_tracked_words = Term.get_tracked_words_list()
|
|
|
|
self.last_refresh_word = time.time()
|
|
|
|
self.set_tracked_words_list = Term.get_set_tracked_words_list()
|
|
|
|
self.last_refresh_set = time.time()
|
2019-08-07 12:08:24 +02:00
|
|
|
|
2021-06-02 16:04:52 +02:00
|
|
|
self.redis_logger.info(f"Module: {self.module_name} Launched")
|
2019-08-07 12:08:24 +02:00
|
|
|
|
|
|
|
|
2021-04-02 09:52:05 +02:00
|
|
|
def compute(self, item_id):
|
2021-04-02 16:54:45 +02:00
|
|
|
# refresh Tracked term
|
|
|
|
if self.last_refresh_word < Term.get_tracked_term_last_updated_by_type('word'):
|
|
|
|
self.list_tracked_words = Term.get_tracked_words_list()
|
|
|
|
self.last_refresh_word = time.time()
|
|
|
|
self.redis_logger.debug('Tracked word refreshed')
|
2021-06-02 16:53:17 +02:00
|
|
|
print('Tracked word refreshed')
|
2021-04-02 16:54:45 +02:00
|
|
|
|
|
|
|
if self.last_refresh_set < Term.get_tracked_term_last_updated_by_type('set'):
|
|
|
|
self.set_tracked_words_list = Term.get_set_tracked_words_list()
|
|
|
|
self.last_refresh_set = time.time()
|
|
|
|
self.redis_logger.debug('Tracked set refreshed')
|
2021-06-02 16:53:17 +02:00
|
|
|
print('Tracked set refreshed')
|
2021-04-02 16:54:45 +02:00
|
|
|
|
2021-04-02 09:52:05 +02:00
|
|
|
# Cast message as Item
|
2021-06-02 16:04:52 +02:00
|
|
|
item = Item(item_id)
|
|
|
|
item_date = item.get_date()
|
|
|
|
item_content = item.get_content()
|
2019-08-09 14:20:13 +02:00
|
|
|
|
2021-04-02 09:52:05 +02:00
|
|
|
signal.alarm(self.max_execution_time)
|
2019-08-07 12:08:24 +02:00
|
|
|
|
2021-04-02 09:52:05 +02:00
|
|
|
dict_words_freq = None
|
|
|
|
try:
|
|
|
|
dict_words_freq = Term.get_text_word_frequency(item_content)
|
|
|
|
except TimeoutException:
|
2021-06-02 16:04:52 +02:00
|
|
|
self.redis_logger.warning(f"{item.get_id()} processing timeout")
|
2021-04-02 09:52:05 +02:00
|
|
|
else:
|
|
|
|
signal.alarm(0)
|
2019-08-07 12:08:24 +02:00
|
|
|
|
2021-04-02 09:52:05 +02:00
|
|
|
if dict_words_freq:
|
2019-08-09 14:20:13 +02:00
|
|
|
# create token statistics
|
2019-09-24 15:33:53 +02:00
|
|
|
#for word in dict_words_freq:
|
|
|
|
# Term.create_token_statistics(item_date, word, dict_words_freq[word])
|
2021-06-14 17:36:30 +02:00
|
|
|
item_source = item.get_source()
|
2019-08-07 12:08:24 +02:00
|
|
|
|
|
|
|
# check solo words
|
2021-06-14 17:36:30 +02:00
|
|
|
####### # TODO: check if source needed #######
|
2021-04-02 09:52:05 +02:00
|
|
|
for word in self.list_tracked_words:
|
2019-08-07 12:08:24 +02:00
|
|
|
if word in dict_words_freq:
|
2021-07-14 16:05:51 +02:00
|
|
|
self.new_term_found(word, 'word', item.get_id(), item_source)
|
2021-06-14 17:36:30 +02:00
|
|
|
|
|
|
|
# check words set
|
|
|
|
for elem in self.set_tracked_words_list:
|
|
|
|
list_words = elem[0]
|
|
|
|
nb_words_threshold = elem[1]
|
|
|
|
word_set = elem[2]
|
|
|
|
nb_uniq_word = 0
|
|
|
|
|
|
|
|
for word in list_words:
|
|
|
|
if word in dict_words_freq:
|
|
|
|
nb_uniq_word += 1
|
|
|
|
if nb_uniq_word >= nb_words_threshold:
|
2021-07-14 16:05:51 +02:00
|
|
|
self.new_term_found(word_set, 'set', item.get_id(), item_source)
|
2021-06-14 17:36:30 +02:00
|
|
|
|
2021-07-14 13:58:00 +02:00
|
|
|
def new_term_found(self, term, term_type, item_id, item_source):
|
2021-04-02 09:52:05 +02:00
|
|
|
uuid_list = Term.get_term_uuid_list(term, term_type)
|
2021-06-02 16:04:52 +02:00
|
|
|
self.redis_logger.info(f'new tracked term found: {term} in {item_id}')
|
|
|
|
print(f'new tracked term found: {term} in {item_id}')
|
2021-04-02 09:52:05 +02:00
|
|
|
|
|
|
|
for term_uuid in uuid_list:
|
2021-06-14 17:36:30 +02:00
|
|
|
tracker_sources = Tracker.get_tracker_uuid_sources(term_uuid)
|
|
|
|
if not tracker_sources or item_source in tracker_sources:
|
|
|
|
print(not tracker_sources or item_source in tracker_sources)
|
2021-07-14 13:58:00 +02:00
|
|
|
Tracker.add_tracked_item(term_uuid, item_id)
|
2021-06-14 17:36:30 +02:00
|
|
|
|
|
|
|
tags_to_add = Term.get_term_tags(term_uuid)
|
|
|
|
for tag in tags_to_add:
|
|
|
|
msg = '{};{}'.format(tag, item_id)
|
|
|
|
self.send_message_to_queue(msg, 'Tags')
|
|
|
|
|
|
|
|
mail_to_notify = Term.get_term_mails(term_uuid)
|
|
|
|
if mail_to_notify:
|
|
|
|
mail_subject = Tracker.get_email_subject(term_uuid)
|
|
|
|
mail_body = Tracker_Term.mail_body_template.format(term, item_id, self.full_item_url, item_id)
|
|
|
|
for mail in mail_to_notify:
|
|
|
|
self.redis_logger.debug(f'Send Mail {mail_subject}')
|
|
|
|
print(f'S print(item_content)end Mail {mail_subject}')
|
|
|
|
NotificationHelper.sendEmailNotification(mail, mail_subject, mail_body)
|
2021-04-02 09:52:05 +02:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
2021-04-02 16:54:45 +02:00
|
|
|
|
2021-06-02 16:04:52 +02:00
|
|
|
module = Tracker_Term()
|
2021-04-02 09:52:05 +02:00
|
|
|
module.run()
|