2019-08-09 14:20:13 +02:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
# -*-coding:UTF-8 -*
|
|
|
|
"""
|
|
|
|
This Module is used for regex tracking.
|
|
|
|
It processes every paste coming from the global module and test the regexs
|
|
|
|
supplied in the term webpage.
|
|
|
|
|
|
|
|
"""
|
|
|
|
import os
|
|
|
|
import re
|
|
|
|
import sys
|
|
|
|
import time
|
|
|
|
|
|
|
|
from Helper import Process
|
|
|
|
from pubsublogger import publisher
|
|
|
|
|
|
|
|
import NotificationHelper
|
|
|
|
|
|
|
|
from packages import Item
|
|
|
|
from packages import Term
|
|
|
|
|
2020-06-24 15:07:45 +02:00
|
|
|
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
|
2020-07-10 15:54:14 +02:00
|
|
|
import Tracker
|
2020-06-24 15:07:45 +02:00
|
|
|
import regex_helper
|
|
|
|
|
2020-10-13 16:02:30 +02:00
|
|
|
full_item_url = "/object/item?id="
|
2019-08-09 14:20:13 +02:00
|
|
|
mail_body_template = "AIL Framework,\nNew occurrence for term tracked regex: {}\nitem id: {}\nurl: {}{}"
|
|
|
|
|
|
|
|
dict_regex_tracked = Term.get_regex_tracked_words_dict()
|
|
|
|
last_refresh = time.time()
|
|
|
|
|
|
|
|
def new_term_found(term, term_type, item_id, item_date):
|
|
|
|
uuid_list = Term.get_term_uuid_list(term, 'regex')
|
|
|
|
print('new tracked term found: {} in {}'.format(term, item_id))
|
|
|
|
|
|
|
|
for term_uuid in uuid_list:
|
|
|
|
Term.add_tracked_item(term_uuid, item_id, item_date)
|
|
|
|
|
|
|
|
tags_to_add = Term.get_term_tags(term_uuid)
|
|
|
|
for tag in tags_to_add:
|
|
|
|
msg = '{};{}'.format(tag, item_id)
|
|
|
|
p.populate_set_out(msg, 'Tags')
|
|
|
|
|
|
|
|
mail_to_notify = Term.get_term_mails(term_uuid)
|
|
|
|
if mail_to_notify:
|
2020-07-10 16:01:41 +02:00
|
|
|
mail_subject = Tracker.get_email_subject(term_uuid)
|
2019-08-09 14:20:13 +02:00
|
|
|
mail_body = mail_body_template.format(term, item_id, full_item_url, item_id)
|
|
|
|
for mail in mail_to_notify:
|
2020-07-10 15:54:14 +02:00
|
|
|
NotificationHelper.sendEmailNotification(mail, mail_subject, mail_body)
|
2019-08-09 14:20:13 +02:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
publisher.port = 6380
|
|
|
|
publisher.channel = "Script"
|
|
|
|
publisher.info("Script RegexTracker started")
|
|
|
|
|
|
|
|
config_section = 'RegexTracker'
|
2020-06-24 15:07:45 +02:00
|
|
|
module_name = "RegexTracker"
|
2019-08-09 14:20:13 +02:00
|
|
|
p = Process(config_section)
|
|
|
|
max_execution_time = p.config.getint(config_section, "max_execution_time")
|
|
|
|
|
2020-09-09 15:06:34 +02:00
|
|
|
full_item_url = p.config.get("Notifications", "ail_domain") + full_item_url
|
2019-08-09 14:20:13 +02:00
|
|
|
|
2020-06-24 15:07:45 +02:00
|
|
|
redis_cache_key = regex_helper.generate_redis_cache_key(module_name)
|
|
|
|
|
2019-08-09 14:20:13 +02:00
|
|
|
# Regex Frequency
|
|
|
|
while True:
|
|
|
|
|
|
|
|
item_id = p.get_from_set()
|
|
|
|
|
|
|
|
if item_id is not None:
|
|
|
|
|
|
|
|
item_date = Item.get_item_date(item_id)
|
|
|
|
item_content = Item.get_item_content(item_id)
|
|
|
|
|
|
|
|
for regex in dict_regex_tracked:
|
2020-06-24 15:07:45 +02:00
|
|
|
matched = regex_helper.regex_search(module_name, redis_cache_key, dict_regex_tracked[regex], item_id, item_content, max_time=max_execution_time)
|
2019-08-09 14:20:13 +02:00
|
|
|
if matched:
|
|
|
|
new_term_found(regex, 'regex', item_id, item_date)
|
|
|
|
|
|
|
|
else:
|
|
|
|
time.sleep(5)
|
|
|
|
|
|
|
|
# refresh Tracked term
|
|
|
|
if last_refresh < Term.get_tracked_term_last_updated_by_type('regex'):
|
|
|
|
dict_regex_tracked = Term.get_regex_tracked_words_dict()
|
|
|
|
last_refresh = time.time()
|
|
|
|
print('Tracked set refreshed')
|