AIL-framework/bin/trackers/Tracker_Term.py

154 lines
4.8 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
The Tracker_Term Module
===================
"""
2021-04-02 09:52:05 +02:00
##################################
# Import External packages
##################################
import os
import sys
import time
import signal
sys.path.append(os.environ['AIL_BIN'])
2021-04-02 09:52:05 +02:00
##################################
# Import Project packages
##################################
from modules.abstract_module import AbstractModule
from lib.ConfigLoader import ConfigLoader
from lib.objects import ail_objects
from lib import Tracker
2023-03-30 14:58:55 +02:00
from exporter.MailExporter import MailExporterTracker
from exporter.WebHookExporter import WebHookExporterTracker
class TimeoutException(Exception):
pass
def timeout_handler(signum, frame):
raise TimeoutException
signal.signal(signal.SIGALRM, timeout_handler)
class Tracker_Term(AbstractModule):
2021-04-02 09:52:05 +02:00
"""
Tracker_Term module for AIL framework
2021-04-02 09:52:05 +02:00
"""
def __init__(self, queue=True):
super(Tracker_Term, self).__init__(queue=queue)
config_loader = ConfigLoader()
2021-04-02 09:52:05 +02:00
self.pending_seconds = 5
self.max_execution_time = config_loader.get_config_int('Tracker_Term', "max_execution_time")
2021-04-02 09:52:05 +02:00
# loads tracked words
self.tracked_words = Tracker.get_tracked_words()
2021-04-02 09:52:05 +02:00
self.last_refresh_word = time.time()
self.tracked_sets = Tracker.get_tracked_sets()
2021-04-02 09:52:05 +02:00
self.last_refresh_set = time.time()
2023-03-30 14:58:55 +02:00
# Exporter
self.exporters = {'mail': MailExporterTracker(),
'webhook': WebHookExporterTracker()}
self.redis_logger.info(f"Module: {self.module_name} Launched")
def compute(self, obj_id, obj_type='item', subtype=''):
# refresh Tracked term
if self.last_refresh_word < Tracker.get_tracker_last_updated_by_type('word'):
self.tracked_words = Tracker.get_tracked_words()
self.last_refresh_word = time.time()
self.redis_logger.debug('Tracked word refreshed')
print('Tracked word refreshed')
if self.last_refresh_set < Tracker.get_tracker_last_updated_by_type('set'):
self.tracked_sets = Tracker.get_tracked_sets()
self.last_refresh_set = time.time()
self.redis_logger.debug('Tracked set refreshed')
print('Tracked set refreshed')
obj = ail_objects.get_object(obj_type, subtype, obj_id)
obj_type = obj.get_type()
# Object Filter
if obj_type not in self.tracked_words and obj_type not in self.tracked_sets:
return None
2023-05-12 15:29:53 +02:00
content = obj.get_content()
2021-04-02 09:52:05 +02:00
signal.alarm(self.max_execution_time)
2021-04-02 09:52:05 +02:00
dict_words_freq = None
try:
dict_words_freq = Tracker.get_text_word_frequency(content)
2021-04-02 09:52:05 +02:00
except TimeoutException:
self.redis_logger.warning(f"{obj.get_id()} processing timeout")
2021-04-02 09:52:05 +02:00
else:
signal.alarm(0)
2021-04-02 09:52:05 +02:00
if dict_words_freq:
# check solo words
for word in self.tracked_words[obj_type]:
if word in dict_words_freq:
self.new_tracker_found(word, 'word', obj)
# check words set
for tracked_set in self.tracked_sets[obj_type]:
nb_uniq_word = 0
for word in tracked_set['words']:
if word in dict_words_freq:
nb_uniq_word += 1
if nb_uniq_word >= tracked_set['nb']:
self.new_tracker_found(tracked_set['tracked'], 'set', obj)
def new_tracker_found(self, tracker_name, tracker_type, obj): # TODO FILTER
obj_id = obj.get_id()
2022-02-16 10:19:53 +01:00
for tracker_uuid in Tracker.get_trackers_by_tracked_obj_type(tracker_type, obj.get_type(), tracker_name):
2023-03-30 14:58:55 +02:00
tracker = Tracker.Tracker(tracker_uuid)
# Filter Object
filters = tracker.get_filters()
if ail_objects.is_filtered(obj, filters):
2023-03-30 14:58:55 +02:00
continue
2023-05-11 16:21:43 +02:00
print(f'new tracked term {tracker_uuid} found: {tracker_name} in {obj_id}')
self.redis_logger.warning(f'new tracked term found: {tracker_name} in {obj_id}')
2023-05-11 16:21:43 +02:00
tracker.add(obj.get_type(), obj.get_subtype(), obj_id)
2023-03-30 14:58:55 +02:00
# Tags
for tag in tracker.get_tags():
if obj.get_type() == 'item':
msg = f'{tag};{obj_id}'
self.add_message_to_queue(msg, 'Tags')
else:
obj.add_tag(tag)
2023-03-30 14:58:55 +02:00
# Mail
if tracker.mail_export():
# TODO add matches + custom subjects
self.exporters['mail'].export(tracker, obj)
2023-03-30 14:58:55 +02:00
# Webhook
if tracker.webhook_export():
self.exporters['webhook'].export(tracker, obj)
2021-04-02 09:52:05 +02:00
if __name__ == '__main__':
module = Tracker_Term()
2021-04-02 09:52:05 +02:00
module.run()
# module.compute('submitted/2023/05/02/submitted_b1e518f1-703b-40f6-8238-d1c22888197e.gz')