2019-08-06 17:03:49 +02:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
# -*-coding:UTF-8 -*
|
|
|
|
"""
|
2021-06-02 16:04:52 +02:00
|
|
|
The Tracker_Term Module
|
2019-08-06 17:03:49 +02:00
|
|
|
===================
|
|
|
|
|
|
|
|
"""
|
2021-04-02 09:52:05 +02:00
|
|
|
|
|
|
|
##################################
|
|
|
|
# Import External packages
|
|
|
|
##################################
|
2019-08-06 17:03:49 +02:00
|
|
|
import os
|
|
|
|
import sys
|
|
|
|
import time
|
2019-08-09 14:20:13 +02:00
|
|
|
import signal
|
2021-09-28 21:11:20 +02:00
|
|
|
|
2019-08-06 17:03:49 +02:00
|
|
|
|
2021-06-02 16:04:52 +02:00
|
|
|
sys.path.append(os.environ['AIL_BIN'])
|
2021-04-02 09:52:05 +02:00
|
|
|
##################################
|
|
|
|
# Import Project packages
|
|
|
|
##################################
|
2021-06-02 16:04:52 +02:00
|
|
|
from modules.abstract_module import AbstractModule
|
2023-04-13 14:25:02 +02:00
|
|
|
from lib.ConfigLoader import ConfigLoader
|
2023-05-04 16:35:56 +02:00
|
|
|
from lib.objects import ail_objects
|
2020-07-10 15:54:14 +02:00
|
|
|
from lib import Tracker
|
|
|
|
|
2023-03-30 14:58:55 +02:00
|
|
|
from exporter.MailExporter import MailExporterTracker
|
|
|
|
from exporter.WebHookExporter import WebHookExporterTracker
|
|
|
|
|
2019-08-09 14:20:13 +02:00
|
|
|
class TimeoutException(Exception):
|
|
|
|
pass
|
2021-09-28 21:11:20 +02:00
|
|
|
|
|
|
|
|
2019-08-09 14:20:13 +02:00
|
|
|
def timeout_handler(signum, frame):
|
|
|
|
raise TimeoutException
|
2021-09-28 21:11:20 +02:00
|
|
|
|
|
|
|
|
2019-08-09 14:20:13 +02:00
|
|
|
signal.signal(signal.SIGALRM, timeout_handler)
|
|
|
|
|
2019-08-07 12:08:24 +02:00
|
|
|
|
2021-06-02 16:04:52 +02:00
|
|
|
class Tracker_Term(AbstractModule):
|
2021-04-02 09:52:05 +02:00
|
|
|
"""
|
2021-06-02 16:04:52 +02:00
|
|
|
Tracker_Term module for AIL framework
|
2021-04-02 09:52:05 +02:00
|
|
|
"""
|
2021-09-28 21:11:20 +02:00
|
|
|
|
2023-04-13 14:42:57 +02:00
|
|
|
def __init__(self, queue=True):
|
|
|
|
super(Tracker_Term, self).__init__(queue=queue)
|
2019-08-06 17:03:49 +02:00
|
|
|
|
2023-04-13 14:25:02 +02:00
|
|
|
config_loader = ConfigLoader()
|
|
|
|
|
2021-04-02 09:52:05 +02:00
|
|
|
self.pending_seconds = 5
|
2019-08-07 12:08:24 +02:00
|
|
|
|
2023-04-13 14:25:02 +02:00
|
|
|
self.max_execution_time = config_loader.get_config_int('Tracker_Term', "max_execution_time")
|
2019-08-07 12:08:24 +02:00
|
|
|
|
2021-04-02 09:52:05 +02:00
|
|
|
# loads tracked words
|
2023-05-04 16:35:56 +02:00
|
|
|
self.tracked_words = Tracker.get_tracked_words()
|
2021-04-02 09:52:05 +02:00
|
|
|
self.last_refresh_word = time.time()
|
2023-05-04 16:35:56 +02:00
|
|
|
self.tracked_sets = Tracker.get_tracked_sets()
|
2021-04-02 09:52:05 +02:00
|
|
|
self.last_refresh_set = time.time()
|
2019-08-07 12:08:24 +02:00
|
|
|
|
2023-03-30 14:58:55 +02:00
|
|
|
# Exporter
|
|
|
|
self.exporters = {'mail': MailExporterTracker(),
|
|
|
|
'webhook': WebHookExporterTracker()}
|
|
|
|
|
2021-06-02 16:04:52 +02:00
|
|
|
self.redis_logger.info(f"Module: {self.module_name} Launched")
|
2019-08-07 12:08:24 +02:00
|
|
|
|
2023-06-22 15:38:04 +02:00
|
|
|
def compute(self, message):
|
2021-04-02 16:54:45 +02:00
|
|
|
# refresh Tracked term
|
2023-05-04 16:35:56 +02:00
|
|
|
if self.last_refresh_word < Tracker.get_tracker_last_updated_by_type('word'):
|
|
|
|
self.tracked_words = Tracker.get_tracked_words()
|
2021-04-02 16:54:45 +02:00
|
|
|
self.last_refresh_word = time.time()
|
|
|
|
self.redis_logger.debug('Tracked word refreshed')
|
2021-06-02 16:53:17 +02:00
|
|
|
print('Tracked word refreshed')
|
2021-04-02 16:54:45 +02:00
|
|
|
|
2023-05-04 16:35:56 +02:00
|
|
|
if self.last_refresh_set < Tracker.get_tracker_last_updated_by_type('set'):
|
|
|
|
self.tracked_sets = Tracker.get_tracked_sets()
|
2021-04-02 16:54:45 +02:00
|
|
|
self.last_refresh_set = time.time()
|
|
|
|
self.redis_logger.debug('Tracked set refreshed')
|
2021-06-02 16:53:17 +02:00
|
|
|
print('Tracked set refreshed')
|
2021-04-02 16:54:45 +02:00
|
|
|
|
2023-06-22 15:38:04 +02:00
|
|
|
obj = self.get_obj()
|
2023-05-04 16:35:56 +02:00
|
|
|
obj_type = obj.get_type()
|
|
|
|
|
|
|
|
# Object Filter
|
|
|
|
if obj_type not in self.tracked_words and obj_type not in self.tracked_sets:
|
|
|
|
return None
|
|
|
|
|
2023-05-12 15:29:53 +02:00
|
|
|
content = obj.get_content()
|
2019-08-09 14:20:13 +02:00
|
|
|
|
2021-04-02 09:52:05 +02:00
|
|
|
signal.alarm(self.max_execution_time)
|
2019-08-07 12:08:24 +02:00
|
|
|
|
2021-04-02 09:52:05 +02:00
|
|
|
dict_words_freq = None
|
|
|
|
try:
|
2023-05-04 16:35:56 +02:00
|
|
|
dict_words_freq = Tracker.get_text_word_frequency(content)
|
2021-04-02 09:52:05 +02:00
|
|
|
except TimeoutException:
|
2024-03-13 11:58:40 +01:00
|
|
|
self.redis_logger.warning(f"{self.obj.get_global_id()} processing timeout")
|
2021-04-02 09:52:05 +02:00
|
|
|
else:
|
|
|
|
signal.alarm(0)
|
2019-08-07 12:08:24 +02:00
|
|
|
|
2021-04-02 09:52:05 +02:00
|
|
|
if dict_words_freq:
|
2019-08-07 12:08:24 +02:00
|
|
|
|
|
|
|
# check solo words
|
2023-05-04 16:35:56 +02:00
|
|
|
for word in self.tracked_words[obj_type]:
|
2019-08-07 12:08:24 +02:00
|
|
|
if word in dict_words_freq:
|
2023-05-04 16:35:56 +02:00
|
|
|
self.new_tracker_found(word, 'word', obj)
|
2021-06-14 17:36:30 +02:00
|
|
|
|
|
|
|
# check words set
|
2023-05-04 16:35:56 +02:00
|
|
|
for tracked_set in self.tracked_sets[obj_type]:
|
2021-06-14 17:36:30 +02:00
|
|
|
nb_uniq_word = 0
|
2023-05-04 16:35:56 +02:00
|
|
|
for word in tracked_set['words']:
|
2021-06-14 17:36:30 +02:00
|
|
|
if word in dict_words_freq:
|
|
|
|
nb_uniq_word += 1
|
2023-05-04 16:35:56 +02:00
|
|
|
if nb_uniq_word >= tracked_set['nb']:
|
|
|
|
self.new_tracker_found(tracked_set['tracked'], 'set', obj)
|
2021-06-14 17:36:30 +02:00
|
|
|
|
2023-05-04 16:35:56 +02:00
|
|
|
def new_tracker_found(self, tracker_name, tracker_type, obj): # TODO FILTER
|
|
|
|
obj_id = obj.get_id()
|
2022-02-16 10:19:53 +01:00
|
|
|
|
2023-05-04 16:35:56 +02:00
|
|
|
for tracker_uuid in Tracker.get_trackers_by_tracked_obj_type(tracker_type, obj.get_type(), tracker_name):
|
2023-03-30 14:58:55 +02:00
|
|
|
tracker = Tracker.Tracker(tracker_uuid)
|
|
|
|
|
2023-05-04 16:35:56 +02:00
|
|
|
# Filter Object
|
|
|
|
filters = tracker.get_filters()
|
|
|
|
if ail_objects.is_filtered(obj, filters):
|
2023-03-30 14:58:55 +02:00
|
|
|
continue
|
|
|
|
|
2024-03-13 11:58:40 +01:00
|
|
|
print(f'new tracked term {tracker_uuid} found: {tracker_name} in {self.obj.get_global_id()}')
|
|
|
|
self.redis_logger.warning(f'new tracked term found: {tracker_name} in {self.obj.get_global_id()}')
|
2023-05-04 16:35:56 +02:00
|
|
|
|
2023-05-11 16:21:43 +02:00
|
|
|
tracker.add(obj.get_type(), obj.get_subtype(), obj_id)
|
2023-03-30 14:58:55 +02:00
|
|
|
|
|
|
|
# Tags
|
|
|
|
for tag in tracker.get_tags():
|
2023-05-04 16:35:56 +02:00
|
|
|
if obj.get_type() == 'item':
|
2023-06-22 15:38:04 +02:00
|
|
|
self.add_message_to_queue(message=tag, queue='Tags')
|
2023-05-04 16:35:56 +02:00
|
|
|
else:
|
|
|
|
obj.add_tag(tag)
|
2023-03-30 14:58:55 +02:00
|
|
|
|
|
|
|
# Mail
|
|
|
|
if tracker.mail_export():
|
|
|
|
# TODO add matches + custom subjects
|
2023-05-04 16:35:56 +02:00
|
|
|
self.exporters['mail'].export(tracker, obj)
|
2023-03-30 14:58:55 +02:00
|
|
|
|
|
|
|
# Webhook
|
|
|
|
if tracker.webhook_export():
|
2023-05-04 16:35:56 +02:00
|
|
|
self.exporters['webhook'].export(tracker, obj)
|
2021-10-04 12:55:40 +02:00
|
|
|
|
2021-04-02 09:52:05 +02:00
|
|
|
|
2021-09-28 21:11:20 +02:00
|
|
|
if __name__ == '__main__':
|
2021-06-02 16:04:52 +02:00
|
|
|
module = Tracker_Term()
|
2021-04-02 09:52:05 +02:00
|
|
|
module.run()
|
2023-05-04 16:35:56 +02:00
|
|
|
# module.compute('submitted/2023/05/02/submitted_b1e518f1-703b-40f6-8238-d1c22888197e.gz')
|