2018-05-04 13:53:29 +02:00
|
|
|
#!/usr/bin/env python3
|
2014-08-06 11:43:40 +02:00
|
|
|
# -*-coding:UTF-8 -*
|
|
|
|
"""
|
|
|
|
The ZMQ_PubSub_Categ Module
|
|
|
|
============================
|
|
|
|
|
|
|
|
Each words files created under /files/ are representing categories.
|
|
|
|
This modules take these files and compare them to
|
2021-05-19 16:57:20 +02:00
|
|
|
the content of an item.
|
2014-08-06 11:43:40 +02:00
|
|
|
|
2021-05-19 16:57:20 +02:00
|
|
|
When a word from a item match one or more of these words file, the filename of
|
|
|
|
the item / zhe item id is published/forwarded to the next modules.
|
2014-08-06 11:43:40 +02:00
|
|
|
|
|
|
|
Each category (each files) are representing a dynamic channel.
|
|
|
|
This mean that if you create 1000 files under /files/ you'll have 1000 channels
|
2021-05-19 16:57:20 +02:00
|
|
|
where every time there is a matching word to a category, the item containing
|
2014-08-06 11:43:40 +02:00
|
|
|
this word will be pushed to this specific channel.
|
|
|
|
|
|
|
|
..note:: The channel will have the name of the file created.
|
|
|
|
|
|
|
|
Implementing modules can start here, create your own category file,
|
2014-08-14 14:11:07 +02:00
|
|
|
and then create your own module to treat the specific paste matching this
|
|
|
|
category.
|
2014-08-06 11:43:40 +02:00
|
|
|
|
|
|
|
Requirements
|
|
|
|
------------
|
|
|
|
|
|
|
|
*Need running Redis instances. (Redis)
|
|
|
|
*Categories files of words in /files/ need to be created
|
|
|
|
|
|
|
|
"""
|
2021-04-02 09:52:05 +02:00
|
|
|
|
|
|
|
##################################
|
|
|
|
# Import External packages
|
|
|
|
##################################
|
2014-08-14 14:11:07 +02:00
|
|
|
import argparse
|
2021-06-02 14:42:23 +02:00
|
|
|
import os
|
2014-09-05 17:05:45 +02:00
|
|
|
import re
|
2021-06-02 14:42:23 +02:00
|
|
|
import sys
|
2021-04-02 09:52:05 +02:00
|
|
|
|
2021-06-02 14:42:23 +02:00
|
|
|
sys.path.append(os.environ['AIL_BIN'])
|
2021-04-02 09:52:05 +02:00
|
|
|
##################################
|
|
|
|
# Import Project packages
|
|
|
|
##################################
|
2021-06-02 14:42:23 +02:00
|
|
|
from modules.abstract_module import AbstractModule
|
2023-04-13 14:25:02 +02:00
|
|
|
from lib.ConfigLoader import ConfigLoader
|
2022-10-25 16:25:19 +02:00
|
|
|
from lib.objects.Items import Item
|
2014-08-06 11:43:40 +02:00
|
|
|
|
2014-08-19 19:07:07 +02:00
|
|
|
|
2021-04-02 09:52:05 +02:00
|
|
|
class Categ(AbstractModule):
|
|
|
|
"""
|
|
|
|
Categ module for AIL framework
|
|
|
|
"""
|
|
|
|
|
2021-06-08 16:46:36 +02:00
|
|
|
def __init__(self, categ_files_dir=os.path.join(os.environ['AIL_HOME'], 'files')):
|
2021-04-02 09:52:05 +02:00
|
|
|
"""
|
|
|
|
Init Categ
|
|
|
|
"""
|
|
|
|
super(Categ, self).__init__()
|
|
|
|
|
2021-05-19 16:57:20 +02:00
|
|
|
self.categ_files_dir = categ_files_dir
|
2014-08-06 11:43:40 +02:00
|
|
|
|
2023-04-13 14:25:02 +02:00
|
|
|
config_loader = ConfigLoader()
|
|
|
|
|
2021-05-19 16:57:20 +02:00
|
|
|
# default = 1 string
|
2023-04-13 14:25:02 +02:00
|
|
|
self.matchingThreshold = config_loader.get_config_int("Categ", "matchingThreshold")
|
2014-08-06 11:43:40 +02:00
|
|
|
|
2021-05-19 16:57:20 +02:00
|
|
|
self.reload_categ_words()
|
2021-04-02 09:52:05 +02:00
|
|
|
self.redis_logger.info("Script Categ started")
|
2014-08-06 11:43:40 +02:00
|
|
|
|
2021-05-19 16:57:20 +02:00
|
|
|
# # TODO: trigger reload on change ( save last reload time, ...)
|
|
|
|
def reload_categ_words(self):
|
2021-06-17 18:16:31 +02:00
|
|
|
categories = ['CreditCards', 'Mail', 'Onion', 'Urls', 'Credential', 'Cve', 'ApiKey']
|
2021-04-02 09:52:05 +02:00
|
|
|
tmp_dict = {}
|
|
|
|
for filename in categories:
|
|
|
|
bname = os.path.basename(filename)
|
|
|
|
tmp_dict[bname] = []
|
2021-05-19 16:57:20 +02:00
|
|
|
with open(os.path.join(self.categ_files_dir, filename), 'r') as f:
|
2021-04-02 09:52:05 +02:00
|
|
|
patterns = [r'%s' % ( re.escape(s.strip()) ) for s in f]
|
|
|
|
tmp_dict[bname] = re.compile('|'.join(patterns), re.IGNORECASE)
|
2021-05-19 16:57:20 +02:00
|
|
|
self.categ_words = tmp_dict.items()
|
2014-08-06 11:43:40 +02:00
|
|
|
|
2021-05-19 16:57:20 +02:00
|
|
|
def compute(self, message, r_result=False):
|
|
|
|
# Create Item Object
|
2023-06-22 15:38:04 +02:00
|
|
|
item = self.get_obj()
|
2021-05-19 16:57:20 +02:00
|
|
|
# Get item content
|
|
|
|
content = item.get_content()
|
|
|
|
categ_found = []
|
2014-08-06 11:43:40 +02:00
|
|
|
|
2021-05-19 16:57:20 +02:00
|
|
|
# Search for pattern categories in item content
|
|
|
|
for categ, pattern in self.categ_words:
|
2021-04-02 09:52:05 +02:00
|
|
|
|
2016-02-10 16:39:56 +01:00
|
|
|
found = set(re.findall(pattern, content))
|
2021-04-02 09:52:05 +02:00
|
|
|
lenfound = len(found)
|
|
|
|
if lenfound >= self.matchingThreshold:
|
2021-05-19 16:57:20 +02:00
|
|
|
categ_found.append(categ)
|
2023-06-22 15:38:04 +02:00
|
|
|
msg = str(lenfound)
|
2018-04-16 14:50:04 +02:00
|
|
|
|
2021-04-02 09:52:05 +02:00
|
|
|
# Export message to categ queue
|
2021-05-19 16:57:20 +02:00
|
|
|
print(msg, categ)
|
2023-06-22 15:38:04 +02:00
|
|
|
self.add_message_to_queue(message=msg, queue=categ)
|
2016-02-10 16:39:56 +01:00
|
|
|
|
2023-04-21 14:53:33 +02:00
|
|
|
self.redis_logger.debug(
|
2021-05-19 16:57:20 +02:00
|
|
|
f'Categ;{item.get_source()};{item.get_date()};{item.get_basename()};Detected {lenfound} as {categ};{item.get_id()}')
|
|
|
|
if r_result:
|
|
|
|
return categ_found
|
2021-04-02 09:52:05 +02:00
|
|
|
|
2023-04-21 14:53:33 +02:00
|
|
|
|
2021-05-19 16:57:20 +02:00
|
|
|
if __name__ == '__main__':
|
2021-04-02 09:52:05 +02:00
|
|
|
|
2021-05-19 16:57:20 +02:00
|
|
|
# SCRIPT PARSER #
|
|
|
|
parser = argparse.ArgumentParser(description='Start Categ module on files.')
|
|
|
|
parser.add_argument(
|
2021-06-08 16:46:36 +02:00
|
|
|
'-d', type=str, default=os.path.join(os.environ['AIL_HOME'], 'files'),
|
2021-05-19 16:57:20 +02:00
|
|
|
help='Path to the directory containing the category files.',
|
|
|
|
action='store')
|
|
|
|
args = parser.parse_args()
|
2021-04-02 09:52:05 +02:00
|
|
|
|
2021-05-19 16:57:20 +02:00
|
|
|
module = Categ(categ_files_dir=args.d)
|
2021-04-02 09:52:05 +02:00
|
|
|
module.run()
|