mirror of https://github.com/CIRCL/AIL-framework
chg: [Categ] tests + docs
parent
4a9bda2ee8
commit
20727fff77
|
@ -43,7 +43,7 @@ class ApiKey(AbstractModule):
|
|||
# Send module state to logs
|
||||
self.redis_logger.info(f"Module {self.module_name} initialized")
|
||||
|
||||
def compute(self, message, r_match=False):
|
||||
def compute(self, message, r_result=False):
|
||||
id, score = message.split()
|
||||
item = Item(id)
|
||||
item_content = item.get_content()
|
||||
|
@ -82,7 +82,7 @@ class ApiKey(AbstractModule):
|
|||
# Send to duplicate
|
||||
self.send_message_to_queue('Duplicate', item.get_id())
|
||||
|
||||
if r_match:
|
||||
if r_result:
|
||||
return (google_api_key, aws_access_key, aws_secret_key)
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
95
bin/Categ.py
95
bin/Categ.py
|
@ -4,19 +4,16 @@
|
|||
The ZMQ_PubSub_Categ Module
|
||||
============================
|
||||
|
||||
This module is consuming the Redis-list created by the ZMQ_PubSub_Tokenize_Q
|
||||
Module.
|
||||
|
||||
Each words files created under /files/ are representing categories.
|
||||
This modules take these files and compare them to
|
||||
the stream of data given by the ZMQ_PubSub_Tokenize_Q Module.
|
||||
the content of an item.
|
||||
|
||||
When a word from a paste match one or more of these words file, the filename of
|
||||
the paste is published/forwarded to the next modules.
|
||||
When a word from a item match one or more of these words file, the filename of
|
||||
the item / zhe item id is published/forwarded to the next modules.
|
||||
|
||||
Each category (each files) are representing a dynamic channel.
|
||||
This mean that if you create 1000 files under /files/ you'll have 1000 channels
|
||||
where every time there is a matching word to a category, the paste containing
|
||||
where every time there is a matching word to a category, the item containing
|
||||
this word will be pushed to this specific channel.
|
||||
|
||||
..note:: The channel will have the name of the file created.
|
||||
|
@ -25,15 +22,11 @@ Implementing modules can start here, create your own category file,
|
|||
and then create your own module to treat the specific paste matching this
|
||||
category.
|
||||
|
||||
..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put
|
||||
the same Subscriber name in both of them.
|
||||
|
||||
Requirements
|
||||
------------
|
||||
|
||||
*Need running Redis instances. (Redis)
|
||||
*Categories files of words in /files/ need to be created
|
||||
*Need the ZMQ_PubSub_Tokenize_Q Module running to be able to work properly.
|
||||
|
||||
"""
|
||||
|
||||
|
@ -42,16 +35,13 @@ Requirements
|
|||
##################################
|
||||
import os
|
||||
import argparse
|
||||
import time
|
||||
import re
|
||||
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from module.abstract_module import AbstractModule
|
||||
from pubsublogger import publisher
|
||||
from packages import Paste
|
||||
from Helper import Process
|
||||
from packages.Item import Item
|
||||
|
||||
|
||||
class Categ(AbstractModule):
|
||||
|
@ -59,73 +49,66 @@ class Categ(AbstractModule):
|
|||
Categ module for AIL framework
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self, categ_files_dir='../files/'):
|
||||
"""
|
||||
Init Categ
|
||||
"""
|
||||
super(Categ, self).__init__()
|
||||
|
||||
self.categ_files_dir = categ_files_dir
|
||||
|
||||
# default = 1 string
|
||||
self.matchingThreshold = self.process.config.getint("Categ", "matchingThreshold")
|
||||
|
||||
# SCRIPT PARSER #
|
||||
parser = argparse.ArgumentParser(description='Start Categ module on files.')
|
||||
|
||||
parser.add_argument(
|
||||
'-d', type=str, default="../files/",
|
||||
help='Path to the directory containing the category files.',
|
||||
action='store')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
self.reload_categ_words()
|
||||
self.redis_logger.info("Script Categ started")
|
||||
|
||||
# # TODO: trigger reload on change ( save last reload time, ...)
|
||||
def reload_categ_words(self):
|
||||
categories = ['CreditCards', 'Mail', 'Onion', 'Web', 'Credential', 'Cve', 'ApiKey']
|
||||
tmp_dict = {}
|
||||
for filename in categories:
|
||||
bname = os.path.basename(filename)
|
||||
tmp_dict[bname] = []
|
||||
with open(os.path.join(args.d, filename), 'r') as f:
|
||||
with open(os.path.join(self.categ_files_dir, filename), 'r') as f:
|
||||
patterns = [r'%s' % ( re.escape(s.strip()) ) for s in f]
|
||||
tmp_dict[bname] = re.compile('|'.join(patterns), re.IGNORECASE)
|
||||
self.categ_words = tmp_dict.items()
|
||||
|
||||
self.categ_items = tmp_dict.items()
|
||||
def compute(self, message, r_result=False):
|
||||
# Create Item Object
|
||||
item = Item(message)
|
||||
# Get item content
|
||||
content = item.get_content()
|
||||
categ_found = []
|
||||
|
||||
prec_filename = None
|
||||
|
||||
|
||||
def compute(self, message):
|
||||
# Cast message as paste
|
||||
paste = Paste.Paste(message)
|
||||
# Get paste content
|
||||
content = paste.get_p_content()
|
||||
|
||||
# init categories found
|
||||
is_categ_found = False
|
||||
|
||||
# Search for pattern categories in paste content
|
||||
for categ, pattern in self.categ_items:
|
||||
# Search for pattern categories in item content
|
||||
for categ, pattern in self.categ_words:
|
||||
|
||||
found = set(re.findall(pattern, content))
|
||||
lenfound = len(found)
|
||||
if lenfound >= self.matchingThreshold:
|
||||
is_categ_found = True
|
||||
msg = '{} {}'.format(paste.p_rel_path, lenfound)
|
||||
categ_found.append(categ)
|
||||
msg = f'{item.get_id()} {lenfound}'
|
||||
|
||||
self.redis_logger.debug('%s;%s %s'%(self.module_name, msg, categ))
|
||||
|
||||
# Export message to categ queue
|
||||
self.process.populate_set_out(msg, categ)
|
||||
print(msg, categ)
|
||||
self.send_message_to_queue(categ, msg)
|
||||
|
||||
self.redis_logger.info(
|
||||
'Categ;{};{};{};Detected {} as {};{}'.format(
|
||||
paste.p_source, paste.p_date, paste.p_name,
|
||||
lenfound, categ, paste.p_rel_path))
|
||||
|
||||
if not is_categ_found:
|
||||
self.redis_logger.debug('No %s found in this paste: %s'%(self.module_name, paste.p_name))
|
||||
|
||||
f'Categ;{item.get_source()};{item.get_date()};{item.get_basename()};Detected {lenfound} as {categ};{item.get_id()}')
|
||||
if r_result:
|
||||
return categ_found
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
module = Categ()
|
||||
|
||||
# SCRIPT PARSER #
|
||||
parser = argparse.ArgumentParser(description='Start Categ module on files.')
|
||||
parser.add_argument(
|
||||
'-d', type=str, default="../files/",
|
||||
help='Path to the directory containing the category files.',
|
||||
action='store')
|
||||
args = parser.parse_args()
|
||||
|
||||
module = Categ(categ_files_dir=args.d)
|
||||
module.run()
|
||||
|
|
|
@ -9,6 +9,7 @@ sys.path.append(os.environ['AIL_BIN'])
|
|||
|
||||
# Modules Classes
|
||||
from ApiKey import ApiKey
|
||||
from Categ import Categ
|
||||
from Onion import Onion
|
||||
|
||||
# project packages
|
||||
|
@ -25,11 +26,23 @@ class Test_Module_ApiKey(unittest.TestCase):
|
|||
aws_access_key = 'AKIAIOSFODNN7EXAMPLE'
|
||||
aws_secret_key = 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY'
|
||||
|
||||
matches = self.module_obj.compute(f'{item_id} 3', r_match=True)
|
||||
matches = self.module_obj.compute(f'{item_id} 3', r_result=True)
|
||||
self.assertCountEqual(matches[0], [google_api_key])
|
||||
self.assertCountEqual(matches[1], [aws_access_key])
|
||||
self.assertCountEqual(matches[2], [aws_secret_key])
|
||||
|
||||
class Test_Module_Categ(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.module_obj = Categ()
|
||||
|
||||
def test_module(self):
|
||||
item_id = 'tests/2021/01/01/categ.gz'
|
||||
test_categ = ['CreditCards', 'Mail', 'Onion', 'Web', 'Credential', 'Cve']
|
||||
|
||||
result = self.module_obj.compute(item_id, r_result=True)
|
||||
self.assertCountEqual(result, test_categ)
|
||||
|
||||
class Test_Module_Onion(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
|
|
Loading…
Reference in New Issue