mirror of https://github.com/CIRCL/AIL-framework
chg: [modules + tests] fix modules + test modules on samples
parent
90b6f43468
commit
42a23da182
|
@ -12,15 +12,14 @@ import time
|
|||
import datetime
|
||||
import redis
|
||||
import os
|
||||
import sys
|
||||
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages #
|
||||
##################################
|
||||
from module.abstract_module import AbstractModule
|
||||
from modules.abstract_module import AbstractModule
|
||||
from packages.Date import Date
|
||||
from pubsublogger import publisher
|
||||
from Helper import Process
|
||||
from packages import Paste
|
||||
import ConfigLoader
|
||||
|
||||
|
|
|
@ -10,14 +10,16 @@ import sys
|
|||
import time
|
||||
import datetime
|
||||
|
||||
from pubsublogger import publisher
|
||||
|
||||
import NotificationHelper
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from packages import Date
|
||||
from packages import Item
|
||||
from packages import Term
|
||||
|
||||
from pubsublogger import publisher
|
||||
|
||||
def clean_term_db_stat_token():
|
||||
all_stat_date = Term.get_all_token_stat_history()
|
||||
|
||||
|
|
|
@ -51,7 +51,7 @@ class Categ(AbstractModule):
|
|||
Categ module for AIL framework
|
||||
"""
|
||||
|
||||
def __init__(self, categ_files_dir='../files/'):
|
||||
def __init__(self, categ_files_dir=os.path.join(os.environ['AIL_HOME'], 'files')):
|
||||
"""
|
||||
Init Categ
|
||||
"""
|
||||
|
@ -107,7 +107,7 @@ if __name__ == '__main__':
|
|||
# SCRIPT PARSER #
|
||||
parser = argparse.ArgumentParser(description='Start Categ module on files.')
|
||||
parser.add_argument(
|
||||
'-d', type=str, default="../files/",
|
||||
'-d', type=str, default=os.path.join(os.environ['AIL_HOME'], 'files'),
|
||||
help='Path to the directory containing the category files.',
|
||||
action='store')
|
||||
args = parser.parse_args()
|
||||
|
|
|
@ -104,6 +104,7 @@ class Global(AbstractModule):
|
|||
# Incorrect filename
|
||||
if not os.path.commonprefix([filename, self.PASTES_FOLDER]) == self.PASTES_FOLDER:
|
||||
self.redis_logger.warning(f'Global; Path traversal detected {filename}')
|
||||
print(f'Global; Path traversal detected {filename}')
|
||||
|
||||
else:
|
||||
# Decode compressed base64
|
||||
|
@ -134,6 +135,7 @@ class Global(AbstractModule):
|
|||
|
||||
else:
|
||||
self.redis_logger.debug(f"Empty Item: {message} not processed")
|
||||
print(f"Empty Item: {message} not processed")
|
||||
|
||||
|
||||
def check_filename(self, filename, new_file_content):
|
||||
|
@ -145,6 +147,7 @@ class Global(AbstractModule):
|
|||
# check if file exist
|
||||
if os.path.isfile(filename):
|
||||
self.redis_logger.warning(f'File already exist {filename}')
|
||||
print(f'File already exist {filename}')
|
||||
|
||||
# Check that file already exists but content differs
|
||||
curr_file_content = self.gunzip_file(filename)
|
||||
|
@ -165,11 +168,13 @@ class Global(AbstractModule):
|
|||
if os.path.isfile(filename):
|
||||
# Ignore duplicate
|
||||
self.redis_logger.debug(f'ignore duplicated file {filename}')
|
||||
print(f'ignore duplicated file {filename}')
|
||||
filename = None
|
||||
|
||||
else:
|
||||
# Ignore duplicate checksum equals
|
||||
self.redis_logger.debug(f'ignore duplicated file {filename}')
|
||||
print(f'ignore duplicated file {filename}')
|
||||
filename = None
|
||||
|
||||
else:
|
||||
|
@ -192,10 +197,12 @@ class Global(AbstractModule):
|
|||
curr_file_content = f.read()
|
||||
except EOFError:
|
||||
self.redis_logger.warning(f'Global; Incomplete file: {filename}')
|
||||
print(f'Global; Incomplete file: {filename}')
|
||||
# save daily stats
|
||||
self.r_stats.zincrby('module:Global:incomplete_file', datetime.datetime.now().strftime('%Y%m%d'), 1)
|
||||
except OSError:
|
||||
self.redis_logger.warning(f'Global; Not a gzipped file: {filename}')
|
||||
print(f'Global; Not a gzipped file: {filename}')
|
||||
# save daily stats
|
||||
self.r_stats.zincrby('module:Global:invalid_file', datetime.datetime.now().strftime('%Y%m%d'), 1)
|
||||
|
||||
|
@ -213,6 +220,7 @@ class Global(AbstractModule):
|
|||
gunzipped_bytes_obj = fo.read()
|
||||
except Exception as e:
|
||||
self.redis_logger.warning(f'Global; Invalid Gzip file: {filename}, {e}')
|
||||
print(f'Global; Invalid Gzip file: {filename}, {e}')
|
||||
|
||||
return gunzipped_bytes_obj
|
||||
|
||||
|
|
|
@ -26,7 +26,7 @@ sys.path.append(os.environ['AIL_BIN'])
|
|||
# Import Project packages
|
||||
##################################
|
||||
from modules.abstract_module import AbstractModule
|
||||
from packages import Paste
|
||||
from packages.Item import Item
|
||||
|
||||
|
||||
class Indexer(AbstractModule):
|
||||
|
@ -98,19 +98,23 @@ class Indexer(AbstractModule):
|
|||
|
||||
|
||||
def compute(self, message):
|
||||
try:
|
||||
PST = Paste.Paste(message)
|
||||
docpath = message.split(" ", -1)[-1]
|
||||
paste = PST.get_p_content()
|
||||
self.redis_logger.debug(f"Indexing - {self.indexname}: {docpath}")
|
||||
print(f"Indexing - {self.indexname}: {docpath}")
|
||||
docpath = message.split(" ", -1)[-1]
|
||||
|
||||
item = Item(message)
|
||||
item_id = item.get_id()
|
||||
item_content = item.get_content()
|
||||
|
||||
self.redis_logger.debug(f"Indexing - {self.indexname}: {docpath}")
|
||||
print(f"Indexing - {self.indexname}: {docpath}")
|
||||
|
||||
try:
|
||||
# Avoid calculating the index's size at each message
|
||||
if(time.time() - self.last_refresh > self.TIME_WAIT):
|
||||
self.last_refresh = time.time()
|
||||
if self.check_index_size() >= self.INDEX_SIZE_THRESHOLD*(1000*1000):
|
||||
timestamp = int(time.time())
|
||||
self.redis_logger.debug(f"Creating new index {timestamp}")
|
||||
print(f"Creating new index {timestamp}")
|
||||
self.indexpath = join(self.baseindexpath, str(timestamp))
|
||||
self.indexname = str(timestamp)
|
||||
# update all_index
|
||||
|
@ -125,13 +129,13 @@ class Indexer(AbstractModule):
|
|||
indexwriter.update_document(
|
||||
title=docpath,
|
||||
path=docpath,
|
||||
content=paste)
|
||||
content=item_content)
|
||||
indexwriter.commit()
|
||||
|
||||
except IOError:
|
||||
self.redis_logger.debug(f"CRC Checksum Failed on: {PST.p_path}")
|
||||
self.redis_logger.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
|
||||
PST.p_source, PST.p_date, PST.p_name))
|
||||
self.redis_logger.debug(f"CRC Checksum Failed on: {item_id}")
|
||||
print(f"CRC Checksum Failed on: {item_id}")
|
||||
self.redis_logger.error(f'Duplicate;{item.get_source()};{item.get_date()};{item.get_basename()};CRC Checksum Failed')
|
||||
|
||||
def check_index_size(self):
|
||||
"""
|
||||
|
|
|
@ -123,7 +123,7 @@ class SentimentAnalysis(AbstractModule):
|
|||
avg_score = {'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compoundPos': 0.0, 'compoundNeg': 0.0}
|
||||
neg_line = 0
|
||||
pos_line = 0
|
||||
sid = SentimentIntensityAnalyzer(sentiment_lexicon_file)
|
||||
sid = SentimentIntensityAnalyzer(self.sentiment_lexicon_file)
|
||||
for sentence in sentences:
|
||||
ss = sid.polarity_scores(sentence)
|
||||
for k in sorted(ss):
|
||||
|
|
|
@ -45,10 +45,11 @@ class Tags(AbstractModule):
|
|||
if len(mess_split) == 2:
|
||||
tag = mess_split[0]
|
||||
item = Item(mess_split[1])
|
||||
item_id = item.get_id()
|
||||
|
||||
# Create a new tag
|
||||
Tag.add_tag('item', tag, item.get_id())
|
||||
print(f'{item.get_id(): Tagged {tag}}')
|
||||
print(f'{item_id}: Tagged {tag}')
|
||||
|
||||
# Forward message to channel
|
||||
self.send_message_to_queue(message, 'MISP_The_Hive_feeder')
|
||||
|
|
|
@ -5,21 +5,21 @@ The Template Module
|
|||
======================
|
||||
|
||||
This module is a template for Template for new modules
|
||||
|
||||
|
||||
"""
|
||||
|
||||
##################################
|
||||
# Import External packages
|
||||
##################################
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from pubsublogger import publisher
|
||||
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from module.abstract_module import AbstractModule
|
||||
from Helper import Process
|
||||
from modules.abstract_module import AbstractModule
|
||||
|
||||
|
||||
class Template(AbstractModule):
|
||||
|
@ -30,12 +30,12 @@ class Template(AbstractModule):
|
|||
def __init__(self):
|
||||
super(Template, self).__init__()
|
||||
|
||||
# Send module state to logs
|
||||
self.redis_logger.info("Module %s initialized"%(self.module_name))
|
||||
|
||||
# Pending time between two computation in seconds
|
||||
# Pending time between two computation (computeNone) in seconds
|
||||
self.pending_seconds = 10
|
||||
|
||||
# Send module state to logs
|
||||
self.redis_logger.info(f'Module {self.module_name} initialized')
|
||||
|
||||
|
||||
def computeNone(self):
|
||||
"""
|
||||
|
@ -52,6 +52,6 @@ class Template(AbstractModule):
|
|||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
|
||||
module = Template()
|
||||
module.run()
|
||||
|
|
|
@ -20,7 +20,7 @@ sys.path.append(os.environ['AIL_BIN'])
|
|||
##################################
|
||||
from modules.abstract_module import AbstractModule
|
||||
import NotificationHelper
|
||||
from packages import Item
|
||||
from packages.Item import Item
|
||||
from packages import Term
|
||||
from lib import Tracker
|
||||
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -7,6 +7,7 @@ import unittest
|
|||
|
||||
import gzip
|
||||
from base64 import b64encode
|
||||
from distutils.dir_util import copy_tree
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
|
||||
|
@ -20,9 +21,20 @@ from modules.Keys import Keys
|
|||
from modules.Onion import Onion
|
||||
|
||||
# project packages
|
||||
from lib.ConfigLoader import ConfigLoader
|
||||
import lib.crawlers as crawlers
|
||||
import packages.Item as Item
|
||||
|
||||
#### COPY SAMPLES ####
|
||||
config_loader = ConfigLoader()
|
||||
# # TODO:move me in new Item package
|
||||
ITEMS_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
|
||||
ITEMS_FOLDER = os.path.join(os.path.realpath(ITEMS_FOLDER), '')
|
||||
TESTS_ITEMS_FOLDER = os.path.join(ITEMS_FOLDER, 'tests')
|
||||
sample_dir = os.path.join(os.environ['AIL_HOME'], 'samples')
|
||||
copy_tree(sample_dir, TESTS_ITEMS_FOLDER)
|
||||
#### ---- ####
|
||||
|
||||
class Test_Module_ApiKey(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
|
@ -91,29 +103,31 @@ class Test_Module_Global(unittest.TestCase):
|
|||
|
||||
item_content = b'Lorem ipsum dolor sit amet, consectetur adipiscing elit'
|
||||
item_content_1 = b64encode(gzip.compress(item_content)).decode()
|
||||
item_content_2 = b64encode(gzip.compress(item_content + b' more text')).decode()
|
||||
item_content_2 = b64encode(gzip.compress(item_content + b' more text ...')).decode()
|
||||
message = f'{item_id} {item_content_1}'
|
||||
|
||||
# Test new item
|
||||
result = self.module_obj.compute(message, r_result=True)
|
||||
print(result)
|
||||
print(f'test new item: {result}')
|
||||
self.assertEqual(result, item_id)
|
||||
|
||||
# Test duplicate
|
||||
result = self.module_obj.compute(message, r_result=True)
|
||||
print(result)
|
||||
print(f'test duplicate {result}')
|
||||
self.assertIsNone(result)
|
||||
|
||||
# Test same id with != content
|
||||
item = Item.Item('tests/2021/01/01/global_831875da824fc86ab5cc0e835755b520.gz')
|
||||
item.delete()
|
||||
message = f'{item_id} {item_content_2}'
|
||||
result = self.module_obj.compute(message, r_result=True)
|
||||
print(result)
|
||||
print(f'test same id with != content: {result}')
|
||||
self.assertIn(item_id[:-3], result)
|
||||
self.assertNotEqual(result, item_id)
|
||||
|
||||
# cleanup
|
||||
item = Item.Item(result)
|
||||
item.delete()
|
||||
# item = Item.Item(result)
|
||||
# item.delete()
|
||||
# # TODO: remove from queue
|
||||
|
||||
class Test_Module_Keys(unittest.TestCase):
|
||||
|
|
|
@ -31,7 +31,7 @@ if __name__ == '__main__':
|
|||
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
|
||||
config_loader = None
|
||||
|
||||
r_serv.set('ail:current_background_script', 'domain languages update')
|
||||
r_serv_db.set('ail:current_background_script', 'domain languages update')
|
||||
|
||||
nb_elem_to_update = r_serv_db.get('update:nb_elem_to_convert')
|
||||
if not nb_elem_to_update:
|
||||
|
|
Loading…
Reference in New Issue