mirror of https://github.com/CIRCL/AIL-framework
chg: [modules + tests] fix modules + test modules on samples
parent
90b6f43468
commit
42a23da182
|
@ -12,15 +12,14 @@ import time
|
||||||
import datetime
|
import datetime
|
||||||
import redis
|
import redis
|
||||||
import os
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.append(os.environ['AIL_BIN'])
|
||||||
##################################
|
##################################
|
||||||
# Import Project packages #
|
# Import Project packages #
|
||||||
##################################
|
##################################
|
||||||
from module.abstract_module import AbstractModule
|
from modules.abstract_module import AbstractModule
|
||||||
from packages.Date import Date
|
from packages.Date import Date
|
||||||
from pubsublogger import publisher
|
|
||||||
from Helper import Process
|
|
||||||
from packages import Paste
|
from packages import Paste
|
||||||
import ConfigLoader
|
import ConfigLoader
|
||||||
|
|
||||||
|
|
|
@ -10,14 +10,16 @@ import sys
|
||||||
import time
|
import time
|
||||||
import datetime
|
import datetime
|
||||||
|
|
||||||
from pubsublogger import publisher
|
sys.path.append(os.environ['AIL_BIN'])
|
||||||
|
##################################
|
||||||
import NotificationHelper
|
# Import Project packages
|
||||||
|
##################################
|
||||||
from packages import Date
|
from packages import Date
|
||||||
from packages import Item
|
from packages import Item
|
||||||
from packages import Term
|
from packages import Term
|
||||||
|
|
||||||
|
from pubsublogger import publisher
|
||||||
|
|
||||||
def clean_term_db_stat_token():
|
def clean_term_db_stat_token():
|
||||||
all_stat_date = Term.get_all_token_stat_history()
|
all_stat_date = Term.get_all_token_stat_history()
|
||||||
|
|
||||||
|
|
|
@ -51,7 +51,7 @@ class Categ(AbstractModule):
|
||||||
Categ module for AIL framework
|
Categ module for AIL framework
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, categ_files_dir='../files/'):
|
def __init__(self, categ_files_dir=os.path.join(os.environ['AIL_HOME'], 'files')):
|
||||||
"""
|
"""
|
||||||
Init Categ
|
Init Categ
|
||||||
"""
|
"""
|
||||||
|
@ -107,7 +107,7 @@ if __name__ == '__main__':
|
||||||
# SCRIPT PARSER #
|
# SCRIPT PARSER #
|
||||||
parser = argparse.ArgumentParser(description='Start Categ module on files.')
|
parser = argparse.ArgumentParser(description='Start Categ module on files.')
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'-d', type=str, default="../files/",
|
'-d', type=str, default=os.path.join(os.environ['AIL_HOME'], 'files'),
|
||||||
help='Path to the directory containing the category files.',
|
help='Path to the directory containing the category files.',
|
||||||
action='store')
|
action='store')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
|
@ -104,6 +104,7 @@ class Global(AbstractModule):
|
||||||
# Incorrect filename
|
# Incorrect filename
|
||||||
if not os.path.commonprefix([filename, self.PASTES_FOLDER]) == self.PASTES_FOLDER:
|
if not os.path.commonprefix([filename, self.PASTES_FOLDER]) == self.PASTES_FOLDER:
|
||||||
self.redis_logger.warning(f'Global; Path traversal detected {filename}')
|
self.redis_logger.warning(f'Global; Path traversal detected {filename}')
|
||||||
|
print(f'Global; Path traversal detected {filename}')
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# Decode compressed base64
|
# Decode compressed base64
|
||||||
|
@ -134,6 +135,7 @@ class Global(AbstractModule):
|
||||||
|
|
||||||
else:
|
else:
|
||||||
self.redis_logger.debug(f"Empty Item: {message} not processed")
|
self.redis_logger.debug(f"Empty Item: {message} not processed")
|
||||||
|
print(f"Empty Item: {message} not processed")
|
||||||
|
|
||||||
|
|
||||||
def check_filename(self, filename, new_file_content):
|
def check_filename(self, filename, new_file_content):
|
||||||
|
@ -145,6 +147,7 @@ class Global(AbstractModule):
|
||||||
# check if file exist
|
# check if file exist
|
||||||
if os.path.isfile(filename):
|
if os.path.isfile(filename):
|
||||||
self.redis_logger.warning(f'File already exist {filename}')
|
self.redis_logger.warning(f'File already exist {filename}')
|
||||||
|
print(f'File already exist {filename}')
|
||||||
|
|
||||||
# Check that file already exists but content differs
|
# Check that file already exists but content differs
|
||||||
curr_file_content = self.gunzip_file(filename)
|
curr_file_content = self.gunzip_file(filename)
|
||||||
|
@ -165,11 +168,13 @@ class Global(AbstractModule):
|
||||||
if os.path.isfile(filename):
|
if os.path.isfile(filename):
|
||||||
# Ignore duplicate
|
# Ignore duplicate
|
||||||
self.redis_logger.debug(f'ignore duplicated file {filename}')
|
self.redis_logger.debug(f'ignore duplicated file {filename}')
|
||||||
|
print(f'ignore duplicated file {filename}')
|
||||||
filename = None
|
filename = None
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# Ignore duplicate checksum equals
|
# Ignore duplicate checksum equals
|
||||||
self.redis_logger.debug(f'ignore duplicated file {filename}')
|
self.redis_logger.debug(f'ignore duplicated file {filename}')
|
||||||
|
print(f'ignore duplicated file {filename}')
|
||||||
filename = None
|
filename = None
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
@ -192,10 +197,12 @@ class Global(AbstractModule):
|
||||||
curr_file_content = f.read()
|
curr_file_content = f.read()
|
||||||
except EOFError:
|
except EOFError:
|
||||||
self.redis_logger.warning(f'Global; Incomplete file: {filename}')
|
self.redis_logger.warning(f'Global; Incomplete file: {filename}')
|
||||||
|
print(f'Global; Incomplete file: {filename}')
|
||||||
# save daily stats
|
# save daily stats
|
||||||
self.r_stats.zincrby('module:Global:incomplete_file', datetime.datetime.now().strftime('%Y%m%d'), 1)
|
self.r_stats.zincrby('module:Global:incomplete_file', datetime.datetime.now().strftime('%Y%m%d'), 1)
|
||||||
except OSError:
|
except OSError:
|
||||||
self.redis_logger.warning(f'Global; Not a gzipped file: {filename}')
|
self.redis_logger.warning(f'Global; Not a gzipped file: {filename}')
|
||||||
|
print(f'Global; Not a gzipped file: {filename}')
|
||||||
# save daily stats
|
# save daily stats
|
||||||
self.r_stats.zincrby('module:Global:invalid_file', datetime.datetime.now().strftime('%Y%m%d'), 1)
|
self.r_stats.zincrby('module:Global:invalid_file', datetime.datetime.now().strftime('%Y%m%d'), 1)
|
||||||
|
|
||||||
|
@ -213,6 +220,7 @@ class Global(AbstractModule):
|
||||||
gunzipped_bytes_obj = fo.read()
|
gunzipped_bytes_obj = fo.read()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.redis_logger.warning(f'Global; Invalid Gzip file: {filename}, {e}')
|
self.redis_logger.warning(f'Global; Invalid Gzip file: {filename}, {e}')
|
||||||
|
print(f'Global; Invalid Gzip file: {filename}, {e}')
|
||||||
|
|
||||||
return gunzipped_bytes_obj
|
return gunzipped_bytes_obj
|
||||||
|
|
||||||
|
|
|
@ -26,7 +26,7 @@ sys.path.append(os.environ['AIL_BIN'])
|
||||||
# Import Project packages
|
# Import Project packages
|
||||||
##################################
|
##################################
|
||||||
from modules.abstract_module import AbstractModule
|
from modules.abstract_module import AbstractModule
|
||||||
from packages import Paste
|
from packages.Item import Item
|
||||||
|
|
||||||
|
|
||||||
class Indexer(AbstractModule):
|
class Indexer(AbstractModule):
|
||||||
|
@ -98,19 +98,23 @@ class Indexer(AbstractModule):
|
||||||
|
|
||||||
|
|
||||||
def compute(self, message):
|
def compute(self, message):
|
||||||
try:
|
docpath = message.split(" ", -1)[-1]
|
||||||
PST = Paste.Paste(message)
|
|
||||||
docpath = message.split(" ", -1)[-1]
|
|
||||||
paste = PST.get_p_content()
|
|
||||||
self.redis_logger.debug(f"Indexing - {self.indexname}: {docpath}")
|
|
||||||
print(f"Indexing - {self.indexname}: {docpath}")
|
|
||||||
|
|
||||||
|
item = Item(message)
|
||||||
|
item_id = item.get_id()
|
||||||
|
item_content = item.get_content()
|
||||||
|
|
||||||
|
self.redis_logger.debug(f"Indexing - {self.indexname}: {docpath}")
|
||||||
|
print(f"Indexing - {self.indexname}: {docpath}")
|
||||||
|
|
||||||
|
try:
|
||||||
# Avoid calculating the index's size at each message
|
# Avoid calculating the index's size at each message
|
||||||
if(time.time() - self.last_refresh > self.TIME_WAIT):
|
if(time.time() - self.last_refresh > self.TIME_WAIT):
|
||||||
self.last_refresh = time.time()
|
self.last_refresh = time.time()
|
||||||
if self.check_index_size() >= self.INDEX_SIZE_THRESHOLD*(1000*1000):
|
if self.check_index_size() >= self.INDEX_SIZE_THRESHOLD*(1000*1000):
|
||||||
timestamp = int(time.time())
|
timestamp = int(time.time())
|
||||||
self.redis_logger.debug(f"Creating new index {timestamp}")
|
self.redis_logger.debug(f"Creating new index {timestamp}")
|
||||||
|
print(f"Creating new index {timestamp}")
|
||||||
self.indexpath = join(self.baseindexpath, str(timestamp))
|
self.indexpath = join(self.baseindexpath, str(timestamp))
|
||||||
self.indexname = str(timestamp)
|
self.indexname = str(timestamp)
|
||||||
# update all_index
|
# update all_index
|
||||||
|
@ -125,13 +129,13 @@ class Indexer(AbstractModule):
|
||||||
indexwriter.update_document(
|
indexwriter.update_document(
|
||||||
title=docpath,
|
title=docpath,
|
||||||
path=docpath,
|
path=docpath,
|
||||||
content=paste)
|
content=item_content)
|
||||||
indexwriter.commit()
|
indexwriter.commit()
|
||||||
|
|
||||||
except IOError:
|
except IOError:
|
||||||
self.redis_logger.debug(f"CRC Checksum Failed on: {PST.p_path}")
|
self.redis_logger.debug(f"CRC Checksum Failed on: {item_id}")
|
||||||
self.redis_logger.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
|
print(f"CRC Checksum Failed on: {item_id}")
|
||||||
PST.p_source, PST.p_date, PST.p_name))
|
self.redis_logger.error(f'Duplicate;{item.get_source()};{item.get_date()};{item.get_basename()};CRC Checksum Failed')
|
||||||
|
|
||||||
def check_index_size(self):
|
def check_index_size(self):
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -123,7 +123,7 @@ class SentimentAnalysis(AbstractModule):
|
||||||
avg_score = {'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compoundPos': 0.0, 'compoundNeg': 0.0}
|
avg_score = {'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compoundPos': 0.0, 'compoundNeg': 0.0}
|
||||||
neg_line = 0
|
neg_line = 0
|
||||||
pos_line = 0
|
pos_line = 0
|
||||||
sid = SentimentIntensityAnalyzer(sentiment_lexicon_file)
|
sid = SentimentIntensityAnalyzer(self.sentiment_lexicon_file)
|
||||||
for sentence in sentences:
|
for sentence in sentences:
|
||||||
ss = sid.polarity_scores(sentence)
|
ss = sid.polarity_scores(sentence)
|
||||||
for k in sorted(ss):
|
for k in sorted(ss):
|
||||||
|
|
|
@ -45,10 +45,11 @@ class Tags(AbstractModule):
|
||||||
if len(mess_split) == 2:
|
if len(mess_split) == 2:
|
||||||
tag = mess_split[0]
|
tag = mess_split[0]
|
||||||
item = Item(mess_split[1])
|
item = Item(mess_split[1])
|
||||||
|
item_id = item.get_id()
|
||||||
|
|
||||||
# Create a new tag
|
# Create a new tag
|
||||||
Tag.add_tag('item', tag, item.get_id())
|
Tag.add_tag('item', tag, item.get_id())
|
||||||
print(f'{item.get_id(): Tagged {tag}}')
|
print(f'{item_id}: Tagged {tag}')
|
||||||
|
|
||||||
# Forward message to channel
|
# Forward message to channel
|
||||||
self.send_message_to_queue(message, 'MISP_The_Hive_feeder')
|
self.send_message_to_queue(message, 'MISP_The_Hive_feeder')
|
||||||
|
|
|
@ -11,15 +11,15 @@ This module is a template for Template for new modules
|
||||||
##################################
|
##################################
|
||||||
# Import External packages
|
# Import External packages
|
||||||
##################################
|
##################################
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
import time
|
import time
|
||||||
from pubsublogger import publisher
|
|
||||||
|
|
||||||
|
|
||||||
|
sys.path.append(os.environ['AIL_BIN'])
|
||||||
##################################
|
##################################
|
||||||
# Import Project packages
|
# Import Project packages
|
||||||
##################################
|
##################################
|
||||||
from module.abstract_module import AbstractModule
|
from modules.abstract_module import AbstractModule
|
||||||
from Helper import Process
|
|
||||||
|
|
||||||
|
|
||||||
class Template(AbstractModule):
|
class Template(AbstractModule):
|
||||||
|
@ -30,12 +30,12 @@ class Template(AbstractModule):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super(Template, self).__init__()
|
super(Template, self).__init__()
|
||||||
|
|
||||||
# Send module state to logs
|
# Pending time between two computation (computeNone) in seconds
|
||||||
self.redis_logger.info("Module %s initialized"%(self.module_name))
|
|
||||||
|
|
||||||
# Pending time between two computation in seconds
|
|
||||||
self.pending_seconds = 10
|
self.pending_seconds = 10
|
||||||
|
|
||||||
|
# Send module state to logs
|
||||||
|
self.redis_logger.info(f'Module {self.module_name} initialized')
|
||||||
|
|
||||||
|
|
||||||
def computeNone(self):
|
def computeNone(self):
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -20,7 +20,7 @@ sys.path.append(os.environ['AIL_BIN'])
|
||||||
##################################
|
##################################
|
||||||
from modules.abstract_module import AbstractModule
|
from modules.abstract_module import AbstractModule
|
||||||
import NotificationHelper
|
import NotificationHelper
|
||||||
from packages import Item
|
from packages.Item import Item
|
||||||
from packages import Term
|
from packages import Term
|
||||||
from lib import Tracker
|
from lib import Tracker
|
||||||
|
|
||||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -7,6 +7,7 @@ import unittest
|
||||||
|
|
||||||
import gzip
|
import gzip
|
||||||
from base64 import b64encode
|
from base64 import b64encode
|
||||||
|
from distutils.dir_util import copy_tree
|
||||||
|
|
||||||
sys.path.append(os.environ['AIL_BIN'])
|
sys.path.append(os.environ['AIL_BIN'])
|
||||||
|
|
||||||
|
@ -20,9 +21,20 @@ from modules.Keys import Keys
|
||||||
from modules.Onion import Onion
|
from modules.Onion import Onion
|
||||||
|
|
||||||
# project packages
|
# project packages
|
||||||
|
from lib.ConfigLoader import ConfigLoader
|
||||||
import lib.crawlers as crawlers
|
import lib.crawlers as crawlers
|
||||||
import packages.Item as Item
|
import packages.Item as Item
|
||||||
|
|
||||||
|
#### COPY SAMPLES ####
|
||||||
|
config_loader = ConfigLoader()
|
||||||
|
# # TODO:move me in new Item package
|
||||||
|
ITEMS_FOLDER = os.path.join(os.environ['AIL_HOME'], config_loader.get_config_str("Directories", "pastes")) + '/'
|
||||||
|
ITEMS_FOLDER = os.path.join(os.path.realpath(ITEMS_FOLDER), '')
|
||||||
|
TESTS_ITEMS_FOLDER = os.path.join(ITEMS_FOLDER, 'tests')
|
||||||
|
sample_dir = os.path.join(os.environ['AIL_HOME'], 'samples')
|
||||||
|
copy_tree(sample_dir, TESTS_ITEMS_FOLDER)
|
||||||
|
#### ---- ####
|
||||||
|
|
||||||
class Test_Module_ApiKey(unittest.TestCase):
|
class Test_Module_ApiKey(unittest.TestCase):
|
||||||
|
|
||||||
def setUp(self):
|
def setUp(self):
|
||||||
|
@ -91,29 +103,31 @@ class Test_Module_Global(unittest.TestCase):
|
||||||
|
|
||||||
item_content = b'Lorem ipsum dolor sit amet, consectetur adipiscing elit'
|
item_content = b'Lorem ipsum dolor sit amet, consectetur adipiscing elit'
|
||||||
item_content_1 = b64encode(gzip.compress(item_content)).decode()
|
item_content_1 = b64encode(gzip.compress(item_content)).decode()
|
||||||
item_content_2 = b64encode(gzip.compress(item_content + b' more text')).decode()
|
item_content_2 = b64encode(gzip.compress(item_content + b' more text ...')).decode()
|
||||||
message = f'{item_id} {item_content_1}'
|
message = f'{item_id} {item_content_1}'
|
||||||
|
|
||||||
# Test new item
|
# Test new item
|
||||||
result = self.module_obj.compute(message, r_result=True)
|
result = self.module_obj.compute(message, r_result=True)
|
||||||
print(result)
|
print(f'test new item: {result}')
|
||||||
self.assertEqual(result, item_id)
|
self.assertEqual(result, item_id)
|
||||||
|
|
||||||
# Test duplicate
|
# Test duplicate
|
||||||
result = self.module_obj.compute(message, r_result=True)
|
result = self.module_obj.compute(message, r_result=True)
|
||||||
print(result)
|
print(f'test duplicate {result}')
|
||||||
self.assertIsNone(result)
|
self.assertIsNone(result)
|
||||||
|
|
||||||
# Test same id with != content
|
# Test same id with != content
|
||||||
|
item = Item.Item('tests/2021/01/01/global_831875da824fc86ab5cc0e835755b520.gz')
|
||||||
|
item.delete()
|
||||||
message = f'{item_id} {item_content_2}'
|
message = f'{item_id} {item_content_2}'
|
||||||
result = self.module_obj.compute(message, r_result=True)
|
result = self.module_obj.compute(message, r_result=True)
|
||||||
print(result)
|
print(f'test same id with != content: {result}')
|
||||||
self.assertIn(item_id[:-3], result)
|
self.assertIn(item_id[:-3], result)
|
||||||
self.assertNotEqual(result, item_id)
|
self.assertNotEqual(result, item_id)
|
||||||
|
|
||||||
# cleanup
|
# cleanup
|
||||||
item = Item.Item(result)
|
# item = Item.Item(result)
|
||||||
item.delete()
|
# item.delete()
|
||||||
# # TODO: remove from queue
|
# # TODO: remove from queue
|
||||||
|
|
||||||
class Test_Module_Keys(unittest.TestCase):
|
class Test_Module_Keys(unittest.TestCase):
|
||||||
|
|
|
@ -31,7 +31,7 @@ if __name__ == '__main__':
|
||||||
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
|
r_serv_onion = config_loader.get_redis_conn("ARDB_Onion")
|
||||||
config_loader = None
|
config_loader = None
|
||||||
|
|
||||||
r_serv.set('ail:current_background_script', 'domain languages update')
|
r_serv_db.set('ail:current_background_script', 'domain languages update')
|
||||||
|
|
||||||
nb_elem_to_update = r_serv_db.get('update:nb_elem_to_convert')
|
nb_elem_to_update = r_serv_db.get('update:nb_elem_to_convert')
|
||||||
if not nb_elem_to_update:
|
if not nb_elem_to_update:
|
||||||
|
|
Loading…
Reference in New Issue