From 42a23da182e66e201add87a18c63b624beffe232 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Tue, 8 Jun 2021 16:46:36 +0200 Subject: [PATCH] chg: [modules + tests] fix modules + test modules on samples --- bin/ModuleStats.py | 7 +++---- bin/core/DbCleaner.py | 10 +++++---- bin/modules/Categ.py | 4 ++-- bin/modules/Global.py | 8 ++++++++ bin/modules/Indexer.py | 26 ++++++++++++++---------- bin/modules/SentimentAnalysis.py | 2 +- bin/modules/Tags.py | 3 ++- bin/template.py | 20 +++++++++--------- bin/trackers/Tracker_Term.py | 2 +- samples/2021/01/01/api_keys.gz | Bin 0 -> 386 bytes samples/2021/01/01/categ.gz | Bin 0 -> 391 bytes samples/2021/01/01/credit_cards.gz | Bin 0 -> 420 bytes samples/2021/01/01/domain_classifier.gz | Bin 0 -> 335 bytes samples/2021/01/01/keys.gz | Bin 0 -> 77 bytes tests/test_modules.py | 26 ++++++++++++++++++------ update/v3.4/Update_domain.py | 2 +- 16 files changed, 69 insertions(+), 41 deletions(-) create mode 100644 samples/2021/01/01/api_keys.gz create mode 100644 samples/2021/01/01/categ.gz create mode 100644 samples/2021/01/01/credit_cards.gz create mode 100644 samples/2021/01/01/domain_classifier.gz create mode 100644 samples/2021/01/01/keys.gz diff --git a/bin/ModuleStats.py b/bin/ModuleStats.py index 49992166..240c315b 100755 --- a/bin/ModuleStats.py +++ b/bin/ModuleStats.py @@ -12,15 +12,14 @@ import time import datetime import redis import os +import sys - +sys.path.append(os.environ['AIL_BIN']) ################################## # Import Project packages # ################################## -from module.abstract_module import AbstractModule +from modules.abstract_module import AbstractModule from packages.Date import Date -from pubsublogger import publisher -from Helper import Process from packages import Paste import ConfigLoader diff --git a/bin/core/DbCleaner.py b/bin/core/DbCleaner.py index e0cf6512..0fdb4228 100755 --- a/bin/core/DbCleaner.py +++ b/bin/core/DbCleaner.py @@ -10,14 +10,16 @@ import sys import time import datetime -from pubsublogger import publisher - -import NotificationHelper - +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## from packages import Date from packages import Item from packages import Term +from pubsublogger import publisher + def clean_term_db_stat_token(): all_stat_date = Term.get_all_token_stat_history() diff --git a/bin/modules/Categ.py b/bin/modules/Categ.py index 4e8b6205..ddd35ace 100755 --- a/bin/modules/Categ.py +++ b/bin/modules/Categ.py @@ -51,7 +51,7 @@ class Categ(AbstractModule): Categ module for AIL framework """ - def __init__(self, categ_files_dir='../files/'): + def __init__(self, categ_files_dir=os.path.join(os.environ['AIL_HOME'], 'files')): """ Init Categ """ @@ -107,7 +107,7 @@ if __name__ == '__main__': # SCRIPT PARSER # parser = argparse.ArgumentParser(description='Start Categ module on files.') parser.add_argument( - '-d', type=str, default="../files/", + '-d', type=str, default=os.path.join(os.environ['AIL_HOME'], 'files'), help='Path to the directory containing the category files.', action='store') args = parser.parse_args() diff --git a/bin/modules/Global.py b/bin/modules/Global.py index c203d6c8..ff14051c 100755 --- a/bin/modules/Global.py +++ b/bin/modules/Global.py @@ -104,6 +104,7 @@ class Global(AbstractModule): # Incorrect filename if not os.path.commonprefix([filename, self.PASTES_FOLDER]) == self.PASTES_FOLDER: self.redis_logger.warning(f'Global; Path traversal detected {filename}') + print(f'Global; Path traversal detected {filename}') else: # Decode compressed base64 @@ -134,6 +135,7 @@ class Global(AbstractModule): else: self.redis_logger.debug(f"Empty Item: {message} not processed") + print(f"Empty Item: {message} not processed") def check_filename(self, filename, new_file_content): @@ -145,6 +147,7 @@ class Global(AbstractModule): # check if file exist if os.path.isfile(filename): self.redis_logger.warning(f'File already exist {filename}') + print(f'File already exist {filename}') # Check that file already exists but content differs curr_file_content = self.gunzip_file(filename) @@ -165,11 +168,13 @@ class Global(AbstractModule): if os.path.isfile(filename): # Ignore duplicate self.redis_logger.debug(f'ignore duplicated file {filename}') + print(f'ignore duplicated file {filename}') filename = None else: # Ignore duplicate checksum equals self.redis_logger.debug(f'ignore duplicated file {filename}') + print(f'ignore duplicated file {filename}') filename = None else: @@ -192,10 +197,12 @@ class Global(AbstractModule): curr_file_content = f.read() except EOFError: self.redis_logger.warning(f'Global; Incomplete file: {filename}') + print(f'Global; Incomplete file: {filename}') # save daily stats self.r_stats.zincrby('module:Global:incomplete_file', datetime.datetime.now().strftime('%Y%m%d'), 1) except OSError: self.redis_logger.warning(f'Global; Not a gzipped file: {filename}') + print(f'Global; Not a gzipped file: {filename}') # save daily stats self.r_stats.zincrby('module:Global:invalid_file', datetime.datetime.now().strftime('%Y%m%d'), 1) @@ -213,6 +220,7 @@ class Global(AbstractModule): gunzipped_bytes_obj = fo.read() except Exception as e: self.redis_logger.warning(f'Global; Invalid Gzip file: {filename}, {e}') + print(f'Global; Invalid Gzip file: {filename}, {e}') return gunzipped_bytes_obj diff --git a/bin/modules/Indexer.py b/bin/modules/Indexer.py index f7218ae1..cdb65f16 100755 --- a/bin/modules/Indexer.py +++ b/bin/modules/Indexer.py @@ -26,7 +26,7 @@ sys.path.append(os.environ['AIL_BIN']) # Import Project packages ################################## from modules.abstract_module import AbstractModule -from packages import Paste +from packages.Item import Item class Indexer(AbstractModule): @@ -98,19 +98,23 @@ class Indexer(AbstractModule): def compute(self, message): - try: - PST = Paste.Paste(message) - docpath = message.split(" ", -1)[-1] - paste = PST.get_p_content() - self.redis_logger.debug(f"Indexing - {self.indexname}: {docpath}") - print(f"Indexing - {self.indexname}: {docpath}") + docpath = message.split(" ", -1)[-1] + item = Item(message) + item_id = item.get_id() + item_content = item.get_content() + + self.redis_logger.debug(f"Indexing - {self.indexname}: {docpath}") + print(f"Indexing - {self.indexname}: {docpath}") + + try: # Avoid calculating the index's size at each message if(time.time() - self.last_refresh > self.TIME_WAIT): self.last_refresh = time.time() if self.check_index_size() >= self.INDEX_SIZE_THRESHOLD*(1000*1000): timestamp = int(time.time()) self.redis_logger.debug(f"Creating new index {timestamp}") + print(f"Creating new index {timestamp}") self.indexpath = join(self.baseindexpath, str(timestamp)) self.indexname = str(timestamp) # update all_index @@ -125,13 +129,13 @@ class Indexer(AbstractModule): indexwriter.update_document( title=docpath, path=docpath, - content=paste) + content=item_content) indexwriter.commit() except IOError: - self.redis_logger.debug(f"CRC Checksum Failed on: {PST.p_path}") - self.redis_logger.error('Duplicate;{};{};{};CRC Checksum Failed'.format( - PST.p_source, PST.p_date, PST.p_name)) + self.redis_logger.debug(f"CRC Checksum Failed on: {item_id}") + print(f"CRC Checksum Failed on: {item_id}") + self.redis_logger.error(f'Duplicate;{item.get_source()};{item.get_date()};{item.get_basename()};CRC Checksum Failed') def check_index_size(self): """ diff --git a/bin/modules/SentimentAnalysis.py b/bin/modules/SentimentAnalysis.py index ba8032a7..ff0b8142 100755 --- a/bin/modules/SentimentAnalysis.py +++ b/bin/modules/SentimentAnalysis.py @@ -123,7 +123,7 @@ class SentimentAnalysis(AbstractModule): avg_score = {'neg': 0.0, 'neu': 0.0, 'pos': 0.0, 'compoundPos': 0.0, 'compoundNeg': 0.0} neg_line = 0 pos_line = 0 - sid = SentimentIntensityAnalyzer(sentiment_lexicon_file) + sid = SentimentIntensityAnalyzer(self.sentiment_lexicon_file) for sentence in sentences: ss = sid.polarity_scores(sentence) for k in sorted(ss): diff --git a/bin/modules/Tags.py b/bin/modules/Tags.py index f21fa858..2a42bfca 100755 --- a/bin/modules/Tags.py +++ b/bin/modules/Tags.py @@ -45,10 +45,11 @@ class Tags(AbstractModule): if len(mess_split) == 2: tag = mess_split[0] item = Item(mess_split[1]) + item_id = item.get_id() # Create a new tag Tag.add_tag('item', tag, item.get_id()) - print(f'{item.get_id(): Tagged {tag}}') + print(f'{item_id}: Tagged {tag}') # Forward message to channel self.send_message_to_queue(message, 'MISP_The_Hive_feeder') diff --git a/bin/template.py b/bin/template.py index 0e1a0a8f..88f17cbd 100755 --- a/bin/template.py +++ b/bin/template.py @@ -5,21 +5,21 @@ The Template Module ====================== This module is a template for Template for new modules - + """ ################################## # Import External packages ################################## +import os +import sys import time -from pubsublogger import publisher - +sys.path.append(os.environ['AIL_BIN']) ################################## # Import Project packages ################################## -from module.abstract_module import AbstractModule -from Helper import Process +from modules.abstract_module import AbstractModule class Template(AbstractModule): @@ -30,12 +30,12 @@ class Template(AbstractModule): def __init__(self): super(Template, self).__init__() - # Send module state to logs - self.redis_logger.info("Module %s initialized"%(self.module_name)) - - # Pending time between two computation in seconds + # Pending time between two computation (computeNone) in seconds self.pending_seconds = 10 + # Send module state to logs + self.redis_logger.info(f'Module {self.module_name} initialized') + def computeNone(self): """ @@ -52,6 +52,6 @@ class Template(AbstractModule): if __name__ == '__main__': - + module = Template() module.run() diff --git a/bin/trackers/Tracker_Term.py b/bin/trackers/Tracker_Term.py index 89791e22..9878835e 100755 --- a/bin/trackers/Tracker_Term.py +++ b/bin/trackers/Tracker_Term.py @@ -20,7 +20,7 @@ sys.path.append(os.environ['AIL_BIN']) ################################## from modules.abstract_module import AbstractModule import NotificationHelper -from packages import Item +from packages.Item import Item from packages import Term from lib import Tracker diff --git a/samples/2021/01/01/api_keys.gz b/samples/2021/01/01/api_keys.gz new file mode 100644 index 0000000000000000000000000000000000000000..6ed5164b0a2a10d31f5791d6542304fb4b5af9ad GIT binary patch literal 386 zcmV-|0e$`-iwFpY1f^gA17UD!Uu$J~a{!Hz%TA*)42AbR#d!cI?7EsoMSoB!EocxD ztdL3Ej-}j4;!efWk28hcEGDa5oa1vo-{;B^Qkq8i9CC%$@ME$1q^p0gH$GAQ^Da^l5n>d=lr?`8$uPc|5l z_K842K9pv7?%*WRv=L=&4tAt?jt`ZFlv}Ur!AGu6I(5kj8UjU^Ps*c6nv70=C8by~ zn9O)IPx%zUTJ_o4a!8VPhEz2tCoYTiYO#8Fx_MY`xAW;`v3b0oECVW+%D*^yu13Qy z4moHs4~*Y;6za=!PdC$Mb-A8zmecn3^8NAnb0^oYUmAnsVQ>Ps#&8nC_bd#DVyAnd zjx^$+P_={B8K$G-(%En~F@o7SVrO+ui+JkehcPHMi{~_EFgX9WA&cQO6dHJucRO)H g@Z7i#nV7@yOJfdV)Z5qiG|q7L2NyC~lTrcz0Qma3GXMYp literal 0 HcmV?d00001 diff --git a/samples/2021/01/01/categ.gz b/samples/2021/01/01/categ.gz new file mode 100644 index 0000000000000000000000000000000000000000..35466908a7c364cff6729594dc88047c51c7fe8b GIT binary patch literal 391 zcmV;20eJo&iwFqNB&A>g17l%yWoH02k-JhOK@3H6zCs@W12&j|h$-Oc50&5VkE zO-r)P->(`kSwAG*t8+hVrwrW0GQ^r{2Vn)wq_+@i5eiCb9WZiZ30xMS#QrH1y#X?c zQV-Z85S!#0K`A$%!zFO!Xhp#k5*}R%NUtj7jY~nm#GBy+e-x;2b{WV#4ky22zJ<+L zpj6Rru5fWFR#dJ9EG5jLUMG(t79L7JBe{7okepixOFvw`>q&5(=Pz29M4PT1Wo~mk zrt?vrl4yKhsK*eq{xs%0n;I)dyg)hh~jsuR2qN2_X+m-~CVU~Y*fuLCqC;BLO$ zl_VGF)HI-0dHB`@0_z$628D%8|+lawO!=f~AIo lH`ICNzzC6q`uNQ?4OcjVVUI!n=V^Jm`3H(LIPgIN005vBuy_Cf literal 0 HcmV?d00001 diff --git a/samples/2021/01/01/credit_cards.gz b/samples/2021/01/01/credit_cards.gz new file mode 100644 index 0000000000000000000000000000000000000000..53d094a19a1ce20f3fc732f523784484f8f73ce1 GIT binary patch literal 420 zcmV;V0bBkbiwFqug{NQu17mV!WNCC?V_|Y+a{x7w%aYSD5JmU-iu(bnl;vl<1cnU- zOV~G3Cv=q_9z9a|d|DIOxou1L^tsij{Z=d#Pu<>69Lvu@1(pX4;2k8FsU-DC{&1EUkp( zaH<>*jh1M|iEFXq+n3LnmU%Oa*~~nc>$+@c6`p=8F%<8EmJ#S43FPIy(FH#%I7$FH z1nCbC9Vwjg-lQO=Qgd_QCub{_s-%b?RH}}Y9hP|?_p2F)bs4v58qmC_VACtLUm!&~ z#YdbRIPnuq_$7rf592%xo0-Pzys!JBa)K_GFfY@%jLUBJ?P^}il(k;))|sePhUBKJ zy!=ZnPkE(^X!LZ`}|JKw!$gDq^R`L0N~EFIu1FqjKxj zs@Y^MvDN=OB?xEJ8RJr-4TvT*OZ6&Q(DmI-U(FoE9k(7L%O!qM$^6c&MmbUe1$G_ zTRx*ePMNGuuGSH?Qe%56?;>5gJN-)P$YKa3cuOXG8pu|D;B`wjS(IO!C!mVmX^|Af*|;1+i!N8dmgG; z$8nzLkz|V`w4YUDElOrbYTrey5wD9Cgsn3U<3G hE1}4exo?}*jZQmM%?Q`uUqm}_`2oOzS@H7$008$wqS^oe literal 0 HcmV?d00001 diff --git a/samples/2021/01/01/keys.gz b/samples/2021/01/01/keys.gz new file mode 100644 index 0000000000000000000000000000000000000000..559b40064a11028f369a7f915b82fc6b6322f37c GIT binary patch literal 77 zcmV-T0J8rdiwFq#p|D^818Ze@a{$xT1p+5mcTYbBXV;(*Pd86z#}HSLpe|P;PQV3L j;Oggssfi0B2Qf0hJwU