From ab7b2bdbabc7a19e979a4a597f7cd07bba76efa3 Mon Sep 17 00:00:00 2001 From: Terrtia Date: Fri, 14 Apr 2023 14:43:07 +0200 Subject: [PATCH] chg: [pystemon importer] create new PystemonImporter module, refactor pystemon feeder --- bin/LAUNCH.sh | 2 +- bin/feeder/pystemon-feeder.py | 83 ---------------------------- bin/importer/PystemonImporter.py | 90 +++++++++++++++++++++++++++++++ configs/core.cfg.sample | 8 ++- configs/modules.cfg | 3 ++ {bin/feeder => tests}/test-zmq.py | 0 6 files changed, 100 insertions(+), 86 deletions(-) delete mode 100755 bin/feeder/pystemon-feeder.py create mode 100755 bin/importer/PystemonImporter.py rename {bin/feeder => tests}/test-zmq.py (100%) diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index 52a211a4..adb1444f 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -483,7 +483,7 @@ function launch_feeder { screen -dmS "Feeder_Pystemon" sleep 0.1 echo -e $GREEN"\t* Launching Pystemon feeder"$DEFAULT - screen -S "Feeder_Pystemon" -X screen -t "Pystemon_feeder" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./feeder/pystemon-feeder.py; read x" + screen -S "Feeder_Pystemon" -X screen -t "Pystemon_feeder" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./importer/PystemonImporter.py; read x" sleep 0.1 screen -S "Feeder_Pystemon" -X screen -t "Pystemon" bash -c "cd ${AIL_HOME}/../pystemon; ${ENV_PY} ./pystemon.py; read x" else diff --git a/bin/feeder/pystemon-feeder.py b/bin/feeder/pystemon-feeder.py deleted file mode 100755 index 5ac6fd31..00000000 --- a/bin/feeder/pystemon-feeder.py +++ /dev/null @@ -1,83 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# -# This file is part of AIL framework - Analysis Information Leak framework -# -# This a simple feeder script feeding data from pystemon to AIL. -# -# Don't forget to set your pystemonpath and ensure that the -# configuration matches this script. Default is Redis DB 10. -# -# https://github.com/cvandeplas/pystemon/blob/master/pystemon.yaml#L16 -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# Copyright (c) 2014 Alexandre Dulaunoy - a@foo.be - -import os -import sys - -import zmq -import random -import time -import redis -import base64 - -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) -import ConfigLoader - -config_loader = ConfigLoader.ConfigLoader() - -if config_loader.has_option("ZMQ_Global", "bind"): - zmq_url = config_loader.get_config_str("ZMQ_Global", "bind") -else: - zmq_url = "tcp://127.0.0.1:5556" - -pystemonpath = config_loader.get_config_str("Directories", "pystemonpath") -pastes_directory = config_loader.get_config_str("Directories", "pastes") -pastes_directory = os.path.join(os.environ['AIL_HOME'], pastes_directory) -base_sleeptime = 0.01 -sleep_inc = 0 - -config_loader = None - -context = zmq.Context() -socket = context.socket(zmq.PUB) -socket.bind(zmq_url) - -# check https://github.com/cvandeplas/pystemon/blob/master/pystemon.yaml#L16 -r = redis.StrictRedis(host='localhost', db=10, decode_responses=True) - -# 101 pastes processed feed -# 102 raw pastes feed -topic = '102' - -while True: - time.sleep(base_sleeptime + sleep_inc) - item_id = r.lpop("pastes") - if item_id is None: - continue - try: - print(item_id) - full_item_path = os.path.join(pystemonpath, item_id) - if not os.path.isfile(full_item_path): - print('Error: {}, file not found'.format(full_item_path)) - sleep_inc = 1 - continue - - with open(full_item_path, 'rb') as f: #.read() - messagedata = f.read() - path_to_send = os.path.join(pastes_directory, item_id) - path_to_send = 'pystemon>>' + path_to_send - - s = b' '.join( [ topic.encode(), path_to_send.encode(), base64.b64encode(messagedata) ] ) - socket.send(s) - sleep_inc = sleep_inc-0.01 if sleep_inc-0.01 > 0 else 0 - except IOError as e: - # file not found, could be a buffering issue -> increase sleeping time - print('IOError: Increasing sleep time') - sleep_inc += 0.5 - continue diff --git a/bin/importer/PystemonImporter.py b/bin/importer/PystemonImporter.py new file mode 100755 index 00000000..d17dfc8a --- /dev/null +++ b/bin/importer/PystemonImporter.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# This file is part of AIL framework - Analysis Information Leak framework +# +# This a simple feeder script feeding data from pystemon to AIL. +# +# Don't forget to set your pystemonpath and ensure that the +# configuration matches this script. Default is Redis DB 10. +# https://github.com/cvandeplas/pystemon/blob/master/pystemon.yaml#L52 +# + +import base64 +import os +import gzip +import sys +import redis + +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from importer.abstract_importer import AbstractImporter +from modules.abstract_module import AbstractModule +from lib.ConfigLoader import ConfigLoader + +class PystemonImporter(AbstractImporter): + def __init__(self, pystemon_dir, host='localhost', port=6379, db=10): + super().__init__() + # Check Pystemon Redis Config: + # https://github.com/cvandeplas/pystemon/blob/master/pystemon.yaml#L54 + self.r_pystemon = redis.StrictRedis(host=host, port=port, db=db, decode_responses=True) + self.dir_pystemon = pystemon_dir + + # # TODO: add exception + def encode_and_compress_data(self, content): + return base64.b64encode(gzip.compress(content)).decode() + + def importer(self): + item_id = self.r_pystemon.lpop("pastes") + print(item_id) + if item_id: + print(item_id) + full_item_path = os.path.join(self.dir_pystemon, item_id) # TODO SANITIZE PATH + # Check if pystemon file exists + if not os.path.isfile(full_item_path): + print(f'Error: {full_item_path}, file not found') + return None + # Get Item Content + try: + with open(full_item_path, 'rb') as f: + content = f.read() + if not content: + return None + + b64_gzipped_content = self.encode_and_compress_data(content) + print(item_id, b64_gzipped_content) + return f'{item_id} {b64_gzipped_content}' + except IOError as e: + print(f'Error: {full_item_path}, IOError') + return None + + +class PystemonModuleImporter(AbstractModule): + + def __init__(self): + super().__init__() + self.pending_seconds = 10 + config_loader = ConfigLoader() + # TODO MIGRATE OLD CONFIG + # dir_pystemon = config_loader.get_config_str("Directories", "pystemonpath") + # Check Pystemon Redis Config: + # https://github.com/cvandeplas/pystemon/blob/master/pystemon.yaml#L54 + dir_pystemon = config_loader.get_config_str("Pystemon", "dir") + host = config_loader.get_config_str("Pystemon", "redis_host") + port = config_loader.get_config_str("Pystemon", "redis_port") + db = config_loader.get_config_str("Pystemon", "redis_db") + self.importer = PystemonImporter(dir_pystemon, host=host, port=port, db=db) + + def get_message(self): + return self.importer.importer() + + def compute(self, message): + relay_message = f'pystemon {message}' + self.add_message_to_queue(relay_message) + + +if __name__ == '__main__': + module = PystemonModuleImporter() + module.run() diff --git a/configs/core.cfg.sample b/configs/core.cfg.sample index 475f7220..8f26637e 100644 --- a/configs/core.cfg.sample +++ b/configs/core.cfg.sample @@ -18,10 +18,14 @@ tldsfile = faup/src/data/mozilla.tlds domainstrending_csv = var/www/static/csv/domainstrendingdata -pystemonpath = /home/pystemon/pystemon/ - sentiment_lexicon_file = sentiment/vader_lexicon.zip/vader_lexicon/vader_lexicon.txt +[Pystemon] +dir = /home/pystemon/pystemon/ +redis_host = localhost +redis_port = 6379 +redis_db = 10 + ##### Logs ###### [Logs] # activate syslog diff --git a/configs/modules.cfg b/configs/modules.cfg index dc71dac8..fc169227 100644 --- a/configs/modules.cfg +++ b/configs/modules.cfg @@ -7,6 +7,9 @@ publish = Importers [Importer_Json] publish = Importers,Tags +[PystemonModuleImporter] +publish = Importers + #################################################### [Mixer] diff --git a/bin/feeder/test-zmq.py b/tests/test-zmq.py similarity index 100% rename from bin/feeder/test-zmq.py rename to tests/test-zmq.py