chg: [pystemon importer] create new PystemonImporter module, refactor pystemon feeder

pull/594/head
Terrtia 2023-04-14 14:43:07 +02:00
parent ccf935700a
commit ab7b2bdbab
No known key found for this signature in database
GPG Key ID: 1E1B1F50D84613D0
6 changed files with 100 additions and 86 deletions

View File

@ -483,7 +483,7 @@ function launch_feeder {
screen -dmS "Feeder_Pystemon"
sleep 0.1
echo -e $GREEN"\t* Launching Pystemon feeder"$DEFAULT
screen -S "Feeder_Pystemon" -X screen -t "Pystemon_feeder" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./feeder/pystemon-feeder.py; read x"
screen -S "Feeder_Pystemon" -X screen -t "Pystemon_feeder" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./importer/PystemonImporter.py; read x"
sleep 0.1
screen -S "Feeder_Pystemon" -X screen -t "Pystemon" bash -c "cd ${AIL_HOME}/../pystemon; ${ENV_PY} ./pystemon.py; read x"
else

View File

@ -1,83 +0,0 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# This file is part of AIL framework - Analysis Information Leak framework
#
# This a simple feeder script feeding data from pystemon to AIL.
#
# Don't forget to set your pystemonpath and ensure that the
# configuration matches this script. Default is Redis DB 10.
#
# https://github.com/cvandeplas/pystemon/blob/master/pystemon.yaml#L16
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Copyright (c) 2014 Alexandre Dulaunoy - a@foo.be
import os
import sys
import zmq
import random
import time
import redis
import base64
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
config_loader = ConfigLoader.ConfigLoader()
if config_loader.has_option("ZMQ_Global", "bind"):
zmq_url = config_loader.get_config_str("ZMQ_Global", "bind")
else:
zmq_url = "tcp://127.0.0.1:5556"
pystemonpath = config_loader.get_config_str("Directories", "pystemonpath")
pastes_directory = config_loader.get_config_str("Directories", "pastes")
pastes_directory = os.path.join(os.environ['AIL_HOME'], pastes_directory)
base_sleeptime = 0.01
sleep_inc = 0
config_loader = None
context = zmq.Context()
socket = context.socket(zmq.PUB)
socket.bind(zmq_url)
# check https://github.com/cvandeplas/pystemon/blob/master/pystemon.yaml#L16
r = redis.StrictRedis(host='localhost', db=10, decode_responses=True)
# 101 pastes processed feed
# 102 raw pastes feed
topic = '102'
while True:
time.sleep(base_sleeptime + sleep_inc)
item_id = r.lpop("pastes")
if item_id is None:
continue
try:
print(item_id)
full_item_path = os.path.join(pystemonpath, item_id)
if not os.path.isfile(full_item_path):
print('Error: {}, file not found'.format(full_item_path))
sleep_inc = 1
continue
with open(full_item_path, 'rb') as f: #.read()
messagedata = f.read()
path_to_send = os.path.join(pastes_directory, item_id)
path_to_send = 'pystemon>>' + path_to_send
s = b' '.join( [ topic.encode(), path_to_send.encode(), base64.b64encode(messagedata) ] )
socket.send(s)
sleep_inc = sleep_inc-0.01 if sleep_inc-0.01 > 0 else 0
except IOError as e:
# file not found, could be a buffering issue -> increase sleeping time
print('IOError: Increasing sleep time')
sleep_inc += 0.5
continue

View File

@ -0,0 +1,90 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# This file is part of AIL framework - Analysis Information Leak framework
#
# This a simple feeder script feeding data from pystemon to AIL.
#
# Don't forget to set your pystemonpath and ensure that the
# configuration matches this script. Default is Redis DB 10.
# https://github.com/cvandeplas/pystemon/blob/master/pystemon.yaml#L52
#
import base64
import os
import gzip
import sys
import redis
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from importer.abstract_importer import AbstractImporter
from modules.abstract_module import AbstractModule
from lib.ConfigLoader import ConfigLoader
class PystemonImporter(AbstractImporter):
def __init__(self, pystemon_dir, host='localhost', port=6379, db=10):
super().__init__()
# Check Pystemon Redis Config:
# https://github.com/cvandeplas/pystemon/blob/master/pystemon.yaml#L54
self.r_pystemon = redis.StrictRedis(host=host, port=port, db=db, decode_responses=True)
self.dir_pystemon = pystemon_dir
# # TODO: add exception
def encode_and_compress_data(self, content):
return base64.b64encode(gzip.compress(content)).decode()
def importer(self):
item_id = self.r_pystemon.lpop("pastes")
print(item_id)
if item_id:
print(item_id)
full_item_path = os.path.join(self.dir_pystemon, item_id) # TODO SANITIZE PATH
# Check if pystemon file exists
if not os.path.isfile(full_item_path):
print(f'Error: {full_item_path}, file not found')
return None
# Get Item Content
try:
with open(full_item_path, 'rb') as f:
content = f.read()
if not content:
return None
b64_gzipped_content = self.encode_and_compress_data(content)
print(item_id, b64_gzipped_content)
return f'{item_id} {b64_gzipped_content}'
except IOError as e:
print(f'Error: {full_item_path}, IOError')
return None
class PystemonModuleImporter(AbstractModule):
def __init__(self):
super().__init__()
self.pending_seconds = 10
config_loader = ConfigLoader()
# TODO MIGRATE OLD CONFIG
# dir_pystemon = config_loader.get_config_str("Directories", "pystemonpath")
# Check Pystemon Redis Config:
# https://github.com/cvandeplas/pystemon/blob/master/pystemon.yaml#L54
dir_pystemon = config_loader.get_config_str("Pystemon", "dir")
host = config_loader.get_config_str("Pystemon", "redis_host")
port = config_loader.get_config_str("Pystemon", "redis_port")
db = config_loader.get_config_str("Pystemon", "redis_db")
self.importer = PystemonImporter(dir_pystemon, host=host, port=port, db=db)
def get_message(self):
return self.importer.importer()
def compute(self, message):
relay_message = f'pystemon {message}'
self.add_message_to_queue(relay_message)
if __name__ == '__main__':
module = PystemonModuleImporter()
module.run()

View File

@ -18,10 +18,14 @@ tldsfile = faup/src/data/mozilla.tlds
domainstrending_csv = var/www/static/csv/domainstrendingdata
pystemonpath = /home/pystemon/pystemon/
sentiment_lexicon_file = sentiment/vader_lexicon.zip/vader_lexicon/vader_lexicon.txt
[Pystemon]
dir = /home/pystemon/pystemon/
redis_host = localhost
redis_port = 6379
redis_db = 10
##### Logs ######
[Logs]
# activate syslog

View File

@ -7,6 +7,9 @@ publish = Importers
[Importer_Json]
publish = Importers,Tags
[PystemonModuleImporter]
publish = Importers
####################################################
[Mixer]