chg: [importers] improve abstract class and logs

pull/594/head
Terrtia 2023-06-02 11:03:32 +02:00
parent 550ca3abe6
commit 7c7799564f
No known key found for this signature in database
GPG Key ID: 1E1B1F50D84613D0
6 changed files with 83 additions and 54 deletions

View File

@ -642,7 +642,7 @@ def domain_migration():
domain = Domains.Domain(dom) domain = Domains.Domain(dom)
domain.update_daterange(first_seen) domain.update_daterange(first_seen)
domain.update_daterange(last_check) domain.update_daterange(last_check)
domain._set_ports(ports) # TODO ############################################################################ # domain._set_ports(ports)
if last_origin: if last_origin:
domain.set_last_origin(last_origin) domain.set_last_origin(last_origin)
for language in languages: for language in languages:

View File

@ -24,37 +24,29 @@ from lib import ail_files # TODO RENAME ME
logging.config.dictConfig(ail_logger.get_config(name='modules')) logging.config.dictConfig(ail_logger.get_config(name='modules'))
# TODO Clean queue one object destruct
class FileImporter(AbstractImporter): class FileImporter(AbstractImporter):
def __init__(self, feeder='file_import'): def __init__(self, feeder='file_import'):
super().__init__() super().__init__(queue=True)
self.logger = logging.getLogger(f'{self.__class__.__name__}') self.logger = logging.getLogger(f'{self.__class__.__name__}')
self.feeder_name = feeder # TODO sanityze feeder name self.feeder_name = feeder # TODO sanityze feeder name
# Setup the I/O queues
self.queue = AILQueue('FileImporter', 'manual')
def importer(self, path): def importer(self, path):
if os.path.isfile(path): if os.path.isfile(path):
with open(path, 'rb') as f: with open(path, 'rb') as f:
content = f.read() content = f.read()
if content:
mimetype = ail_files.get_mimetype(content) mimetype = ail_files.get_mimetype(content)
if ail_files.is_text(mimetype):
item_id = ail_files.create_item_id(self.feeder_name, path) item_id = ail_files.create_item_id(self.feeder_name, path)
content = ail_files.create_gzipped_b64(content) gzipped = False
if content: if mimetype == 'application/gzip':
message = f'dir_import {item_id} {content}' gzipped = True
self.logger.info(message) elif not ail_files.is_text(mimetype):
self.queue.send_message(message) return None
elif mimetype == 'application/gzip':
item_id = ail_files.create_item_id(self.feeder_name, path) message = self.create_message(item_id, content, gzipped=gzipped, source='dir_import')
content = ail_files.create_b64(content) if message:
if content: self.add_message_to_queue(message)
message = f'dir_import {item_id} {content}'
self.logger.info(message)
self.queue.send_message(message)
class DirImporter(AbstractImporter): class DirImporter(AbstractImporter):
def __init__(self): def __init__(self):

View File

@ -10,9 +10,7 @@
# https://github.com/cvandeplas/pystemon/blob/master/pystemon.yaml#L52 # https://github.com/cvandeplas/pystemon/blob/master/pystemon.yaml#L52
# #
import base64
import os import os
import gzip
import sys import sys
import redis import redis
@ -32,10 +30,6 @@ class PystemonImporter(AbstractImporter):
self.r_pystemon = redis.StrictRedis(host=host, port=port, db=db, decode_responses=True) self.r_pystemon = redis.StrictRedis(host=host, port=port, db=db, decode_responses=True)
self.dir_pystemon = pystemon_dir self.dir_pystemon = pystemon_dir
# # TODO: add exception
def encode_and_compress_data(self, content):
return base64.b64encode(gzip.compress(content)).decode()
def importer(self): def importer(self):
item_id = self.r_pystemon.lpop("pastes") item_id = self.r_pystemon.lpop("pastes")
print(item_id) print(item_id)
@ -53,9 +47,8 @@ class PystemonImporter(AbstractImporter):
if not content: if not content:
return None return None
b64_gzipped_content = self.encode_and_compress_data(content) return self.create_message(item_id, content, source='pystemon')
print(item_id, b64_gzipped_content)
return f'{item_id} {b64_gzipped_content}'
except IOError as e: except IOError as e:
print(f'Error: {full_item_path}, IOError') print(f'Error: {full_item_path}, IOError')
return None return None
@ -81,8 +74,7 @@ class PystemonModuleImporter(AbstractModule):
return self.importer.importer() return self.importer.importer()
def compute(self, message): def compute(self, message):
relay_message = f'pystemon {message}' self.add_message_to_queue(message)
self.add_message_to_queue(relay_message)
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -7,26 +7,41 @@ Importer Class
Import Content Import Content
""" """
import base64
import gzip
import logging
import logging.config
import os import os
import sys import sys
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
# sys.path.append(os.environ['AIL_BIN']) sys.path.append(os.environ['AIL_BIN'])
################################## ##################################
# Import Project packages # Import Project packages
################################## ##################################
# from ConfigLoader import ConfigLoader # from ConfigLoader import ConfigLoader
from lib import ail_logger
from lib.ail_queues import AILQueue
class AbstractImporter(ABC): logging.config.dictConfig(ail_logger.get_config(name='modules'))
def __init__(self):
# TODO Clean queue one object destruct
class AbstractImporter(ABC): # TODO ail queues
def __init__(self, queue=False):
""" """
Init Module AIL Importer
importer_name: str; set the importer name if different from the instance ClassName :param queue: Allow to push messages to other modules
""" """
# Module name if provided else instance className # Module name if provided else instance className
self.name = self._name() self.name = self._name()
self.logger = logging.getLogger(f'{self.__class__.__name__}')
# Setup the I/O queues for one shot importers
if queue:
self.queue = AILQueue(self.name, 'importer_manual')
@abstractmethod @abstractmethod
def importer(self, *args, **kwargs): def importer(self, *args, **kwargs):
@ -39,4 +54,49 @@ class AbstractImporter(ABC):
""" """
return self.__class__.__name__ return self.__class__.__name__
def add_message_to_queue(self, message, queue_name=None):
"""
Add message to queue
:param message: message to send in queue
:param queue_name: queue or module name
ex: add_message_to_queue(item_id, 'Mail')
"""
if message:
self.queue.send_message(message, queue_name)
@staticmethod
def b64(content):
if isinstance(content, str):
content = content.encode()
return base64.b64encode(content).decode()
@staticmethod
def create_gzip(content):
if isinstance(content, str):
content = content.encode()
return gzip.compress(content)
def b64_gzip(self, content):
try:
gziped = self.create_gzip(content)
return self.b64(gziped)
except Exception as e:
self.logger.warning(e)
return ''
def create_message(self, obj_id, content, b64=False, gzipped=False, source=None):
if not gzipped:
content = self.b64_gzip(content)
elif not b64:
content = self.b64(gzipped)
if not content:
return None
if isinstance(content, bytes):
content = content.decode()
if not source:
source = self.name
self.logger.info(f'{source} {obj_id}')
# self.logger.debug(f'{source} {obj_id} {content}')
return f'{source} {obj_id} {content}'

View File

@ -1,9 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
import base64
import datetime import datetime
import gzip
import logging.config import logging.config
import magic import magic
import os import os
@ -181,15 +179,3 @@ def create_item_id(feeder_name, path):
item_id = os.path.join(feeder_name, date, basename) item_id = os.path.join(feeder_name, date, basename)
# TODO check if already exists # TODO check if already exists
return item_id return item_id
def create_b64(b_content):
return base64.standard_b64encode(b_content).decode()
def create_gzipped_b64(b_content):
try:
gzipencoded = gzip.compress(b_content)
gzip64encoded = create_b64(gzipencoded)
return gzip64encoded
except Exception as e:
logger.warning(e)
return ''

View File

@ -33,10 +33,9 @@ class AbstractModule(ABC):
def __init__(self, module_name=None, queue=True): def __init__(self, module_name=None, queue=True):
""" """
Init Module AIL Module,
module_name: str; set the module name if different from the instance ClassName module_name: str; set the module name if different from the instance ClassName
queue_name: str; set the queue name if different from the instance ClassName :param queue: Allow to push messages to other modules
logger_channel: str; set the logger channel name, 'Script' by default
""" """
self.logger = logging.getLogger(f'{self.__class__.__name__}') self.logger = logging.getLogger(f'{self.__class__.__name__}')