#!/usr/bin/env python3 # -*-coding:UTF-8 -* """ The Submit paste module ================ This module is taking paste in redis queue ARDB_DB and submit to global """ ################################## # Import External packages ################################## import os import sys import gzip import io import redis import base64 import datetime import time # from sflock.main import unpack # import sflock sys.path.append(os.environ['AIL_BIN']) ################################## # Import Project packages ################################## from modules.abstract_module import AbstractModule from packages import Tag from lib import ConfigLoader class SubmitPaste(AbstractModule): """ SubmitPaste module for AIL framework """ expire_time = 120 # Text max size TEXT_MAX_SIZE = ConfigLoader.ConfigLoader().get_config_int("SubmitPaste", "TEXT_MAX_SIZE") # File max size FILE_MAX_SIZE = ConfigLoader.ConfigLoader().get_config_int("SubmitPaste", "FILE_MAX_SIZE") # Allowed file type ALLOWED_EXTENSIONS = ConfigLoader.ConfigLoader().get_config_str("SubmitPaste", "FILE_ALLOWED_EXTENSIONS").split(',') def __init__(self): """ init """ super(SubmitPaste, self).__init__(queue_name='submit_paste') self.r_serv_db = ConfigLoader.ConfigLoader().get_redis_conn("ARDB_DB") self.r_serv_log_submit = ConfigLoader.ConfigLoader().get_redis_conn("Redis_Log_submit") self.r_serv_tags = ConfigLoader.ConfigLoader().get_redis_conn("ARDB_Tags") self.r_serv_metadata = ConfigLoader.ConfigLoader().get_redis_conn("ARDB_Metadata") self.serv_statistics = ConfigLoader.ConfigLoader().get_redis_conn("ARDB_Statistics") self.pending_seconds = 3 self.PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], ConfigLoader.ConfigLoader().get_config_str("Directories", "pastes")) + '/' def compute(self, uuid): """ Main method of the Module to implement """ self.redis_logger.debug(f'compute UUID {uuid}') # get temp value save on disk ltags = self.r_serv_db.smembers(f'{uuid}:ltags') ltagsgalaxies = self.r_serv_db.smembers(f'{uuid}:ltagsgalaxies') paste_content = self.r_serv_db.get(f'{uuid}:paste_content') isfile = self.r_serv_db.get(f'{uuid}:isfile') password = self.r_serv_db.get(f'{uuid}:password') source = self.r_serv_db.get(f'{uuid}:source') if source in ['crawled', 'tests']: source = 'submitted' self.redis_logger.debug(f'isfile UUID {isfile}') self.redis_logger.debug(f'source UUID {source}') self.redis_logger.debug(f'paste_content UUID {paste_content}') # needed if redis is restarted self.r_serv_log_submit.set(f'{uuid}:end', 0) self.r_serv_log_submit.set(f'{uuid}:processing', 0) self.r_serv_log_submit.set(f'{uuid}:nb_total', -1) self.r_serv_log_submit.set(f'{uuid}:nb_end', 0) self.r_serv_log_submit.set(f'{uuid}:nb_sucess', 0) self.r_serv_log_submit.set(f'{uuid}:processing', 1) if isfile == 'True': # file input self._manage_file(uuid, paste_content, ltags, ltagsgalaxies, source) else: # textarea input paste self._manage_text(uuid, paste_content, ltags, ltagsgalaxies, source) # new paste created from file, remove uuid ref self.remove_submit_uuid(uuid) def run(self): """ Run Module endless process """ # Endless loop processing messages from the input queue while self.proceed: # Get one message (paste) from the QueueIn (copy of Redis_Global publish) nb_submit = self.r_serv_db.scard('submitted:uuid') if nb_submit > 0: try: uuid = self.r_serv_db.srandmember('submitted:uuid') # Module processing with the message from the queue self.redis_logger.debug(uuid) self.compute(uuid) except Exception as err: self.redis_logger.error(f'Error in module {self.module_name}: {err}') # Remove uuid ref self.remove_submit_uuid(uuid) else: # Wait before next process self.redis_logger.debug(f'{self.module_name}, waiting for new message, Idling {self.pending_seconds}s') time.sleep(self.pending_seconds) def _manage_text(self, uuid, paste_content, ltags, ltagsgalaxies, source): """ Create a paste for given text """ if sys.getsizeof(paste_content) < SubmitPaste.TEXT_MAX_SIZE: self.r_serv_log_submit.set(f'{uuid}:nb_total', 1) self.create_paste(uuid, paste_content.encode(), ltags, ltagsgalaxies, uuid, source) time.sleep(0.5) else: self.abord_file_submission(uuid, f'Text size is over {SubmitPaste.TEXT_MAX_SIZE} bytes') def _manage_file(self, uuid, file_full_path, ltags, ltagsgalaxies, source): """ Create a paste for given file """ self.redis_logger.debug('manage') if os.path.exists(file_full_path): self.redis_logger.debug(f'file exists {file_full_path}') file_size = os.stat(file_full_path).st_size self.redis_logger.debug(f'file size {file_size}') # Verify file length if file_size < SubmitPaste.FILE_MAX_SIZE: # TODO sanitize filename filename = file_full_path.split('/')[-1] self.redis_logger.debug(f'sanitize filename {filename}') self.redis_logger.debug('file size allowed') if not '.' in filename: self.redis_logger.debug('no extension for filename') try: # Read file with open(file_full_path,'r') as f: content = f.read() self.r_serv_log_submit.set(uuid + ':nb_total', 1) self.create_paste(uuid, content.encode(), ltags, ltagsgalaxies, uuid, source) except: self.abord_file_submission(uuid, "file error") else: file_type = filename.rsplit('.', 1)[1] file_type = file_type.lower() self.redis_logger.debug(f'file ext {file_type}') if file_type in SubmitPaste.ALLOWED_EXTENSIONS: self.redis_logger.debug('Extension allowed') # TODO enum of possible file extension ? # TODO verify file hash with virus total ? if not self._is_compressed_type(file_type): self.redis_logger.debug('Plain text file') # plain txt file with open(file_full_path,'r') as f: content = f.read() self.r_serv_log_submit.set(uuid + ':nb_total', 1) self.create_paste(uuid, content.encode(), ltags, ltagsgalaxies, uuid, source) else: # Compressed file self.abord_file_submission(uuid, "file decompression should be implemented") # TODO add compress file management # #decompress file # try: # if password == None: # files = unpack(file_full_path.encode()) # #print(files.children) # else: # try: # files = unpack(file_full_path.encode(), password=password.encode()) # #print(files.children) # except sflock.exception.IncorrectUsageException: # self.abord_file_submission(uuid, "Wrong Password") # raise # except: # self.abord_file_submission(uuid, "file decompression error") # raise # self.redis_logger.debug('unpacking {} file'.format(files.unpacker)) # if(not files.children): # self.abord_file_submission(uuid, "Empty compressed file") # raise # # set number of files to submit # self.r_serv_log_submit.set(uuid + ':nb_total', len(files.children)) # n = 1 # for child in files.children: # if self.verify_extention_filename(child.filename.decode()): # self.create_paste(uuid, child.contents, ltags, ltagsgalaxies, uuid+'_'+ str(n) , source) # n = n + 1 # else: # self.redis_logger.error("Error in module %s: bad extention"%(self.module_name)) # self.addError(uuid, 'Bad file extension: {}'.format(child.filename.decode()) ) # except FileNotFoundError: # self.redis_logger.error("Error in module %s: file not found"%(self.module_name)) # self.addError(uuid, 'File not found: {}'.format(file_full_path), uuid ) else: self.abord_file_submission(uuid, f'File :{file_full_path} too large, over {SubmitPaste.FILE_MAX_SIZE} bytes') else: self.abord_file_submission(uuid, "Server Error, the archive can't be found") def _is_compressed_type(self, file_type): """ Check if file type is in the list of compressed file extensions format """ compressed_type = ['zip', 'gz', 'tar.gz'] return file_type in compressed_type def remove_submit_uuid(self, uuid): # save temp value on disk self.r_serv_db.delete(f'{uuid}:ltags') self.r_serv_db.delete(f'{uuid}:ltagsgalaxies') self.r_serv_db.delete(f'{uuid}:paste_content') self.r_serv_db.delete(f'{uuid}:isfile') self.r_serv_db.delete(f'{uuid}:password') self.r_serv_db.delete(f'{uuid}:source') self.r_serv_log_submit.expire(f'{uuid}:end', SubmitPaste.expire_time) self.r_serv_log_submit.expire(f'{uuid}:processing', SubmitPaste.expire_time) self.r_serv_log_submit.expire(f'{uuid}:nb_total', SubmitPaste.expire_time) self.r_serv_log_submit.expire(f'{uuid}:nb_sucess', SubmitPaste.expire_time) self.r_serv_log_submit.expire(f'{uuid}:nb_end', SubmitPaste.expire_time) self.r_serv_log_submit.expire(f'{uuid}:error', SubmitPaste.expire_time) self.r_serv_log_submit.expire(f'{uuid}:paste_submit_link', SubmitPaste.expire_time) # delete uuid self.r_serv_db.srem('submitted:uuid', uuid) self.redis_logger.debug(f'{uuid} all file submitted') print(f'{uuid} all file submitted') def create_paste(self, uuid, paste_content, ltags, ltagsgalaxies, name, source=None): # # TODO: Use Item create result = False now = datetime.datetime.now() source = source if source else 'submitted' save_path = source + '/' + now.strftime("%Y") + '/' + now.strftime("%m") + '/' + now.strftime("%d") + '/submitted_' + name + '.gz' full_path = filename = os.path.join(os.environ['AIL_HOME'], self.process.config.get("Directories", "pastes"), save_path) self.redis_logger.debug(f'file path of the paste {full_path}') if not os.path.isfile(full_path): # file not exists in AIL paste directory self.redis_logger.debug(f"new paste {paste_content}") gzip64encoded = self._compress_encode_content(paste_content) if gzip64encoded: # use relative path rel_item_path = save_path.replace(self.PASTES_FOLDER, '', 1) self.redis_logger.debug(f"relative path {rel_item_path}") # send paste to Global module relay_message = f"{rel_item_path} {gzip64encoded}" self.process.populate_set_out(relay_message, 'Mixer') # increase nb of paste by feeder name self.r_serv_log_submit.hincrby("mixer_cache:list_feeder", source, 1) # add tags for tag in ltags: Tag.add_tag('item', tag, rel_item_path) for tag in ltagsgalaxies: Tag.add_tag('item', tag, rel_item_path) self.r_serv_log_submit.incr(f'{uuid}:nb_end') self.r_serv_log_submit.incr(f'{uuid}:nb_sucess') if self.r_serv_log_submit.get(f'{uuid}:nb_end') == self.r_serv_log_submit.get(f'{uuid}:nb_total'): self.r_serv_log_submit.set(f'{uuid}:end', 1) self.redis_logger.debug(f' {rel_item_path} send to Global') print(f' {rel_item_path} send to Global') self.r_serv_log_submit.sadd(f'{uuid}:paste_submit_link', rel_item_path) curr_date = datetime.date.today() self.serv_statistics.hincrby(curr_date.strftime("%Y%m%d"),'submit_paste', 1) self.redis_logger.debug("paste submitted") else: self.addError(uuid, f'File: {save_path} already exist in submitted pastes') return result def _compress_encode_content(self, content): gzip64encoded = None try: gzipencoded = gzip.compress(content) gzip64encoded = base64.standard_b64encode(gzipencoded).decode() except: self.abord_file_submission(uuid, "file error") return gzip64encoded def addError(self, uuid, errorMessage): self.redis_logger.debug(errorMessage) print(errorMessage) error = self.r_serv_log_submit.get(f'{uuid}:error') if error != None: self.r_serv_log_submit.set(f'{uuid}:error', error + '

' + errorMessage) self.r_serv_log_submit.incr(f'{uuid}:nb_end') def abord_file_submission(self, uuid, errorMessage): self.redis_logger.debug(f'abord {uuid}, {errorMessage}') self.addError(uuid, errorMessage) self.r_serv_log_submit.set(f'{uuid}:end', 1) curr_date = datetime.date.today() self.serv_statistics.hincrby(curr_date.strftime("%Y%m%d"),'submit_abord', 1) self.remove_submit_uuid(uuid) # # TODO: use Item function def get_item_date(self, item_filename): l_directory = item_filename.split('/') return f'{l_directory[-4]}{l_directory[-3]}{l_directory[-2]}' def verify_extention_filename(self, filename): if not '.' in filename: return True else: file_type = filename.rsplit('.', 1)[1] #txt file if file_type in SubmitPaste.ALLOWED_EXTENSIONS: return True else: return False if __name__ == '__main__': module = SubmitPaste() module.run()