AIL-framework/bin/Global.py

115 lines
3.8 KiB
Python
Raw Normal View History

2018-05-04 13:53:29 +02:00
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
The ZMQ_Feed_Q Module
=====================
This module is consuming the Redis-list created by the ZMQ_Feed_Q Module,
And save the paste on disk to allow others modules to work on them.
..todo:: Be able to choose to delete or not the saved paste after processing.
..todo:: Store the empty paste (unprocessed) somewhere in Redis.
..note:: Module ZMQ_Something_Q and ZMQ_Something are closely bound, always put
the same Subscriber name in both of them.
Requirements
------------
*Need running Redis instances.
*Need the ZMQ_Feed_Q Module running to be able to work properly.
"""
import base64
import os
import time
import uuid
from pubsublogger import publisher
from Helper import Process
2018-04-20 10:42:19 +02:00
import magic
def rreplace(s, old, new, occurrence):
li = s.rsplit(old, occurrence)
return new.join(li)
2018-04-20 10:42:19 +02:00
if __name__ == '__main__':
publisher.port = 6380
publisher.channel = 'Script'
processed_paste = 0
time_1 = time.time()
config_section = 'Global'
p = Process(config_section)
2018-11-02 16:07:27 +01:00
PASTES_FOLDER = os.path.join(os.environ['AIL_HOME'], p.config.get("Directories", "pastes"))
PASTES_FOLDERS = PASTES_FOLDER + '/'
2018-11-02 16:07:27 +01:00
# LOGGING #
publisher.info("Feed Script started to receive & publish.")
while True:
message = p.get_from_set()
# Recovering the streamed message informations.
if message is not None:
splitted = message.split()
if len(splitted) == 2:
paste, gzip64encoded = splitted
else:
# TODO Store the name of the empty paste inside a Redis-list.
2018-04-12 17:06:57 +02:00
print("Empty Paste: not processed")
publisher.debug("Empty Paste: {0} not processed".format(message))
continue
else:
2018-04-12 17:06:57 +02:00
print("Empty Queues: Waiting...")
if int(time.time() - time_1) > 30:
to_print = 'Global; ; ; ;glob Processed {0} paste(s)'.format(processed_paste)
2018-04-12 17:06:57 +02:00
print(to_print)
#publisher.info(to_print)
time_1 = time.time()
processed_paste = 0
time.sleep(1)
continue
file_name_paste = paste.split('/')[-1]
if len(file_name_paste)>255:
new_file_name_paste = '{}{}.gz'.format(file_name_paste[:215], str(uuid.uuid4()))
paste = rreplace(paste, file_name_paste, new_file_name_paste, 1)
# Creating the full filepath
2018-11-02 16:07:27 +01:00
filename = os.path.join(PASTES_FOLDER, paste)
2014-08-19 19:07:07 +02:00
dirname = os.path.dirname(filename)
if not os.path.exists(dirname):
os.makedirs(dirname)
2018-04-20 10:42:19 +02:00
decoded = base64.standard_b64decode(gzip64encoded)
2014-08-19 19:07:07 +02:00
with open(filename, 'wb') as f:
2018-04-20 10:42:19 +02:00
f.write(decoded)
'''try:
2018-04-20 10:42:19 +02:00
decoded2 = gunzip_bytes_obj(decoded)
except:
decoded2 =''
type = magic.from_buffer(decoded2, mime=True)
if type!= 'text/x-c++' and type!= 'text/html' and type!= 'text/x-c' and type!= 'text/x-python' and type!= 'text/x-php' and type!= 'application/xml' and type!= 'text/x-shellscript' and type!= 'text/plain' and type!= 'text/x-diff' and type!= 'text/x-ruby':
print('-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------')
print(filename)
print(type)
print('-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------')
2018-11-02 16:07:27 +01:00
'''
# remove PASTES_FOLDER from item path (crawled item + submited)
if PASTES_FOLDERS in paste:
paste = paste.replace(PASTES_FOLDERS, '', 1)
2018-11-02 16:07:27 +01:00
p.populate_set_out(paste)
processed_paste+=1