fix: [Global: already saved filename] save updated + filter duplicated items

pull/486/head
Terrtia 2020-02-03 14:51:51 +01:00
parent e808840f95
commit 4d8db3fcc4
No known key found for this signature in database
GPG Key ID: 1E1B1F50D84613D0
1 changed files with 53 additions and 18 deletions

View File

@ -21,14 +21,24 @@ Requirements
""" """
import base64 import base64
import hashlib
import io
import gzip
import os import os
import sys
import time import time
import uuid import uuid
from pubsublogger import publisher from pubsublogger import publisher
from Helper import Process from Helper import Process
import magic def gunzip_bytes_obj(bytes_obj):
in_ = io.BytesIO()
in_.write(bytes_obj)
in_.seek(0)
with gzip.GzipFile(fileobj=in_, mode='rb') as fo:
gunzipped_bytes_obj = fo.read()
return gunzipped_bytes_obj
def rreplace(s, old, new, occurrence): def rreplace(s, old, new, occurrence):
li = s.rsplit(old, occurrence) li = s.rsplit(old, occurrence)
@ -67,9 +77,9 @@ if __name__ == '__main__':
publisher.debug("Empty Paste: {0} not processed".format(message)) publisher.debug("Empty Paste: {0} not processed".format(message))
continue continue
else: else:
print("Empty Queues: Waiting...") #print("Empty Queues: Waiting...")
if int(time.time() - time_1) > 30: if int(time.time() - time_1) > 30:
to_print = 'Global; ; ; ;glob Processed {0} paste(s)'.format(processed_paste) to_print = 'Global; ; ; ;glob Processed {0} paste(s) in {1} s'.format(processed_paste, time.time() - time_1)
print(to_print) print(to_print)
#publisher.info(to_print) #publisher.info(to_print)
time_1 = time.time() time_1 = time.time()
@ -95,28 +105,53 @@ if __name__ == '__main__':
print('Path traversal detected {}'.format(filename)) print('Path traversal detected {}'.format(filename))
publisher.warning('Global; Path traversal detected') publisher.warning('Global; Path traversal detected')
else: else:
# decode compressed base64
decoded = base64.standard_b64decode(gzip64encoded)
# check if file exist
if os.path.isfile(filename):
print('File already exist {}'.format(filename))
publisher.warning('Global; File already exist')
with gzip.open(filename, 'rb') as f:
curr_file_content = f.read()
curr_file_md5 = hashlib.md5(curr_file_content).hexdigest()
new_file_content = gunzip_bytes_obj(decoded)
new_file_md5 = hashlib.md5(new_file_content).hexdigest()
if new_file_md5 != curr_file_md5:
if filename.endswith('.gz'):
filename = '{}_{}.gz'.format(filename[:-3], new_file_md5)
else:
filename = '{}_{}'.format(filename, new_file_md5)
# continue if new file already exist
if os.path.isfile(filename):
print('ignore duplicated file')
continue
print('new file: {}'.format(filename))
# ignore duplicate
else:
print('ignore duplicated file')
continue
# create subdir
dirname = os.path.dirname(filename) dirname = os.path.dirname(filename)
if not os.path.exists(dirname): if not os.path.exists(dirname):
os.makedirs(dirname) os.makedirs(dirname)
decoded = base64.standard_b64decode(gzip64encoded)
with open(filename, 'wb') as f: with open(filename, 'wb') as f:
f.write(decoded) f.write(decoded)
'''try:
decoded2 = gunzip_bytes_obj(decoded)
except:
decoded2 =''
type = magic.from_buffer(decoded2, mime=True) paste = filename
# remove PASTES_FOLDER from
if type!= 'text/x-c++' and type!= 'text/html' and type!= 'text/x-c' and type!= 'text/x-python' and type!= 'text/x-php' and type!= 'application/xml' and type!= 'text/x-shellscript' and type!= 'text/plain' and type!= 'text/x-diff' and type!= 'text/x-ruby': if PASTES_FOLDERS in paste:
paste = paste.replace(PASTES_FOLDERS, '', 1)
print('-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------')
print(filename)
print(type)
print('-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------')
'''
p.populate_set_out(paste) p.populate_set_out(paste)
processed_paste+=1 processed_paste+=1