From cf645299299bafe3537fed776eb69f3016a812c4 Mon Sep 17 00:00:00 2001 From: Jean-Louis Huynen Date: Tue, 16 Feb 2021 16:13:53 +0100 Subject: [PATCH 1/7] add: [filerwatcher] initial json file worker --- .../filewatcherjson/filewatcherjson.py | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100755 server/workers/workers_2/meta_types_modules/filewatcherjson/filewatcherjson.py diff --git a/server/workers/workers_2/meta_types_modules/filewatcherjson/filewatcherjson.py b/server/workers/workers_2/meta_types_modules/filewatcherjson/filewatcherjson.py new file mode 100755 index 0000000..9962dc6 --- /dev/null +++ b/server/workers/workers_2/meta_types_modules/filewatcherjson/filewatcherjson.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 + +from meta_types_modules.MetaTypesDefault import MetaTypesDefault +import hashlib +import time +import os +import datetime + +class TypeHandler(MetaTypesDefault): + + def __init__(self, uuid, json_file): + super().__init__(uuid, json_file) + self.set_rotate_file_mode(False) + self.saved_dir = '' + + def process_data(self, data): + self.reconstruct_data(data) + + # pushing the filepath instead of the file content to the analyzer + def handle_reconstructed_data(self, data): + m = hashlib.sha256() + decoded_data = data.decode() + self.set_last_time_saved(time.time()) + self.set_last_saved_date(datetime.datetime.now().strftime("%Y%m%d%H%M%S")) + + # Create folder + jsons_save_dir = os.path.join(self.get_save_dir(), 'files') + if not os.path.isdir(jsons_save_dir): + os.makedirs(jsons_save_dir) + # write json file to disk + m.update(data) + jsons_path = os.path.join(jsons_save_dir, m.hexdigest()+'.json') + with open(jsons_path, 'w') as j: + j.write(decoded_data) + # Send data to Analyszer + self.send_to_analyzers(jsons_path) + + def test(self): + print('Class: filewatcherjson') From d74d2fb71a932f1f035adcf7365c4942255fe091 Mon Sep 17 00:00:00 2001 From: Jean-Louis Huynen Date: Wed, 17 Feb 2021 16:46:33 +0100 Subject: [PATCH 2/7] add: [filerwatcher] +base64 worker --- .../filewatcher/filewatcher.py | 47 +++++++++++++++++++ .../filewatcherjson/filewatcherjson.py | 5 +- 2 files changed, 49 insertions(+), 3 deletions(-) create mode 100755 server/workers/workers_2/meta_types_modules/filewatcher/filewatcher.py diff --git a/server/workers/workers_2/meta_types_modules/filewatcher/filewatcher.py b/server/workers/workers_2/meta_types_modules/filewatcher/filewatcher.py new file mode 100755 index 0000000..778cc5c --- /dev/null +++ b/server/workers/workers_2/meta_types_modules/filewatcher/filewatcher.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python3 + +from meta_types_modules.MetaTypesDefault import MetaTypesDefault +import hashlib +import time +import os +import datetime +import base64 + +class TypeHandler(MetaTypesDefault): + + def __init__(self, uuid, json_file): + super().__init__(uuid, json_file) + self.set_rotate_file_mode(False) + self.saved_dir = '' + + def process_data(self, data): + self.reconstruct_data(data) + + # pushing the filepath instead of the file content to the analyzer + def handle_reconstructed_data(self, data): + m = hashlib.sha256() + self.set_last_time_saved(time.time()) + self.set_last_saved_date(datetime.datetime.now().strftime("%Y%m%d%H%M%S")) + + # Create folder + save_dir = os.path.join(self.get_save_dir(), 'files') + #debug_dir = os.path.join(self.get_save_dir(), 'debug') + if not os.path.isdir(save_dir): + os.makedirs(save_dir) + #if not os.path.isdir(debug_dir): + # os.makedirs(debug_dir) + # write binary file to disk + decodeddata = base64.b64decode(data) + m.update(decodeddata) + path = os.path.join(save_dir, m.hexdigest()) + #pathd = os.path.join(debug_dir, m.hexdigest()) + with open(path, 'wb') as p: + p.write(decodeddata) + + #with open(pathd, 'wb') as p: + # p.write(data) + # Send data to Analyszer + self.send_to_analyzers(path) + + def test(self): + print('Class: filewatcher') diff --git a/server/workers/workers_2/meta_types_modules/filewatcherjson/filewatcherjson.py b/server/workers/workers_2/meta_types_modules/filewatcherjson/filewatcherjson.py index 9962dc6..1cb1fa4 100755 --- a/server/workers/workers_2/meta_types_modules/filewatcherjson/filewatcherjson.py +++ b/server/workers/workers_2/meta_types_modules/filewatcherjson/filewatcherjson.py @@ -19,7 +19,6 @@ class TypeHandler(MetaTypesDefault): # pushing the filepath instead of the file content to the analyzer def handle_reconstructed_data(self, data): m = hashlib.sha256() - decoded_data = data.decode() self.set_last_time_saved(time.time()) self.set_last_saved_date(datetime.datetime.now().strftime("%Y%m%d%H%M%S")) @@ -30,8 +29,8 @@ class TypeHandler(MetaTypesDefault): # write json file to disk m.update(data) jsons_path = os.path.join(jsons_save_dir, m.hexdigest()+'.json') - with open(jsons_path, 'w') as j: - j.write(decoded_data) + with open(jsons_path, 'wb') as j: + j.write(data) # Send data to Analyszer self.send_to_analyzers(jsons_path) From 5a3e299332fea3bf467c67383434589a7e1b9e2c Mon Sep 17 00:00:00 2001 From: Jean-Louis Huynen Date: Thu, 18 Feb 2021 14:37:43 +0100 Subject: [PATCH 3/7] add: [filerwatcher] enable by_uuid / date filing --- .../workers_2/meta_types_modules/filewatcher/filewatcher.py | 2 +- .../meta_types_modules/filewatcherjson/filewatcherjson.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/server/workers/workers_2/meta_types_modules/filewatcher/filewatcher.py b/server/workers/workers_2/meta_types_modules/filewatcher/filewatcher.py index 778cc5c..bb204b2 100755 --- a/server/workers/workers_2/meta_types_modules/filewatcher/filewatcher.py +++ b/server/workers/workers_2/meta_types_modules/filewatcher/filewatcher.py @@ -24,7 +24,7 @@ class TypeHandler(MetaTypesDefault): self.set_last_saved_date(datetime.datetime.now().strftime("%Y%m%d%H%M%S")) # Create folder - save_dir = os.path.join(self.get_save_dir(), 'files') + save_dir = os.path.join(self.get_save_dir(save_by_uuid=True), 'files') #debug_dir = os.path.join(self.get_save_dir(), 'debug') if not os.path.isdir(save_dir): os.makedirs(save_dir) diff --git a/server/workers/workers_2/meta_types_modules/filewatcherjson/filewatcherjson.py b/server/workers/workers_2/meta_types_modules/filewatcherjson/filewatcherjson.py index 1cb1fa4..a387160 100755 --- a/server/workers/workers_2/meta_types_modules/filewatcherjson/filewatcherjson.py +++ b/server/workers/workers_2/meta_types_modules/filewatcherjson/filewatcherjson.py @@ -23,7 +23,7 @@ class TypeHandler(MetaTypesDefault): self.set_last_saved_date(datetime.datetime.now().strftime("%Y%m%d%H%M%S")) # Create folder - jsons_save_dir = os.path.join(self.get_save_dir(), 'files') + jsons_save_dir = os.path.join(self.get_save_dir(save_by_uuid=True), 'files') if not os.path.isdir(jsons_save_dir): os.makedirs(jsons_save_dir) # write json file to disk From ef6e87f3c509fb6c5e65133b679e9d7edb746bab Mon Sep 17 00:00:00 2001 From: Jean-Louis Huynen Date: Thu, 18 Feb 2021 17:00:55 +0100 Subject: [PATCH 4/7] chg: [filerwatcher] compression, ext from MH + remove buffer limits --- .../filewatcher/filewatcher.py | 50 +++++++++++++++---- 1 file changed, 39 insertions(+), 11 deletions(-) diff --git a/server/workers/workers_2/meta_types_modules/filewatcher/filewatcher.py b/server/workers/workers_2/meta_types_modules/filewatcher/filewatcher.py index bb204b2..5eaf55c 100755 --- a/server/workers/workers_2/meta_types_modules/filewatcher/filewatcher.py +++ b/server/workers/workers_2/meta_types_modules/filewatcher/filewatcher.py @@ -6,15 +6,22 @@ import time import os import datetime import base64 +import shutil +import gzip class TypeHandler(MetaTypesDefault): def __init__(self, uuid, json_file): super().__init__(uuid, json_file) + if "compress" in json_file: + self.compress = json_file['compress'] + if "extension" in json_file: + self.extension = json_file['extension'] self.set_rotate_file_mode(False) self.saved_dir = '' def process_data(self, data): + # Unpack the thing self.reconstruct_data(data) # pushing the filepath instead of the file content to the analyzer @@ -25,23 +32,44 @@ class TypeHandler(MetaTypesDefault): # Create folder save_dir = os.path.join(self.get_save_dir(save_by_uuid=True), 'files') - #debug_dir = os.path.join(self.get_save_dir(), 'debug') if not os.path.isdir(save_dir): os.makedirs(save_dir) - #if not os.path.isdir(debug_dir): - # os.makedirs(debug_dir) - # write binary file to disk + # write file to disk decodeddata = base64.b64decode(data) + m.update(decodeddata) path = os.path.join(save_dir, m.hexdigest()) - #pathd = os.path.join(debug_dir, m.hexdigest()) + path = '{}.{}'.format(path, self.extension) with open(path, 'wb') as p: p.write(decodeddata) - - #with open(pathd, 'wb') as p: - # p.write(data) - # Send data to Analyszer + if self.compress: + compressed_filename = '{}.gz'.format(path) + with open(path, 'rb') as f_in: + with gzip.open(compressed_filename, 'wb') as f_out: + shutil.copyfileobj(f_in, f_out) + os.remove(path) + self.send_to_analyzers(compressed_filename) + # Send data to Analyzer self.send_to_analyzers(path) - def test(self): - print('Class: filewatcher') + def reconstruct_data(self, data): + # save data in buffer + self.add_to_buffer(data) + data = self.get_buffer() + + # end of element found in data + if self.get_file_separator() in data: + # empty buffer + self.reset_buffer() + all_line = data.split(self.get_file_separator()) + for reconstructed_data in all_line[:-1]: + if reconstructed_data != b'': + self.handle_reconstructed_data(reconstructed_data) + + # save incomplete element in buffer + if all_line[-1] != b'': + self.add_to_buffer(all_line[-1]) + + +def test(self): + print('Class: filewatcher') \ No newline at end of file From 36a771ea2dc36dc0713c54f7ac5d86efd03a2d76 Mon Sep 17 00:00:00 2001 From: Jean-Louis Huynen Date: Thu, 18 Feb 2021 18:04:44 +0100 Subject: [PATCH 5/7] chg: [filerwatcher] define segragation from MH --- .../meta_types_modules/filewatcher/filewatcher.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/server/workers/workers_2/meta_types_modules/filewatcher/filewatcher.py b/server/workers/workers_2/meta_types_modules/filewatcher/filewatcher.py index 5eaf55c..1b2c676 100755 --- a/server/workers/workers_2/meta_types_modules/filewatcher/filewatcher.py +++ b/server/workers/workers_2/meta_types_modules/filewatcher/filewatcher.py @@ -13,10 +13,15 @@ class TypeHandler(MetaTypesDefault): def __init__(self, uuid, json_file): super().__init__(uuid, json_file) + self.compress = False + self.extension = '' + self.segregate = True if "compress" in json_file: self.compress = json_file['compress'] if "extension" in json_file: self.extension = json_file['extension'] + if "segregate" in json_file: + self.segregate = json_file['segregate'] self.set_rotate_file_mode(False) self.saved_dir = '' @@ -31,7 +36,7 @@ class TypeHandler(MetaTypesDefault): self.set_last_saved_date(datetime.datetime.now().strftime("%Y%m%d%H%M%S")) # Create folder - save_dir = os.path.join(self.get_save_dir(save_by_uuid=True), 'files') + save_dir = os.path.join(self.get_save_dir(save_by_uuid=self.segregate), 'files') if not os.path.isdir(save_dir): os.makedirs(save_dir) # write file to disk From a6d5a3d22c00c820879add21b38cc39747fa7bd8 Mon Sep 17 00:00:00 2001 From: Jean-Louis Huynen Date: Fri, 19 Feb 2021 09:45:47 +0100 Subject: [PATCH 6/7] chg: [filerwatcher] fix double queue logging for compressed files --- .../workers_2/meta_types_modules/filewatcher/filewatcher.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/workers/workers_2/meta_types_modules/filewatcher/filewatcher.py b/server/workers/workers_2/meta_types_modules/filewatcher/filewatcher.py index 1b2c676..a77fc05 100755 --- a/server/workers/workers_2/meta_types_modules/filewatcher/filewatcher.py +++ b/server/workers/workers_2/meta_types_modules/filewatcher/filewatcher.py @@ -54,8 +54,8 @@ class TypeHandler(MetaTypesDefault): shutil.copyfileobj(f_in, f_out) os.remove(path) self.send_to_analyzers(compressed_filename) - # Send data to Analyzer - self.send_to_analyzers(path) + else: + self.send_to_analyzers(path) def reconstruct_data(self, data): # save data in buffer From 6c3c9f99548d93ca1cc1b1140235cb5f10a00d3c Mon Sep 17 00:00:00 2001 From: Jean-Louis Huynen Date: Tue, 23 Feb 2021 19:54:36 +0100 Subject: [PATCH 7/7] fix #41 --- server/configs/server.conf.sample | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/configs/server.conf.sample b/server/configs/server.conf.sample index ea4662a..1a71c14 100644 --- a/server/configs/server.conf.sample +++ b/server/configs/server.conf.sample @@ -6,7 +6,7 @@ save_directory = None [D4_Server] # registration or shared-secret server_mode = registration -analyzer_queues_max_size = 10000 +analyzer_queues_max_size = 100000000 [Flask_Server] # UI port number