From 9d0d0b4303747ddac540b1d4a0bec5fa580288cf Mon Sep 17 00:00:00 2001 From: Sami Mokaddem Date: Mon, 11 Dec 2017 17:28:34 +0100 Subject: [PATCH] update: Moved filtering operation (thresholds, number of matching in the categ file) in the configuration file. It permits to better control the flow of pastes. Also set default mixer duplicate filtering to 3 (Do not filter) --- bin/Categ.py | 3 ++- bin/Credential.py | 12 +++++++----- bin/Mixer.py | 2 +- bin/packages/config.cfg.sample | 16 ++++++++++++++-- 4 files changed, 24 insertions(+), 9 deletions(-) diff --git a/bin/Categ.py b/bin/Categ.py index 986080d4..3bf68664 100755 --- a/bin/Categ.py +++ b/bin/Categ.py @@ -52,6 +52,7 @@ if __name__ == "__main__": config_section = 'Categ' p = Process(config_section) + matchingThreshold = p.config.getint("Categ", "matchingThreshold") # SCRIPT PARSER # parser = argparse.ArgumentParser(description='Start Categ module on files.') @@ -90,7 +91,7 @@ if __name__ == "__main__": for categ, pattern in tmp_dict.items(): found = set(re.findall(pattern, content)) - if len(found) > 0: + if len(found) >= matchingThreshold: msg = '{} {}'.format(paste.p_path, len(found)) print msg, categ p.populate_set_out(msg, categ) diff --git a/bin/Credential.py b/bin/Credential.py index bb52f311..29f80f88 100755 --- a/bin/Credential.py +++ b/bin/Credential.py @@ -41,7 +41,6 @@ REDIS_KEY_ALL_CRED_SET_REV = 'AllCredentialsRev' REDIS_KEY_ALL_PATH_SET = 'AllPath' REDIS_KEY_ALL_PATH_SET_REV = 'AllPathRev' REDIS_KEY_MAP_CRED_TO_PATH = 'CredToPathMapping' -MINIMUMSIZETHRESHOLD = 3 if __name__ == "__main__": publisher.port = 6380 @@ -49,6 +48,8 @@ if __name__ == "__main__": config_section = "Credential" p = Process(config_section) publisher.info("Find credentials") + + minimumLengthThreshold = p.config.getint("Credential", "minimumLengthThreshold") faup = Faup() server_cred = redis.StrictRedis( @@ -56,7 +57,8 @@ if __name__ == "__main__": port=p.config.get("Redis_Level_DB_TermCred", "port"), db=p.config.get("Redis_Level_DB_TermCred", "db")) - critical = 8 + criticalNumberToAlert = p.config.getint("Credential", "criticalNumberToAlert") + minTopPassList = p.config.getint("Credential", "minTopPassList") regex_web = "((?:https?:\/\/)[-_0-9a-zA-Z]+\.[0-9a-zA-Z]+)" regex_cred = "[a-zA-Z0-9._-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}:[a-zA-Z0-9\_\-]+" @@ -71,7 +73,7 @@ if __name__ == "__main__": filepath, count = message.split() - if count < 5: + if count < minTopPassList: # Less than 5 matches from the top password list, false positive. print("false positive:", count) continue @@ -94,7 +96,7 @@ if __name__ == "__main__": print('\n '.join(creds)) #num of creds above tresh, publish an alert - if len(creds) > critical: + if len(creds) > criticalNumberToAlert: print("========> Found more than 10 credentials in this file : {}".format(filepath)) publisher.warning(to_print) #Send to duplicate @@ -154,6 +156,6 @@ if __name__ == "__main__": #Add the split to redis, each split point towards its initial credential unique number splitedCred = re.findall(REGEX_CRED, cred) for partCred in splitedCred: - if len(partCred) > MINIMUMSIZETHRESHOLD: + if len(partCred) > minimumLengthThreshold: server_cred.sadd(partCred, uniq_num_cred) diff --git a/bin/Mixer.py b/bin/Mixer.py index f2988a01..83475ac4 100755 --- a/bin/Mixer.py +++ b/bin/Mixer.py @@ -20,7 +20,7 @@ Depending on the configuration, this module will process the feed as follow: - Else, do not process it but keep track for statistics on duplicate operation_mode 3: "Don't look if duplicate" - - SImply do not bother to check if it is a duplicate + - Simply do not bother to check if it is a duplicate Note that the hash of the content is defined as the sha1(gzip64encoded). diff --git a/bin/packages/config.cfg.sample b/bin/packages/config.cfg.sample index da50932f..158c0491 100644 --- a/bin/packages/config.cfg.sample +++ b/bin/packages/config.cfg.sample @@ -30,6 +30,18 @@ default_display = 10 minute_processed_paste = 10 #### Modules #### +[Categ] +#Minimum number of match between the paste and the category file +matchingThreshold=1 + +[Credentials] +#Minimum length that a credential must have to be considered as such +minimumLengthThreshold=3 +#Will be pushed as alert if the number of credentials is greater to that number +criticalNumberToAlert=8 +#Will be considered as false positive if less that X matches from the top password list +minTopPassList=5 + [Modules_Duplicates] #Number of month to look back maximum_month_range = 3 @@ -45,8 +57,8 @@ min_paste_size = 0.3 threshold_stucked_module=600 [Module_Mixer] -#Define the configuration of the mixer, possible value: 1 or 2 -operation_mode = 1 +#Define the configuration of the mixer, possible value: 1, 2 or 3 +operation_mode = 3 #Define the time that a paste will be considerate duplicate. in seconds (1day = 86400) ttl_duplicate = 86400