mirror of https://github.com/CIRCL/AIL-framework
fix Duplicate, save list of duplicates on disk + prevent empty hash creation
parent
225fe76c96
commit
f66a528bc2
|
@ -158,6 +158,10 @@ if __name__ == "__main__":
|
|||
# Adding hashes in Redis
|
||||
for hash_type, paste_hash in paste_hashes.items():
|
||||
r_serv1.set(paste_hash, index)
|
||||
#bad hash
|
||||
if paste_hash == '':
|
||||
print('bad Hash: ' + hash_type)
|
||||
else:
|
||||
r_serv1.sadd("HASHS_"+hash_type, paste_hash)
|
||||
|
||||
##################### Similarity found #######################
|
||||
|
@ -174,10 +178,11 @@ if __name__ == "__main__":
|
|||
if dupl != []:
|
||||
dupl = list(dupl)
|
||||
PST.__setattr__("p_duplicate", dupl)
|
||||
PST.save_attribute_redis("p_duplicate", dupl)
|
||||
PST.save_others_pastes_attribute_duplicate("p_duplicate", dupl)
|
||||
PST.save_attribute_duplicate(dupl)
|
||||
PST.save_others_pastes_attribute_duplicate(dupl)
|
||||
publisher.info('{}Detected {};{}'.format(to_print, len(dupl), PST.p_path))
|
||||
print('{}Detected {}'.format(to_print, len(dupl)))
|
||||
print('')
|
||||
|
||||
y = time.time()
|
||||
|
||||
|
|
|
@ -110,8 +110,6 @@ function launching_scripts {
|
|||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "Duplicates" bash -c './Duplicates.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "Attributes" bash -c './Attributes.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "Lines" bash -c './Lines.py; read x'
|
||||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "DomClassifier" bash -c './DomClassifier.py; read x'
|
||||
|
|
|
@ -76,6 +76,11 @@ class Paste(object):
|
|||
port=cfg.getint("Redis_Data_Merging", "port"),
|
||||
db=cfg.getint("Redis_Data_Merging", "db"),
|
||||
decode_responses=True)
|
||||
self.store_duplicate = redis.StrictRedis(
|
||||
host=cfg.get("ARDB_Metadata", "host"),
|
||||
port=cfg.getint("ARDB_Metadata", "port"),
|
||||
db=cfg.getint("ARDB_Metadata", "db"),
|
||||
decode_responses=True)
|
||||
|
||||
self.p_path = p_path
|
||||
self.p_name = os.path.basename(self.p_path)
|
||||
|
@ -272,9 +277,9 @@ class Paste(object):
|
|||
return False, var
|
||||
|
||||
def _get_p_duplicate(self):
|
||||
self.p_duplicate = self.store.hget(self.p_path, "p_duplicate")
|
||||
self.p_duplicate = self.store_duplicate.smembers('dup:'+self.p_path)
|
||||
if self.p_duplicate is not None:
|
||||
return self.p_duplicate
|
||||
return list(self.p_duplicate)
|
||||
else:
|
||||
return '[]'
|
||||
|
||||
|
@ -323,27 +328,20 @@ class Paste(object):
|
|||
else:
|
||||
self.store.hset(self.p_path, attr_name, json.dumps(value))
|
||||
|
||||
def save_others_pastes_attribute_duplicate(self, attr_name, list_value):
|
||||
def save_attribute_duplicate(self, value):
|
||||
"""
|
||||
Save an attribute as a field
|
||||
"""
|
||||
for tuple in value:
|
||||
self.store_duplicate.sadd('dup:'+self.p_path, tuple)
|
||||
|
||||
def save_others_pastes_attribute_duplicate(self, list_value):
|
||||
"""
|
||||
Save a new duplicate on others pastes
|
||||
"""
|
||||
for hash_type, path, percent, date in list_value:
|
||||
#get json
|
||||
json_duplicate = self.store.hget(path, attr_name)
|
||||
#json save on redis
|
||||
if json_duplicate is not None:
|
||||
list_duplicate = (json.loads(json_duplicate))
|
||||
# avoid duplicate, a paste can be send by multiples modules
|
||||
to_add = [hash_type, self.p_path, percent, date]
|
||||
if to_add not in list_duplicate:
|
||||
list_duplicate.append(to_add)
|
||||
self.store.hset(path, attr_name, json.dumps(list_duplicate))
|
||||
|
||||
else:
|
||||
# create the new list
|
||||
list_duplicate = [[hash_type, self.p_path, percent, date]]
|
||||
self.store.hset(path, attr_name, json.dumps(list_duplicate))
|
||||
|
||||
self.store_duplicate.sadd('dup:'+path,to_add)
|
||||
|
||||
def _get_from_redis(self, r_serv):
|
||||
ans = {}
|
||||
|
|
|
@ -152,6 +152,11 @@ host = localhost
|
|||
port = 6382
|
||||
db = 6
|
||||
|
||||
[ARDB_Metadata]
|
||||
host = localhost
|
||||
port = 6382
|
||||
db = 7
|
||||
|
||||
[Url]
|
||||
cc_critical = DE
|
||||
|
||||
|
|
|
@ -65,6 +65,18 @@ r_serv_pasteName = redis.StrictRedis(
|
|||
db=cfg.getint("Redis_Paste_Name", "db"),
|
||||
decode_responses=True)
|
||||
|
||||
r_serv_tags = redis.StrictRedis(
|
||||
host=cfg.get("ARDB_Tags", "host"),
|
||||
port=cfg.getint("ARDB_Tags", "port"),
|
||||
db=cfg.getint("ARDB_Tags", "db"),
|
||||
decode_responses=True)
|
||||
|
||||
r_serv_metadata = redis.StrictRedis(
|
||||
host=cfg.get("ARDB_Metadata", "host"),
|
||||
port=cfg.getint("ARDB_Metadata", "port"),
|
||||
db=cfg.getint("ARDB_Metadata", "db"),
|
||||
decode_responses=True)
|
||||
|
||||
# VARIABLES #
|
||||
max_preview_char = int(cfg.get("Flask", "max_preview_char")) # Maximum number of character to display in the tooltip
|
||||
max_preview_modal = int(cfg.get("Flask", "max_preview_modal")) # Maximum number of character to display in the modal
|
||||
|
|
|
@ -18,6 +18,7 @@ import Flask_config
|
|||
app = Flask_config.app
|
||||
cfg = Flask_config.cfg
|
||||
r_serv_pasteName = Flask_config.r_serv_pasteName
|
||||
r_serv_metadata = Flask_config.r_serv_metadata
|
||||
max_preview_char = Flask_config.max_preview_char
|
||||
max_preview_modal = Flask_config.max_preview_modal
|
||||
DiffMaxLineLength = Flask_config.DiffMaxLineLength
|
||||
|
@ -38,20 +39,22 @@ def showpaste(content_range):
|
|||
p_mime = paste.p_mime
|
||||
p_lineinfo = paste.get_lines_info()
|
||||
p_content = paste.get_p_content()
|
||||
p_duplicate_full_list = json.loads(paste._get_p_duplicate())
|
||||
p_duplicate_str_full_list = paste._get_p_duplicate()
|
||||
|
||||
p_duplicate_full_list = []
|
||||
p_duplicate_list = []
|
||||
p_simil_list = []
|
||||
p_date_list = []
|
||||
p_hashtype_list = []
|
||||
|
||||
|
||||
for dup_list in p_duplicate_full_list:
|
||||
for dup_list in p_duplicate_str_full_list:
|
||||
dup_list = dup_list[1:-1].replace('\'', '').replace(' ', '').split(',')
|
||||
if dup_list[0] == "tlsh":
|
||||
dup_list[2] = 100 - int(dup_list[2])
|
||||
else:
|
||||
print('dup_list')
|
||||
print(dup_list)
|
||||
dup_list[2] = int(dup_list[2])
|
||||
p_duplicate_full_list.append(dup_list)
|
||||
|
||||
#p_duplicate_full_list.sort(lambda x,y: cmp(x[2], y[2]), reverse=True)
|
||||
|
||||
|
@ -69,8 +72,8 @@ def showpaste(content_range):
|
|||
comp_vals.append(p_duplicate_full_list[i][2])
|
||||
dup_list_removed.append(i)
|
||||
|
||||
hash_types = str(hash_types).replace("[","").replace("]","") if len(hash_types)==1 else str(hash_types)
|
||||
comp_vals = str(comp_vals).replace("[","").replace("]","") if len(comp_vals)==1 else str(comp_vals)
|
||||
#hash_types = str(hash_types).replace("[","").replace("]","") if len(hash_types)==1 else str(hash_types)
|
||||
#comp_vals = str(comp_vals).replace("[","").replace("]","") if len(comp_vals)==1 else str(comp_vals)
|
||||
|
||||
if len(p_duplicate_full_list[dup_list_index]) > 3:
|
||||
try:
|
||||
|
@ -80,7 +83,7 @@ def showpaste(content_range):
|
|||
date_paste = str(p_duplicate_full_list[dup_list_index][3])
|
||||
else:
|
||||
date_paste = "No date available"
|
||||
new_dup_list.append([hash_types.replace("'", ""), p_duplicate_full_list[dup_list_index][1], comp_vals, date_paste])
|
||||
new_dup_list.append([hash_types, p_duplicate_full_list[dup_list_index][1], comp_vals, date_paste])
|
||||
|
||||
# Create the list to pass to the webpage
|
||||
for dup_list in new_dup_list:
|
||||
|
|
Loading…
Reference in New Issue