mirror of https://github.com/MISP/misp-galaxy
				
				
				
			
		
			
				
	
	
		
			67 lines
		
	
	
		
			1.9 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
			
		
		
	
	
			67 lines
		
	
	
		
			1.9 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
| #!/usr/bin/env python3
 | |
| # coding=utf-8
 | |
| """
 | |
|     Tools to find duplicate in galaxies
 | |
| """
 | |
| import json
 | |
| import os
 | |
| import collections
 | |
| 
 | |
| 
 | |
| def loadjsons(path, return_paths=False):
 | |
|     """
 | |
|     Find all Jsons and load them in a dict
 | |
| 
 | |
|     Parameters:
 | |
|         path: string
 | |
|         return_names: boolean, if the name of the file should be returned,
 | |
|         default: False
 | |
| 
 | |
|     Returns:
 | |
|         List of parsed file contents.
 | |
|         If return_paths is True, then every list item is a tuple of the
 | |
|         file name and the file content
 | |
|     """
 | |
|     files = []
 | |
|     data = []
 | |
|     for name in os.listdir(path):
 | |
|         if os.path.isfile(os.path.join(path, name)) and name.endswith('.json'):
 | |
|             files.append(name)
 | |
|     for jfile in files:
 | |
|         filepath = os.path.join(path, jfile)
 | |
|         if return_paths:
 | |
|             data.append((filepath, json.load(open(filepath))))
 | |
|         else:
 | |
|             data.append(json.load(json.load(open(filepath))))
 | |
|     return data
 | |
| 
 | |
| 
 | |
| if __name__ == '__main__':
 | |
|     """
 | |
|         Iterate all name + synonyms
 | |
|         tell what is duplicated.
 | |
|     """
 | |
|     jsons = loadjsons("../clusters")
 | |
|     counter = collections.Counter()
 | |
|     namespace = []
 | |
|     for djson in jsons:
 | |
|         items = djson.get('values')
 | |
|         for entry in items:
 | |
|             name = entry.get('value').strip().lower()
 | |
|             counter[name] += 1
 | |
|             namespace.append([name, djson.get('name')])
 | |
|             try:
 | |
|                 for synonym in entry.get('meta').get('synonyms'):
 | |
|                     name = synonym.strip().lower()
 | |
|                     counter[name] += 1
 | |
|                     namespace.append([name, djson.get('name')])
 | |
|             except (AttributeError, TypeError):
 | |
|                 pass
 | |
|     counter = dict(counter)
 | |
|     for key, val in counter.items():
 | |
|         if val > 1:
 | |
|             print("Warning duplicate %s" % key)
 | |
|             for item in namespace:
 | |
|                 if item[0] == key:
 | |
|                     print(item)
 |