mirror of https://github.com/CIRCL/AIL-framework
				
				
				
			
		
			
				
	
	
		
			104 lines
		
	
	
		
			3.6 KiB
		
	
	
	
		
			Python
		
	
	
			
		
		
	
	
			104 lines
		
	
	
		
			3.6 KiB
		
	
	
	
		
			Python
		
	
	
| import redis
 | |
| import string
 | |
| 
 | |
| 
 | |
| def create_common_hash_file(r_serv, zmin, zmax, filename):
 | |
|     """ Create a "top100".txt file.
 | |
| 
 | |
|     :param r_serv: -- connexion to redis database
 | |
|     :param zmin: -- (int) Offset of the top list
 | |
|     :param zmax: -- (int) Number of element wanted to be in the top list.
 | |
|     :param filename: -- the pathname to the created file.
 | |
| 
 | |
|     This Function create a ranking list between zmin and zman of the most common
 | |
|     hashs.
 | |
|     Line are written as follow in the file:
 | |
|     hash:[md5hash]:[cardinality]:[line]
 | |
|     All hashes represent a full line which mean it can be one char or more...
 | |
| 
 | |
|     """
 | |
|     with open(filename, 'wb') as F:
 | |
| 
 | |
|         for h, num in r_serv.zrevrangebyscore("hash", "+inf", "-inf", zmin, zmax, True):
 | |
| 
 | |
|             F.write("hash:{0}:{1}:{2}\n".format(h, num, list(r_serv.smembers('L:'+h))))
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| def paste_searching(r_serv, filename, pastename, mincard, maxcard):
 | |
|     """Search similar hashs from a given file.
 | |
| 
 | |
|     :param r_serv: -- connexion to redis database
 | |
|     :param filename: -- the pathname to the created file.
 | |
|     :param pastename: -- the name of the paste used to search in redis database.
 | |
|     :param mincard: -- the minimum occurence needed of an hash to be taken in count.
 | |
|     :param maxcard: -- the maximum occurence needed of an hash to be taken in count.
 | |
| 
 | |
|     This function return a text file which is a kind of synthesis about
 | |
|     where (in the others pastes) the hash of the given pastename have been found.
 | |
| 
 | |
|     """
 | |
|     P = set([pastename])
 | |
|     tmp_h = str()
 | |
|     tmp_set = set([])
 | |
| 
 | |
|     with open(filename, 'wb') as F:
 | |
| 
 | |
|         F.write("Paste: {0}\nOptions used:\nMincard: {1}\nMaxcard: {2}\n\nContaining Following Hash:\n".format(pastename,mincard,maxcard))
 | |
| 
 | |
|         for h in r_serv.smembers("hash"):
 | |
| 
 | |
|             if (r_serv.smembers(h).intersection(P) and r_serv.scard(h) >= mincard and r_serv.scard(h) <= maxcard):
 | |
| 
 | |
|                 F.write(h+'\n')
 | |
|                 tmp_set = tmp_set.union(r_serv.smembers(h).union(r_serv.smembers(tmp_h)))
 | |
| 
 | |
|             tmp_h = h
 | |
| 
 | |
|         F.write("\nSimilar Files:\n")
 | |
| 
 | |
|         for n, s in enumerate(tmp_set):
 | |
| 
 | |
|             F.write(str(n) + ': ' + s + '\n')
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| def paste_searching2(r_serv, filename, pastename, mincard, maxcard):
 | |
|     """Search similar hashs from a given file.
 | |
|     (On another kind of redis data structure)
 | |
| 
 | |
|     :param r_serv: -- connexion to redis database
 | |
|     :param filename: -- the pathname to the created file.
 | |
|     :param pastename: -- the name of the paste used to search in redis database.
 | |
|     :param mincard: -- the minimum occurence needed of an hash to be taken in count.
 | |
|     :param maxcard: -- the maximum occurence needed of an hash to be taken in count.
 | |
| 
 | |
|     This function return a text file which is a kind of synthesis about
 | |
|     where (in the others pastes) the hash of the given pastename have been found.
 | |
| 
 | |
|     """
 | |
|     P = set([pastename])
 | |
|     tmp_h = str()
 | |
|     tmp_set = set([])
 | |
| 
 | |
|     with open(filename, 'wb') as F:
 | |
| 
 | |
|         F.write("Paste: {0}\nOptions used:\nMincard: {1}\nMaxcard: {2}\n\n###Containing Following Hash:### ###Occur### ###### Corresponding Line ######\n".format(pastename,mincard,maxcard))
 | |
| 
 | |
|         for h in r_serv.zrange("hash", 0, -1):
 | |
| 
 | |
|             if (r_serv.smembers(h).intersection(P) and r_serv.scard(h) >= mincard and r_serv.scard(h) <= maxcard):
 | |
| 
 | |
|                 F.write(h + ' -- ' + str(r_serv.zscore("hash",h)) + ' -- ' + str(list(r_serv.smembers('L:' + h))) + '\n')
 | |
|                 tmp_set = tmp_set.union(r_serv.smembers(h).union(r_serv.smembers(tmp_h)))
 | |
| 
 | |
|             tmp_h = h
 | |
| 
 | |
|         F.write("\nSimilar Files:\n")
 | |
| 
 | |
|         for n, s in enumerate(tmp_set):
 | |
| 
 | |
|             F.write(str(n) + ': ' + s + '\n')
 |