mirror of https://github.com/CIRCL/AIL-framework
maxi cleanup old code :'(
parent
7a3c216787
commit
04a8f1bdf2
|
@ -1,64 +0,0 @@
|
|||
import networkx as nx
|
||||
import xml.sax.saxutils as xlm
|
||||
import redis
|
||||
|
||||
def Gephi_Graph(r_serv, graphpath, mincard, maxcard, insert_type):
|
||||
"""Create Gephi Graph by calling a "Sub function": Create_Graph
|
||||
|
||||
:param r_serv: -- connexion to redis database
|
||||
:param graphpath: -- the absolute path of the .gephi graph created.
|
||||
:param mincard: -- the minimum links between 2 nodes to be created
|
||||
:param maxcard: -- the maximum links between 2 nodes to be created
|
||||
:param insert_type: -- the type of datastructure used to create the graph.
|
||||
|
||||
In fact this function is juste here to be able to choose between two kind of
|
||||
Redis database structure: One which is a Sorted set and the other a simple
|
||||
set.
|
||||
|
||||
"""
|
||||
g = nx.Graph()
|
||||
|
||||
if (insert_type == 0):
|
||||
|
||||
for h in r_serv.smembers("hash"):
|
||||
Create_Graph(r_serv, g, h, graphpath, mincard, maxcard)
|
||||
|
||||
elif (insert_type == 2):
|
||||
|
||||
for h in r_serv.zrange("hash", 0, -1):
|
||||
Create_Graph(r_serv, g, h, graphpath, mincard, maxcard)
|
||||
|
||||
nx.write_gexf(g,graphpath)
|
||||
print nx.info(g)
|
||||
|
||||
|
||||
|
||||
|
||||
def Create_Graph(r_serv, graph, h, graphpath, mincard, maxcard):
|
||||
"""Create Gephi Graph.
|
||||
|
||||
:param r_serv: -- connexion to redis database
|
||||
:param graph: -- networkx graph object
|
||||
:param h: -- (str) the hash which will be transform into a node.
|
||||
:param graphpath: -- the absolute path of the .gephi graph created.
|
||||
:param mincard: -- the minimum links between 2 nodes to be created
|
||||
:param maxcard: -- the maximum links between 2 nodes to be created
|
||||
|
||||
This function link all the pastes with theirs own hashed lines.
|
||||
Of course a paste can have multiple hashed lines and an hashed line can be
|
||||
contained in multiple paste.
|
||||
In this case it's a common hash.
|
||||
|
||||
"""
|
||||
if (r_serv.scard(h) >= mincard) and (r_serv.scard(h) <= maxcard):
|
||||
|
||||
for filename in r_serv.smembers(h):
|
||||
|
||||
for line in r_serv.smembers(filename):
|
||||
|
||||
line = line.decode('UTF-8', errors='ignore')
|
||||
line = xlm.quoteattr(line, {'"':'"', "'":"'"})
|
||||
|
||||
graph.add_edge(h, line+" -- "+filename)
|
||||
|
||||
#OK
|
|
@ -1,151 +0,0 @@
|
|||
import redis, time, sys, os, inspect
|
||||
|
||||
from datetime import timedelta, date, datetime
|
||||
|
||||
from pubsublogger import publisher
|
||||
|
||||
def set_listof_pid(r_serv, filename, name):
|
||||
"""Create the pid list and it's pid members
|
||||
|
||||
:param r_serv: -- Connexion to redis.
|
||||
:param filename: -- the absolute pastes path name.
|
||||
:param name: -- the traditionnal argv[0] (The name of the launched script)
|
||||
|
||||
This function create a hashes in redis as follows and a set of pid.
|
||||
|
||||
+------------+------------+---------------------+
|
||||
| Keys | Fields | Values |
|
||||
+============+============+=====================+
|
||||
| 2045 | startime | 2014-05-09_11:44:17 |
|
||||
+------------+------------+---------------------+
|
||||
| ... | prog | ./programme |
|
||||
+------------+------------+---------------------+
|
||||
| ... | pid | 2045 |
|
||||
+------------+------------+---------------------+
|
||||
| ... | paste | /home/folder/aux.gz |
|
||||
+------------+------------+---------------------+
|
||||
| ... | kb | 54.12 |
|
||||
+------------+------------+---------------------+
|
||||
|
||||
+------------+------------+
|
||||
| Keys | Members |
|
||||
+============+============+
|
||||
| pid | 2045 |
|
||||
+------------+------------+
|
||||
| ... | 2480 |
|
||||
+------------+------------+
|
||||
|
||||
"""
|
||||
r_serv.sadd("pid", os.getpid())
|
||||
r_serv.hmset(os.getpid(),
|
||||
{
|
||||
"startime":time.strftime("%Y-%m-%d_%H:%M:%S"),
|
||||
"prog":name,
|
||||
"pid":str(os.getpid()),
|
||||
"paste":filename,
|
||||
"Kb":round(os.path.getsize(filename)/1024.0,2)
|
||||
})
|
||||
|
||||
|
||||
|
||||
|
||||
def update_listof_pid(r_serv):
|
||||
"""Remove pid from the pid list
|
||||
|
||||
:param r_serv: -- Connexion to redis.
|
||||
|
||||
Remove from the list and redis, pid which are terminated.
|
||||
|
||||
"""
|
||||
r_serv.srem("pid", os.getpid())
|
||||
r_serv.delete(os.getpid())
|
||||
|
||||
|
||||
|
||||
|
||||
def flush_list_of_pid(r_serv):
|
||||
"""Flush the datas in redis
|
||||
|
||||
:param r_serv: -- Connexion to redis.
|
||||
|
||||
Clean the redis database from the previous pid and pidlist inserted
|
||||
|
||||
"""
|
||||
for x in r_serv.smembers("pid"):
|
||||
r_serv.delete(x)
|
||||
|
||||
r_serv.delete("pid")
|
||||
|
||||
|
||||
|
||||
|
||||
def format_display_listof_pid(dico, arg):
|
||||
"""Formating data for shell and human
|
||||
|
||||
:param dico: (dict) dictionnary
|
||||
:param arg: (str) Choosing argument
|
||||
|
||||
:returns: (str)
|
||||
|
||||
This function provide different displaying formats for the dictionnary's data.
|
||||
|
||||
"""
|
||||
if arg == 'pid':
|
||||
var = "{0}".format(dico['pid'])
|
||||
elif arg == 'up':
|
||||
var = "{0}".format(dico['uptime'])
|
||||
elif arg == 'kb':
|
||||
var = "{0}".format(dico['Kb'])
|
||||
elif arg == 'paste':
|
||||
var = "{0}".format(dico['paste'])
|
||||
elif arg == 'startime':
|
||||
var = "{0}".format(dico['startime'])
|
||||
elif arg == 'prg':
|
||||
var = "{0}".format(dico['prog'])
|
||||
else:
|
||||
var = "PID:{0},uptime:{1},kb:{2},paste:{3},prog:{4},startime:{5}".format(dico['pid'],
|
||||
dico['uptime'],
|
||||
dico['Kb'],
|
||||
dico['paste'],
|
||||
dico['prog'],
|
||||
dico['startime'])
|
||||
|
||||
return var
|
||||
|
||||
|
||||
|
||||
|
||||
def display_listof_pid(r_serv, arg):
|
||||
"""Display the pid list from redis
|
||||
|
||||
This function display infos in the shell about lauched process
|
||||
|
||||
"""
|
||||
jobs = {}
|
||||
joblist = []
|
||||
try:
|
||||
for job in r_serv.smembers("pid"):
|
||||
jobs = r_serv.hgetall(job)
|
||||
|
||||
if jobs != None:
|
||||
start = datetime.strptime(r_serv.hget(job, "startime"), "%Y-%m-%d_%H:%M:%S")
|
||||
|
||||
end = datetime.strptime(time.strftime("%Y-%m-%d_%H:%M:%S"), "%Y-%m-%d_%H:%M:%S")
|
||||
jobs['uptime'] = str(abs(start - end))
|
||||
joblist.append(jobs)
|
||||
else:
|
||||
publisher.debug("display_list_of_pid Aborted due to lack of Information in Redis")
|
||||
|
||||
joblist = sorted(joblist, key=lambda k: k['uptime'], reverse=True)
|
||||
|
||||
for job in joblist:
|
||||
print format_display_listof_pid(job, arg)
|
||||
|
||||
if arg == "remain":
|
||||
print "Remaining: {0}".format(r_serv.llen("filelist"))
|
||||
|
||||
if arg == "processed":
|
||||
print "processed: {0}".format(r_serv.llen("processed"))
|
||||
|
||||
except TypeError:
|
||||
publisher.error("TypeError for display_listof_pid")
|
|
@ -1,203 +0,0 @@
|
|||
import sys, hashlib, os, os.path, gzip, string, glob, itertools, copy, shutil
|
||||
import redis, crcmod, mmh3, time, fileinput
|
||||
import crcmod, mmh3
|
||||
|
||||
from operator import itemgetter, attrgetter
|
||||
from pubsublogger import publisher
|
||||
|
||||
|
||||
|
||||
|
||||
def listdirectory(path):
|
||||
"""Path Traversing Function.
|
||||
|
||||
:param path: -- The absolute pathname to a directory.
|
||||
|
||||
This function is returning all the absolute path of the files contained in
|
||||
the argument directory.
|
||||
|
||||
"""
|
||||
fichier=[]
|
||||
for root, dirs, files in os.walk(path):
|
||||
|
||||
for i in files:
|
||||
|
||||
fichier.append(os.path.join(root, i))
|
||||
|
||||
return fichier
|
||||
|
||||
|
||||
|
||||
|
||||
clean = lambda dirty: ''.join(filter(string.printable.__contains__, dirty))
|
||||
"""It filters out non-printable characters from the string it receives."""
|
||||
|
||||
|
||||
|
||||
def select_hash(hashkind, line):
|
||||
"""Select the kind of hashing for the line.
|
||||
|
||||
:param hashkind: -- (str) The name of the hash
|
||||
:param line: -- (str) The string to hash.
|
||||
|
||||
This function is a kind of hash selector which will use the hash passed
|
||||
in argument to hash the string also passed in argument.
|
||||
|
||||
"""
|
||||
if hashkind == "md5":
|
||||
hashline = hashlib.md5(line).hexdigest()
|
||||
|
||||
elif hashkind == "sha1":
|
||||
hashline = hashlib.sha1(line).hexdigest()
|
||||
|
||||
elif hashkind == "crc":
|
||||
crc32 = crcmod.Crc(0x104c11db7, initCrc=0, xorOut=0xFFFFFFFF)
|
||||
crc32.update(line)
|
||||
hashline = crc32.hexdigest()
|
||||
|
||||
elif hashkind == "murmur":
|
||||
hashline = mmh3.hash(line)
|
||||
|
||||
return str(hashline)
|
||||
|
||||
|
||||
|
||||
|
||||
def redis_populate(pipe, folder, minline, hashkind, jmp, insert_type):
|
||||
"""Call another function with different "mode"
|
||||
|
||||
:param pipe: -- Redis pipe
|
||||
:param folder: -- the absolute path name to the folder where to process
|
||||
:param minline: -- the minimum lenght of line to hash
|
||||
:param hashkind: -- the hash to use
|
||||
:param jmp: -- (bool) trigger the jumping line mode or not
|
||||
:param insert_type: -- which kind of datastructure to create in redis.
|
||||
|
||||
This Function actually call the function "insert_redis" with differents
|
||||
method to process it.
|
||||
In one way, x lines are jumped before the Insertion.
|
||||
In another, all the line are hashed and inserted in redis.
|
||||
|
||||
"""
|
||||
for filename in folder:
|
||||
|
||||
with gzip.open(filename, 'rb') as F:
|
||||
start_line = 1
|
||||
|
||||
for num, line in enumerate(F, start_line):
|
||||
|
||||
if jmp != 1:
|
||||
|
||||
if (num % jmp) == 1 :
|
||||
insert_redis(filename,
|
||||
line,
|
||||
pipe,
|
||||
minline,
|
||||
hashkind,
|
||||
num,
|
||||
insert_type)
|
||||
|
||||
else:
|
||||
insert_redis(filename,
|
||||
line,
|
||||
pipe,
|
||||
minline,
|
||||
hashkind,
|
||||
num,
|
||||
insert_type)
|
||||
|
||||
pipe.execute()
|
||||
|
||||
|
||||
|
||||
|
||||
def insert_redis(filename, line, pipe, minline, hashkind, num, insert_type):
|
||||
"""Insert hashed line in redis.
|
||||
|
||||
:param filename: -- the absolute path name to the folder where to process
|
||||
:param line: -- the clear line which will be hashed.
|
||||
:param pipe: -- Redis pipe
|
||||
:param minline: -- the minimum lenght of line to hash
|
||||
:param hashkind: -- the hash to use
|
||||
:param num: -- (int) the first line of the file (better human read)
|
||||
:param insert_type: -- (int) Choose the datastructure used in redis.
|
||||
|
||||
This function insert hashed lines in the selected redis datastructure
|
||||
The datastructure is represented as follow:
|
||||
|
||||
case one: ALLIN
|
||||
"hash"[hashedline][occurence] => to index all different hashs + scoring
|
||||
"hashedline"[filename.gz] => to associate the file.gz to his hashedline
|
||||
"L:hashedline"[clearline] => for the correspondance
|
||||
|
||||
case two: SORTED SET (for the ./top.py script)
|
||||
"hash"[hashedline][occurence] => to index all different hashs + scoring
|
||||
"hashedline"[filename.gz] => to associate the file.gz to his hashedline
|
||||
|
||||
case tree: BASIC SET (for ./Graph.py)
|
||||
"hash"[hashedline] to index all different hashs (without scores)
|
||||
"hashedline"[filename.gz] => to associate the file.gz to his hashedline
|
||||
"filename.gz"[firstline] => for human reading
|
||||
|
||||
"""
|
||||
if (insert_type == 2): # ALLIN
|
||||
if len(line) >= minline:
|
||||
|
||||
pipe.zincrby("hash", select_hash(hashkind, line), 1)
|
||||
pipe.sadd(select_hash(hashkind,line), filename.split('/',20)[-1])
|
||||
pipe.sadd("L:"+select_hash(hashkind, line), clean(line))
|
||||
|
||||
if (num == 1):
|
||||
|
||||
pipe.sadd(filename.split('/',20)[-1], clean(line[0:80]))
|
||||
|
||||
|
||||
elif (insert_type == 1): # SORTED SET FOR TOP100.py
|
||||
|
||||
if len(line) >= minline:
|
||||
|
||||
pipe.zincrby("hash", select_hash(hashkind, line), 1)
|
||||
pipe.sadd(select_hash(hashkind, line), clean(line))
|
||||
|
||||
|
||||
elif (insert_type == 0): # SET FOR THE GRAPH
|
||||
|
||||
if len(line) >= minline:
|
||||
|
||||
pipe.sadd("hash", select_hash(hashkind, line))
|
||||
pipe.sadd(select_hash(hashkind,line), filename.split('/',20)[-1])
|
||||
|
||||
if (num == 1):
|
||||
|
||||
pipe.sadd(filename.split('/',20)[-1], clean(line[0:80]))
|
||||
|
||||
|
||||
|
||||
|
||||
def remove_pure_doppelganger(r_serv, nb):
|
||||
"""Remove identic paste
|
||||
|
||||
:param r_serv: -- Redis connexion database
|
||||
:param nb: -- (int) Number of execution wanted
|
||||
|
||||
Add to a temporary list the hash of wholes files and compare the new hash
|
||||
to the element of this list. If the hash is already inside, the file
|
||||
is deleted otherwise the hash is added in the list.
|
||||
|
||||
"""
|
||||
hashlist = []
|
||||
for x in xrange(0,nb):
|
||||
filename = r_serv.lpop("filelist")
|
||||
|
||||
with open(filename, 'rb') as L:
|
||||
hashline = hashlib.md5(L.read()).hexdigest()
|
||||
|
||||
print len(hashlist)
|
||||
|
||||
if hashline in hashlist:
|
||||
|
||||
os.remove(filename)
|
||||
publisher.debug("{0} removed".format(filename))
|
||||
print "{0} removed".format(filename)
|
||||
else:
|
||||
hashlist.append(hashline)
|
|
@ -15,32 +15,6 @@ from datetime import date, timedelta
|
|||
from dateutil.rrule import rrule, DAILY
|
||||
|
||||
|
||||
|
||||
def create_graph_by_day_datastruct(r_serv, r_key, year, month):
|
||||
"""Creating a datastructure in redis.
|
||||
|
||||
:param r_serv: -- Redis connexion database
|
||||
:param r_key: -- (str) The name of the key read in redis (often the name of
|
||||
the keywords category list)
|
||||
:param year: -- (integer) The year to process
|
||||
:param month: -- (integer) The month to process
|
||||
|
||||
|
||||
"""
|
||||
a = date(year, month, 01)
|
||||
b = date(year, month, cal.monthrange(year, month)[1])
|
||||
|
||||
for dt in rrule(DAILY, dtstart = a, until = b):
|
||||
r_serv.zadd(r_key+'_by_day',0,dt.strftime("%Y%m%d"))
|
||||
|
||||
for Tfilename in r_serv.zrange(r_key+'_occur', 0, -1, withscores = True):
|
||||
r_serv.zincrby(r_key+'_by_day',
|
||||
Tfilename[0][-22:-12].replace('/',''),
|
||||
Tfilename[1])
|
||||
|
||||
|
||||
|
||||
|
||||
def is_luhn_valid(card_number):
|
||||
"""Apply the Luhn algorithm to validate credit card.
|
||||
|
||||
|
@ -156,155 +130,3 @@ def checking_A_record(r_serv, domains_set):
|
|||
|
||||
publisher.debug("URLs before: {0} after: {1} (valid)".format(num, score))
|
||||
return (num, WalidA)
|
||||
|
||||
|
||||
|
||||
|
||||
def refining_regex_dataset(r_serv, r_key, regex, min_match, year, month, luhn = True, dnscheck = True):
|
||||
"""Refine the "raw dataset" of paste with regulars expressions
|
||||
|
||||
:param r_serv: -- Redis connexion database
|
||||
:param r_key: -- (str) The name of the key read in redis (often the name of
|
||||
the keywords category list)
|
||||
:param min_match: -- (int) Below this number file are deleted
|
||||
:param regex: -- Regular expression which will be match.
|
||||
|
||||
This function Refine database created with classify_token_paste function.
|
||||
It opening again the files which matchs the keywords category list, found
|
||||
regular expression inside it and count how many time is found.
|
||||
|
||||
If there is not too much match about the regular expression the file is
|
||||
deleted from the list.
|
||||
|
||||
Than it finally merge the result by day to be able to create a bar graph
|
||||
which will represent how many occurence by day the regex match.
|
||||
|
||||
"""
|
||||
for filename in r_serv.zrange(r_key, 0, -1):
|
||||
|
||||
with gzip.open(filename, 'rb') as F:
|
||||
var = 0
|
||||
matchs = set([])
|
||||
|
||||
for num, kword in enumerate(F):
|
||||
|
||||
match = re.findall(regex, kword)
|
||||
var += len(match)
|
||||
|
||||
for y in match:
|
||||
if y != '' and len(y) < 100:
|
||||
matchs.add(y)
|
||||
# If there is less match than min_match delete it (False pos)
|
||||
if len(matchs) <= min_match :
|
||||
r_serv.zrem(r_key, filename)
|
||||
publisher.debug("{0} deleted".format(filename))
|
||||
else:
|
||||
# else changing the score.
|
||||
if r_key == "creditcard_categ" and luhn:
|
||||
for card_number in matchs:
|
||||
if is_luhn_valid(card_number):
|
||||
|
||||
r_serv.zincrby(r_key+'_occur', filename, 1)
|
||||
|
||||
publisher.info("{1} is valid in the file {0}".format(filename, card_number))
|
||||
else:
|
||||
publisher.debug("{0} card is invalid".format(card_number))
|
||||
|
||||
if r_key == "mails_categ" and dnscheck:
|
||||
r_serv.zadd(r_key+'_occur', checking_MX_record(r_serv, matchs), filename)
|
||||
|
||||
else:
|
||||
# LUHN NOT TRIGGERED (Other Categs)
|
||||
r_serv.zadd(r_key+'_occur',
|
||||
len(matchs),
|
||||
filename)
|
||||
|
||||
create_graph_by_day_datastruct(r_serv, r_key, year, month)
|
||||
|
||||
|
||||
|
||||
|
||||
def graph_categ_by_day(r_serv, filename, year, month, r_key):
|
||||
"""Create a bargraph representing regex matching by day
|
||||
|
||||
:param r_serv: -- Redis connexion database
|
||||
:param filename: -- (str) The absolute path where to save the figure.png
|
||||
:param r_key: -- (str) The name of the key read in redis (often the name of
|
||||
the keywords category list)
|
||||
:param year: -- (integer) The year to process
|
||||
:param month: -- (integer) The month to process
|
||||
|
||||
This function display the amount of the category per day.
|
||||
|
||||
"""
|
||||
adate = []
|
||||
categ_num = []
|
||||
rcParams['figure.figsize'] = 15, 10
|
||||
|
||||
a = date(year, month, 01)
|
||||
b = date(year, month, cal.monthrange(year, month)[1])
|
||||
|
||||
for dt in rrule(DAILY, dtstart = a, until = b):
|
||||
adate.append(dt.strftime("%d"))
|
||||
categ_num.append(r_serv.zscore(r_key+'_by_day',dt.strftime("%Y%m%d")))
|
||||
|
||||
n_groups = len(categ_num)
|
||||
adress_scores = tuple(categ_num)
|
||||
|
||||
index = np.arange(n_groups)
|
||||
bar_width = 0.5
|
||||
opacity = 0.6
|
||||
|
||||
ladress = plt.bar(index, adress_scores, bar_width,
|
||||
alpha = opacity,
|
||||
color = 'b',
|
||||
label = r_key)
|
||||
|
||||
|
||||
plt.plot(tuple(categ_num), 'r--')
|
||||
#plt.yscale('log')
|
||||
plt.xlabel('Days')
|
||||
plt.ylabel('Amount')
|
||||
plt.title('Occurence of '+r_key+' by day')
|
||||
plt.xticks(index + bar_width/2 , tuple(adate))
|
||||
|
||||
plt.legend()
|
||||
plt.grid()
|
||||
|
||||
plt.tight_layout()
|
||||
|
||||
plt.savefig(filename+".png", dpi=None, facecolor='w', edgecolor='b',
|
||||
orientation='portrait', papertype=None, format="png",
|
||||
transparent=False, bbox_inches=None, pad_inches=0.1,
|
||||
frameon=True)
|
||||
|
||||
publisher.info(filename+".png"+" saved!")
|
||||
|
||||
|
||||
|
||||
|
||||
def create_tld_list(url = "https://mxr.mozilla.org/mozilla-central/source/netwerk/dns/effective_tld_names.dat?raw=1"):
|
||||
"""Recover a tld list from url.
|
||||
|
||||
:param url: -- The url of the tld list.
|
||||
:return: -- list
|
||||
|
||||
This function recover from mozilla.org the list of the effective tld names,
|
||||
Save it as a file, and return a list of all the tld.
|
||||
|
||||
|
||||
"""
|
||||
domains = []
|
||||
htmlSource = urllib.urlopen(url).read()
|
||||
with open("ICCANdomain", 'wb') as F:
|
||||
F.write(htmlSource)
|
||||
|
||||
with open("ICCANdomain", 'rb') as F:
|
||||
|
||||
for num, line in enumerate(F):
|
||||
if re.match(r"^\/\/|\n", line) == None:
|
||||
domains.append(re.sub(r'\*', '', line[:-1]))
|
||||
else:
|
||||
publisher.info("Comment line ignored.")
|
||||
|
||||
return domains
|
||||
|
|
|
@ -1,103 +0,0 @@
|
|||
import redis
|
||||
import string
|
||||
|
||||
|
||||
def create_common_hash_file(r_serv, zmin, zmax, filename):
|
||||
""" Create a "top100".txt file.
|
||||
|
||||
:param r_serv: -- connexion to redis database
|
||||
:param zmin: -- (int) Offset of the top list
|
||||
:param zmax: -- (int) Number of element wanted to be in the top list.
|
||||
:param filename: -- the pathname to the created file.
|
||||
|
||||
This Function create a ranking list between zmin and zman of the most common
|
||||
hashs.
|
||||
Line are written as follow in the file:
|
||||
hash:[md5hash]:[cardinality]:[line]
|
||||
All hashes represent a full line which mean it can be one char or more...
|
||||
|
||||
"""
|
||||
with open(filename, 'wb') as F:
|
||||
|
||||
for h, num in r_serv.zrevrangebyscore("hash", "+inf", "-inf", zmin, zmax, True):
|
||||
|
||||
F.write("hash:{0}:{1}:{2}\n".format(h, num, list(r_serv.smembers('L:'+h))))
|
||||
|
||||
|
||||
|
||||
|
||||
def paste_searching(r_serv, filename, pastename, mincard, maxcard):
|
||||
"""Search similar hashs from a given file.
|
||||
|
||||
:param r_serv: -- connexion to redis database
|
||||
:param filename: -- the pathname to the created file.
|
||||
:param pastename: -- the name of the paste used to search in redis database.
|
||||
:param mincard: -- the minimum occurence needed of an hash to be taken in count.
|
||||
:param maxcard: -- the maximum occurence needed of an hash to be taken in count.
|
||||
|
||||
This function return a text file which is a kind of synthesis about
|
||||
where (in the others pastes) the hash of the given pastename have been found.
|
||||
|
||||
"""
|
||||
P = set([pastename])
|
||||
tmp_h = str()
|
||||
tmp_set = set([])
|
||||
|
||||
with open(filename, 'wb') as F:
|
||||
|
||||
F.write("Paste: {0}\nOptions used:\nMincard: {1}\nMaxcard: {2}\n\nContaining Following Hash:\n".format(pastename,mincard,maxcard))
|
||||
|
||||
for h in r_serv.smembers("hash"):
|
||||
|
||||
if (r_serv.smembers(h).intersection(P) and r_serv.scard(h) >= mincard and r_serv.scard(h) <= maxcard):
|
||||
|
||||
F.write(h+'\n')
|
||||
tmp_set = tmp_set.union(r_serv.smembers(h).union(r_serv.smembers(tmp_h)))
|
||||
|
||||
tmp_h = h
|
||||
|
||||
F.write("\nSimilar Files:\n")
|
||||
|
||||
for n, s in enumerate(tmp_set):
|
||||
|
||||
F.write(str(n) + ': ' + s + '\n')
|
||||
|
||||
|
||||
|
||||
|
||||
def paste_searching2(r_serv, filename, pastename, mincard, maxcard):
|
||||
"""Search similar hashs from a given file.
|
||||
(On another kind of redis data structure)
|
||||
|
||||
:param r_serv: -- connexion to redis database
|
||||
:param filename: -- the pathname to the created file.
|
||||
:param pastename: -- the name of the paste used to search in redis database.
|
||||
:param mincard: -- the minimum occurence needed of an hash to be taken in count.
|
||||
:param maxcard: -- the maximum occurence needed of an hash to be taken in count.
|
||||
|
||||
This function return a text file which is a kind of synthesis about
|
||||
where (in the others pastes) the hash of the given pastename have been found.
|
||||
|
||||
"""
|
||||
P = set([pastename])
|
||||
tmp_h = str()
|
||||
tmp_set = set([])
|
||||
|
||||
with open(filename, 'wb') as F:
|
||||
|
||||
F.write("Paste: {0}\nOptions used:\nMincard: {1}\nMaxcard: {2}\n\n###Containing Following Hash:### ###Occur### ###### Corresponding Line ######\n".format(pastename,mincard,maxcard))
|
||||
|
||||
for h in r_serv.zrange("hash", 0, -1):
|
||||
|
||||
if (r_serv.smembers(h).intersection(P) and r_serv.scard(h) >= mincard and r_serv.scard(h) <= maxcard):
|
||||
|
||||
F.write(h + ' -- ' + str(r_serv.zscore("hash",h)) + ' -- ' + str(list(r_serv.smembers('L:' + h))) + '\n')
|
||||
tmp_set = tmp_set.union(r_serv.smembers(h).union(r_serv.smembers(tmp_h)))
|
||||
|
||||
tmp_h = h
|
||||
|
||||
F.write("\nSimilar Files:\n")
|
||||
|
||||
for n, s in enumerate(tmp_set):
|
||||
|
||||
F.write(str(n) + ': ' + s + '\n')
|
|
@ -19,316 +19,30 @@ from dateutil.rrule import rrule, DAILY
|
|||
|
||||
from packages import *
|
||||
|
||||
def redis_words_ranking(pipe, r_serv, nb, minlength, maxlength):
|
||||
"""Looping function
|
||||
|
||||
:param pipe: -- Redis pipe.
|
||||
:param nb: -- (int) Number of pastes proceeded by function
|
||||
:param minlength: -- (int) passed to the next function
|
||||
:param maxlength: -- (int) passed to the next function
|
||||
def listdirectory(path):
|
||||
"""Path Traversing Function.
|
||||
|
||||
:param path: -- The absolute pathname to a directory.
|
||||
|
||||
This function is returning all the absolute path of the files contained in
|
||||
the argument directory.
|
||||
|
||||
"""
|
||||
try:
|
||||
for n in xrange(0,nb):
|
||||
fichier=[]
|
||||
for root, dirs, files in os.walk(path):
|
||||
|
||||
path = r_serv.lpop("filelist")
|
||||
for i in files:
|
||||
|
||||
if path != None:
|
||||
set_listof_pid(r_serv, path, sys.argv[0])
|
||||
fichier.append(os.path.join(root, i))
|
||||
|
||||
redis_zincr_words(pipe, path, minlength, maxlength)
|
||||
return fichier
|
||||
|
||||
update_listof_pid(r_serv)
|
||||
|
||||
r_serv.lpush("processed",path)
|
||||
|
||||
publisher.debug(path)
|
||||
else:
|
||||
publisher.debug("Empty list")
|
||||
break
|
||||
except (KeyboardInterrupt, SystemExit) as e:
|
||||
flush_list_of_pid(r_serv)
|
||||
publisher.debug("Pid list flushed")
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def redis_zincr_words(pipe, filename, minlength, maxlength):
|
||||
"""Create news sorted set in redis.
|
||||
|
||||
:param minlength: -- (int) Minimum words length inserted
|
||||
:param maxlength: -- (int) Maximum words length inserted
|
||||
:param filename: -- The absolute path to the file.gz to process.
|
||||
|
||||
Representation of the set in redis:
|
||||
|
||||
+------------+------------+-----------+
|
||||
| Keys | Members | Scores |
|
||||
+============+============+===========+
|
||||
| 20131001 | word1 | 142 |
|
||||
+------------+------------+-----------+
|
||||
| ... | word2 | 120 |
|
||||
+------------+------------+-----------+
|
||||
| 20131002 | ... | ... |
|
||||
+------------+------------+-----------+
|
||||
|
||||
This function store all words between minlength and maxlength in redis.
|
||||
Redis will count as well how much time each word will appear by day:
|
||||
The cardinality.
|
||||
|
||||
"""
|
||||
tokenizer = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]+', gaps = True, discard_empty = True)
|
||||
|
||||
with gzip.open(filename, 'rb') as F:
|
||||
|
||||
blob = TextBlob(clean(F.read()), tokenizer = tokenizer)
|
||||
|
||||
for word in blob.tokens:
|
||||
|
||||
if (len(word) >= minlength) and (len(word) <= maxlength):
|
||||
pipe.zincrby(filename[-22:-12].replace('/',''), word, 1)
|
||||
|
||||
if (len(word) >= maxlength):
|
||||
publisher.info("word bigger than {0} detected at {1}".format(maxlength, filename))
|
||||
publisher.info(word)
|
||||
|
||||
pipe.execute()
|
||||
|
||||
|
||||
|
||||
|
||||
def classify_token_paste(r_serv, listname, choicedatastruct, nb, r_set):
|
||||
"""Tokenizing on word category
|
||||
|
||||
:param r_serv: -- Redis database connexion
|
||||
:param listname: -- (str) path to the file containing the list of path of category files
|
||||
:param choicedatastruct: -- (bool) Changing the index of datastructure
|
||||
:param nb: -- (int) Number of pastes proceeded by function
|
||||
|
||||
Redis data structures cas be choose as follow:
|
||||
|
||||
+---------------+------------+-----------+
|
||||
| Keys | Members | Scores |
|
||||
+===============+============+===========+
|
||||
| mails_categ | filename | 25000 |
|
||||
+---------------+------------+-----------+
|
||||
| ... | filename2 | 2400 |
|
||||
+---------------+------------+-----------+
|
||||
| web_categ | ... | ... |
|
||||
+---------------+------------+-----------+
|
||||
|
||||
Or
|
||||
|
||||
+--------------+-------------+-----------+
|
||||
| Keys | Members | Scores |
|
||||
+==============+=============+===========+
|
||||
| filename | mails_categ | 100000 |
|
||||
+--------------+-------------+-----------+
|
||||
| ... | web_categ | 24050 |
|
||||
+--------------+-------------+-----------+
|
||||
| filename2 | ... | ... |
|
||||
+--------------+-------------+-----------+
|
||||
|
||||
This function tokenise on all special characters like: @^\|[{#~}]!:;$^=
|
||||
And insert data in redis if the token match the keywords in a list previously
|
||||
created.
|
||||
These lists of keywords can be list of everything you want but it's better
|
||||
to create "category" of keywords.
|
||||
|
||||
"""
|
||||
|
||||
try:
|
||||
for n in xrange(0,nb):
|
||||
filename = r_serv.lpop(r_set)
|
||||
|
||||
if filename != None:
|
||||
|
||||
tokenizer = RegexpTokenizer('[\&\~\:\;\,\.\(\)\{\}\|\[\]\\\\/\-/\=\'\"\%\$\?\@\+\#\_\^\<\>\!\*\n\r\t\s]+', gaps = True, discard_empty = True)
|
||||
set_listof_pid(r_serv, filename, sys.argv[0])
|
||||
|
||||
with open(listname, 'rb') as L:
|
||||
# for each "categ" listed in the file
|
||||
for num, fname in enumerate(L):
|
||||
# contain keywords by categ
|
||||
tmp_list = []
|
||||
#for each keywords
|
||||
with open(fname[:-1], 'rb') as LS:
|
||||
|
||||
for num, kword in enumerate(LS):
|
||||
tmp_list.append(kword[:-1])
|
||||
|
||||
# for each paste
|
||||
with gzip.open(filename, 'rb') as F:
|
||||
|
||||
blob = TextBlob(clean(F.read()),
|
||||
tokenizer = tokenizer)
|
||||
|
||||
# for each paste token
|
||||
for word in blob.tokens.lower():
|
||||
|
||||
if word in tmp_list:
|
||||
# choosing between two data structures.
|
||||
if choicedatastruct:
|
||||
r_serv.zincrby(filename,
|
||||
fname.split('/')[-1][:-1],
|
||||
1)
|
||||
else:
|
||||
r_serv.zincrby(fname.split('/')[-1][:-1],
|
||||
filename,
|
||||
1)
|
||||
|
||||
update_listof_pid(r_serv)
|
||||
|
||||
else:
|
||||
publisher.debug("Empty list")
|
||||
#r_serv.save()
|
||||
break
|
||||
|
||||
except (KeyboardInterrupt, SystemExit) as e:
|
||||
flush_list_of_pid(r_serv)
|
||||
publisher.debug("Pid list flushed")
|
||||
|
||||
|
||||
|
||||
|
||||
def dectect_longlines(r_serv, r_key, store = False, maxlength = 500):
|
||||
"""Store longlines's linenumbers in redis
|
||||
|
||||
:param r_serv: -- The redis connexion database
|
||||
:param r_key: -- (str) The key name in redis
|
||||
:param store: -- (bool) Store the line numbers or not.
|
||||
:param maxlength: -- The limit between "short lines" and "long lines"
|
||||
|
||||
This function connect to a redis list of filename (pastes filename);
|
||||
Open the paste and check inside if there is some line with their
|
||||
length >= to maxlength.
|
||||
If yes, the paste is "tagged" as containing a longlines in another
|
||||
redis structures, and the linenumber (of the long lines) can be stored
|
||||
in addition if the argument store is at True.
|
||||
|
||||
"""
|
||||
try:
|
||||
while True:
|
||||
#r_key_list (categ)
|
||||
filename = r_serv.lpop(r_key)
|
||||
|
||||
if filename != None:
|
||||
|
||||
set_listof_pid(r_serv, filename, sys.argv[0])
|
||||
|
||||
# for each pastes
|
||||
with gzip.open(filename, 'rb') as F:
|
||||
var = True
|
||||
for num, line in enumerate(F):
|
||||
|
||||
if len(line) >= maxlength:
|
||||
#publisher.debug("Longline:{0}".format(line))
|
||||
if var:
|
||||
r_serv.rpush("longlines", filename)
|
||||
var = False
|
||||
|
||||
if store:
|
||||
r_serv.sadd(filename, num)
|
||||
else:
|
||||
publisher.debug("Line numbers of longlines not stored")
|
||||
|
||||
update_listof_pid(r_serv)
|
||||
else:
|
||||
publisher.debug("Empty list")
|
||||
return False
|
||||
break
|
||||
|
||||
except (KeyboardInterrupt, SystemExit) as e:
|
||||
flush_list_of_pid(r_serv)
|
||||
publisher.debug("Pid list flushed")
|
||||
|
||||
|
||||
|
||||
|
||||
# NOT USED RIGHT NOW #
|
||||
def recovering_longlines(r_serv):
|
||||
"""Get longlines with linenumbers
|
||||
|
||||
"""
|
||||
try:
|
||||
for n in xrange(0,nb):
|
||||
filename = r_serv.lpop("longlines")
|
||||
|
||||
if filename != None:
|
||||
# For each values in redis (longline's line number)
|
||||
for numline in r_serv.smembers(filename):
|
||||
|
||||
with gzip.open(filename,'rb') as F:
|
||||
|
||||
for num, line in enumerate(F):
|
||||
#When corresponding.
|
||||
if int(num) == int(numline):
|
||||
pass
|
||||
# TREATMENT
|
||||
else:
|
||||
publisher.debug("Empty list")
|
||||
r_serv.save()
|
||||
break
|
||||
|
||||
except (KeyboardInterrupt, SystemExit) as e:
|
||||
flush_list_of_pid(r_serv)
|
||||
publisher.debug("Pid list flushed")
|
||||
|
||||
|
||||
|
||||
|
||||
def remove_longline_from_categ(r_serv, r_key, delete, store, maxlength):
|
||||
"""Remove from a set, file with long lines.
|
||||
|
||||
:param r_serv: -- The redis connexion database
|
||||
:param r_key: -- (str) The key name in redis
|
||||
:param store: -- (bool) Store the line numbers or not.
|
||||
:param delete: -- (bool) If true, delete the used key from redis.
|
||||
:param maxlength: -- The limit between "short lines" and "long lines"
|
||||
|
||||
"""
|
||||
publisher.info("Number of file before:{0}".format(r_serv.zcard(r_key)))
|
||||
|
||||
#Create a list of file to proceed (1)
|
||||
for filename in r_serv.zrange(r_key, 0, -1):
|
||||
r_serv.rpush(r_key+"_list", filename)
|
||||
|
||||
#detecting longlines in pastes
|
||||
dectect_longlines(r_serv, r_key+"_list", store, maxlength)
|
||||
|
||||
#remove false positive members
|
||||
while True:
|
||||
fp_filename = r_serv.lpop("longlines")
|
||||
|
||||
if fp_filename == None:
|
||||
break
|
||||
|
||||
else:
|
||||
# if wanted, delete in addition the set with linenumbers (created with store)
|
||||
if delete:
|
||||
r_serv.zrem(r_key, fp_filename)
|
||||
r_serv.delete(fp_filename)
|
||||
|
||||
else:
|
||||
#remove the file with longline from the r_key zset.
|
||||
r_serv.zrem(r_key, fp_filename)
|
||||
|
||||
publisher.info("Longline file removed from {0}, {1} Files remaining".format(r_key, r_serv.zcard(r_key)))
|
||||
|
||||
|
||||
|
||||
|
||||
def detect_longline_from_list(r_serv, nb):
|
||||
try:
|
||||
for n in xrange(0,nb):
|
||||
|
||||
if not dectect_longlines(r_serv, "filelist", True):
|
||||
break
|
||||
|
||||
except (KeyboardInterrupt, SystemExit) as e:
|
||||
flush_list_of_pid(r_serv)
|
||||
publisher.debug("Pid list flushed")
|
||||
clean = lambda dirty: ''.join(filter(string.printable.__contains__, dirty))
|
||||
"""It filters out non-printable characters from the string it receives."""
|
||||
|
||||
|
||||
|
||||
|
@ -369,182 +83,6 @@ def create_dirfile(r_serv, directory, overwrite):
|
|||
|
||||
|
||||
|
||||
def redis_interbargraph_set(r_serv, year, month, overwrite):
|
||||
"""Create a Redis sorted set.
|
||||
|
||||
:param r_serv: -- connexion to redis database
|
||||
:param year: -- (integer) The year to process
|
||||
:param month: -- (integer) The month to process
|
||||
:param overwrite: -- (bool) trigger the overwrite mode
|
||||
|
||||
This function create inside redis the intersection of all days in
|
||||
a month two by two.
|
||||
Example:
|
||||
For a month of 31days it will create 30 sorted set between day and
|
||||
day+1 until the last day.
|
||||
The overwrite mode delete the intersets and re-create them.
|
||||
|
||||
"""
|
||||
a = date(year, month, 01)
|
||||
b = date(year, month, cal.monthrange(year, month)[1])
|
||||
|
||||
if overwrite:
|
||||
r_serv.delete("InterSet")
|
||||
|
||||
for dt in rrule(DAILY, dtstart = a, until = b - timedelta(1)):
|
||||
dayafter = dt+timedelta(1)
|
||||
|
||||
r_serv.delete(str(dt.strftime("%Y%m%d"))+str(dayafter.strftime("%Y%m%d")))
|
||||
|
||||
r_serv.zinterstore(
|
||||
str(dt.strftime("%Y%m%d"))+str(dayafter.strftime("%Y%m%d")),
|
||||
{str(dt.strftime("%Y%m%d")):1,
|
||||
str(dayafter.strftime("%Y%m%d")):-1})
|
||||
|
||||
r_serv.zadd(
|
||||
"InterSet",
|
||||
1,
|
||||
str(dt.strftime("%Y%m%d"))+str(dayafter.strftime("%Y%m%d")))
|
||||
else:
|
||||
for dt in rrule(DAILY, dtstart = a, until = b - timedelta(1)):
|
||||
dayafter = dt+timedelta(1)
|
||||
|
||||
if r_serv.zcard(str(dt.strftime("%Y%m%d"))+str(dayafter.strftime("%Y%m%d"))) == 0:
|
||||
|
||||
r_serv.zinterstore(
|
||||
str(dt.strftime("%Y%m%d"))+str(dayafter.strftime("%Y%m%d")),
|
||||
{str(dt.strftime("%Y%m%d")):1,
|
||||
str(dayafter.strftime("%Y%m%d")):-1})
|
||||
|
||||
r_serv.zadd(
|
||||
"InterSet",
|
||||
1,
|
||||
str(dt.strftime("%Y%m%d"))+str(dayafter.strftime("%Y%m%d")))
|
||||
|
||||
publisher.info(str(dt.strftime("%Y%m%d"))+str(dayafter.strftime("%Y%m%d"))+" Intersection Created")
|
||||
|
||||
else:
|
||||
publisher.warning("Data already exist, operation aborted.")
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
def word_bar_graph(r_serv, year, month, filename):
|
||||
"""Create an histogram.
|
||||
|
||||
:param r_serv: -- connexion to redis database
|
||||
:param year: -- (integer) The year to process
|
||||
:param month: -- (integer) The month to process
|
||||
:param filename: -- The absolute path where to save the figure.png
|
||||
|
||||
This function use matplotlib to create an histogram.
|
||||
The redis database need obviously to be populated first
|
||||
with functions: redis_words_ranking and redis_interbargraph_set.
|
||||
|
||||
"""
|
||||
lw = []
|
||||
adate = []
|
||||
inter = [0]
|
||||
rcParams['figure.figsize'] = 15, 10
|
||||
|
||||
a = date(year, month, 01)
|
||||
b = date(year, month, cal.monthrange(year,month)[1])
|
||||
|
||||
for dt in rrule(DAILY, dtstart = a, until = b):
|
||||
lw.append(r_serv.zcard(dt.strftime("%Y%m%d")))
|
||||
adate.append(dt.strftime("%d"))
|
||||
|
||||
for x in r_serv.zrange("InterSet", 0, 31):
|
||||
inter.append(r_serv.zcard(x))
|
||||
|
||||
n_groups = len(lw)
|
||||
card_words = tuple(lw)
|
||||
card_interword = tuple(inter)
|
||||
|
||||
index = np.arange(n_groups)
|
||||
bar_width = 0.5
|
||||
opacity = 0.6
|
||||
|
||||
words = plt.bar(index, card_words, bar_width,
|
||||
alpha=opacity,
|
||||
color='g',
|
||||
label='Words/day')
|
||||
|
||||
lwords = plt.bar(index - 0.5, card_interword, bar_width,
|
||||
alpha=opacity,
|
||||
color='r',
|
||||
label='Intersection')
|
||||
|
||||
|
||||
plt.plot(tuple(inter), 'b--')
|
||||
plt.xlabel(str(year)+'/'+str(month)+' Days')
|
||||
plt.ylabel('Words')
|
||||
plt.title('Words Cardinality & Intersection Histogram')
|
||||
plt.xticks(index + bar_width/2 , tuple(adate))
|
||||
|
||||
plt.legend()
|
||||
plt.grid()
|
||||
|
||||
plt.tight_layout()
|
||||
|
||||
plt.savefig(filename+".png", dpi=None, facecolor='w', edgecolor='b',
|
||||
orientation='portrait', papertype=None, format="png",
|
||||
transparent=False, bbox_inches=None, pad_inches=0.1,
|
||||
frameon=True)
|
||||
|
||||
publisher.info(filename+".png"+" saved!")
|
||||
|
||||
|
||||
|
||||
|
||||
def create_data_words_curve(r_serv, r_serv2, year, month, filename):
|
||||
"""Create a Redis hashes.
|
||||
|
||||
:param r_serv: -- connexion to redis database (read)
|
||||
:param r_serv2: -- connexion to redis database (write)
|
||||
:param year: -- (integer) The year to process
|
||||
:param month: -- (integer) The month to process
|
||||
:param filename: -- the path to the file which contain a list of words.
|
||||
|
||||
|
||||
The hashes of redis is created as follow:
|
||||
|
||||
+------------+------------+-----------+
|
||||
| Keys | Field | Values |
|
||||
+============+============+===========+
|
||||
| word1 | 20131001 | 150 |
|
||||
+------------+------------+-----------+
|
||||
| ... | 20131002 | 145 |
|
||||
+------------+------------+-----------+
|
||||
| word2 | ... | ... |
|
||||
+------------+------------+-----------+
|
||||
|
||||
The filename need to be a list of words separated by a carriage return
|
||||
with an empty line at the end.
|
||||
This function create datas which is used by the function
|
||||
create_curve_with_word_file which create a csv file.
|
||||
|
||||
"""
|
||||
stop = stopwords.words('english')
|
||||
a = date(year, month, 01)
|
||||
b = date(year, month, cal.monthrange(year,month)[1])
|
||||
|
||||
with open(filename, 'rb') as F:
|
||||
|
||||
for line in F:
|
||||
|
||||
for dt in rrule(DAILY, dtstart = a, until = b):
|
||||
|
||||
if r_serv.zscore(dt.strftime("%Y%m%d"), line[:-1]) is not None:
|
||||
#tester si ca existe deja "en option" et ajouter un WARNING log
|
||||
r_serv2.hmset(line[:-1], {str(dt.strftime("%Y%m%d")):r_serv.zscore(dt.strftime("%Y%m%d"), line[:-1])})
|
||||
else:
|
||||
pass
|
||||
|
||||
|
||||
|
||||
|
||||
def create_curve_with_word_file(r_serv, csvfilename, feederfilename, year, month):
|
||||
"""Create a csv file used with dygraph.
|
||||
|
||||
|
|
|
@ -1,56 +0,0 @@
|
|||
#!/usr/bin/python2.7
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
from packages.lib_words import *
|
||||
from packages.imported import *
|
||||
|
||||
def main():
|
||||
"""Main Function"""
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description = '''This script is a part of the Analysis Information Leak
|
||||
framework. It create an histogram which display the occurency
|
||||
of the words per day but also the intersection of day and day-1 of these
|
||||
occurencies''',
|
||||
epilog = '''The Redis database need to be populated by the script
|
||||
Wordsranking_Populate.py before using this one.''')
|
||||
|
||||
parser.add_argument('-db',
|
||||
type = int,
|
||||
default = 0,
|
||||
help = 'The name of the Redis DB (default 0)',
|
||||
choices=[0, 1, 2, 3, 4],
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('y',
|
||||
type = int,
|
||||
metavar = "year",
|
||||
help = 'The year processed.',
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('m',
|
||||
type = int,
|
||||
metavar = "month",
|
||||
help = 'The month processed.',
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('-f',
|
||||
type = str,
|
||||
metavar = "filename",
|
||||
default = "figure",
|
||||
help = 'The absolute path name of the "figure.png"',
|
||||
action = 'store')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
r = redis.StrictRedis(
|
||||
host='localhost',
|
||||
port=6379,
|
||||
db=args.db)
|
||||
|
||||
p = r.pipeline(False)
|
||||
|
||||
word_bar_graph(r,args.y,args.m, args.f)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -1,64 +0,0 @@
|
|||
#!/usr/bin/python2.7
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
from packages.lib_refine import *
|
||||
from packages.imported import *
|
||||
|
||||
def main():
|
||||
"""Main Function"""
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description = '''This script is a part of the Analysis Information Leak
|
||||
framework. It create an histogram which display the occurency
|
||||
of the word category per days.''',
|
||||
epilog = '''The Redis database need to be populated by the script
|
||||
Classify_Paste_Token.py before.
|
||||
It's also usefull to launch Remove_longline_fp.py and Refine_with_regex.py
|
||||
to create a more accurate histogram.
|
||||
example: ./Bargraph_categ_by_day.py 2013 12 mails_categ''')
|
||||
|
||||
parser.add_argument('-db',
|
||||
type = int,
|
||||
default = 0,
|
||||
help = 'The name of the Redis DB (default 0)',
|
||||
choices=[0, 1, 2, 3, 4],
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('-f',
|
||||
type = str,
|
||||
metavar = "filename",
|
||||
default = "figure",
|
||||
help = 'The absolute path name of the "figure.png"',
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('y',
|
||||
type = int,
|
||||
metavar = "year",
|
||||
help = 'The year processed',
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('m',
|
||||
type = int,
|
||||
metavar = "month",
|
||||
help = 'The month processed',
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('key',
|
||||
type = str,
|
||||
help ='name of the key to process in redis (the word_categ concerned)',
|
||||
action = 'store')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
r = redis.StrictRedis(
|
||||
host='localhost',
|
||||
port=6379,
|
||||
db=args.db)
|
||||
|
||||
p = r.pipeline(False)
|
||||
|
||||
graph_categ_by_day(r, args.f, args.y, args.m, args.key)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -1,61 +0,0 @@
|
|||
#!/usr/bin/python2.7
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
from packages.lib_words import *
|
||||
from packages.imported import *
|
||||
from pubsublogger import publisher
|
||||
|
||||
def main():
|
||||
"""Main Function"""
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description = '''This script is a part of the Analysis Information Leak
|
||||
framework. It create sets in redis as much as category
|
||||
defined in the file given by the argument -l ''',
|
||||
epilog = '''Example : seq 5000 | parallel -n0 -j 10
|
||||
./classify_Paste_Token.py -nbp 200''')
|
||||
|
||||
parser.add_argument('-l',
|
||||
type = str,
|
||||
default = "../files/list_categ_files",
|
||||
help = 'Path to the list_categ_files (../files/list_categ_files)',
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('-db',
|
||||
type = int,
|
||||
default = 0,
|
||||
help = 'The name of the Redis DB (default 0)',
|
||||
choices=[0, 1, 2, 3, 4],
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('-s',
|
||||
help = 'Datastruct type, swapping between keys & members',
|
||||
action = 'store_true')
|
||||
|
||||
parser.add_argument('-nbp',
|
||||
type = int,
|
||||
default = 200,
|
||||
help = 'Nbpaste',
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('-set',
|
||||
type = str,
|
||||
default = 'filelist',
|
||||
help = 'The name of the list in redis which contain the filename to tokenise',
|
||||
action = 'store')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
r = redis.StrictRedis(
|
||||
host='localhost',
|
||||
port=6379,
|
||||
db=args.db)
|
||||
|
||||
p = r.pipeline(False)
|
||||
|
||||
publisher.channel = "youpi"
|
||||
|
||||
classify_token_paste(r, args.l, args.s, args.nbp, args.set)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -1,46 +0,0 @@
|
|||
#!/usr/bin/python2.7
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
from packages.lib_words import *
|
||||
from packages.imported import *
|
||||
from pubsublogger import publisher
|
||||
|
||||
def main():
|
||||
"""Main Function"""
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description = '''This script is a part of the Analysis Information Leak
|
||||
framework. It's here to monitor some script which take time
|
||||
and lauched in parallel, You can display which process is running on which
|
||||
paste and how much time it spent processing it''',
|
||||
epilog = 'example : ./Display_pid -p pid -db 1 -d remain')
|
||||
|
||||
parser.add_argument('-d',
|
||||
type = str,
|
||||
default = 'all',
|
||||
choices=['paste', 'up', 'start', 'kb', 'all', 'pid', 'prg', 'remain', 'processed'],
|
||||
help = 'Which info to display ?',
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('-db',
|
||||
type = int,
|
||||
default = 0,
|
||||
help = 'The name of the Redis DB (default 0)',
|
||||
choices=[0, 1, 2, 3, 4],
|
||||
action = 'store')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
r = redis.StrictRedis(
|
||||
host='localhost',
|
||||
port=6379,
|
||||
db=args.db)
|
||||
|
||||
p = r.pipeline(False)
|
||||
|
||||
publisher.channel = "youpi"
|
||||
|
||||
display_listof_pid(r, args.d)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -1,65 +0,0 @@
|
|||
#!/usr/bin/python2.7
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
from packages.lib_gephi import *
|
||||
from packages.imported import *
|
||||
|
||||
def main():
|
||||
"""Main Function"""
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description = '''This script is a part of the Analysis Information
|
||||
Leak framework. It create a gephi graph to have a global
|
||||
view of the pastes but also which one are similar.''',
|
||||
epilog = '''The Redis database need to be populated by the script
|
||||
Populate.py before using this one.''')
|
||||
|
||||
parser.add_argument('-t',
|
||||
type = int,
|
||||
default = 0,
|
||||
help = 'Type of the Redis population (Same arg than in Populate.py)',
|
||||
choices=[0, 2],
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('-db',
|
||||
type = int,
|
||||
default = 0,
|
||||
help = 'The name of the Redis DB',
|
||||
choices=[0, 1, 2, 3, 4],
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('-min',
|
||||
type = int,
|
||||
default = 3,
|
||||
help = 'minimum linked nodes (default 3)',
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('-max',
|
||||
type = int,
|
||||
default = 50,
|
||||
help = 'maximum linked nodes created (execute top.py before for more info)',
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('-p',
|
||||
type = str,
|
||||
default = '../graph/out',
|
||||
metavar = 'path',
|
||||
help = "pathname of the graph file created. ex: /home/graph",
|
||||
action = 'store')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
r = redis.StrictRedis(
|
||||
host='localhost',
|
||||
port=6379,
|
||||
db=args.db,
|
||||
unix_socket_path='/tmp/redis.sock')
|
||||
|
||||
|
||||
Gephi_Graph(r, args.p+".gexf", args.min, args.max, args.t)
|
||||
cprint("GRAPH CREATED AT:{0}.gexf".format(args.p),"green")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
#OK
|
|
@ -1,52 +0,0 @@
|
|||
#!/usr/bin/python2.7
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
from packages.lib_words import *
|
||||
from packages.imported import *
|
||||
|
||||
def main():
|
||||
"""Main Function"""
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description = '''This script is a part of the Analysis Information
|
||||
Leak framework. It create in redis the intersection
|
||||
between all the days two by two of the date given in argument.''',
|
||||
epilog = '''The Redis database need to be populated by the script
|
||||
Wordsranking_Populate.py before using this one.''')
|
||||
|
||||
parser.add_argument('-db',
|
||||
type = int,
|
||||
default = 0,
|
||||
help = 'The name of the Redis DB (default 0)',
|
||||
choices=[0, 1, 2, 3, 4],
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('y',
|
||||
type = int,
|
||||
metavar = "year",
|
||||
help = 'The year',
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('m',
|
||||
type = int,
|
||||
metavar = "month",
|
||||
help = 'The month',
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('-ow',
|
||||
help = 'trigger the overwritting mode',
|
||||
action = 'store_true')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
r = redis.StrictRedis(
|
||||
host='localhost',
|
||||
port=6379,
|
||||
db=args.db)
|
||||
|
||||
p = r.pipeline(False)
|
||||
|
||||
redis_interbargraph_set(r, args.y, args.m, args.ow)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -1,75 +0,0 @@
|
|||
#!/usr/bin/python2.7
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
from packages.lib_redis_insert import *
|
||||
from packages.imported import *
|
||||
|
||||
def main():
|
||||
"""Main Function"""
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description = '''This script is a part of the Analysis Information
|
||||
Leak framework. Is Populate the Redis database with
|
||||
the pastes names and theirs hashed line''',
|
||||
epilog = '''This script need to be run first in order to use the others:
|
||||
Graph.py, Search.py, Top.py ...''')
|
||||
|
||||
parser.add_argument('input',
|
||||
type = str,
|
||||
metavar = 'pathfolder',
|
||||
help = 'Input folder',
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('-t',
|
||||
type = int,
|
||||
default = 0,
|
||||
help = 'type of population wanted 0 = set 1 = zset 2 = mix',
|
||||
choices=[0, 1, 2],
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('-db',
|
||||
type = int,
|
||||
default = 0,
|
||||
help = 'The name of the Redis DB (default 0)',
|
||||
choices=[0, 1, 2, 3, 4],
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('-H',
|
||||
type = str,
|
||||
default = 'md5',
|
||||
metavar='hash',
|
||||
help = 'The hash method (default md5)',
|
||||
choices=["md5", "sha1", "crc", "murmur"],
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('-jmp',
|
||||
type = int,
|
||||
default = 10,
|
||||
metavar = 'jump',
|
||||
help = '''Jumping line factor. 1 = All the line are taken. X = jump X line
|
||||
(default 10)''',
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('-ml',
|
||||
type = int,
|
||||
default = 1,
|
||||
metavar = 'minlnline',
|
||||
help = '''Length line factor. 1 = All the line are taken.
|
||||
X = each line >= X char (default 1)''',
|
||||
action = 'store')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
r = redis.StrictRedis(
|
||||
host='localhost',
|
||||
port=6379,
|
||||
db=args.db)
|
||||
|
||||
p = r.pipeline()
|
||||
|
||||
redis_populate(p, listdirectory(args.input), args.ml, args.H, args.jmp, args.t)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
#OK
|
|
@ -1,78 +0,0 @@
|
|||
#!/usr/bin/python2.7
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
from packages.lib_refine import *
|
||||
from packages.imported import *
|
||||
from pubsublogger import publisher
|
||||
|
||||
def main():
|
||||
"""Main Function"""
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description = '''This script is a part of the Analysis Information
|
||||
Leak framework. Is refining a redis set by
|
||||
re analysing set with regex and changing the score by the number of
|
||||
regex matching''',
|
||||
epilog = '''example of use: ./Refine_with_regex.py 2013 12 -regex mail
|
||||
-key mails_categ''')
|
||||
|
||||
parser.add_argument('-db',
|
||||
type = int,
|
||||
default = 0,
|
||||
help = 'The name of the Redis DB (default 0)',
|
||||
choices=[0, 1, 2, 3, 4],
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('-nbm',
|
||||
type = int,
|
||||
default = 1,
|
||||
help = 'Minimum matching regex occurence per file to keep in redis (1)',
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('-regex',
|
||||
type = str,
|
||||
default = 'mail',
|
||||
choices=['mail', 'card', 'url', 'bitcoin'],
|
||||
help = 'Which regex wanted to be use to match',
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('-key',
|
||||
type = str,
|
||||
default = "mails_categ",
|
||||
help = 'Name of the key to process in redis (same name than the wordlist concerned)',
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('y',
|
||||
type = int,
|
||||
metavar = "year",
|
||||
help = 'The year processed',
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('m',
|
||||
type = int,
|
||||
metavar = "month",
|
||||
help = 'The month processed',
|
||||
action = 'store')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.regex == 'mail':
|
||||
regex = "[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}"
|
||||
elif args.regex == 'card':
|
||||
regex = "4[0-9]{12}(?:[0-9]{3})?"
|
||||
elif args.regex == 'bitcoin':
|
||||
regex = "[13][1-9A-HJ-NP-Za-km-z]{26,33}"
|
||||
|
||||
r = redis.StrictRedis(
|
||||
host='localhost',
|
||||
port=6379,
|
||||
db=args.db)
|
||||
|
||||
p = r.pipeline(False)
|
||||
|
||||
publisher.channel = "youpi"
|
||||
|
||||
refining_regex_dataset(r, args.key, regex, args.nbm, args.y, args.m)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -1,44 +0,0 @@
|
|||
#!/usr/bin/python2.7
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
from packages.lib_redis_insert import *
|
||||
from packages.imported import *
|
||||
|
||||
def main():
|
||||
"""Main Function"""
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description = '''This script is a part of the Analysis Information
|
||||
Leak framework. It Add to a temporary list the hash
|
||||
of wholes files and compare the new hash to the element of this
|
||||
list. If the hash is already inside, the file is deleted
|
||||
otherwise the hash is added in the list.''',
|
||||
epilog = '''This script need Redis to be populated before by
|
||||
./Dir.py''')
|
||||
|
||||
parser.add_argument('-db',
|
||||
type = int,
|
||||
default = 0,
|
||||
help = 'The name of the Redis DB (default 0)',
|
||||
choices=[0, 1, 2, 3, 4],
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('-nbp',
|
||||
type = int,
|
||||
default = 200,
|
||||
help = 'nbpaste',
|
||||
action = 'store')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
r = redis.StrictRedis(
|
||||
host='localhost',
|
||||
port=6379,
|
||||
db=args.db)
|
||||
|
||||
p = r.pipeline(False)
|
||||
|
||||
remove_pure_doppelganger(r, args.nbp)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -1,57 +0,0 @@
|
|||
#!/usr/bin/python2.7
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
from packages.lib_words import *
|
||||
from packages.imported import *
|
||||
|
||||
def main():
|
||||
"""Main Function"""
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description = '''This script is a part of the Analysis Information Leak
|
||||
framework. It removes the line which are in redis under
|
||||
the "key" name argument''',
|
||||
epilog = '''This script is usually usefull launched after using
|
||||
./Classify_Paste_Token.py example: ./Remove_longline_fp.py mails_categ''')
|
||||
|
||||
parser.add_argument('-db',
|
||||
type = int,
|
||||
default = 0,
|
||||
help = 'The name of the Redis DB (default 0)',
|
||||
choices=[0, 1, 2, 3, 4],
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('key',
|
||||
type = str,
|
||||
help = 'Name of the key to process in redis ("")',
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('-d',
|
||||
help = 'Delete the set of longline created?',
|
||||
action = 'store_true')
|
||||
|
||||
parser.add_argument('-s',
|
||||
help = 'Store the longline numbers inside a set?',
|
||||
action = 'store_true')
|
||||
|
||||
parser.add_argument('-max',
|
||||
type = int,
|
||||
default = 500,
|
||||
help = 'The limit between "short lines" and "long lines" (500)',
|
||||
action = 'store')
|
||||
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
r = redis.StrictRedis(
|
||||
host='localhost',
|
||||
port=6379,
|
||||
db=args.db)
|
||||
|
||||
p = r.pipeline(False)
|
||||
|
||||
#remove_longline_from_categ(r, args.key, args.d, args.s, args.max)
|
||||
detect_longline_from_list(r,args.max)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -1,72 +0,0 @@
|
|||
#!/usr/bin/python2.7
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
from packages.lib_search import *
|
||||
from packages.imported import *
|
||||
|
||||
def main():
|
||||
"""Main Function"""
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description = 'Analysis Information Leak framework',
|
||||
epilog = 'MSc Student Internship')
|
||||
|
||||
parser.add_argument('-db',
|
||||
default = 0,
|
||||
type = int,
|
||||
help = 'The name of the Redis DB',
|
||||
choices=[0, 1, 2, 3, 4],
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('name',
|
||||
type = str,
|
||||
metavar = 'pastename',
|
||||
help = 'The name of the paste',
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('-min',
|
||||
type = int,
|
||||
default = 3,
|
||||
help = 'minimum linked hashs (default 3)',
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('-max',
|
||||
type = int,
|
||||
default = 50,
|
||||
help = 'maximum linked hash (execute top.py to be more aware)',
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('-p',
|
||||
type = str,
|
||||
default = '../graph/Search_',
|
||||
metavar = 'path',
|
||||
help = "pathname of the file created.",
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('-t',
|
||||
type = int,
|
||||
default = 0,
|
||||
help = 'Type of the Redis population (Same arg than in Populate.py)',
|
||||
choices=[0, 2],
|
||||
action = 'store')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
r = redis.StrictRedis(
|
||||
host='localhost',
|
||||
port=6379,
|
||||
db=args.db,
|
||||
unix_socket_path='/tmp/redis.sock')
|
||||
|
||||
|
||||
if args.t == 2:
|
||||
paste_searching2(r, args.p+args.name+".txt", args.name, args.min, args.max)
|
||||
cprint("GRAPH CREATED AT:{0}{1}.txt".format(args.p,args.name),"green")
|
||||
elif args.t == 0:
|
||||
paste_searching(r, args.p+args.name+".txt", args.name, args.min, args.max)
|
||||
cprint("GRAPH CREATED AT:{0}{1}.txt".format(args.p,args.name),"green")
|
||||
pass
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -1,58 +0,0 @@
|
|||
#!/usr/bin/python2.7
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
from packages.lib_search import Create_Common_Hash_File
|
||||
from packages.imported import *
|
||||
|
||||
def main():
|
||||
"""Main Function"""
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description = '''This script is a part of the Analysis Information Leak
|
||||
framework. It create a text file with the top common hash
|
||||
which are in the redis database''',
|
||||
epilog = '''The Redis database need to be populated by the script
|
||||
Populate.py before using this one.''')
|
||||
|
||||
parser.add_argument('-db',
|
||||
default = 0,
|
||||
type = int,
|
||||
help = 'The name of the Redis DB',
|
||||
choices=[0, 1, 2, 3, 4],
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('-off',
|
||||
default = 1,
|
||||
type = int,
|
||||
metavar = 'offset',
|
||||
help = 'Starting point of the toplist',
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('-top',
|
||||
default = 100,
|
||||
type = int,
|
||||
metavar = '100',
|
||||
help = 'How many occurence? top 10-50-100 ?',
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('-p',
|
||||
type = str,
|
||||
default = '../graph/top',
|
||||
metavar = 'path',
|
||||
help = "pathname of the file created ex: /home/top",
|
||||
action = 'store')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
r = redis.StrictRedis(
|
||||
host='localhost',
|
||||
port=6379,
|
||||
db=args.db)
|
||||
|
||||
create_common_cash_file(r, args.off, args.top, args.p+str(args.top)+".top")
|
||||
cprint("LIST CREATED","green")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
#OK
|
|
@ -1,64 +0,0 @@
|
|||
#!/usr/bin/python2.7
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
from packages.lib_words import *
|
||||
from packages.imported import *
|
||||
|
||||
def main():
|
||||
"""Main Function"""
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description = 'Analysis Information Leak framework',
|
||||
epilog = 'Thats drawing')
|
||||
|
||||
parser.add_argument('-db',
|
||||
type = int,
|
||||
default = 0,
|
||||
help = 'The name of the Redis DB To get the info (0)',
|
||||
choices=[0, 1, 2, 3, 4],
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('-db1',
|
||||
type = int,
|
||||
default = 1,
|
||||
help = 'The name of the Redis DB To store (1)',
|
||||
choices=[0, 1, 2, 3, 4],
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('f',
|
||||
type = str,
|
||||
metavar= "file",
|
||||
help = 'Words filename',
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('y',
|
||||
type = int,
|
||||
metavar = "year",
|
||||
help = 'The year',
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('m',
|
||||
type = int,
|
||||
metavar = "month",
|
||||
help = 'The month',
|
||||
action = 'store')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
r = redis.StrictRedis(
|
||||
host='localhost',
|
||||
port=6379,
|
||||
db=args.db)
|
||||
|
||||
r2 = redis.StrictRedis(
|
||||
host='localhost',
|
||||
port=6379,
|
||||
db=args.db1)
|
||||
|
||||
|
||||
p = r.pipeline(False)
|
||||
|
||||
create_data_words_curve(r, r2, args.y, args.m, args.f)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -1,57 +0,0 @@
|
|||
#!/usr/bin/python2.7
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
from packages.lib_words import *
|
||||
from packages.imported import *
|
||||
|
||||
def main():
|
||||
"""Main Function"""
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description = 'Analysis Information Leak framework',
|
||||
epilog = 'Thats drawing')
|
||||
|
||||
parser.add_argument('-db',
|
||||
type = int,
|
||||
default = 0,
|
||||
help = 'The name of the Redis DB (default 0)',
|
||||
choices=[0, 1, 2, 3, 4],
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('-cvs',
|
||||
type = str,
|
||||
metavar = "filename",
|
||||
default = "wordstrendingdata",
|
||||
help = 'The name of the cvs file wanted to be created',
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('f',
|
||||
type = str,
|
||||
help = 'The file with the list of words',
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('y',
|
||||
type = int,
|
||||
metavar = "year",
|
||||
help = 'The year',
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('m',
|
||||
type = int,
|
||||
metavar = "month",
|
||||
help = 'The month',
|
||||
action = 'store')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
r = redis.StrictRedis(
|
||||
host='localhost',
|
||||
port=6379,
|
||||
db=args.db)
|
||||
|
||||
p = r.pipeline(False)
|
||||
|
||||
create_curve_with_word_file(r, args.cvs, args.f, args.y, args.m)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -1,54 +0,0 @@
|
|||
#!/usr/bin/python2.7
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
from packages.lib_words import *
|
||||
from packages.imported import *
|
||||
from pubsublogger import publisher
|
||||
|
||||
def main():
|
||||
"""Main Function"""
|
||||
|
||||
parser = argparse.ArgumentParser(
|
||||
description = 'Analysis Information Leak framework',
|
||||
epilog = 'example : seq 2 | parallel ./Wordsranking_Populate.py -nbp 20')
|
||||
|
||||
parser.add_argument('-nbp',
|
||||
type = int,
|
||||
default = 200,
|
||||
help = 'nbpaste',
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('-db',
|
||||
type = int,
|
||||
default = 0,
|
||||
help = 'The name of the Redis DB (default 0)',
|
||||
choices=[0, 1, 2, 3, 4],
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('-min',
|
||||
type = int,
|
||||
default = 4,
|
||||
help = 'Minimum length of the inserted words (default 4)',
|
||||
action = 'store')
|
||||
|
||||
parser.add_argument('-max',
|
||||
type = int,
|
||||
default = 200,
|
||||
help = 'Maximum length of the inserted words (default 200)',
|
||||
action = 'store')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
r = redis.StrictRedis(
|
||||
host='localhost',
|
||||
port=6379,
|
||||
db=args.db)
|
||||
|
||||
p = r.pipeline(False)
|
||||
|
||||
publisher.channel = "youpi"
|
||||
|
||||
redis_words_ranking(p, r, args.nbp, args.min, args.max)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Reference in New Issue