mirror of https://github.com/CIRCL/AIL-framework
79 lines
2.0 KiB
Python
79 lines
2.0 KiB
Python
|
#!/usr/bin/python2.7
|
||
|
# -*-coding:UTF-8 -*
|
||
|
|
||
|
from packages.lib_refine import *
|
||
|
from packages.imported import *
|
||
|
from pubsublogger import publisher
|
||
|
|
||
|
def main():
|
||
|
"""Main Function"""
|
||
|
|
||
|
parser = argparse.ArgumentParser(
|
||
|
description = '''This script is a part of the Analysis Information
|
||
|
Leak framework. Is refining a redis set by
|
||
|
re analysing set with regex and changing the score by the number of
|
||
|
regex matching''',
|
||
|
epilog = '''example of use: ./Refine_with_regex.py 2013 12 -regex mail
|
||
|
-key mails_categ''')
|
||
|
|
||
|
parser.add_argument('-db',
|
||
|
type = int,
|
||
|
default = 0,
|
||
|
help = 'The name of the Redis DB (default 0)',
|
||
|
choices=[0, 1, 2, 3, 4],
|
||
|
action = 'store')
|
||
|
|
||
|
parser.add_argument('-nbm',
|
||
|
type = int,
|
||
|
default = 1,
|
||
|
help = 'Minimum matching regex occurence per file to keep in redis (1)',
|
||
|
action = 'store')
|
||
|
|
||
|
parser.add_argument('-regex',
|
||
|
type = str,
|
||
|
default = 'mail',
|
||
|
choices=['mail', 'card', 'url', 'bitcoin'],
|
||
|
help = 'Which regex wanted to be use to match',
|
||
|
action = 'store')
|
||
|
|
||
|
parser.add_argument('-key',
|
||
|
type = str,
|
||
|
default = "mails_categ",
|
||
|
help = 'Name of the key to process in redis (same name than the wordlist concerned)',
|
||
|
action = 'store')
|
||
|
|
||
|
parser.add_argument('y',
|
||
|
type = int,
|
||
|
metavar = "year",
|
||
|
help = 'The year processed',
|
||
|
action = 'store')
|
||
|
|
||
|
parser.add_argument('m',
|
||
|
type = int,
|
||
|
metavar = "month",
|
||
|
help = 'The month processed',
|
||
|
action = 'store')
|
||
|
|
||
|
args = parser.parse_args()
|
||
|
|
||
|
if args.regex == 'mail':
|
||
|
regex = "[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}"
|
||
|
elif args.regex == 'card':
|
||
|
regex = "4[0-9]{12}(?:[0-9]{3})?"
|
||
|
elif args.regex == 'bitcoin':
|
||
|
regex = "[13][1-9A-HJ-NP-Za-km-z]{26,33}"
|
||
|
|
||
|
r = redis.StrictRedis(
|
||
|
host='localhost',
|
||
|
port=6379,
|
||
|
db=args.db)
|
||
|
|
||
|
p = r.pipeline(False)
|
||
|
|
||
|
publisher.channel = "youpi"
|
||
|
|
||
|
refining_regex_dataset(r, args.key, regex, args.nbm, args.y, args.m)
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
main()
|