mirror of https://github.com/CIRCL/AIL-framework
chg: [modules] create new modules repository + small fixs
parent
d4829273c5
commit
ed37232a2d
|
@ -78,7 +78,7 @@ function helptext {
|
|||
[-k | --killAll] Kill DB + Scripts
|
||||
[-ks | --killscript] Scripts
|
||||
[-u | --update] Update AIL
|
||||
[-ut | --thirdpartyUpdate] Update Web
|
||||
[-ut | --thirdpartyUpdate] Update UI/Frontend
|
||||
[-t | --test] Launch Tests
|
||||
[-rp | --resetPassword] Reset Password
|
||||
[-f | --launchFeeder] LAUNCH Pystemon feeder
|
||||
|
@ -180,7 +180,7 @@ function launching_scripts {
|
|||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "ApiKey" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./ApiKey.py; read x"
|
||||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "Web" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Web.py; read x"
|
||||
screen -S "Script_AIL" -X screen -t "Urls" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Urls.py; read x"
|
||||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "Credential" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Credential.py; read x"
|
||||
sleep 0.1
|
||||
|
@ -210,8 +210,6 @@ function launching_scripts {
|
|||
#sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "Cve" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Cve.py; read x"
|
||||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "WebStats" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./WebStats.py; read x"
|
||||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "ModuleStats" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./ModuleStats.py; read x"
|
||||
sleep 0.1
|
||||
screen -S "Script_AIL" -X screen -t "SQLInjectionDetection" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./SQLInjectionDetection.py; read x"
|
||||
|
|
|
@ -0,0 +1,86 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
"""
|
||||
The Urls Module
|
||||
============================
|
||||
|
||||
This module extract URLs from an item and send them to others modules.
|
||||
|
||||
"""
|
||||
|
||||
##################################
|
||||
# Import External packages
|
||||
##################################
|
||||
import redis
|
||||
import pprint
|
||||
import time
|
||||
import os
|
||||
from pyfaup.faup import Faup
|
||||
import re
|
||||
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from module.abstract_module import AbstractModule
|
||||
from packages.Item import Item
|
||||
from packages import lib_refine
|
||||
from Helper import Process
|
||||
|
||||
|
||||
class Urls(AbstractModule):
|
||||
"""
|
||||
Urls module for AIL framework
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
Init Urls
|
||||
"""
|
||||
super(Urls, self).__init__()
|
||||
|
||||
# FUNCTIONS #
|
||||
self.faup = Faup()
|
||||
|
||||
# Protocol file path
|
||||
protocolsfile_path = os.path.join(os.environ['AIL_HOME'],
|
||||
self.process.config.get("Directories", "protocolsfile"))
|
||||
# Get all uri from protocolsfile (Used for Curve)
|
||||
uri_scheme = ""
|
||||
with open(protocolsfile_path, 'r') as scheme_file:
|
||||
for scheme in scheme_file:
|
||||
uri_scheme += scheme[:-1]+"|"
|
||||
uri_scheme = uri_scheme[:-1]
|
||||
|
||||
self.url_regex = "((?i:"+uri_scheme + \
|
||||
")\://(?:[a-zA-Z0-9\.\-]+(?:\:[a-zA-Z0-9\.&%\$\-]+)*@)*(?:(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|(?:[a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(?:com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|[a-zA-Z]{2}))(?:\:[0-9]+)*(?:/(?:$|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
|
||||
|
||||
# Send module state to logs
|
||||
self.redis_logger.info(f"Module {self.module_name} initialized")
|
||||
|
||||
|
||||
def compute(self, message):
|
||||
"""
|
||||
Search for Web links from given message
|
||||
"""
|
||||
# Extract item
|
||||
id, score = message.split()
|
||||
|
||||
item = Item(id)
|
||||
|
||||
l_urls = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.url_regex, item.get_id(), item.get_content())
|
||||
if len(urls) > 0:
|
||||
to_print = f'Urls;{item.get_source()};{item.get_date()};{item.get_basename()};'
|
||||
self.redis_logger.info(f'{to_print}Detected {len(urls)} URL;{item.get_id()}')
|
||||
|
||||
for url in l_urls:
|
||||
# # TODO: FIXME handle .foundation .dev onion? i2p?
|
||||
|
||||
to_send = f"{url} {item.get_id()}"
|
||||
self.send_message_to_queue(to_send, 'Url')
|
||||
self.redis_logger.debug(f"url_parsed: {to_send}")
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
module = Urls()
|
||||
module.run()
|
184
bin/Web.py
184
bin/Web.py
|
@ -1,184 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
"""
|
||||
The Web Module
|
||||
============================
|
||||
|
||||
This module tries to parse URLs and warns if some defined contry code are present.
|
||||
|
||||
"""
|
||||
|
||||
##################################
|
||||
# Import External packages
|
||||
##################################
|
||||
import redis
|
||||
import pprint
|
||||
import time
|
||||
import os
|
||||
import dns.exception
|
||||
from pyfaup.faup import Faup
|
||||
import re
|
||||
# Country and ASN lookup
|
||||
from cymru.ip2asn.dns import DNSClient as ip2asn
|
||||
import socket
|
||||
import pycountry
|
||||
import ipaddress
|
||||
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from module.abstract_module import AbstractModule
|
||||
from packages import Paste
|
||||
from packages import lib_refine
|
||||
from Helper import Process
|
||||
|
||||
|
||||
class Web(AbstractModule):
|
||||
"""
|
||||
Web module for AIL framework
|
||||
"""
|
||||
|
||||
# Used to prevent concat with empty fields due to url parsing
|
||||
def avoidNone(self, a_string):
|
||||
if a_string is None:
|
||||
return ""
|
||||
else:
|
||||
return a_string
|
||||
|
||||
def __init__(self):
|
||||
"""
|
||||
Init Web
|
||||
"""
|
||||
super(Web, self).__init__(logger_channel='script:web')
|
||||
|
||||
# REDIS Cache
|
||||
self.r_serv2 = redis.StrictRedis(
|
||||
host=self.process.config.get("Redis_Cache", "host"),
|
||||
port=self.process.config.getint("Redis_Cache", "port"),
|
||||
db=self.process.config.getint("Redis_Cache", "db"),
|
||||
decode_responses=True)
|
||||
|
||||
# Country to log as critical
|
||||
self.cc_critical = self.process.config.get("Url", "cc_critical")
|
||||
|
||||
# FUNCTIONS #
|
||||
|
||||
self.faup = Faup()
|
||||
|
||||
# Protocol file path
|
||||
protocolsfile_path = os.path.join(os.environ['AIL_HOME'],
|
||||
self.process.config.get("Directories", "protocolsfile"))
|
||||
# Get all uri from protocolsfile (Used for Curve)
|
||||
uri_scheme = ""
|
||||
with open(protocolsfile_path, 'r') as scheme_file:
|
||||
for scheme in scheme_file:
|
||||
uri_scheme += scheme[:-1]+"|"
|
||||
uri_scheme = uri_scheme[:-1]
|
||||
|
||||
self.url_regex = "((?i:"+uri_scheme + \
|
||||
")\://(?:[a-zA-Z0-9\.\-]+(?:\:[a-zA-Z0-9\.&%\$\-]+)*@)*(?:(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|(?:[a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(?:com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|[a-zA-Z]{2}))(?:\:[0-9]+)*(?:/(?:$|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
|
||||
|
||||
self.prec_filename = None
|
||||
|
||||
# Send module state to logs
|
||||
self.redis_logger.info(f"Module {self.module_name} initialized")
|
||||
|
||||
|
||||
def compute(self, message):
|
||||
"""
|
||||
Search for Web links from given message
|
||||
"""
|
||||
# Extract item
|
||||
filename, score = message.split()
|
||||
|
||||
domains_list = set()
|
||||
hosts_list = set()
|
||||
|
||||
if self.prec_filename is None or filename != self.prec_filename:
|
||||
domains_list.clear()
|
||||
hosts_list.clear()
|
||||
|
||||
PST = Paste.Paste(filename)
|
||||
client = ip2asn()
|
||||
|
||||
detected_urls = PST.get_regex(self.url_regex)
|
||||
if len(detected_urls) > 0:
|
||||
to_print = f'Web;{PST.p_source};{PST.p_date};{PST.p_name};'
|
||||
self.redis_logger.info(f'{to_print}Detected {len(detected_urls)} URL;{PST.p_rel_path}')
|
||||
|
||||
for url in detected_urls:
|
||||
|
||||
if url.endswith(".on"):
|
||||
# URL is an onion link skip
|
||||
# TODO send to TOR crawler ?
|
||||
# self.redis_logger.debug("Skip onion link")
|
||||
continue
|
||||
|
||||
self.redis_logger.debug(f"match regex: {url}")
|
||||
|
||||
to_send = f"{url} {PST._get_p_date()} {filename}"
|
||||
self.process.populate_set_out(to_send, 'Url')
|
||||
self.redis_logger.debug(f"url_parsed: {to_send}")
|
||||
|
||||
self.faup.decode(url)
|
||||
domain = self.faup.get_domain()
|
||||
subdomain = self.faup.get_subdomain()
|
||||
|
||||
self.redis_logger.debug(f'{url} Published')
|
||||
|
||||
domains_list.add(domain)
|
||||
|
||||
hostl = f'{subdomain}.{domain}' if subdomain else domain
|
||||
|
||||
if hostl not in hosts_list:
|
||||
# test host only once a host in a paste
|
||||
hosts_list.add(hostl)
|
||||
|
||||
try:
|
||||
socket.setdefaulttimeout(1)
|
||||
ip = socket.gethostbyname(hostl)
|
||||
# If the resolver is not giving any IPv4 address,
|
||||
# ASN/CC lookup is skip.
|
||||
l = client.lookup(ip, qType='IP')
|
||||
except ipaddress.AddressValueError:
|
||||
self.redis_logger.debug(
|
||||
f'ASN/CC lookup failed for IP {ip}')
|
||||
continue
|
||||
except:
|
||||
self.redis_logger.debug(
|
||||
f'Resolver IPv4 address failed for host {hostl}')
|
||||
continue
|
||||
|
||||
cc = getattr(l, 'cc')
|
||||
asn = ''
|
||||
if getattr(l, 'asn') is not None:
|
||||
asn = getattr(l, 'asn')[2:] # remobe b'
|
||||
|
||||
# EU is not an official ISO 3166 code (but used by RIPE
|
||||
# IP allocation)
|
||||
if cc is not None and cc != "EU":
|
||||
countryname = pycountry.countries.get(alpha_2=cc).name
|
||||
self.redis_logger.debug(f'{hostl};{asn};{cc};{countryname}')
|
||||
if cc == self.cc_critical:
|
||||
to_print = f'Url;{PST.p_source};{PST.p_date};{PST.p_name};Detected {hostl} {cc}'
|
||||
self.redis_logger.info(to_print)
|
||||
else:
|
||||
self.redis_logger.debug(f'{hostl};{asn};{cc}')
|
||||
|
||||
A_values = lib_refine.checking_A_record(self.r_serv2,
|
||||
domains_list)
|
||||
|
||||
if A_values[0] >= 1:
|
||||
|
||||
pprint.pprint(A_values)
|
||||
# self.redis_logger.info('Url;{};{};{};Checked {} URL;{}'.format(
|
||||
# PST.p_source, PST.p_date, PST.p_name, A_values[0], PST.p_rel_path))
|
||||
|
||||
self.prec_filename = filename
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
module = Web()
|
||||
module.run()
|
207
bin/WebStats.py
207
bin/WebStats.py
|
@ -1,207 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
"""
|
||||
The WebStats Module
|
||||
======================
|
||||
|
||||
This module makes stats on URL recolted from the web module.
|
||||
It consider the TLD, Domain and protocol.
|
||||
|
||||
"""
|
||||
|
||||
##################################
|
||||
# Import External packages
|
||||
##################################
|
||||
import time
|
||||
import datetime
|
||||
import redis
|
||||
import os
|
||||
from pubsublogger import publisher
|
||||
from pyfaup.faup import Faup
|
||||
|
||||
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from module.abstract_module import AbstractModule
|
||||
from packages import lib_words
|
||||
from packages.Date import Date
|
||||
from Helper import Process
|
||||
|
||||
|
||||
class WebStats(AbstractModule):
|
||||
"""
|
||||
WebStats module for AIL framework
|
||||
"""
|
||||
|
||||
# Config Var
|
||||
THRESHOLD_TOTAL_SUM = 200 # Above this value, a keyword is eligible for a progression
|
||||
THRESHOLD_INCREASE = 1.0 # The percentage representing the keyword occurence since num_day_to_look
|
||||
MAX_SET_CARDINALITY = 10 # The cardinality of the progression set
|
||||
NUM_DAY_TO_LOOK = 5 # the detection of the progression start num_day_to_look in the past
|
||||
|
||||
|
||||
def __init__(self):
|
||||
super(WebStats, self).__init__()
|
||||
|
||||
# Send module state to logs
|
||||
self.redis_logger.info("Module %s initialized"%(self.module_name))
|
||||
# Sent to the logging a description of the module
|
||||
self.redis_logger.info("Makes statistics about valid URL")
|
||||
|
||||
self.pending_seconds = 5*60
|
||||
|
||||
# REDIS #
|
||||
self.r_serv_trend = redis.StrictRedis(
|
||||
host=self.process.config.get("ARDB_Trending", "host"),
|
||||
port=self.process.config.get("ARDB_Trending", "port"),
|
||||
db=self.process.config.get("ARDB_Trending", "db"),
|
||||
decode_responses=True)
|
||||
|
||||
# FILE CURVE SECTION #
|
||||
self.csv_path_proto = os.path.join(os.environ['AIL_HOME'],
|
||||
self.process.config.get("Directories", "protocolstrending_csv"))
|
||||
self.protocolsfile_path = os.path.join(os.environ['AIL_HOME'],
|
||||
self.process.config.get("Directories", "protocolsfile"))
|
||||
|
||||
self.csv_path_tld = os.path.join(os.environ['AIL_HOME'],
|
||||
self.process.config.get("Directories", "tldstrending_csv"))
|
||||
self.tldsfile_path = os.path.join(os.environ['AIL_HOME'],
|
||||
self.process.config.get("Directories", "tldsfile"))
|
||||
|
||||
self.csv_path_domain = os.path.join(os.environ['AIL_HOME'],
|
||||
self.process.config.get("Directories", "domainstrending_csv"))
|
||||
|
||||
self.faup = Faup()
|
||||
self.generate_new_graph = False
|
||||
|
||||
|
||||
def computeNone(self):
|
||||
if self.generate_new_graph:
|
||||
self.generate_new_graph = False
|
||||
|
||||
today = datetime.date.today()
|
||||
year = today.year
|
||||
month = today.month
|
||||
|
||||
self.redis_logger.debug('Building protocol graph')
|
||||
lib_words.create_curve_with_word_file(self.r_serv_trend, self.csv_path_proto,
|
||||
self.protocolsfile_path, year,
|
||||
month)
|
||||
|
||||
self.redis_logger.debug('Building tld graph')
|
||||
lib_words.create_curve_with_word_file(self.r_serv_trend, self.csv_path_tld,
|
||||
self.tldsfile_path, year,
|
||||
month)
|
||||
|
||||
self.redis_logger.debug('Building domain graph')
|
||||
lib_words.create_curve_from_redis_set(self.r_serv_trend, self.csv_path_domain,
|
||||
"domain", year,
|
||||
month)
|
||||
self.redis_logger.debug('end building')
|
||||
|
||||
|
||||
def compute(self, message):
|
||||
self.generate_new_graph = True
|
||||
|
||||
# Do something with the message from the queue
|
||||
url, date, path = message.split()
|
||||
self.faup.decode(url)
|
||||
url_parsed = self.faup.get()
|
||||
|
||||
# Scheme analysis
|
||||
self.analyse('scheme', date, url_parsed)
|
||||
# Tld analysis
|
||||
self.analyse('tld', date, url_parsed)
|
||||
# Domain analysis
|
||||
self.analyse('domain', date, url_parsed)
|
||||
|
||||
self.compute_progression('scheme', self.NUM_DAY_TO_LOOK, url_parsed)
|
||||
self.compute_progression('tld', self.NUM_DAY_TO_LOOK, url_parsed)
|
||||
self.compute_progression('domain', self.NUM_DAY_TO_LOOK, url_parsed)
|
||||
|
||||
|
||||
def analyse(self, field_name, date, url_parsed):
|
||||
field = url_parsed[field_name]
|
||||
|
||||
if field is not None:
|
||||
try: # faup version
|
||||
field = field.decode()
|
||||
except:
|
||||
pass
|
||||
|
||||
self.r_serv_trend.hincrby(field, date, 1)
|
||||
|
||||
if field_name == "domain": #save domain in a set for the monthly plot
|
||||
domain_set_name = "domain_set_" + date[0:6]
|
||||
self.r_serv_trend.sadd(domain_set_name, field)
|
||||
self.redis_logger.debug("added in " + domain_set_name +": "+ field)
|
||||
|
||||
|
||||
def get_date_range(self, num_day):
|
||||
curr_date = datetime.date.today()
|
||||
date = Date(str(curr_date.year)+str(curr_date.month).zfill(2)+str(curr_date.day).zfill(2))
|
||||
date_list = []
|
||||
|
||||
for i in range(0, num_day+1):
|
||||
date_list.append(date.substract_day(i))
|
||||
return date_list
|
||||
|
||||
|
||||
def compute_progression_word(self, num_day, keyword):
|
||||
"""
|
||||
Compute the progression for one keyword
|
||||
"""
|
||||
date_range = self.get_date_range(num_day)
|
||||
# check if this keyword is eligible for progression
|
||||
keyword_total_sum = 0
|
||||
value_list = []
|
||||
for date in date_range: # get value up to date_range
|
||||
curr_value = self.r_serv_trend.hget(keyword, date)
|
||||
value_list.append(int(curr_value if curr_value is not None else 0))
|
||||
keyword_total_sum += int(curr_value) if curr_value is not None else 0
|
||||
oldest_value = value_list[-1] if value_list[-1] != 0 else 1 #Avoid zero division
|
||||
|
||||
# The progression is based on the ratio: value[i] / value[i-1]
|
||||
keyword_increase = 0
|
||||
value_list_reversed = value_list[:]
|
||||
value_list_reversed.reverse()
|
||||
for i in range(1, len(value_list_reversed)):
|
||||
divisor = value_list_reversed[i-1] if value_list_reversed[i-1] != 0 else 1
|
||||
keyword_increase += value_list_reversed[i] / divisor
|
||||
|
||||
return (keyword_increase, keyword_total_sum)
|
||||
|
||||
|
||||
def compute_progression(self, field_name, num_day, url_parsed):
|
||||
"""
|
||||
recompute the set top_progression zset
|
||||
- Compute the current field progression
|
||||
- re-compute the current progression for each first 2*self.MAX_SET_CARDINALITY fields in the top_progression_zset
|
||||
"""
|
||||
redis_progression_name_set = "z_top_progression_"+field_name
|
||||
|
||||
keyword = url_parsed[field_name]
|
||||
if keyword is not None:
|
||||
|
||||
#compute the progression of the current word
|
||||
keyword_increase, keyword_total_sum = self.compute_progression_word(num_day, keyword)
|
||||
|
||||
#re-compute the progression of 2*self.MAX_SET_CARDINALITY
|
||||
current_top = self.r_serv_trend.zrevrangebyscore(redis_progression_name_set, '+inf', '-inf', withscores=True, start=0, num=2*self.MAX_SET_CARDINALITY)
|
||||
for word, value in current_top:
|
||||
word_inc, word_tot_sum = self.compute_progression_word(num_day, word)
|
||||
self.r_serv_trend.zrem(redis_progression_name_set, word)
|
||||
if (word_tot_sum > self.THRESHOLD_TOTAL_SUM) and (word_inc > self.THRESHOLD_INCREASE):
|
||||
self.r_serv_trend.zadd(redis_progression_name_set, float(word_inc), word)
|
||||
|
||||
# filter before adding
|
||||
if (keyword_total_sum > self.THRESHOLD_TOTAL_SUM) and (keyword_increase > self.THRESHOLD_INCREASE):
|
||||
self.r_serv_trend.zadd(redis_progression_name_set, float(keyword_increase), keyword)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
module = WebStats()
|
||||
module.run()
|
|
@ -12,9 +12,13 @@ Search for API keys on an item content.
|
|||
"""
|
||||
|
||||
import re
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append(os.path.join(os.environ['AIL_BIN']))
|
||||
|
||||
# project packages
|
||||
from module.abstract_module import AbstractModule
|
||||
from modules.abstract_module import AbstractModule
|
||||
from packages.Item import Item
|
||||
from lib import regex_helper
|
||||
|
|
@ -33,14 +33,16 @@ Requirements
|
|||
##################################
|
||||
# Import External packages
|
||||
##################################
|
||||
import os
|
||||
import argparse
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from module.abstract_module import AbstractModule
|
||||
from modules.abstract_module import AbstractModule
|
||||
from packages.Item import Item
|
||||
|
||||
|
|
@ -26,27 +26,22 @@ Redis organization:
|
|||
##################################
|
||||
# Import External packages
|
||||
##################################
|
||||
import time
|
||||
import os
|
||||
import sys
|
||||
import datetime
|
||||
import time
|
||||
import re
|
||||
import redis
|
||||
from datetime import datetime
|
||||
from pyfaup.faup import Faup
|
||||
from pubsublogger import publisher
|
||||
import lib.regex_helper as regex_helper
|
||||
import signal
|
||||
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from module.abstract_module import AbstractModule
|
||||
from Helper import Process
|
||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages'))
|
||||
import Item
|
||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
|
||||
import ConfigLoader
|
||||
from modules.abstract_module import AbstractModule
|
||||
from packages.Item import Item
|
||||
from lib import ConfigLoader
|
||||
from lib import regex_helper
|
||||
|
||||
|
||||
class Credential(AbstractModule):
|
||||
|
@ -54,8 +49,6 @@ class Credential(AbstractModule):
|
|||
Credential module for AIL framework
|
||||
"""
|
||||
|
||||
max_execution_time = 30
|
||||
|
||||
# Split username with spec. char or with upper case, distinguish start with upper
|
||||
REGEX_CRED = "[a-z]+|[A-Z]{3,}|[A-Z]{1,2}[a-z]+|[0-9]+"
|
||||
REDIS_KEY_NUM_USERNAME = 'uniqNumForUsername'
|
||||
|
@ -79,12 +72,15 @@ class Credential(AbstractModule):
|
|||
self.redis_cache_key = regex_helper.generate_redis_cache_key(self.module_name)
|
||||
|
||||
# Database
|
||||
self.server_cred = ConfigLoader.ConfigLoader().get_redis_conn("ARDB_TermCred")
|
||||
self.server_statistics = ConfigLoader.ConfigLoader().get_redis_conn("ARDB_Statistics")
|
||||
config_loader = ConfigLoader.ConfigLoader()
|
||||
self.server_cred = config_loader.get_redis_conn("ARDB_TermCred")
|
||||
self.server_statistics = config_loader.get_redis_conn("ARDB_Statistics")
|
||||
|
||||
# Config values
|
||||
self.minimumLengthThreshold = ConfigLoader.ConfigLoader().get_config_int("Credential", "minimumLengthThreshold")
|
||||
self.criticalNumberToAlert = ConfigLoader.ConfigLoader().get_config_int("Credential", "criticalNumberToAlert")
|
||||
self.minimumLengthThreshold = config_loader.get_config_int("Credential", "minimumLengthThreshold")
|
||||
self.criticalNumberToAlert = config_loader.get_config_int("Credential", "criticalNumberToAlert")
|
||||
|
||||
self.max_execution_time = 30
|
||||
|
||||
# Waiting time in secondes between to message proccessed
|
||||
self.pending_seconds = 10
|
||||
|
@ -95,38 +91,39 @@ class Credential(AbstractModule):
|
|||
|
||||
def compute(self, message):
|
||||
|
||||
item_id, count = message.split()
|
||||
id, count = message.split()
|
||||
item = Item(id)
|
||||
|
||||
item_content = Item.get_item_content(item_id)
|
||||
item_content = item.get_content()
|
||||
|
||||
# Extract all credentials
|
||||
all_credentials = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_cred, item_id, item_content, max_time=Credential.max_execution_time)
|
||||
all_credentials = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_cred, item.get_id(), item_content, max_time=self.max_execution_time)
|
||||
|
||||
if all_credentials:
|
||||
nb_cred = len(all_credentials)
|
||||
message = f'Checked {nb_cred} credentials found.'
|
||||
|
||||
all_sites = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_web, item_id, item_content, max_time=Credential.max_execution_time)
|
||||
all_sites = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_web, item.get_id(), item_content, max_time=self.max_execution_time)
|
||||
if all_sites:
|
||||
discovered_sites = ', '.join(all_sites)
|
||||
message += f' Related websites: {discovered_sites}'
|
||||
|
||||
self.redis_logger.debug(message)
|
||||
print(message)
|
||||
|
||||
to_print = f'Credential;{Item.get_source(item_id)};{Item.get_item_date(item_id)};{Item.get_item_basename(item_id)};{message};{item_id}'
|
||||
to_print = f'Credential;{item.get_source()};{item.get_date()};{item.get_basename()};{message};{item.get_id()}'
|
||||
|
||||
#num of creds above tresh, publish an alert
|
||||
if nb_cred > self.criticalNumberToAlert:
|
||||
self.redis_logger.debug(f"========> Found more than 10 credentials in this file : {item_id}")
|
||||
print(f"========> Found more than 10 credentials in this file : {item.get_id()}")
|
||||
self.redis_logger.warning(to_print)
|
||||
|
||||
# Send to duplicate
|
||||
self.process.populate_set_out(item_id, 'Duplicate')
|
||||
self.send_message_to_queue(item.get_id(), 'Duplicate')
|
||||
|
||||
msg = f'infoleak:automatic-detection="credential";{item_id}'
|
||||
self.process.populate_set_out(msg, 'Tags')
|
||||
msg = f'infoleak:automatic-detection="credential";{item.get_id()}'
|
||||
self.send_message_to_queue(msg, 'Tags')
|
||||
|
||||
site_occurence = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_site_for_stats, item_id, item_content, max_time=Credential.max_execution_time, r_set=False)
|
||||
site_occurence = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_site_for_stats, item.get_id(), item_content, max_time=self.max_execution_time, r_set=False)
|
||||
|
||||
creds_sites = {}
|
||||
|
||||
|
@ -140,7 +137,7 @@ class Credential(AbstractModule):
|
|||
for url in all_sites:
|
||||
self.faup.decode(url)
|
||||
domain = self.faup.get()['domain']
|
||||
## TODO: # FIXME: remove me
|
||||
## TODO: # FIXME: remove me, check faup versionb
|
||||
try:
|
||||
domain = domain.decode()
|
||||
except:
|
||||
|
@ -153,14 +150,14 @@ class Credential(AbstractModule):
|
|||
for site, num in creds_sites.items(): # Send for each different site to moduleStats
|
||||
|
||||
mssg = f'credential;{num};{site};{Item.get_item_date(item_id)}'
|
||||
self.redis_logger.debug(mssg)
|
||||
self.process.populate_set_out(mssg, 'ModuleStats')
|
||||
print(mssg)
|
||||
self.send_message_to_queue(msg, 'ModuleStats')
|
||||
|
||||
if all_sites:
|
||||
discovered_sites = ', '.join(all_sites)
|
||||
self.redis_logger.debug(f"=======> Probably on : {discovered_sites}")
|
||||
print(f"=======> Probably on : {discovered_sites}")
|
||||
|
||||
date = datetime.datetime.now().strftime("%Y%m")
|
||||
date = datetime.now().strftime("%Y%m")
|
||||
for cred in all_credentials:
|
||||
maildomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", cred.lower())[0]
|
||||
self.faup.decode(maildomains)
|
||||
|
@ -173,7 +170,7 @@ class Credential(AbstractModule):
|
|||
self.server_statistics.hincrby('credential_by_tld:'+date, tld, 1)
|
||||
else:
|
||||
self.redis_logger.info(to_print)
|
||||
self.redis_logger.debug(f'found {nb_cred} credentials')
|
||||
print(f'found {nb_cred} credentials')
|
||||
|
||||
# For searching credential in termFreq
|
||||
for cred in all_credentials:
|
||||
|
@ -181,8 +178,8 @@ class Credential(AbstractModule):
|
|||
|
||||
# unique number attached to unique path
|
||||
uniq_num_path = self.server_cred.incr(Credential.REDIS_KEY_NUM_PATH)
|
||||
self.server_cred.hmset(Credential.REDIS_KEY_ALL_PATH_SET, {item_id: uniq_num_path})
|
||||
self.server_cred.hmset(Credential.REDIS_KEY_ALL_PATH_SET_REV, {uniq_num_path: item_id})
|
||||
self.server_cred.hmset(Credential.REDIS_KEY_ALL_PATH_SET, {item.get_id(): uniq_num_path})
|
||||
self.server_cred.hmset(Credential.REDIS_KEY_ALL_PATH_SET_REV, {uniq_num_path: item.get_id()})
|
||||
|
||||
# unique number attached to unique username
|
||||
uniq_num_cred = self.server_cred.hget(Credential.REDIS_KEY_ALL_CRED_SET, cred)
|
|
@ -14,14 +14,16 @@ It apply credit card regexes on item content and warn if a valid card number is
|
|||
##################################
|
||||
# Import External packages
|
||||
##################################
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import time
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from module.abstract_module import AbstractModule
|
||||
from modules.abstract_module import AbstractModule
|
||||
from packages.Item import Item
|
||||
from packages import lib_refine
|
||||
|
|
@ -17,21 +17,21 @@ from hashlib import sha1
|
|||
import magic
|
||||
import json
|
||||
import datetime
|
||||
from pubsublogger import publisher
|
||||
import re
|
||||
import signal
|
||||
from lib import Decoded
|
||||
|
||||
import sys
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from module.abstract_module import AbstractModule
|
||||
from modules.abstract_module import AbstractModule
|
||||
from Helper import Process
|
||||
from packages import Item
|
||||
import ConfigLoader
|
||||
|
||||
from lib import ConfigLoader
|
||||
from lib import Decoded
|
||||
|
||||
# # TODO: use regex_helper
|
||||
class TimeoutException(Exception):
|
||||
pass
|
||||
|
||||
|
@ -138,6 +138,8 @@ class Decoder(AbstractModule):
|
|||
if not mimetype:
|
||||
self.redis_logger.debug(item_id)
|
||||
self.redis_logger.debug(sha1_string)
|
||||
print(item_id)
|
||||
print(sha1_string)
|
||||
raise Exception('Invalid mimetype')
|
||||
Decoded.save_decoded_file_content(sha1_string, decoded_file, item_date, mimetype=mimetype)
|
||||
Decoded.save_item_relationship(sha1_string, item_id)
|
||||
|
@ -147,6 +149,7 @@ class Decoder(AbstractModule):
|
|||
content = content.replace(encoded, '', 1)
|
||||
|
||||
self.redis_logger.debug(f'{item_id} : {decoder_name} - {mimetype}')
|
||||
print(f'{item_id} : {decoder_name} - {mimetype}')
|
||||
if(find):
|
||||
self.set_out_item(decoder_name, item_id)
|
||||
|
||||
|
@ -158,11 +161,11 @@ class Decoder(AbstractModule):
|
|||
self.redis_logger.warning(f'{decoder_name} decoded')
|
||||
|
||||
# Send to duplicate
|
||||
self.process.populate_set_out(item_id, 'Duplicate')
|
||||
self.send_message_to_queue(item_id, 'Duplicate')
|
||||
|
||||
# Send to Tags
|
||||
msg = f'infoleak:automatic-detection="{decoder_name}";{item_id}'
|
||||
self.process.populate_set_out(msg, 'Tags')
|
||||
self.send_message_to_queue(msg, 'Tags')
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
@ -18,10 +18,11 @@ import sys
|
|||
import time
|
||||
import DomainClassifier.domainclassifier
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from module.abstract_module import AbstractModule
|
||||
from modules.abstract_module import AbstractModule
|
||||
from packages.Item import Item
|
||||
|
||||
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
|
|
@ -36,10 +36,11 @@ import redis
|
|||
from hashlib import md5
|
||||
from uuid import uuid4
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from module.abstract_module import AbstractModule
|
||||
from modules.abstract_module import AbstractModule
|
||||
from lib.ConfigLoader import ConfigLoader
|
||||
|
||||
|
|
@ -15,17 +15,18 @@ and index each file with a full-text indexer (Whoosh until now).
|
|||
import time
|
||||
import shutil
|
||||
import os
|
||||
import sys
|
||||
from os.path import join, getsize
|
||||
from whoosh.index import create_in, exists_in, open_dir
|
||||
from whoosh.fields import Schema, TEXT, ID
|
||||
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from module.abstract_module import AbstractModule
|
||||
from modules.abstract_module import AbstractModule
|
||||
from packages import Paste
|
||||
from Helper import Process
|
||||
|
||||
|
||||
class Indexer(AbstractModule):
|
||||
|
@ -102,6 +103,7 @@ class Indexer(AbstractModule):
|
|||
docpath = message.split(" ", -1)[-1]
|
||||
paste = PST.get_p_content()
|
||||
self.redis_logger.debug(f"Indexing - {self.indexname}: {docpath}")
|
||||
print(f"Indexing - {self.indexname}: {docpath}")
|
||||
|
||||
# Avoid calculating the index's size at each message
|
||||
if(time.time() - self.last_refresh > self.TIME_WAIT):
|
|
@ -15,13 +15,16 @@ RSA private key, certificate messages
|
|||
##################################
|
||||
# Import External packages
|
||||
##################################
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from enum import Enum
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from module.abstract_module import AbstractModule
|
||||
from modules.abstract_module import AbstractModule
|
||||
from packages.Item import Item
|
||||
|
||||
|
|
@ -19,8 +19,11 @@ import os
|
|||
import sys
|
||||
import re
|
||||
|
||||
# project packages
|
||||
from module.abstract_module import AbstractModule
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from modules.abstract_module import AbstractModule
|
||||
from lib.ConfigLoader import ConfigLoader
|
||||
from lib import crawlers
|
||||
from lib import regex_helper
|
|
@ -11,13 +11,13 @@ from base64 import b64encode
|
|||
sys.path.append(os.environ['AIL_BIN'])
|
||||
|
||||
# Modules Classes
|
||||
from ApiKey import ApiKey
|
||||
from Categ import Categ
|
||||
from CreditCards import CreditCards
|
||||
from DomClassifier import DomClassifier
|
||||
from Global import Global
|
||||
from Keys import Keys
|
||||
from Onion import Onion
|
||||
from modules.ApiKey import ApiKey
|
||||
from modules.Categ import Categ
|
||||
from modules.CreditCards import CreditCards
|
||||
from modules.DomClassifier import DomClassifier
|
||||
from modules.Global import Global
|
||||
from modules.Keys import Keys
|
||||
from modules.Onion import Onion
|
||||
|
||||
# project packages
|
||||
import lib.crawlers as crawlers
|
||||
|
|
Loading…
Reference in New Issue