chg: [modules] create new modules repository + small fixs

pull/569/head
Terrtia 2021-06-02 14:42:23 +02:00
parent d4829273c5
commit ed37232a2d
No known key found for this signature in database
GPG Key ID: 1E1B1F50D84613D0
23 changed files with 176 additions and 465 deletions

View File

@ -78,7 +78,7 @@ function helptext {
[-k | --killAll] Kill DB + Scripts [-k | --killAll] Kill DB + Scripts
[-ks | --killscript] Scripts [-ks | --killscript] Scripts
[-u | --update] Update AIL [-u | --update] Update AIL
[-ut | --thirdpartyUpdate] Update Web [-ut | --thirdpartyUpdate] Update UI/Frontend
[-t | --test] Launch Tests [-t | --test] Launch Tests
[-rp | --resetPassword] Reset Password [-rp | --resetPassword] Reset Password
[-f | --launchFeeder] LAUNCH Pystemon feeder [-f | --launchFeeder] LAUNCH Pystemon feeder
@ -180,7 +180,7 @@ function launching_scripts {
sleep 0.1 sleep 0.1
screen -S "Script_AIL" -X screen -t "ApiKey" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./ApiKey.py; read x" screen -S "Script_AIL" -X screen -t "ApiKey" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./ApiKey.py; read x"
sleep 0.1 sleep 0.1
screen -S "Script_AIL" -X screen -t "Web" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Web.py; read x" screen -S "Script_AIL" -X screen -t "Urls" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Urls.py; read x"
sleep 0.1 sleep 0.1
screen -S "Script_AIL" -X screen -t "Credential" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Credential.py; read x" screen -S "Script_AIL" -X screen -t "Credential" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Credential.py; read x"
sleep 0.1 sleep 0.1
@ -210,8 +210,6 @@ function launching_scripts {
#sleep 0.1 #sleep 0.1
screen -S "Script_AIL" -X screen -t "Cve" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Cve.py; read x" screen -S "Script_AIL" -X screen -t "Cve" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Cve.py; read x"
sleep 0.1 sleep 0.1
screen -S "Script_AIL" -X screen -t "WebStats" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./WebStats.py; read x"
sleep 0.1
screen -S "Script_AIL" -X screen -t "ModuleStats" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./ModuleStats.py; read x" screen -S "Script_AIL" -X screen -t "ModuleStats" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./ModuleStats.py; read x"
sleep 0.1 sleep 0.1
screen -S "Script_AIL" -X screen -t "SQLInjectionDetection" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./SQLInjectionDetection.py; read x" screen -S "Script_AIL" -X screen -t "SQLInjectionDetection" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./SQLInjectionDetection.py; read x"

86
bin/Urls.py Executable file
View File

@ -0,0 +1,86 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
The Urls Module
============================
This module extract URLs from an item and send them to others modules.
"""
##################################
# Import External packages
##################################
import redis
import pprint
import time
import os
from pyfaup.faup import Faup
import re
##################################
# Import Project packages
##################################
from module.abstract_module import AbstractModule
from packages.Item import Item
from packages import lib_refine
from Helper import Process
class Urls(AbstractModule):
"""
Urls module for AIL framework
"""
def __init__(self):
"""
Init Urls
"""
super(Urls, self).__init__()
# FUNCTIONS #
self.faup = Faup()
# Protocol file path
protocolsfile_path = os.path.join(os.environ['AIL_HOME'],
self.process.config.get("Directories", "protocolsfile"))
# Get all uri from protocolsfile (Used for Curve)
uri_scheme = ""
with open(protocolsfile_path, 'r') as scheme_file:
for scheme in scheme_file:
uri_scheme += scheme[:-1]+"|"
uri_scheme = uri_scheme[:-1]
self.url_regex = "((?i:"+uri_scheme + \
")\://(?:[a-zA-Z0-9\.\-]+(?:\:[a-zA-Z0-9\.&%\$\-]+)*@)*(?:(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|(?:[a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(?:com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|[a-zA-Z]{2}))(?:\:[0-9]+)*(?:/(?:$|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
# Send module state to logs
self.redis_logger.info(f"Module {self.module_name} initialized")
def compute(self, message):
"""
Search for Web links from given message
"""
# Extract item
id, score = message.split()
item = Item(id)
l_urls = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.url_regex, item.get_id(), item.get_content())
if len(urls) > 0:
to_print = f'Urls;{item.get_source()};{item.get_date()};{item.get_basename()};'
self.redis_logger.info(f'{to_print}Detected {len(urls)} URL;{item.get_id()}')
for url in l_urls:
# # TODO: FIXME handle .foundation .dev onion? i2p?
to_send = f"{url} {item.get_id()}"
self.send_message_to_queue(to_send, 'Url')
self.redis_logger.debug(f"url_parsed: {to_send}")
if __name__ == '__main__':
module = Urls()
module.run()

View File

@ -1,184 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
The Web Module
============================
This module tries to parse URLs and warns if some defined contry code are present.
"""
##################################
# Import External packages
##################################
import redis
import pprint
import time
import os
import dns.exception
from pyfaup.faup import Faup
import re
# Country and ASN lookup
from cymru.ip2asn.dns import DNSClient as ip2asn
import socket
import pycountry
import ipaddress
##################################
# Import Project packages
##################################
from module.abstract_module import AbstractModule
from packages import Paste
from packages import lib_refine
from Helper import Process
class Web(AbstractModule):
"""
Web module for AIL framework
"""
# Used to prevent concat with empty fields due to url parsing
def avoidNone(self, a_string):
if a_string is None:
return ""
else:
return a_string
def __init__(self):
"""
Init Web
"""
super(Web, self).__init__(logger_channel='script:web')
# REDIS Cache
self.r_serv2 = redis.StrictRedis(
host=self.process.config.get("Redis_Cache", "host"),
port=self.process.config.getint("Redis_Cache", "port"),
db=self.process.config.getint("Redis_Cache", "db"),
decode_responses=True)
# Country to log as critical
self.cc_critical = self.process.config.get("Url", "cc_critical")
# FUNCTIONS #
self.faup = Faup()
# Protocol file path
protocolsfile_path = os.path.join(os.environ['AIL_HOME'],
self.process.config.get("Directories", "protocolsfile"))
# Get all uri from protocolsfile (Used for Curve)
uri_scheme = ""
with open(protocolsfile_path, 'r') as scheme_file:
for scheme in scheme_file:
uri_scheme += scheme[:-1]+"|"
uri_scheme = uri_scheme[:-1]
self.url_regex = "((?i:"+uri_scheme + \
")\://(?:[a-zA-Z0-9\.\-]+(?:\:[a-zA-Z0-9\.&%\$\-]+)*@)*(?:(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|(?:[a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(?:com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|[a-zA-Z]{2}))(?:\:[0-9]+)*(?:/(?:$|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)"
self.prec_filename = None
# Send module state to logs
self.redis_logger.info(f"Module {self.module_name} initialized")
def compute(self, message):
"""
Search for Web links from given message
"""
# Extract item
filename, score = message.split()
domains_list = set()
hosts_list = set()
if self.prec_filename is None or filename != self.prec_filename:
domains_list.clear()
hosts_list.clear()
PST = Paste.Paste(filename)
client = ip2asn()
detected_urls = PST.get_regex(self.url_regex)
if len(detected_urls) > 0:
to_print = f'Web;{PST.p_source};{PST.p_date};{PST.p_name};'
self.redis_logger.info(f'{to_print}Detected {len(detected_urls)} URL;{PST.p_rel_path}')
for url in detected_urls:
if url.endswith(".on"):
# URL is an onion link skip
# TODO send to TOR crawler ?
# self.redis_logger.debug("Skip onion link")
continue
self.redis_logger.debug(f"match regex: {url}")
to_send = f"{url} {PST._get_p_date()} {filename}"
self.process.populate_set_out(to_send, 'Url')
self.redis_logger.debug(f"url_parsed: {to_send}")
self.faup.decode(url)
domain = self.faup.get_domain()
subdomain = self.faup.get_subdomain()
self.redis_logger.debug(f'{url} Published')
domains_list.add(domain)
hostl = f'{subdomain}.{domain}' if subdomain else domain
if hostl not in hosts_list:
# test host only once a host in a paste
hosts_list.add(hostl)
try:
socket.setdefaulttimeout(1)
ip = socket.gethostbyname(hostl)
# If the resolver is not giving any IPv4 address,
# ASN/CC lookup is skip.
l = client.lookup(ip, qType='IP')
except ipaddress.AddressValueError:
self.redis_logger.debug(
f'ASN/CC lookup failed for IP {ip}')
continue
except:
self.redis_logger.debug(
f'Resolver IPv4 address failed for host {hostl}')
continue
cc = getattr(l, 'cc')
asn = ''
if getattr(l, 'asn') is not None:
asn = getattr(l, 'asn')[2:] # remobe b'
# EU is not an official ISO 3166 code (but used by RIPE
# IP allocation)
if cc is not None and cc != "EU":
countryname = pycountry.countries.get(alpha_2=cc).name
self.redis_logger.debug(f'{hostl};{asn};{cc};{countryname}')
if cc == self.cc_critical:
to_print = f'Url;{PST.p_source};{PST.p_date};{PST.p_name};Detected {hostl} {cc}'
self.redis_logger.info(to_print)
else:
self.redis_logger.debug(f'{hostl};{asn};{cc}')
A_values = lib_refine.checking_A_record(self.r_serv2,
domains_list)
if A_values[0] >= 1:
pprint.pprint(A_values)
# self.redis_logger.info('Url;{};{};{};Checked {} URL;{}'.format(
# PST.p_source, PST.p_date, PST.p_name, A_values[0], PST.p_rel_path))
self.prec_filename = filename
if __name__ == '__main__':
module = Web()
module.run()

View File

@ -1,207 +0,0 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
The WebStats Module
======================
This module makes stats on URL recolted from the web module.
It consider the TLD, Domain and protocol.
"""
##################################
# Import External packages
##################################
import time
import datetime
import redis
import os
from pubsublogger import publisher
from pyfaup.faup import Faup
##################################
# Import Project packages
##################################
from module.abstract_module import AbstractModule
from packages import lib_words
from packages.Date import Date
from Helper import Process
class WebStats(AbstractModule):
"""
WebStats module for AIL framework
"""
# Config Var
THRESHOLD_TOTAL_SUM = 200 # Above this value, a keyword is eligible for a progression
THRESHOLD_INCREASE = 1.0 # The percentage representing the keyword occurence since num_day_to_look
MAX_SET_CARDINALITY = 10 # The cardinality of the progression set
NUM_DAY_TO_LOOK = 5 # the detection of the progression start num_day_to_look in the past
def __init__(self):
super(WebStats, self).__init__()
# Send module state to logs
self.redis_logger.info("Module %s initialized"%(self.module_name))
# Sent to the logging a description of the module
self.redis_logger.info("Makes statistics about valid URL")
self.pending_seconds = 5*60
# REDIS #
self.r_serv_trend = redis.StrictRedis(
host=self.process.config.get("ARDB_Trending", "host"),
port=self.process.config.get("ARDB_Trending", "port"),
db=self.process.config.get("ARDB_Trending", "db"),
decode_responses=True)
# FILE CURVE SECTION #
self.csv_path_proto = os.path.join(os.environ['AIL_HOME'],
self.process.config.get("Directories", "protocolstrending_csv"))
self.protocolsfile_path = os.path.join(os.environ['AIL_HOME'],
self.process.config.get("Directories", "protocolsfile"))
self.csv_path_tld = os.path.join(os.environ['AIL_HOME'],
self.process.config.get("Directories", "tldstrending_csv"))
self.tldsfile_path = os.path.join(os.environ['AIL_HOME'],
self.process.config.get("Directories", "tldsfile"))
self.csv_path_domain = os.path.join(os.environ['AIL_HOME'],
self.process.config.get("Directories", "domainstrending_csv"))
self.faup = Faup()
self.generate_new_graph = False
def computeNone(self):
if self.generate_new_graph:
self.generate_new_graph = False
today = datetime.date.today()
year = today.year
month = today.month
self.redis_logger.debug('Building protocol graph')
lib_words.create_curve_with_word_file(self.r_serv_trend, self.csv_path_proto,
self.protocolsfile_path, year,
month)
self.redis_logger.debug('Building tld graph')
lib_words.create_curve_with_word_file(self.r_serv_trend, self.csv_path_tld,
self.tldsfile_path, year,
month)
self.redis_logger.debug('Building domain graph')
lib_words.create_curve_from_redis_set(self.r_serv_trend, self.csv_path_domain,
"domain", year,
month)
self.redis_logger.debug('end building')
def compute(self, message):
self.generate_new_graph = True
# Do something with the message from the queue
url, date, path = message.split()
self.faup.decode(url)
url_parsed = self.faup.get()
# Scheme analysis
self.analyse('scheme', date, url_parsed)
# Tld analysis
self.analyse('tld', date, url_parsed)
# Domain analysis
self.analyse('domain', date, url_parsed)
self.compute_progression('scheme', self.NUM_DAY_TO_LOOK, url_parsed)
self.compute_progression('tld', self.NUM_DAY_TO_LOOK, url_parsed)
self.compute_progression('domain', self.NUM_DAY_TO_LOOK, url_parsed)
def analyse(self, field_name, date, url_parsed):
field = url_parsed[field_name]
if field is not None:
try: # faup version
field = field.decode()
except:
pass
self.r_serv_trend.hincrby(field, date, 1)
if field_name == "domain": #save domain in a set for the monthly plot
domain_set_name = "domain_set_" + date[0:6]
self.r_serv_trend.sadd(domain_set_name, field)
self.redis_logger.debug("added in " + domain_set_name +": "+ field)
def get_date_range(self, num_day):
curr_date = datetime.date.today()
date = Date(str(curr_date.year)+str(curr_date.month).zfill(2)+str(curr_date.day).zfill(2))
date_list = []
for i in range(0, num_day+1):
date_list.append(date.substract_day(i))
return date_list
def compute_progression_word(self, num_day, keyword):
"""
Compute the progression for one keyword
"""
date_range = self.get_date_range(num_day)
# check if this keyword is eligible for progression
keyword_total_sum = 0
value_list = []
for date in date_range: # get value up to date_range
curr_value = self.r_serv_trend.hget(keyword, date)
value_list.append(int(curr_value if curr_value is not None else 0))
keyword_total_sum += int(curr_value) if curr_value is not None else 0
oldest_value = value_list[-1] if value_list[-1] != 0 else 1 #Avoid zero division
# The progression is based on the ratio: value[i] / value[i-1]
keyword_increase = 0
value_list_reversed = value_list[:]
value_list_reversed.reverse()
for i in range(1, len(value_list_reversed)):
divisor = value_list_reversed[i-1] if value_list_reversed[i-1] != 0 else 1
keyword_increase += value_list_reversed[i] / divisor
return (keyword_increase, keyword_total_sum)
def compute_progression(self, field_name, num_day, url_parsed):
"""
recompute the set top_progression zset
- Compute the current field progression
- re-compute the current progression for each first 2*self.MAX_SET_CARDINALITY fields in the top_progression_zset
"""
redis_progression_name_set = "z_top_progression_"+field_name
keyword = url_parsed[field_name]
if keyword is not None:
#compute the progression of the current word
keyword_increase, keyword_total_sum = self.compute_progression_word(num_day, keyword)
#re-compute the progression of 2*self.MAX_SET_CARDINALITY
current_top = self.r_serv_trend.zrevrangebyscore(redis_progression_name_set, '+inf', '-inf', withscores=True, start=0, num=2*self.MAX_SET_CARDINALITY)
for word, value in current_top:
word_inc, word_tot_sum = self.compute_progression_word(num_day, word)
self.r_serv_trend.zrem(redis_progression_name_set, word)
if (word_tot_sum > self.THRESHOLD_TOTAL_SUM) and (word_inc > self.THRESHOLD_INCREASE):
self.r_serv_trend.zadd(redis_progression_name_set, float(word_inc), word)
# filter before adding
if (keyword_total_sum > self.THRESHOLD_TOTAL_SUM) and (keyword_increase > self.THRESHOLD_INCREASE):
self.r_serv_trend.zadd(redis_progression_name_set, float(keyword_increase), keyword)
if __name__ == '__main__':
module = WebStats()
module.run()

View File

@ -12,9 +12,13 @@ Search for API keys on an item content.
""" """
import re import re
import os
import sys
sys.path.append(os.path.join(os.environ['AIL_BIN']))
# project packages # project packages
from module.abstract_module import AbstractModule from modules.abstract_module import AbstractModule
from packages.Item import Item from packages.Item import Item
from lib import regex_helper from lib import regex_helper

View File

@ -33,14 +33,16 @@ Requirements
################################## ##################################
# Import External packages # Import External packages
################################## ##################################
import os
import argparse import argparse
import os
import re import re
import sys
sys.path.append(os.environ['AIL_BIN'])
################################## ##################################
# Import Project packages # Import Project packages
################################## ##################################
from module.abstract_module import AbstractModule from modules.abstract_module import AbstractModule
from packages.Item import Item from packages.Item import Item

View File

@ -26,27 +26,22 @@ Redis organization:
################################## ##################################
# Import External packages # Import External packages
################################## ##################################
import time
import os import os
import sys import sys
import datetime import time
import re import re
import redis import redis
from datetime import datetime
from pyfaup.faup import Faup from pyfaup.faup import Faup
from pubsublogger import publisher
import lib.regex_helper as regex_helper
import signal
sys.path.append(os.environ['AIL_BIN'])
################################## ##################################
# Import Project packages # Import Project packages
################################## ##################################
from module.abstract_module import AbstractModule from modules.abstract_module import AbstractModule
from Helper import Process from packages.Item import Item
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages')) from lib import ConfigLoader
import Item from lib import regex_helper
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/'))
import ConfigLoader
class Credential(AbstractModule): class Credential(AbstractModule):
@ -54,8 +49,6 @@ class Credential(AbstractModule):
Credential module for AIL framework Credential module for AIL framework
""" """
max_execution_time = 30
# Split username with spec. char or with upper case, distinguish start with upper # Split username with spec. char or with upper case, distinguish start with upper
REGEX_CRED = "[a-z]+|[A-Z]{3,}|[A-Z]{1,2}[a-z]+|[0-9]+" REGEX_CRED = "[a-z]+|[A-Z]{3,}|[A-Z]{1,2}[a-z]+|[0-9]+"
REDIS_KEY_NUM_USERNAME = 'uniqNumForUsername' REDIS_KEY_NUM_USERNAME = 'uniqNumForUsername'
@ -79,12 +72,15 @@ class Credential(AbstractModule):
self.redis_cache_key = regex_helper.generate_redis_cache_key(self.module_name) self.redis_cache_key = regex_helper.generate_redis_cache_key(self.module_name)
# Database # Database
self.server_cred = ConfigLoader.ConfigLoader().get_redis_conn("ARDB_TermCred") config_loader = ConfigLoader.ConfigLoader()
self.server_statistics = ConfigLoader.ConfigLoader().get_redis_conn("ARDB_Statistics") self.server_cred = config_loader.get_redis_conn("ARDB_TermCred")
self.server_statistics = config_loader.get_redis_conn("ARDB_Statistics")
# Config values # Config values
self.minimumLengthThreshold = ConfigLoader.ConfigLoader().get_config_int("Credential", "minimumLengthThreshold") self.minimumLengthThreshold = config_loader.get_config_int("Credential", "minimumLengthThreshold")
self.criticalNumberToAlert = ConfigLoader.ConfigLoader().get_config_int("Credential", "criticalNumberToAlert") self.criticalNumberToAlert = config_loader.get_config_int("Credential", "criticalNumberToAlert")
self.max_execution_time = 30
# Waiting time in secondes between to message proccessed # Waiting time in secondes between to message proccessed
self.pending_seconds = 10 self.pending_seconds = 10
@ -95,38 +91,39 @@ class Credential(AbstractModule):
def compute(self, message): def compute(self, message):
item_id, count = message.split() id, count = message.split()
item = Item(id)
item_content = Item.get_item_content(item_id) item_content = item.get_content()
# Extract all credentials # Extract all credentials
all_credentials = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_cred, item_id, item_content, max_time=Credential.max_execution_time) all_credentials = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_cred, item.get_id(), item_content, max_time=self.max_execution_time)
if all_credentials: if all_credentials:
nb_cred = len(all_credentials) nb_cred = len(all_credentials)
message = f'Checked {nb_cred} credentials found.' message = f'Checked {nb_cred} credentials found.'
all_sites = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_web, item_id, item_content, max_time=Credential.max_execution_time) all_sites = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_web, item.get_id(), item_content, max_time=self.max_execution_time)
if all_sites: if all_sites:
discovered_sites = ', '.join(all_sites) discovered_sites = ', '.join(all_sites)
message += f' Related websites: {discovered_sites}' message += f' Related websites: {discovered_sites}'
self.redis_logger.debug(message) print(message)
to_print = f'Credential;{Item.get_source(item_id)};{Item.get_item_date(item_id)};{Item.get_item_basename(item_id)};{message};{item_id}' to_print = f'Credential;{item.get_source()};{item.get_date()};{item.get_basename()};{message};{item.get_id()}'
#num of creds above tresh, publish an alert #num of creds above tresh, publish an alert
if nb_cred > self.criticalNumberToAlert: if nb_cred > self.criticalNumberToAlert:
self.redis_logger.debug(f"========> Found more than 10 credentials in this file : {item_id}") print(f"========> Found more than 10 credentials in this file : {item.get_id()}")
self.redis_logger.warning(to_print) self.redis_logger.warning(to_print)
# Send to duplicate # Send to duplicate
self.process.populate_set_out(item_id, 'Duplicate') self.send_message_to_queue(item.get_id(), 'Duplicate')
msg = f'infoleak:automatic-detection="credential";{item_id}' msg = f'infoleak:automatic-detection="credential";{item.get_id()}'
self.process.populate_set_out(msg, 'Tags') self.send_message_to_queue(msg, 'Tags')
site_occurence = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_site_for_stats, item_id, item_content, max_time=Credential.max_execution_time, r_set=False) site_occurence = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_site_for_stats, item.get_id(), item_content, max_time=self.max_execution_time, r_set=False)
creds_sites = {} creds_sites = {}
@ -140,7 +137,7 @@ class Credential(AbstractModule):
for url in all_sites: for url in all_sites:
self.faup.decode(url) self.faup.decode(url)
domain = self.faup.get()['domain'] domain = self.faup.get()['domain']
## TODO: # FIXME: remove me ## TODO: # FIXME: remove me, check faup versionb
try: try:
domain = domain.decode() domain = domain.decode()
except: except:
@ -153,14 +150,14 @@ class Credential(AbstractModule):
for site, num in creds_sites.items(): # Send for each different site to moduleStats for site, num in creds_sites.items(): # Send for each different site to moduleStats
mssg = f'credential;{num};{site};{Item.get_item_date(item_id)}' mssg = f'credential;{num};{site};{Item.get_item_date(item_id)}'
self.redis_logger.debug(mssg) print(mssg)
self.process.populate_set_out(mssg, 'ModuleStats') self.send_message_to_queue(msg, 'ModuleStats')
if all_sites: if all_sites:
discovered_sites = ', '.join(all_sites) discovered_sites = ', '.join(all_sites)
self.redis_logger.debug(f"=======> Probably on : {discovered_sites}") print(f"=======> Probably on : {discovered_sites}")
date = datetime.datetime.now().strftime("%Y%m") date = datetime.now().strftime("%Y%m")
for cred in all_credentials: for cred in all_credentials:
maildomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", cred.lower())[0] maildomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", cred.lower())[0]
self.faup.decode(maildomains) self.faup.decode(maildomains)
@ -173,7 +170,7 @@ class Credential(AbstractModule):
self.server_statistics.hincrby('credential_by_tld:'+date, tld, 1) self.server_statistics.hincrby('credential_by_tld:'+date, tld, 1)
else: else:
self.redis_logger.info(to_print) self.redis_logger.info(to_print)
self.redis_logger.debug(f'found {nb_cred} credentials') print(f'found {nb_cred} credentials')
# For searching credential in termFreq # For searching credential in termFreq
for cred in all_credentials: for cred in all_credentials:
@ -181,8 +178,8 @@ class Credential(AbstractModule):
# unique number attached to unique path # unique number attached to unique path
uniq_num_path = self.server_cred.incr(Credential.REDIS_KEY_NUM_PATH) uniq_num_path = self.server_cred.incr(Credential.REDIS_KEY_NUM_PATH)
self.server_cred.hmset(Credential.REDIS_KEY_ALL_PATH_SET, {item_id: uniq_num_path}) self.server_cred.hmset(Credential.REDIS_KEY_ALL_PATH_SET, {item.get_id(): uniq_num_path})
self.server_cred.hmset(Credential.REDIS_KEY_ALL_PATH_SET_REV, {uniq_num_path: item_id}) self.server_cred.hmset(Credential.REDIS_KEY_ALL_PATH_SET_REV, {uniq_num_path: item.get_id()})
# unique number attached to unique username # unique number attached to unique username
uniq_num_cred = self.server_cred.hget(Credential.REDIS_KEY_ALL_CRED_SET, cred) uniq_num_cred = self.server_cred.hget(Credential.REDIS_KEY_ALL_CRED_SET, cred)

View File

@ -14,14 +14,16 @@ It apply credit card regexes on item content and warn if a valid card number is
################################## ##################################
# Import External packages # Import External packages
################################## ##################################
import os
import re import re
import sys import sys
import time import time
sys.path.append(os.environ['AIL_BIN'])
################################## ##################################
# Import Project packages # Import Project packages
################################## ##################################
from module.abstract_module import AbstractModule from modules.abstract_module import AbstractModule
from packages.Item import Item from packages.Item import Item
from packages import lib_refine from packages import lib_refine

View File

@ -17,21 +17,21 @@ from hashlib import sha1
import magic import magic
import json import json
import datetime import datetime
from pubsublogger import publisher
import re import re
import signal import signal
from lib import Decoded import sys
sys.path.append(os.environ['AIL_BIN'])
################################## ##################################
# Import Project packages # Import Project packages
################################## ##################################
from module.abstract_module import AbstractModule from modules.abstract_module import AbstractModule
from Helper import Process from Helper import Process
from packages import Item from packages import Item
import ConfigLoader from lib import ConfigLoader
from lib import Decoded
# # TODO: use regex_helper
class TimeoutException(Exception): class TimeoutException(Exception):
pass pass
@ -138,6 +138,8 @@ class Decoder(AbstractModule):
if not mimetype: if not mimetype:
self.redis_logger.debug(item_id) self.redis_logger.debug(item_id)
self.redis_logger.debug(sha1_string) self.redis_logger.debug(sha1_string)
print(item_id)
print(sha1_string)
raise Exception('Invalid mimetype') raise Exception('Invalid mimetype')
Decoded.save_decoded_file_content(sha1_string, decoded_file, item_date, mimetype=mimetype) Decoded.save_decoded_file_content(sha1_string, decoded_file, item_date, mimetype=mimetype)
Decoded.save_item_relationship(sha1_string, item_id) Decoded.save_item_relationship(sha1_string, item_id)
@ -147,6 +149,7 @@ class Decoder(AbstractModule):
content = content.replace(encoded, '', 1) content = content.replace(encoded, '', 1)
self.redis_logger.debug(f'{item_id} : {decoder_name} - {mimetype}') self.redis_logger.debug(f'{item_id} : {decoder_name} - {mimetype}')
print(f'{item_id} : {decoder_name} - {mimetype}')
if(find): if(find):
self.set_out_item(decoder_name, item_id) self.set_out_item(decoder_name, item_id)
@ -158,11 +161,11 @@ class Decoder(AbstractModule):
self.redis_logger.warning(f'{decoder_name} decoded') self.redis_logger.warning(f'{decoder_name} decoded')
# Send to duplicate # Send to duplicate
self.process.populate_set_out(item_id, 'Duplicate') self.send_message_to_queue(item_id, 'Duplicate')
# Send to Tags # Send to Tags
msg = f'infoleak:automatic-detection="{decoder_name}";{item_id}' msg = f'infoleak:automatic-detection="{decoder_name}";{item_id}'
self.process.populate_set_out(msg, 'Tags') self.send_message_to_queue(msg, 'Tags')
if __name__ == '__main__': if __name__ == '__main__':

View File

@ -18,10 +18,11 @@ import sys
import time import time
import DomainClassifier.domainclassifier import DomainClassifier.domainclassifier
sys.path.append(os.environ['AIL_BIN'])
################################## ##################################
# Import Project packages # Import Project packages
################################## ##################################
from module.abstract_module import AbstractModule from modules.abstract_module import AbstractModule
from packages.Item import Item from packages.Item import Item
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))

View File

@ -36,10 +36,11 @@ import redis
from hashlib import md5 from hashlib import md5
from uuid import uuid4 from uuid import uuid4
sys.path.append(os.environ['AIL_BIN'])
################################## ##################################
# Import Project packages # Import Project packages
################################## ##################################
from module.abstract_module import AbstractModule from modules.abstract_module import AbstractModule
from lib.ConfigLoader import ConfigLoader from lib.ConfigLoader import ConfigLoader

View File

@ -15,17 +15,18 @@ and index each file with a full-text indexer (Whoosh until now).
import time import time
import shutil import shutil
import os import os
import sys
from os.path import join, getsize from os.path import join, getsize
from whoosh.index import create_in, exists_in, open_dir from whoosh.index import create_in, exists_in, open_dir
from whoosh.fields import Schema, TEXT, ID from whoosh.fields import Schema, TEXT, ID
sys.path.append(os.environ['AIL_BIN'])
################################## ##################################
# Import Project packages # Import Project packages
################################## ##################################
from module.abstract_module import AbstractModule from modules.abstract_module import AbstractModule
from packages import Paste from packages import Paste
from Helper import Process
class Indexer(AbstractModule): class Indexer(AbstractModule):
@ -102,6 +103,7 @@ class Indexer(AbstractModule):
docpath = message.split(" ", -1)[-1] docpath = message.split(" ", -1)[-1]
paste = PST.get_p_content() paste = PST.get_p_content()
self.redis_logger.debug(f"Indexing - {self.indexname}: {docpath}") self.redis_logger.debug(f"Indexing - {self.indexname}: {docpath}")
print(f"Indexing - {self.indexname}: {docpath}")
# Avoid calculating the index's size at each message # Avoid calculating the index's size at each message
if(time.time() - self.last_refresh > self.TIME_WAIT): if(time.time() - self.last_refresh > self.TIME_WAIT):

View File

@ -15,13 +15,16 @@ RSA private key, certificate messages
################################## ##################################
# Import External packages # Import External packages
################################## ##################################
import os
import sys
import time import time
from enum import Enum from enum import Enum
sys.path.append(os.environ['AIL_BIN'])
################################## ##################################
# Import Project packages # Import Project packages
################################## ##################################
from module.abstract_module import AbstractModule from modules.abstract_module import AbstractModule
from packages.Item import Item from packages.Item import Item

View File

@ -19,8 +19,11 @@ import os
import sys import sys
import re import re
# project packages sys.path.append(os.environ['AIL_BIN'])
from module.abstract_module import AbstractModule ##################################
# Import Project packages
##################################
from modules.abstract_module import AbstractModule
from lib.ConfigLoader import ConfigLoader from lib.ConfigLoader import ConfigLoader
from lib import crawlers from lib import crawlers
from lib import regex_helper from lib import regex_helper

View File

@ -11,13 +11,13 @@ from base64 import b64encode
sys.path.append(os.environ['AIL_BIN']) sys.path.append(os.environ['AIL_BIN'])
# Modules Classes # Modules Classes
from ApiKey import ApiKey from modules.ApiKey import ApiKey
from Categ import Categ from modules.Categ import Categ
from CreditCards import CreditCards from modules.CreditCards import CreditCards
from DomClassifier import DomClassifier from modules.DomClassifier import DomClassifier
from Global import Global from modules.Global import Global
from Keys import Keys from modules.Keys import Keys
from Onion import Onion from modules.Onion import Onion
# project packages # project packages
import lib.crawlers as crawlers import lib.crawlers as crawlers