chg: [DB Migration] UI: Extract + highlight leaks and trackers match, Data Retention save object first/last date, Refactor Tools

2022-12-19 16:38:20 +01:00 · 2022-12-19 16:38:20 +01:00 · bf71c9ba99
parent f9715408be
commit bf71c9ba99
26 changed files with 883 additions and 873 deletions
--- a/bin/DB_KVROCKS_MIGRATION.py
+++ b/bin/DB_KVROCKS_MIGRATION.py
@ -375,6 +375,7 @@ def items_migration():
    #         item = Items.Item(item_id)
    #         item.set_father(father_id)
    # DUPLICATES
    for tag in ['infoleak:automatic-detection="credential"']:  # Creditcards, Mail, Keys ???????????????????????????????
        print(f'Duplicate migration: {tag}')
        tag_first = get_tag_first_seen(tag)
@ -389,6 +390,10 @@ def items_migration():
                            print(algo, duplicates_dict[id_2][algo], id_2)
                            item.add_duplicate(algo, duplicates_dict[id_2][algo], id_2)
    # ITEM FIRST/LAST DATE
    Items._manual_set_items_date_first_last()
 # TODO: test cookies migration
 # TODO: migrate auto crawlers
@ -840,14 +845,14 @@ if __name__ == '__main__':
    #core_migration()
    #user_migration()
    #tags_migration()
-    # items_migration()
+    items_migration()
    #crawler_migration()
    # domain_migration()                      # TO TEST ###########################
    #decodeds_migration()
    # screenshots_migration()
-    #subtypes_obj_migration()
+    subtypes_obj_migration()
    # ail_2_ail_migration()
-    trackers_migration()
+    # trackers_migration()
    # investigations_migration()
    # statistics_migration()
--- a/bin/Tools.py
+++ b/bin/Tools.py
@ -1,753 +0,0 @@
 #!/usr/bin/env python3
 # -*-coding:UTF-8 -*
 """
 Tools Module
 ============================
 Search tools outpout
 """
 from Helper import Process
 from pubsublogger import publisher
 import os
 import re
 import sys
 import time
 import redis
 import signal
 sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages'))
 import Item
 class TimeoutException(Exception):
    pass
 def timeout_handler(signum, frame):
    raise TimeoutException
 signal.signal(signal.SIGALRM, timeout_handler)
 def search_tools(item_id, item_content):
    tools_in_item = False
    for tools_name in tools_dict:
        tool_dict = tools_dict[tools_name]
        regex_match = False
        for regex_nb in list(range(tool_dict['nb_regex'])):
            regex_index = regex_nb + 1
            regex = tool_dict['regex{}'.format(regex_index)]
            signal.alarm(tool_dict['max_execution_time'])
            try:
                tools_found = re.findall(regex, item_content)
            except TimeoutException:
                tools_found = []
                p.incr_module_timeout_statistic() # add encoder type
                print ("{0} processing timeout".format(item_id))
                continue
            else:
                signal.alarm(0)
            if not tools_found:
                regex_match = False
                break
            else:
                regex_match = True
                if 'tag{}'.format(regex_index) in tool_dict:
                    print('{} found: {}'.format(item_id, tool_dict['tag{}'.format(regex_index)]))
                    msg = '{};{}'.format(tool_dict['tag{}'.format(regex_index)], item_id)
                    p.populate_set_out(msg, 'Tags')
        if regex_match:
            print('{} found: {}'.format(item_id, tool_dict['name']))
            # Tag Item
            msg = '{};{}'.format(tool_dict['tag'], item_id)
            p.populate_set_out(msg, 'Tags')
    if tools_in_item:
        # send to duplicate module
        p.populate_set_out(item_id, 'Duplicate')
 default_max_execution_time = 30
 tools_dict = {
    'sqlmap': {
        'name': 'sqlmap',
        'regex1': r'Usage of sqlmap for attacking targets without|all tested parameters do not appear to be injectable|sqlmap identified the following injection point|Title:[^\n]*((error|time|boolean)-based|stacked queries|UNION query)',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="sqlmap-tool"', # tag if all regex match
    },
    'wig': {
        'name': 'wig',
        'regex1': r'(?s)wig - WebApp Information Gatherer.+?_{10,}',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="wig-tool"', # tag if all regex match
    },
    'dmytry': {
        'name': 'dmitry',
        'regex1': r'(?s)Gathered (TCP Port|Inet-whois|Netcraft|Subdomain|E-Mail) information for.+?-{10,}',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="dmitry-tool"', # tag if all regex match
    },
    'inurlbr': {
        'name': 'inurlbr',
        'regex1': r'Usage of INURLBR for attacking targets without prior mutual consent is illegal',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="inurlbr-tool"', # tag if all regex match
    },
    'wafw00f': {
        'name': 'wafw00f',
        'regex1': r'(?s)WAFW00F - Web Application Firewall Detection Tool.+?Checking',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="wafw00f-tool"', # tag if all regex match
    },
    'sslyze': {
        'name': 'sslyze',
        'regex1': r'(?s)PluginSessionRenegotiation.+?SCAN RESULTS FOR',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="sslyze-tool"', # tag if all regex match
    },
    'nmap': {
        'name': 'nmap',
        'regex1': r'(?s)Nmap scan report for.+?Host is',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="nmap-tool"', # tag if all regex match
    },
    'dnsenum': {
        'name': 'dnsenum',
        'regex1': r'(?s)dnsenum(\.pl)? VERSION:.+?Trying Zone Transfer',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="dnsenum-tool"', # tag if all regex match
    },
    'knock': {
        'name': 'knock',
        'regex1': r'I scannig with my internal wordlist',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="knock-tool"', # tag if all regex match
    },
    'nikto': {
        'name': 'nikto',
        'regex1': r'(?s)\+ Target IP:.+?\+ Start Time:',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="nikto-tool"', # tag if all regex match
    },
    'dnscan': {
        'name': 'dnscan',
        'regex1': r'(?s)\[\*\] Processing domain.+?\[\+\] Getting nameservers.+?records found',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="dnscan-tool"', # tag if all regex match
    },
    'dnsrecon': {
        'name': 'dnsrecon',
        'regex1': r'Performing General Enumeration of Domain:|Performing TLD Brute force Enumeration against',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="dnsrecon-tool"', # tag if all regex match
    },
    'striker': {
        'name': 'striker',
        'regex1': r'Crawling the target for fuzzable URLs|Honeypot Probabilty:',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="striker-tool"', # tag if all regex match
    },
    'rhawk': {
        'name': 'rhawk',
        'regex1': r'S U B - D O M A I N   F I N D E R',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="rhawk-tool"', # tag if all regex match
    },
    'uniscan': {
        'name': 'uniscan',
        'regex1': r'\| \[\+\] E-mail Found:',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="uniscan-tool"', # tag if all regex match
    },
    'masscan': {
        'name': 'masscan',
        'regex1': r'(?s)Starting masscan [\d.]+.+?Scanning|bit.ly/14GZzcT',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="masscan-tool"', # tag if all regex match
    },
    'msfconsole': {
        'name': 'msfconsole',
        'regex1': r'=\[ metasploit v[\d.]+.+?msf >',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="msfconsole-tool"', # tag if all regex match
    },
    'amap': {
        'name': 'amap',
        'regex1': r'\bamap v[\d.]+ \(www.thc.org/thc-amap\)',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="amap-tool"', # tag if all regex match
    },
    'automater': {
        'name': 'automater',
        'regex1': r'(?s)\[\*\] Checking.+?_+ Results found for:',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="automater-tool"', # tag if all regex match
    },
    'braa': {
        'name': 'braa',
        'regex1': r'\bbraa public@[\d.]+',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="braa-tool"', # tag if all regex match
    },
    'ciscotorch': {
        'name': 'ciscotorch',
        'regex1': r'Becase we need it',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="ciscotorch-tool"', # tag if all regex match
    },
    'theharvester': {
        'name': 'theharvester',
        'regex1': r'Starting harvesting process for domain:',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="theharvester-tool"', # tag if all regex match
    },
    'sslstrip': {
        'name': 'sslstrip',
        'regex1': r'sslstrip [\d.]+ by Moxie Marlinspike running',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="sslstrip-tool"', # tag if all regex match
    },
    'sslcaudit': {
        'name': 'sslcaudit',
        'regex1': r'# filebag location:',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="sslcaudit-tool"', # tag if all regex match
    },
    'smbmap': {
        'name': 'smbmap',
        'regex1': r'\[\+\] Finding open SMB ports\.\.\.',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="smbmap-tool"', # tag if all regex match
    },
    'reconng': {
        'name': 'reconng',
        'regex1': r'\[\*\] Status: unfixed|\[recon-ng\]\[default\]',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="reconng-tool"', # tag if all regex match
    },
    'p0f': {
        'name': 'p0f',
        'regex1': r'\bp0f [^ ]+ by Michal Zalewski',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="p0f-tool"', # tag if all regex match
    },
    'hping3': {
        'name': 'hping3',
        'regex1': r'\bHPING [^ ]+ \([^)]+\): [^ ]+ mode set',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="hping3-tool"', # tag if all regex match
    },
    'enum4linux': {
        'name': 'enum4linux',
        'regex1': r'Starting enum4linux v[\d.]+|\|    Target Information    \|',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="enum4linux-tool"', # tag if all regex match
    },
    'dnstracer': {
        'name': 'dnstracer',
        'regex1': r'(?s)Tracing to.+?DNS HEADER \(send\)',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="dnstracer-tool"', # tag if all regex match
    },
    'dnmap': {
        'name': 'dnmap',
        'regex1': r'dnmap_(client|server)|Nmap output files stored in \'nmap_output\' directory',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="dnmap-tool"', # tag if all regex match
    },
    'arpscan': {
        'name': 'arpscan',
        'regex1': r'Starting arp-scan [^ ]+ with \d+ hosts',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="arpscan-tool"', # tag if all regex match
    },
    'cdpsnarf': {
        'name': 'cdpsnarf',
        'regex1': r'(?s)CDPSnarf v[^ ]+.+?Waiting for a CDP packet\.\.\.',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="cdpsnarf-tool"', # tag if all regex match
    },
    'dnsmap': {
        'name': 'dnsmap',
        'regex1': r'DNS Network Mapper by pagvac',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="dnsmap-tool"', # tag if all regex match
    },
    'dotdotpwn': {
        'name': 'dotdotpwn',
        'regex1': r'DotDotPwn v[^ ]+|dotdotpwn@sectester.net|\[\+\] Creating Traversal patterns',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="dotdotpwn-tool"', # tag if all regex match
    },
    'searchsploit': {
        'name': 'searchsploit',
        'regex1': r'(exploits|shellcodes)/|searchsploit_rc|Exploit Title',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="searchsploit-tool"', # tag if all regex match
    },
    'fierce': {
        'name': 'fierce',
        'regex1': r'(?s)Trying zone transfer first.+Checking for wildcard DNS',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="fierce-tool"', # tag if all regex match
    },
    'firewalk': {
        'name': 'firewalk',
        'regex1': r'Firewalk state initialization completed successfully|Ramping phase source port',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="firewalk-tool"', # tag if all regex match
    },
    'fragroute': {
        'name': 'fragroute',
        'regex1': r'\bfragroute: tcp_seg -> ip_frag',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="fragroute-tool"', # tag if all regex match
    },
    'fragrouter': {
        'name': 'fragrouter',
        'regex1': r'fragrouter: frag-\d+:',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="fragrouter-tool"', # tag if all regex match
    },
    'goofile': {
        'name': 'goofile',
        'regex1': r'code.google.com/p/goofile\b',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="goofile-tool"', # tag if all regex match
    },
    'intrace': {
        'name': 'intrace',
        'regex1': r'\bInTrace [\d.]+ \-\-',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="intrace-tool"', # tag if all regex match
    },
    'ismtp': {
        'name': 'ismtp',
        'regex1': r'Testing SMTP server \[user enumeration\]',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="ismtp-tool"', # tag if all regex match
    },
    'lbd': {
        'name': 'lbd',
        'regex1': r'Checking for (DNS|HTTP)-Loadbalancing',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="lbd-tool"', # tag if all regex match
    },
    'miranda': {
        'name': 'miranda',
        'regex1': r'Entering discovery mode for \'upnp:',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="miranda-tool"', # tag if all regex match
    },
    'ncat': {
        'name': 'ncat',
        'regex1': r'nmap.org/ncat',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="ncat-tool"', # tag if all regex match
    },
    'ohrwurm': {
        'name': 'ohrwurm',
        'regex1': r'\bohrwurm-[\d.]+',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="ohrwurm-tool"', # tag if all regex match
    },
    'oscanner': {
        'name': 'oscanner',
        'regex1': r'Loading services/sids from service file',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="oscanner-tool"', # tag if all regex match
    },
    'sfuzz': {
        'name': 'sfuzz',
        'regex1': r'AREALLYBADSTRING|sfuzz/sfuzz',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="sfuzz-tool"', # tag if all regex match
    },
    'sidguess': {
        'name': 'sidguess',
        'regex1': r'SIDGuesser v[\d.]+',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="sidguess-tool"', # tag if all regex match
    },
    'sqlninja': {
        'name': 'sqlninja',
        'regex1': r'Sqlninja rel\. [\d.]+',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="sqlninja-tool"', # tag if all regex match
    },
    'sqlsus': {
        'name': 'sqlsus',
        'regex1': r'sqlsus version [\d.]+',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="sqlsus-tool"', # tag if all regex match
    },
    'dnsdict6': {
        'name': 'dnsdict6',
        'regex1': r'Starting DNS enumeration work on',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="dnsdict6-tool"', # tag if all regex match
    },
    'unixprivesccheck': {
        'name': 'unixprivesccheck',
        'regex1': r'Recording Interface IP addresses',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="unixprivesccheck-tool"', # tag if all regex match
    },
    'yersinia': {
        'name': 'yersinia',
        'regex1': r'yersinia@yersinia.net',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="yersinia-tool"', # tag if all regex match
    },
    'armitage': {
        'name': 'armitage',
        'regex1': r'\[\*\] Starting msfrpcd for you',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="armitage-tool"', # tag if all regex match
    },
    'backdoorfactory': {
        'name': 'backdoorfactory',
        'regex1': r'\[\*\] In the backdoor module',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="backdoorfactory-tool"', # tag if all regex match
    },
    'beef': {
        'name': 'beef',
        'regex1': r'Please wait as BeEF services are started',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="beef-tool"', # tag if all regex match
    },
    'cat': {
        'name': 'cat',
        'regex1': r'Cisco Auditing Tool.+?g0ne',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="cat-tool"', # tag if all regex match
    },
    'cge': {
        'name': 'cge',
        'regex1': r'Vulnerability successful exploited with \[',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="cge-tool"', # tag if all regex match
    },
    'john': {
        'name': 'john',
        'regex1': r'John the Ripper password cracker, ver:|Loaded \d+ password hash \(',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="john-tool"', # tag if all regex match
    },
    'keimpx': {
        'name': 'keimpx',
        'regex1': r'\bkeimpx [\d.]+',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="keimpx-tool"', # tag if all regex match
    },
    'maskprocessor': {
        'name': 'maskprocessor',
        'regex1': r'mp by atom, High-Performance word generator',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="maskprocessor-tool"', # tag if all regex match
    },
    'ncrack': {
        'name': 'ncrack',
        'regex1': r'Starting Ncrack[^\n]+http://ncrack.org',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="ncrack-tool"', # tag if all regex match
    },
    'patator': {
        'name': 'patator',
        'regex1': r'http://code.google.com/p/patator/|Starting Patator v',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="patator-tool"', # tag if all regex match
    },
    'phrasendrescher': {
        'name': 'phrasendrescher',
        'regex1': r'phrasen\|drescher [\d.]+',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="phrasendrescher-tool"', # tag if all regex match
    },
    'polenum': {
        'name': 'polenum',
        'regex1': r'\[\+\] Password Complexity Flags:',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="polenum-tool"', # tag if all regex match
    },
    'rainbowcrack': {
        'name': 'rainbowcrack',
        'regex1': r'Official Website: http://project-rainbowcrack.com/',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="rainbowcrack-tool"', # tag if all regex match
    },
    'rcracki_mt': {
        'name': 'rcracki_mt',
        'regex1': r'Found \d+ rainbowtable files\.\.\.',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="rcracki_mt-tool"', # tag if all regex match
    },
    'tcpdump': {
        'name': 'tcpdump',
        'regex1': r'tcpdump: listening on.+capture size \d+|\d+ packets received by filter',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="tcpdump-tool"', # tag if all regex match
    },
    'hydra': {
        'name': 'hydra',
        'regex1': r'Hydra \(http://www.thc.org/thc-hydra\)',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="hydra-tool"', # tag if all regex match
    },
    'netcat': {
        'name': 'netcat',
        'regex1': r'Listening on \[[\d.]+\] \(family',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="netcat-tool"', # tag if all regex match
    },
    'nslookup': {
        'name': 'nslookup',
        'regex1': r'Non-authoritative answer:',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="nslookup-tool"', # tag if all regex match
    },
    'dig': {
        'name': 'dig',
        'regex1': r'; <<>> DiG [\d.]+',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="dig-tool"', # tag if all regex match
    },
    'whois': {
        'name': 'whois',
        'regex1': r'(?i)Registrar WHOIS Server:|Registrar URL: http://|DNSSEC: unsigned|information on Whois status codes|REGISTERED, DELEGATED|[Rr]egistrar:|%[^\n]+(WHOIS|2016/679)',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="whois-tool"', # tag if all regex match
    },
    'nessus': {
        'name': 'nessus',
        'regex1': r'nessus_(report_(get|list|exploits)|scan_(new|status))|nessuscli|nessusd|nessus-service',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="nessus-tool"', # tag if all regex match
    },
    'openvas': {
        'name': 'openvas',
        'regex1': r'/openvas/',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="openvas-tool"', # tag if all regex match
    },
    'golismero': {
        'name': 'golismero',
        'regex1': r'GoLismero[\n]+The Web Knife',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="golismero-tool"', # tag if all regex match
    },
    'wpscan': {
        'name': 'wpscan',
        'regex1': r'WordPress Security Scanner by the WPScan Team|\[\+\] Interesting header:',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="wpscan-tool"', # tag if all regex match
    },
    'skipfish': {
        'name': 'skipfish',
        'regex1': r'\[\+\] Sorting and annotating crawl nodes:|skipfish version [\d.]+',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="skipfish-tool"', # tag if all regex match
    },
    'arachni': {
        'name': 'arachni',
        'regex1': r'With the support of the community and the Arachni Team|\[\*\] Waiting for plugins to settle\.\.\.',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="arachni-tool"', # tag if all regex match
    },
    'dirb': {
        'name': 'dirb',
        'regex1': r'==> DIRECTORY:|\bDIRB v[\d.]+',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="dirb-tool"', # tag if all regex match
    },
    'joomscan': {
        'name': 'joomscan',
        'regex1': r'OWASP Joomla! Vulnerability Scanner v[\d.]+',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="joomscan-tool"', # tag if all regex match
    },
    'jbossautopwn': {
        'name': 'jbossautopwn',
        'regex1': r'\[x\] Now creating BSH script\.\.\.|\[x\] Now deploying \.war file:',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="jbossautopwn-tool"', # tag if all regex match
    },
    'grabber': {
        'name': 'grabber',
        'regex1': r'runSpiderScan @',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="grabber-tool"', # tag if all regex match
    },
    'fimap': {
        'name': 'fimap',
        'regex1': r'Automatic LFI/RFI scanner and exploiter',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="fimap-tool"', # tag if all regex match
    },
    'dsxs': {
        'name': 'dsxs',
        'regex1': r'Damn Small XSS Scanner \(DSXS\)',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="dsxs-tool"', # tag if all regex match
    },
    'dsss': {
        'name': 'dsss',
        'regex1': r'Damn Small SQLi Scanner \(DSSS\)',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="dsss-tool"', # tag if all regex match
    },
    'dsjs': {
        'name': 'dsjs',
        'regex1': r'Damn Small JS Scanner \(DSJS\)',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="dsjs-tool"', # tag if all regex match
    },
    'dsfs': {
        'name': 'dsfs',
        'regex1': r'Damn Small FI Scanner \(DSFS\)',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="dsfs-tool"', # tag if all regex match
    },
    'identywaf': {
        'name': 'identywaf',
        'regex1': r'\[o\] initializing handlers\.\.\.',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="identywaf-tool"', # tag if all regex match
    },
    'whatwaf': {
        'name': 'whatwaf',
        'regex1': r'<sCRIPT>ALeRt.+?WhatWaf\?',
        'nb_regex': 1,
        'max_execution_time': default_max_execution_time,
        'tag': 'infoleak:automatic-detection="whatwaf-tool"', # tag if all regex match
    }
 }
 if __name__ == "__main__":
    publisher.port = 6380
    publisher.channel = "Script"
    config_section = 'Tools'
    # # TODO: add duplicate
    # Setup the I/O queues
    p = Process(config_section)
    # Sent to the logging a description of the module
    publisher.info("Run Tools module ")
    # Endless loop getting messages from the input queue
    while True:
        # Get one message from the input queue
        item_id = p.get_from_set()
        if item_id is None:
            publisher.debug("{} queue is empty, waiting".format(config_section))
            time.sleep(1)
            continue
        # Do something with the message from the queue
        item_content = Item.get_item_content(item_id)
        search_tools(item_id, item_content)
--- a/bin/lib/Tracker.py
+++ b/bin/lib/Tracker.py
@ -295,10 +295,10 @@ def get_item_all_trackers_uuid(obj_id):
    #obj_type = 'item'
    return r_serv_tracker.smembers(f'obj:trackers:item:{obj_id}')
-def is_obj_tracked(obj_type, subtype, id):
+def is_obj_tracked(obj_type, subtype, obj_id):
    return r_serv_tracker.exists(f'obj:trackers:{obj_type}:{obj_id}')
-def get_obj_all_trackers(obj_type, subtype, id):
+def get_obj_all_trackers(obj_type, subtype, obj_id):
    return r_serv_tracker.smembers(f'obj:trackers:{obj_type}:{obj_id}')
 # # TODO: ADD all Objects + Subtypes
@ -664,6 +664,10 @@ def get_all_tracked_yara_files(filter_disabled=False):
        pass
    return yara_files
 def get_yara_rule_by_uuid(tracker_uuid):
    yar_path = get_tracker_by_uuid(tracker_uuid)
    return yara.compile(filepath=os.path.join(get_yara_rules_dir(), yar_path))
 def reload_yara_rules():
    yara_files = get_all_tracked_yara_files()
    # {uuid: filename}
--- a/bin/lib/data_retention_engine.py
+++ b/bin/lib/data_retention_engine.py
@ -11,46 +11,85 @@ sys.path.append(os.environ['AIL_BIN'])
 from lib import ConfigLoader
 config_loader = ConfigLoader.ConfigLoader()
-r_serv_db = config_loader.get_db_conn("Kvrocks_Objects")
+r_obj = config_loader.get_db_conn("Kvrocks_Objects")
 r_cache = config_loader.get_redis_conn("Redis_Cache")
 config_loader = None
-def get_first_object_date(object_type, subtype, field=''):
+# TODO HOW TO HANDLE SCREENSHOTS ????
-    first_date = r_serv_db.zscore('objs:first_date', f'{object_type}:{subtype}:{field}')
+# SCREENSHOT ID -> MEMBER OF ITEMS -> DATES
-    if not first_date:
+# META SCREENSHOT -> NB DOMAINS + FIRST/LAST SEEN ???
        first_date = 99999999
    return int(first_date)
-def get_last_object_date(object_type, subtype, field=''):
+# TAG /!\ DIFF TAG CREDENTIAL ITEM != DOMAIN:CREDENTIAL
-    last_date = r_serv_db.zscore('objs:last_date', f'{object_type}:{subtype}:{field}')
+#   -> IN OBJECT TYPE ?????
-    if not last_date:
+# OR SPECIAL FIRST SEEN / LAST SEEN IN TAG LIB
-        last_date = 0
+
-    return int(last_date)
+
-
+# DOMAIN -> subtype = domain type
-def _set_first_object_date(object_type, subtype, date, field=''):
+
-    return r_serv_db.zadd('objs:first_date', {f'{object_type}:{subtype}:{field}': date})
+# TAG -> type = "TAG"
-
+# TAG -> subtype = "OBJ:"tag
-def _set_last_object_date(object_type, subtype, date, field=''):
+
-    return r_serv_db.zadd('objs:last_date', {f'{object_type}:{subtype}:{field}': float(date)})
+def load_obj_date_first_last():
-
+    # LOAD FIRST DATE
-def update_first_object_date(object_type, subtype, date, field=''):
+    dates = r_obj.hgetall(f'date:first')
-    first_date = get_first_object_date(object_type, subtype, field=field)
+    for str_row in dates:
-    if int(date) < first_date:
+        obj_type, subtype = str_row.split(':', 1)
-        _set_first_object_date(object_type, subtype, date, field=field)
+        date = dates[str_row]
-        return date
+        _set_obj_date_first(date, obj_type, subtype=subtype)
-    else:
+    # LOAD LAST DATE
-        return first_date
+    dates = r_obj.hgetall(f'date:last')
-
+    for str_row in dates:
-def update_last_object_date(object_type, subtype, date, field=''):
+        obj_type, subtype = str_row.split(':', 1)
-    last_date = get_last_object_date(object_type, subtype, field=field)
+        date = dates[str_row]
-    if int(date) > last_date:
+        _set_obj_date_last(date, obj_type, subtype=subtype)
-        _set_last_object_date(object_type, subtype, date, field=field)
+
-        return date
+
-    else:
+# MAKE IT WORK WITH TAGS
-        return last_date
+def get_obj_date_first(obj_type, subtype='', r_int=False):
-
+    first = r_cache.hget(f'date:first', f'{obj_type}:{subtype}')
-def update_object_date(object_type, subtype, date, field=''):
+    if not first:
-    update_first_object_date(object_type, subtype, date, field=field)
+        first = r_obj.hget(f'date:first', f'{obj_type}:{subtype}')
-    update_last_object_date(object_type, subtype, date, field=field)
+    if r_int:
        if not first:
            return 0
        else:
            return int(first)
    return first
 def get_obj_date_last(obj_type, subtype='', r_int=False):
    last = r_cache.hget(f'date:last', f'{obj_type}:{subtype}')
    if not last:
        last = r_obj.hget(f'date:last', f'{obj_type}:{subtype}')
    if r_int:
        if not last:
            return 99999999
        else:
            return int(last)
    return last
 # FIRST
 def _set_obj_date_first(date, obj_type, subtype=''):
    r_cache.hset(f'date:first', f'{obj_type}:{subtype}', date)
 def set_obj_date_first(date, obj_type, subtype=''):
    _set_obj_date_first(date, obj_type, subtype=subtype)
    r_obj.hset(f'date:first', f'{obj_type}:{subtype}', date)
 # LAST
 def _set_obj_date_last(date, obj_type, subtype=''):
    r_cache.hset(f'date:last', f'{obj_type}:{subtype}', date)
 def set_obj_date_last(date, obj_type, subtype=''):
    _set_obj_date_last(date, obj_type, subtype=subtype)
    r_obj.hset(f'date:last', f'{obj_type}:{subtype}', date)
 def update_obj_date(date, obj_type, subtype=''):
    date = int(date)
    first = get_obj_date_first(obj_type, subtype=subtype, r_int=True)
    last = get_obj_date_last(obj_type, subtype=subtype, r_int=True)
    if date < first:
        set_obj_date_first(date, obj_type, subtype=subtype)
    if date > last:
        set_obj_date_last(date, obj_type, subtype=subtype)
 ###############################################################
--- a/bin/lib/module_extractor.py
+++ b/bin/lib/module_extractor.py
@ -0,0 +1,148 @@
 #!/usr/bin/env python3
 # -*-coding:UTF-8 -*
 import os
 import sys
 import time
 import yara
 sys.path.append(os.environ['AIL_BIN'])
 ##################################
 # Import Project packages
 ##################################
 import lib.objects.ail_objects
 from lib.objects.Items import Item
 from lib import correlations_engine
 from lib import regex_helper
 from lib.ConfigLoader import ConfigLoader
 from lib import Tracker
 from modules.CreditCards import CreditCards
 from modules.Iban import Iban
 from modules.Mail import Mail
 from modules.Onion import Onion
 from modules.Tools import Tools
 creditCards = CreditCards()
 ibans = Iban()
 mails = Mail()
 onions = Onion()
 tools = Tools()
 config_loader = ConfigLoader()
 r_cache = config_loader.get_redis_conn("Redis_Cache")
 config_loader = None
 r_key = regex_helper.generate_redis_cache_key('extractor')
 MODULES = {
    'infoleak:automatic-detection="credit-card"': creditCards,
    'infoleak:automatic-detection="iban"': ibans,
    'infoleak:automatic-detection="mail"': mails,
    'infoleak:automatic-detection="onion"': onions,
    # APIkey ???
    # Credentials
    # Zerobins
    # CERTIFICATE + KEYS ???
    # SQL Injetction / Libinjection ???
 }
 for tool_name in tools.get_tools():
    MODULES[f'infoleak:automatic-detection="{tool_name}-tool"'] = tools
 def get_correl_match(extract_type, obj_id, content, filter_subtypes=['']):
    correl = correlations_engine.get_correlation_by_correl_type('item', '', obj_id, extract_type)
    to_extract = []
    for c in correl:
        subtype, value = c.split(':', 1)
        # if subtype in filter_subtypes:
        to_extract.append(value)
    if to_extract:
        return regex_helper.regex_finditer(r_key, '|'.join(to_extract), obj_id, content)
    else:
        return []
 def _get_yara_match(data):
    for row in data.get('strings'):
        start, i, value = row
        value = value.decode()
        end = start + len(value)
        r_cache.sadd(f'extractor:yara:match:{r_key}', f'{start}:{end}:{value}')
        r_cache.expire(f'extractor:yara:match:{r_key}', 300)
    return yara.CALLBACK_CONTINUE
 # TODO RETRO HUNTS
 def get_tracker_match(obj_id, content):
    trackers = Tracker.get_obj_all_trackers('item', '', obj_id)
    for tracker_uuid in trackers:
        tracker_type = Tracker.get_tracker_type(tracker_uuid)
        tracker = Tracker.get_tracker_by_uuid(tracker_uuid)
        if tracker_type == 'regex':
            return regex_helper.regex_finditer(r_key, tracker, obj_id, content)
        elif tracker_type == 'yara':
            rule = Tracker.get_yara_rule_by_uuid(tracker_uuid)
            rule.match(data=content, callback=_get_yara_match,
                       which_callbacks=yara.CALLBACK_MATCHES, timeout=30)
            yara_match = r_cache.smembers(f'extractor:yara:match:{r_key}')
            r_cache.delete(f'extractor:yara:match:{r_key}')
            extracted = []
            for match in yara_match:
                start, end, value = match.split(':', 2)
                extracted.append((int(start), int(end), value))
            return extracted
        # elif tracker_type == 'term': # TODO
        #
        # elif tracker_type == '':
    return []
 def extract(obj_id, content=None):
    item = Item(obj_id)
    if not content:
        content = item.get_content()
    extracted = []
    extracted = extracted + get_tracker_match(obj_id, content)
    # print(item.get_tags())
    for tag in item.get_tags():
        if MODULES.get(tag):
            # print(tag)
            module = MODULES.get(tag)
            matches = module.extract(obj_id, content, tag)
            if matches:
                extracted = extracted + matches
    for obj_t in ['cve', 'cryptocurrency', 'username']: # Decoded, PGP->extract bloc
        matches = get_correl_match(obj_t, obj_id, content)
        if matches:
            extracted = extracted + matches
    from operator import itemgetter
    extracted = sorted(extracted, key=itemgetter(0))
    print(extracted)
    return extracted
 if __name__ == '__main__':
    t0 = time.time()
    obj_id = 'crawled/2022/09/15/circl.lu179c7903-5b21-452e-9f25-4b61d9934e2b'
    obj_id = 'crawled/2022/09/15/circl.lu1e4f9721-06dc-404f-aabf-3c3bd0b533bd'
    obj_id = 'submitted/2022/09/13/submitted_ba3ee771-c91c-4f50-9d6a-8558cdac7aeb.gz'
    # obj_id = 'tests/2021/01/01/credit_cards.gz'
    # obj_id = 'crawled/2020/07/20/circl.luc9301321-f1b1-4d91-9082-5eb452b946c5'
    obj_id = 'submitted/2019/09/22/97172282-e4c2-4a1e-b82c-c4fb9490a56e.gz'
    obj_id = 'submitted/2019/09/20/4fb7f02d-1241-4ef4-b17e-80ae76038835.gz'
    extract(obj_id)
    # get_obj_correl('cve', obj_id, content)
    # r = get_tracker_match(obj_id, content)
    # print(r)
    print(time.time() - t0)
--- a/bin/lib/objects/CryptoCurrencies.py
+++ b/bin/lib/objects/CryptoCurrencies.py
@ -17,7 +17,7 @@ from lib.ConfigLoader import ConfigLoader
 from lib.objects.abstract_subtype_object import AbstractSubtypeObject, get_all_id
 config_loader = ConfigLoader()
-
+baseurl = config_loader.get_config_str("Notifications", "ail_domain")
 config_loader = None
 digits58 = '123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz'
--- a/bin/lib/objects/Domains.py
+++ b/bin/lib/objects/Domains.py
@ -21,7 +21,7 @@ from lib.objects.abstract_object import AbstractObject
 from lib.ail_core import paginate_iterator
 from lib.item_basic import get_item_children, get_item_date, get_item_url, get_item_domain, get_item_har
-from lib import data_retention_engine
+from lib.data_retention_engine import update_obj_date
 from packages import Date
@ -426,7 +426,7 @@ class Domain(AbstractObject):
        except (ValueError, TypeError):
            status = True
-        data_retention_engine.update_object_date('domain', self.domain_type, date)
+        update_obj_date(date, 'domain', self.domain_type)
        # UP
        if status:
            r_crawler.srem(f'full_{self.domain_type}_down', self.id)
--- a/bin/lib/objects/Items.py
+++ b/bin/lib/objects/Items.py
@ -22,6 +22,7 @@ from lib.ail_core import get_ail_uuid
 from lib.objects.abstract_object import AbstractObject
 from lib.ConfigLoader import ConfigLoader
 from lib import item_basic
 from lib.data_retention_engine import update_obj_date
 from flask import url_for
@ -245,7 +246,7 @@ class Item(AbstractObject):
            return None
    def get_url(self):
-        return r_object.hset(f'meta:item::{self.id}', 'url')
+        return r_object.hget(f'meta:item::{self.id}', 'url')
    def set_crawled(self, url, parent_id):
        r_object.hset(f'meta:item::{self.id}', 'url', url)
@ -375,6 +376,24 @@ def get_items_by_source(source):
            l_items.append(item_id)
    return l_items
 def _manual_set_items_date_first_last():
    first = 9999
    last = 0
    sources = get_items_sources()
    for source in sources:
        dir_source = os.path.join(os.environ['AIL_HOME'], ITEMS_FOLDER, source)
        for dir_name in os.listdir(dir_source):
            if os.path.isdir(os.path.join(dir_source, dir_name)):
                date = int(dir_name)
                if date < first:
                    first = date
                if date > last:
                    last = date
    if first != 9999:
        update_obj_date(first, 'item')
    if last != 0:
        update_obj_date(last, 'item')
 ################################################################################
 ################################################################################
 ################################################################################
--- a/bin/lib/objects/Screenshots.py
+++ b/bin/lib/objects/Screenshots.py
@ -15,6 +15,7 @@ sys.path.append(os.environ['AIL_BIN'])
 ##################################
 from lib.ConfigLoader import ConfigLoader
 from lib.objects.abstract_object import AbstractObject
 # from lib import data_retention_engine
 config_loader = ConfigLoader()
 r_serv_metadata = config_loader.get_db_conn("Kvrocks_Objects")
--- a/bin/lib/objects/abstract_daterange_object.py
+++ b/bin/lib/objects/abstract_daterange_object.py
@ -19,6 +19,7 @@ sys.path.append(os.environ['AIL_BIN'])
 from lib.objects.abstract_object import AbstractObject
 from lib.ConfigLoader import ConfigLoader
 from lib.item_basic import is_crawled, get_item_domain
 from lib.data_retention_engine import update_obj_date
 from packages import Date
@ -41,10 +42,10 @@ class AbstractDaterangeObject(AbstractObject, ABC):
        super().__init__(obj_type, id)
    def exists(self):
-        return r_object.exists(f'{self.type}:meta:{self.id}')
+        return r_object.exists(f'meta:{self.type}:{self.id}')
    def get_first_seen(self, r_int=False):
-        first_seen = r_object.hget(f'{self.type}:meta:{self.id}', 'first_seen')
+        first_seen = r_object.hget(f'meta:{self.type}:{self.id}', 'first_seen')
        if r_int:
            if first_seen:
                return int(first_seen)
@ -54,7 +55,7 @@ class AbstractDaterangeObject(AbstractObject, ABC):
            return first_seen
    def get_last_seen(self, r_int=False):
-        last_seen = r_object.hget(f'{self.type}:meta:{self.id}', 'last_seen')
+        last_seen = r_object.hget(f'meta:{self.type}:{self.id}', 'last_seen')
        if r_int:
            if last_seen:
                return int(last_seen)
@ -64,7 +65,7 @@ class AbstractDaterangeObject(AbstractObject, ABC):
            return last_seen
    def get_nb_seen(self):
-        return r_object.hget(f'{self.type}:meta:{self.id}', 'nb')
+        return r_object.hget(f'meta:{self.type}:{self.id}', 'nb')
    def get_nb_seen_by_date(self, date):
        nb = r_object.hget(f'{self.type}:date:{date}', self.id)
@ -82,10 +83,10 @@ class AbstractDaterangeObject(AbstractObject, ABC):
        return meta_dict
    def set_first_seen(self, first_seen):
-        r_object.hset(f'{self.type}:meta:{self.id}', 'first_seen', first_seen)
+        r_object.hset(f'meta:{self.type}:{self.id}', 'first_seen', first_seen)
    def set_last_seen(self, last_seen):
-        r_object.hset(f'{self.type}:meta:{self.id}', 'last_seen', last_seen)
+        r_object.hset(f'meta:{self.type}:{self.id}', 'last_seen', last_seen)
    def update_daterange(self, date):
        date = int(date)
@ -114,12 +115,13 @@ class AbstractDaterangeObject(AbstractObject, ABC):
            r_object.sadd(f'{self.type}:all', self.id)
        else:
            self.update_daterange(date)
        update_obj_date(date, self.type)
        # NB Object seen by day
        r_object.hincrby(f'{self.type}:date:{date}', self.id, 1)
        r_object.zincrby(f'{self.type}:date:{date}', 1, self.id) # # # # # # # # # #
        # NB Object seen
-        r_object.hincrby(f'{self.type}:meta:{self.id}', 'nb', 1)
+        r_object.hincrby(f'meta:{self.type}:{self.id}', 'nb', 1)
        # Correlations
        self.add_correlation('item', '', item_id)
--- a/bin/lib/objects/abstract_subtype_object.py
+++ b/bin/lib/objects/abstract_subtype_object.py
@ -19,12 +19,14 @@ sys.path.append(os.environ['AIL_BIN'])
 from lib.objects.abstract_object import AbstractObject
 from lib.ConfigLoader import ConfigLoader
 from lib.item_basic import is_crawled, get_item_domain
 from lib.data_retention_engine import update_obj_date
 from packages import Date
 # LOAD CONFIG
 config_loader = ConfigLoader()
 r_metadata = config_loader.get_redis_conn("ARDB_Metadata")
 r_object = config_loader.get_db_conn("Kvrocks_Objects")
 config_loader = None
 # # TODO: ADD CORRELATION ENGINE
@ -47,7 +49,7 @@ class AbstractSubtypeObject(AbstractObject):
        self.subtype = subtype
    def exists(self):
-        return r_metadata.exists(f'{self.type}_metadata_{self.subtype}:{self.id}')
+        return r_object.exists(f'meta:{self.type}:{self.subtype}:{self.id}')
    # def exists(self):
    #     res = r_metadata.zscore(f'{self.type}_all:{self.subtype}', self.id)
@ -57,7 +59,7 @@ class AbstractSubtypeObject(AbstractObject):
    #         return False
    def get_first_seen(self, r_int=False):
-        first_seen = r_metadata.hget(f'{self.type}_metadata_{self.subtype}:{self.id}', 'first_seen')
+        first_seen = r_object.hget(f'meta:{self.type}:{self.subtype}:{self.id}', 'first_seen')
        if r_int:
            if first_seen:
                return int(first_seen)
@ -67,7 +69,7 @@ class AbstractSubtypeObject(AbstractObject):
            return first_seen
    def get_last_seen(self, r_int=False):
-        last_seen = r_metadata.hget(f'{self.type}_metadata_{self.subtype}:{self.id}', 'last_seen')
+        last_seen = r_object.hget(f'meta:{self.type}:{self.subtype}:{self.id}', 'last_seen')
        if r_int:
            if last_seen:
                return int(last_seen)
@ -94,10 +96,10 @@ class AbstractSubtypeObject(AbstractObject):
        return meta_dict
    def set_first_seen(self, first_seen):
-        r_metadata.hset(f'{self.type}_metadata_{self.subtype}:{self.id}', 'first_seen', first_seen)
+        r_object.hset(f'meta:{self.type}:{self.subtype}:{self.id}', 'first_seen', first_seen)
    def set_last_seen(self, last_seen):
-        r_metadata.hset(f'{self.type}_metadata_{self.subtype}:{self.id}', 'last_seen', last_seen)
+        r_object.hset(f'meta:{self.type}:{self.subtype}:{self.id}', 'last_seen', last_seen)
    def update_daterange(self, date):
        date = int(date)
@ -124,12 +126,13 @@ class AbstractSubtypeObject(AbstractObject):
 # NEW field => first record(last record)
 #                   by subtype ??????
-#               => data Retention + efficicent search
+#               => data Retention + efficient search
 #
 #
    def add(self, date, item_id):
        self.update_daterange(date)
        update_obj_date(date, self.type, self.subtype)
        # daily
        r_metadata.hincrby(f'{self.type}:{self.subtype}:{date}', self.id, 1)
        # all subtypes
--- a/bin/lib/regex_helper.py
+++ b/bin/lib/regex_helper.py
@ -71,7 +71,7 @@ def regex_findall(module_name, redis_key, regex, item_id, item_content, max_time
            if r_set:
                all_items = r_serv_cache.smembers(redis_key)
            else:
-                all_items = r_serv_cache.lrange(redis_key, 0 ,-1)
+                all_items = r_serv_cache.lrange(redis_key, 0, -1)
            r_serv_cache.delete(redis_key)
            proc.terminate()
            return all_items
@ -80,29 +80,66 @@ def regex_findall(module_name, redis_key, regex, item_id, item_content, max_time
        proc.terminate()
        sys.exit(0)
-def _regex_search(redis_key, regex, item_content):
+def _regex_finditer(r_key, regex, content):
-    first_occ = regex.search(item_content)
+    iterator = re.finditer(regex, content)
-    if first_occ:
+    for match in iterator:
-        r_serv_cache.set(redis_key, first_occ)
+        value = match.group()
        start = match.start()
        end = match.end()
        r_serv_cache.rpush(r_key, f'{start}:{end}:{value}')
    r_serv_cache.expire(r_key, 360)
-def regex_search(module_name, redis_key, regex, item_id, item_content, max_time=30):
+def regex_finditer(r_key, regex, item_id, content, max_time=30):
-    proc = Proc(target=_regex_search, args=(redis_key, regex, item_content, ))
+    proc = Proc(target=_regex_finditer, args=(r_key, regex, content))
    try:
        proc.start()
        proc.join(max_time)
        if proc.is_alive():
            proc.terminate()
-            Statistics.incr_module_timeout_statistic(module_name)
+            Statistics.incr_module_timeout_statistic(r_key)
-            err_mess = f"{module_name}: processing timeout: {item_id}"
+            err_mess = f"{r_key}: processing timeout: {item_id}"
            print(err_mess)
            publisher.info(err_mess)
-            return None
+            return []
        else:
-            first_occ = r_serv_cache.get(redis_key)
+            res = r_serv_cache.lrange(r_key, 0, -1)
-            r_serv_cache.delete(redis_key)
+            r_serv_cache.delete(r_key)
            proc.terminate()
-            return first_occ
+            all_match = []
            for match in res:
                start, end, value = match.split(':', 2)
                all_match.append((int(start), int(end), value))
            return all_match
    except KeyboardInterrupt:
-        print("Caught KeyboardInterrupt, terminating workers")
+        print("Caught KeyboardInterrupt, terminating regex worker")
        proc.terminate()
        sys.exit(0)
 def _regex_search(r_key, regex, content):
    if re.search(regex, content):
        r_serv_cache.set(r_key, 1)
        r_serv_cache.expire(r_key, 360)
 def regex_search(r_key, regex, item_id, content, max_time=30):
    proc = Proc(target=_regex_search, args=(r_key, regex, content))
    try:
        proc.start()
        proc.join(max_time)
        if proc.is_alive():
            proc.terminate()
            Statistics.incr_module_timeout_statistic(r_key)
            err_mess = f"{r_key}: processing timeout: {item_id}"
            print(err_mess)
            publisher.info(err_mess)
            return False
        else:
            if r_serv_cache.exists(r_key):
                r_serv_cache.delete(r_key)
                return True
            else:
                r_serv_cache.delete(r_key)
                return False
    except KeyboardInterrupt:
        print("Caught KeyboardInterrupt, terminating regex worker")
        proc.terminate()
        sys.exit(0)
--- a/bin/modules/CreditCards.py
+++ b/bin/modules/CreditCards.py
@ -45,41 +45,57 @@ class CreditCards(AbstractModule):
            ]
        self.regex = re.compile('|'.join(cards))
        self.re_clean_card = r'[^0-9]'
-        # Waiting time in secondes between to message proccessed
+        # Waiting time in seconds between to message processed
        self.pending_seconds = 10
        # Send module state to logs
        self.redis_logger.info(f"Module {self.module_name} initialized")
    def get_valid_card(self, card):
        clean_card = re.sub(self.re_clean_card, '', card)
        if lib_refine.is_luhn_valid(clean_card):
            return clean_card
    def extract(self, obj_id, content, tag):
        extracted = []
        cards = self.regex_finditer(self.regex, obj_id, content)
        for card in cards:
            start, end, value = card
            if self.get_valid_card(value):
                extracted.append(card)
        return extracted
    def compute(self, message, r_result=False):
        item_id, score = message.split()
        item = Item(item_id)
        content = item.get_content()
-        all_cards = re.findall(self.regex, content)
+        all_cards = self.regex_findall(self.regex, item.id, content)
        if len(all_cards) > 0:
            # self.redis_logger.debug(f'All matching {all_cards}')
-            creditcard_set = set([])
+            creditcard_set = set()
            for card in all_cards:
-                clean_card = re.sub('[^0-9]', '', card)
+                print(card)
-                if lib_refine.is_luhn_valid(clean_card):
+                valid_card = self.get_valid_card(card)
-                    self.redis_logger.debug(f'{clean_card} is valid')
+                if valid_card:
-                    creditcard_set.add(clean_card)
+                    creditcard_set.add(valid_card)
-            # pprint.pprint(creditcard_set)
+            # print(creditcard_set)
            to_print = f'CreditCard;{item.get_source()};{item.get_date()};{item.get_basename()};'
-            if len(creditcard_set) > 0:
+            if creditcard_set:
-                self.redis_logger.warning(f'{to_print}Checked {len(creditcard_set)} valid number(s);{item.get_id()}')
+                mess = f'{to_print}Checked {len(creditcard_set)} valid number(s);{item.id}'
                print(mess)
                self.redis_logger.warning(mess)
-                msg = f'infoleak:automatic-detection="credit-card";{item.get_id()}'
+                msg = f'infoleak:automatic-detection="credit-card";{item.id}'
                self.send_message_to_queue(msg, 'Tags')
                if r_result:
                    return creditcard_set
            else:
-                self.redis_logger.info(f'{to_print}CreditCard related;{item.get_id()}')
+                self.redis_logger.info(f'{to_print}CreditCard related;{item.id}')
 if __name__ == '__main__':
--- a/bin/modules/CveModule.py
+++ b/bin/modules/CveModule.py
@ -26,13 +26,13 @@ from lib.objects import Cves
 from lib.objects.Items import Item
-class Cve(AbstractModule):
+class CveModule(AbstractModule):
    """
-    Cve module for AIL framework
+    CveModule for AIL framework
    """
    def __init__(self):
-        super(Cve, self).__init__()
+        super(CveModule, self).__init__()
        # regex to find CVE
        self.reg_cve = re.compile(r'CVE-[1-2]\d{1,4}-\d{1,5}')
@ -68,6 +68,6 @@ class Cve(AbstractModule):
 if __name__ == '__main__':
-    module = Cve()
+    module = CveModule()
    # module.run()
    module.compute('crawled/2022/09/15/circl.lu1e4f9721-06dc-404f-aabf-3c3bd0b533bd 9')
--- a/bin/modules/Global.py
+++ b/bin/modules/Global.py
@ -41,7 +41,8 @@ sys.path.append(os.environ['AIL_BIN'])
 ##################################
 from modules.abstract_module import AbstractModule
 from lib.ConfigLoader import ConfigLoader
-
+from lib.data_retention_engine import update_obj_date
 from lib import item_basic
 class Global(AbstractModule):
    """
@ -85,7 +86,7 @@ class Global(AbstractModule):
        if len(splitted) == 2:
            item, gzip64encoded = splitted
-            # Remove PASTES_FOLDER from item path (crawled item + submited)
+            # Remove PASTES_FOLDER from item path (crawled item + submitted)
            if self.PASTES_FOLDERS in item:
                item = item.replace(self.PASTES_FOLDERS, '', 1)
@ -125,6 +126,8 @@ class Global(AbstractModule):
                        if self.PASTES_FOLDERS in item_id:
                            item_id = item_id.replace(self.PASTES_FOLDERS, '', 1)
                        update_obj_date(item_basic.get_item_date(item_id), 'item')
                        self.send_message_to_queue(item_id)
                        self.processed_item += 1
                        print(item_id)
--- a/bin/modules/Iban.py
+++ b/bin/modules/Iban.py
@ -62,6 +62,17 @@ class Iban(AbstractModule):
            return True
        return False
    def extract(self, obj_id, content, tag):
        extracted = []
        ibans = self.regex_finditer(self.iban_regex, obj_id, content)
        for iban in ibans:
            start, end, value = iban
            value = ''.join(e for e in value if e.isalnum())
            if self.is_valid_iban(value):
                print(value)
                extracted.append(iban)
        return extracted
    def compute(self, message):
        item = Item(message)
        item_id = item.get_id()
--- a/bin/modules/Mail.py
+++ b/bin/modules/Mail.py
@ -57,7 +57,7 @@ class Mail(AbstractModule):
        return self.r_cache.exists(f'mxdomain:{mxdomain}')
    def save_mxdomain_in_cache(self, mxdomain):
-        self.r_cache.setex(f'mxdomain:{mxdomain}', 1, datetime.timedelta(days=1))
+        self.r_cache.setex(f'mxdomain:{mxdomain}', datetime.timedelta(days=1), 1)
    def check_mx_record(self, set_mxdomains):
        """Check if emails MX domains are responding.
@ -118,6 +118,21 @@ class Mail(AbstractModule):
                    print(e)
        return valid_mxdomain
    def extract(self, obj_id, content, tag):
        extracted = []
        mxdomains = {}
        mails = self.regex_finditer(self.email_regex, obj_id, content)
        for mail in mails:
            start, end, value = mail
            mxdomain = value.rsplit('@', 1)[1].lower()
            if mxdomain not in mxdomains:
                mxdomains[mxdomain] = []
            mxdomains[mxdomain].append(mail)
        for mx in self.check_mx_record(mxdomains.keys()):
            for row in mxdomains[mx]:
                extracted.append(row)
        return extracted
    # # TODO: sanitize mails
    def compute(self, message):
        item_id, score = message.split()
--- a/bin/modules/Onion.py
+++ b/bin/modules/Onion.py
@ -55,6 +55,17 @@ class Onion(AbstractModule):
        # TEMP var: SAVE I2P Domain (future I2P crawler)
        # self.save_i2p = config_loader.get_config_boolean("Onion", "save_i2p")
    def extract(self, obj_id, content, tag):
        extracted = []
        onions = self.regex_finditer(self.onion_regex, obj_id, content)
        for onion in onions:
            start, end, value = onion
            url_unpack = crawlers.unpack_url(value)
            domain = url_unpack['domain']
            if crawlers.is_valid_onion_domain(domain):
                extracted.append(onion)
        return extracted
    def compute(self, message):
        onion_urls = []
        domains = []
--- a/bin/modules/Tools.py
+++ b/bin/modules/Tools.py
@ -0,0 +1,434 @@
 #!/usr/bin/env python3
 # -*-coding:UTF-8 -*
 """
 Tools Module
 ============================
 Search tools outpout
 """
 import os
 import sys
 import time
 sys.path.append(os.environ['AIL_BIN'])
 ##################################
 # Import Project packages
 ##################################
 from modules.abstract_module import AbstractModule
 from lib.objects.Items import Item
 TOOLS = {
    'sqlmap': {
        'regex': r'Usage of sqlmap for attacking targets without|all tested parameters do not appear to be injectable|sqlmap identified the following injection point|Title:[^\n]*((error|time|boolean)-based|stacked queries|UNION query)',
        'tag': 'infoleak:automatic-detection="sqlmap-tool"',
    },
    'wig': {
        'regex': r'(?s)wig - WebApp Information Gatherer.+?_{10,}',
        'tag': 'infoleak:automatic-detection="wig-tool"',
    },
    'dmytry': {
        'regex': r'(?s)Gathered (TCP Port|Inet-whois|Netcraft|Subdomain|E-Mail) information for.+?-{10,}',
        'tag': 'infoleak:automatic-detection="dmitry-tool"',
    },
    'inurlbr': {
        'regex': r'Usage of INURLBR for attacking targets without prior mutual consent is illegal',
        'tag': 'infoleak:automatic-detection="inurlbr-tool"',
    },
    'wafw00f': {
        'regex': r'(?s)WAFW00F - Web Application Firewall Detection Tool.+?Checking',
        'tag': 'infoleak:automatic-detection="wafw00f-tool"',
    },
    'sslyze': {
        'regex': r'(?s)PluginSessionRenegotiation.+?SCAN RESULTS FOR',
        'tag': 'infoleak:automatic-detection="sslyze-tool"',
    },
    'nmap': {
        'regex': r'(?s)Nmap scan report for.+?Host is',
        'tag': 'infoleak:automatic-detection="nmap-tool"',
    },
    'dnsenum': {
        'regex': r'(?s)dnsenum(\.pl)? VERSION:.+?Trying Zone Transfer',
        'tag': 'infoleak:automatic-detection="dnsenum-tool"',
    },
    'knock': {
        'regex': r'I scannig with my internal wordlist',
        'tag': 'infoleak:automatic-detection="knock-tool"',
    },
    'nikto': {
        'regex': r'(?s)\+ Target IP:.+?\+ Start Time:',
        'tag': 'infoleak:automatic-detection="nikto-tool"',
    },
    'dnscan': {
        'regex': r'(?s)\[\*\] Processing domain.+?\[\+\] Getting nameservers.+?records found',
        'tag': 'infoleak:automatic-detection="dnscan-tool"',
    },
    'dnsrecon': {
        'regex': r'Performing General Enumeration of Domain:|Performing TLD Brute force Enumeration against',
        'tag': 'infoleak:automatic-detection="dnsrecon-tool"',
    },
    'striker': {
        'regex': r'Crawling the target for fuzzable URLs|Honeypot Probabilty:',
        'tag': 'infoleak:automatic-detection="striker-tool"',
    },
    'rhawk': {
        'regex': r'S U B - D O M A I N   F I N D E R',
        'tag': 'infoleak:automatic-detection="rhawk-tool"',
    },
    'uniscan': {
        'regex': r'\| \[\+\] E-mail Found:',
        'tag': 'infoleak:automatic-detection="uniscan-tool"',
    },
    'masscan': {
        'regex': r'(?s)Starting masscan [\d.]+.+?Scanning|bit.ly/14GZzcT',
        'tag': 'infoleak:automatic-detection="masscan-tool"',
    },
    'msfconsole': {
        'regex': r'=\[ metasploit v[\d.]+.+?msf >',
        'tag': 'infoleak:automatic-detection="msfconsole-tool"',
    },
    'amap': {
        'regex': r'\bamap v[\d.]+ \(www.thc.org/thc-amap\)',
        'tag': 'infoleak:automatic-detection="amap-tool"',
    },
    'automater': {
        'regex': r'(?s)\[\*\] Checking.+?_+ Results found for:',
        'tag': 'infoleak:automatic-detection="automater-tool"',
    },
    'braa': {
        'regex': r'\bbraa public@[\d.]+',
        'tag': 'infoleak:automatic-detection="braa-tool"',
    },
    'ciscotorch': {
        'regex': r'Becase we need it',
        'tag': 'infoleak:automatic-detection="ciscotorch-tool"',
    },
    'theharvester': {
        'regex': r'Starting harvesting process for domain:',
        'tag': 'infoleak:automatic-detection="theharvester-tool"',
    },
    'sslstrip': {
        'regex': r'sslstrip [\d.]+ by Moxie Marlinspike running',
        'tag': 'infoleak:automatic-detection="sslstrip-tool"',
    },
    'sslcaudit': {
        'regex': r'# filebag location:',
        'tag': 'infoleak:automatic-detection="sslcaudit-tool"',
    },
    'smbmap': {
        'regex': r'\[\+\] Finding open SMB ports\.\.\.',
        'tag': 'infoleak:automatic-detection="smbmap-tool"',
    },
    'reconng': {
        'regex': r'\[\*\] Status: unfixed|\[recon-ng\]\[default\]',
        'tag': 'infoleak:automatic-detection="reconng-tool"',
    },
    'p0f': {
        'regex': r'\bp0f [^ ]+ by Michal Zalewski',
        'tag': 'infoleak:automatic-detection="p0f-tool"',
    },
    'hping3': {
        'regex': r'\bHPING [^ ]+ \([^)]+\): [^ ]+ mode set',
        'tag': 'infoleak:automatic-detection="hping3-tool"',
    },
    'enum4linux': {
        'regex': r'Starting enum4linux v[\d.]+|\|    Target Information    \|',
        'tag': 'infoleak:automatic-detection="enum4linux-tool"',
    },
    'dnstracer': {
        'regex': r'(?s)Tracing to.+?DNS HEADER \(send\)',
        'tag': 'infoleak:automatic-detection="dnstracer-tool"',
    },
    'dnmap': {
        'regex': r'dnmap_(client|server)|Nmap output files stored in \'nmap_output\' directory',
        'tag': 'infoleak:automatic-detection="dnmap-tool"',
    },
    'arpscan': {
        'regex': r'Starting arp-scan [^ ]+ with \d+ hosts',
        'tag': 'infoleak:automatic-detection="arpscan-tool"',
    },
    'cdpsnarf': {
        'regex': r'(?s)CDPSnarf v[^ ]+.+?Waiting for a CDP packet\.\.\.',
        'tag': 'infoleak:automatic-detection="cdpsnarf-tool"',
    },
    'dnsmap': {
        'regex': r'DNS Network Mapper by pagvac',
        'tag': 'infoleak:automatic-detection="dnsmap-tool"',
    },
    'dotdotpwn': {
        'regex': r'DotDotPwn v[^ ]+|dotdotpwn@sectester.net|\[\+\] Creating Traversal patterns',
        'tag': 'infoleak:automatic-detection="dotdotpwn-tool"',
    },
    'searchsploit': {
        'regex': r'(exploits|shellcodes)/|searchsploit_rc|Exploit Title',
        'tag': 'infoleak:automatic-detection="searchsploit-tool"',
    },
    'fierce': {
        'regex': r'(?s)Trying zone transfer first.+Checking for wildcard DNS',
        'tag': 'infoleak:automatic-detection="fierce-tool"',
    },
    'firewalk': {
        'regex': r'Firewalk state initialization completed successfully|Ramping phase source port',
        'tag': 'infoleak:automatic-detection="firewalk-tool"',
    },
    'fragroute': {
        'regex': r'\bfragroute: tcp_seg -> ip_frag',
        'tag': 'infoleak:automatic-detection="fragroute-tool"',
    },
    'fragrouter': {
        'regex': r'fragrouter: frag-\d+:',
        'tag': 'infoleak:automatic-detection="fragrouter-tool"',
    },
    'goofile': {
        'regex': r'code.google.com/p/goofile\b',
        'tag': 'infoleak:automatic-detection="goofile-tool"',
    },
    'intrace': {
        'regex': r'\bInTrace [\d.]+ \-\-',
        'tag': 'infoleak:automatic-detection="intrace-tool"',
    },
    'ismtp': {
        'regex': r'Testing SMTP server \[user enumeration\]',
        'tag': 'infoleak:automatic-detection="ismtp-tool"',
    },
    'lbd': {
        'regex': r'Checking for (DNS|HTTP)-Loadbalancing',
        'tag': 'infoleak:automatic-detection="lbd-tool"',
    },
    'miranda': {
        'regex': r'Entering discovery mode for \'upnp:',
        'tag': 'infoleak:automatic-detection="miranda-tool"',
    },
    'ncat': {
        'regex': r'nmap.org/ncat',
        'tag': 'infoleak:automatic-detection="ncat-tool"',
    },
    'ohrwurm': {
        'regex': r'\bohrwurm-[\d.]+',
        'tag': 'infoleak:automatic-detection="ohrwurm-tool"',
    },
    'oscanner': {
        'regex': r'Loading services/sids from service file',
        'tag': 'infoleak:automatic-detection="oscanner-tool"',
    },
    'sfuzz': {
        'regex': r'AREALLYBADSTRING|sfuzz/sfuzz',
        'tag': 'infoleak:automatic-detection="sfuzz-tool"',
    },
    'sidguess': {
        'regex': r'SIDGuesser v[\d.]+',
        'tag': 'infoleak:automatic-detection="sidguess-tool"',
    },
    'sqlninja': {
        'regex': r'Sqlninja rel\. [\d.]+',
        'tag': 'infoleak:automatic-detection="sqlninja-tool"',
    },
    'sqlsus': {
        'regex': r'sqlsus version [\d.]+',
        'tag': 'infoleak:automatic-detection="sqlsus-tool"',
    },
    'dnsdict6': {
        'regex': r'Starting DNS enumeration work on',
        'tag': 'infoleak:automatic-detection="dnsdict6-tool"',
    },
    'unixprivesccheck': {
        'regex': r'Recording Interface IP addresses',
        'tag': 'infoleak:automatic-detection="unixprivesccheck-tool"',
    },
    'yersinia': {
        'regex': r'yersinia@yersinia.net',
        'tag': 'infoleak:automatic-detection="yersinia-tool"',
    },
    'armitage': {
        'regex': r'\[\*\] Starting msfrpcd for you',
        'tag': 'infoleak:automatic-detection="armitage-tool"',
    },
    'backdoorfactory': {
        'regex': r'\[\*\] In the backdoor module',
        'tag': 'infoleak:automatic-detection="backdoorfactory-tool"',
    },
    'beef': {
        'regex': r'Please wait as BeEF services are started',
        'tag': 'infoleak:automatic-detection="beef-tool"',
    },
    'cat': {
        'regex': r'Cisco Auditing Tool.+?g0ne',
        'tag': 'infoleak:automatic-detection="cat-tool"',
    },
    'cge': {
        'regex': r'Vulnerability successful exploited with \[',
        'tag': 'infoleak:automatic-detection="cge-tool"',
    },
    'john': {
        'regex': r'John the Ripper password cracker, ver:|Loaded \d+ password hash \(',
        'tag': 'infoleak:automatic-detection="john-tool"',
    },
    'keimpx': {
        'regex': r'\bkeimpx [\d.]+',
        'tag': 'infoleak:automatic-detection="keimpx-tool"',
    },
    'maskprocessor': {
        'regex': r'mp by atom, High-Performance word generator',
        'tag': 'infoleak:automatic-detection="maskprocessor-tool"',
    },
    'ncrack': {
        'regex': r'Starting Ncrack[^\n]+http://ncrack.org',
        'tag': 'infoleak:automatic-detection="ncrack-tool"',
    },
    'patator': {
        'regex': r'http://code.google.com/p/patator/|Starting Patator v',
        'tag': 'infoleak:automatic-detection="patator-tool"',
    },
    'phrasendrescher': {
        'regex': r'phrasen\|drescher [\d.]+',
        'tag': 'infoleak:automatic-detection="phrasendrescher-tool"',
    },
    'polenum': {
        'regex': r'\[\+\] Password Complexity Flags:',
        'tag': 'infoleak:automatic-detection="polenum-tool"',
    },
    'rainbowcrack': {
        'regex': r'Official Website: http://project-rainbowcrack.com/',
        'tag': 'infoleak:automatic-detection="rainbowcrack-tool"',
    },
    'rcracki_mt': {
        'regex': r'Found \d+ rainbowtable files\.\.\.',
        'tag': 'infoleak:automatic-detection="rcracki_mt-tool"',
    },
    'tcpdump': {
        'regex': r'tcpdump: listening on.+capture size \d+|\d+ packets received by filter',
        'tag': 'infoleak:automatic-detection="tcpdump-tool"',
    },
    'hydra': {
        'regex': r'Hydra \(http://www.thc.org/thc-hydra\)',
        'tag': 'infoleak:automatic-detection="hydra-tool"',
    },
    'netcat': {
        'regex': r'Listening on \[[\d.]+\] \(family',
        'tag': 'infoleak:automatic-detection="netcat-tool"',
    },
    'nslookup': {
        'regex': r'Non-authoritative answer:',
        'tag': 'infoleak:automatic-detection="nslookup-tool"',
    },
    'dig': {
        'regex': r'; <<>> DiG [\d.]+',
        'tag': 'infoleak:automatic-detection="dig-tool"',
    },
    'whois': {
        'regex': r'(?i)Registrar WHOIS Server:|Registrar URL: http://|DNSSEC: unsigned|information on Whois status codes|REGISTERED, DELEGATED|[Rr]egistrar:|%[^\n]+(WHOIS|2016/679)',
        'tag': 'infoleak:automatic-detection="whois-tool"',
    },
    'nessus': {
        'regex': r'nessus_(report_(get|list|exploits)|scan_(new|status))|nessuscli|nessusd|nessus-service',
        'tag': 'infoleak:automatic-detection="nessus-tool"',
    },
    'openvas': {
        'regex': r'/openvas/',
        'tag': 'infoleak:automatic-detection="openvas-tool"',
    },
    'golismero': {
        'regex': r'GoLismero[\n]+The Web Knife',
        'tag': 'infoleak:automatic-detection="golismero-tool"',
    },
    'wpscan': {
        'regex': r'WordPress Security Scanner by the WPScan Team|\[\+\] Interesting header:',
        'tag': 'infoleak:automatic-detection="wpscan-tool"',
    },
    'skipfish': {
        'regex': r'\[\+\] Sorting and annotating crawl nodes:|skipfish version [\d.]+',
        'tag': 'infoleak:automatic-detection="skipfish-tool"',
    },
    'arachni': {
        'regex': r'With the support of the community and the Arachni Team|\[\*\] Waiting for plugins to settle\.\.\.',
        'tag': 'infoleak:automatic-detection="arachni-tool"',
    },
    'dirb': {
        'regex': r'==> DIRECTORY:|\bDIRB v[\d.]+',
        'tag': 'infoleak:automatic-detection="dirb-tool"',
    },
    'joomscan': {
        'regex': r'OWASP Joomla! Vulnerability Scanner v[\d.]+',
        'tag': 'infoleak:automatic-detection="joomscan-tool"',
    },
    'jbossautopwn': {
        'regex': r'\[x\] Now creating BSH script\.\.\.|\[x\] Now deploying \.war file:',
        'tag': 'infoleak:automatic-detection="jbossautopwn-tool"',
    },
    'grabber': {
        'regex': r'runSpiderScan @',
        'tag': 'infoleak:automatic-detection="grabber-tool"',
    },
    'fimap': {
        'regex': r'Automatic LFI/RFI scanner and exploiter',
        'tag': 'infoleak:automatic-detection="fimap-tool"',
    },
    'dsxs': {
        'regex': r'Damn Small XSS Scanner \(DSXS\)',
        'tag': 'infoleak:automatic-detection="dsxs-tool"',
    },
    'dsss': {
        'regex': r'Damn Small SQLi Scanner \(DSSS\)',
        'tag': 'infoleak:automatic-detection="dsss-tool"',
    },
    'dsjs': {
        'regex': r'Damn Small JS Scanner \(DSJS\)',
        'tag': 'infoleak:automatic-detection="dsjs-tool"',
    },
    'dsfs': {
        'regex': r'Damn Small FI Scanner \(DSFS\)',
        'tag': 'infoleak:automatic-detection="dsfs-tool"',
    },
    'identywaf': {
        'regex': r'\[o\] initializing handlers\.\.\.',
        'tag': 'infoleak:automatic-detection="identywaf-tool"',
    },
    'whatwaf': {
        'regex': r'<sCRIPT>ALeRt.+?WhatWaf\?',
        'tag': 'infoleak:automatic-detection="whatwaf-tool"',
    }
 }
 class Tools(AbstractModule):
    """
    Tools module for AIL framework
    """
    def __init__(self):
        super(Tools, self).__init__()
        self.max_execution_time = 30
        # Waiting time in seconds between to message processed
        self.pending_seconds = 10
        # Send module state to logs
        self.redis_logger.info(f"Module {self.module_name} initialized")
    def get_tools(self):
        return TOOLS.keys()
    def extract(self, obj_id, content, tag):
        tool_name = tag.rsplit('"', 2)[1][:-5]
        return self.regex_finditer(TOOLS[tool_name]['regex'], obj_id, content)
    def compute(self, message):
        item = Item(message)
        content = item.get_content()
        for tool_name in TOOLS:
            tool = TOOLS[tool_name]
            match = self.regex_search(tool['regex'], item.id, content)
            if match:
                print(f'{item.id} found: {tool_name}')
                # Tag Item
                msg = f"{tool['tag']};{item.id}"
                self.send_message_to_queue(msg, 'Tags')
                # TODO ADD LOGS
 if __name__ == '__main__':
    module = Tools()
    # module.run()
    module.compute('crawled/2022/09/15/circl.lu179c7903-5b21-452e-9f25-4b61d9934e2b')
--- a/bin/modules/abstract_module.py
+++ b/bin/modules/abstract_module.py
@ -47,7 +47,7 @@ class AbstractModule(ABC):
        self.redis_logger.channel = logger_channel
        #Cache key
-        self.redis_cache_key = regex_helper.generate_redis_cache_key(self.module_name)
+        self.r_cache_key = regex_helper.generate_redis_cache_key(self.module_name)
        self.max_execution_time = 30
        # Run module endlessly
@ -81,6 +81,12 @@ class AbstractModule(ABC):
        self.process.populate_set_out(message, queue_name)
        # add to new set_module
    def regex_search(self, regex, obj_id, content):
        return regex_helper.regex_search(self.r_cache_key, regex, obj_id, content, max_time=self.max_execution_time)
    def regex_finditer(self, regex, obj_id, content):
        return regex_helper.regex_finditer(self.r_cache_key, regex, obj_id, content, max_time=self.max_execution_time)
    def regex_findall(self, regex, id, content):
        """
        regex findall helper (force timeout)
@ -90,7 +96,7 @@ class AbstractModule(ABC):
        ex: send_to_queue(item_id, 'Global')
        """
-        return regex_helper.regex_findall(self.module_name, self.redis_cache_key, regex, id, content, max_time=self.max_execution_time)
+        return regex_helper.regex_findall(self.module_name, self.r_cache_key, regex, id, content, max_time=self.max_execution_time)
    def run(self):
        """
--- a/bin/packages/modules.cfg
+++ b/bin/packages/modules.cfg
@ -126,7 +126,7 @@ subscribe = Redis_Global
 subscribe = Redis_Credential
 publish = Redis_Duplicate,Redis_ModuleStats,Redis_Tags
-[Cve]
+[CveModule]
 subscribe = Redis_Cve
 publish = Redis_Tags
--- a/files/misp-galaxy
+++ b/files/misp-galaxy
@ -1 +1 @@
-Subproject commit aba1321b34e18122ec1825b54e2fc8176a4bd25c
+Subproject commit de12f46ba6305d457b1e248cfeeec89827ec93c9
--- a/other_installers/docker/docker-compose.yml
+++ b/other_installers/docker/docker-compose.yml
@ -248,7 +248,7 @@ services:
    depends_on:
      - redis-log
    entrypoint:
-      - /opt/AIL/bin/Cve.py
+      - /opt/AIL/bin/CveModule.py
    network_mode: service:flask
    image: ail-framework
    volumes:
--- a/var/www/blueprints/objects_item.py
+++ b/var/www/blueprints/objects_item.py
@ -26,6 +26,8 @@ from lib.objects.Screenshots import Screenshot
 from lib import Tag
 from export import Export
 from lib import module_extractor
 # ============ BLUEPRINT ============
 objects_item = Blueprint('objects_item', __name__, template_folder=os.path.join(os.environ['AIL_FLASK'], 'templates/objects/item'))
@ -73,10 +75,12 @@ def showItem():  # # TODO: support post
    # # TODO: ADD in Export SECTION
    meta['hive_case'] = Export.get_item_hive_cases(item_id)
    extracted = module_extractor.extract(item.id, content=meta['content'])
    return render_template("show_item.html", bootstrap_label=bootstrap_label,
                            modal_add_tags=Tag.get_modal_add_tags(meta['id'], object_type='item'),
                            is_hive_connected=Export.get_item_hive_cases(item_id),
-                            meta=meta)
+                            meta=meta, extracted=extracted)
    # kvrocks data
--- a/var/www/modules/hashDecoded/Flask_hashDecoded.py
+++ b/var/www/modules/hashDecoded/Flask_hashDecoded.py
@ -16,6 +16,8 @@ sys.path.append(os.environ['AIL_BIN'])
 ##################################
 # Import Project packages
 ##################################
 from lib.objects import ail_objects
 from packages.Date import Date
 # ============ VARIABLES ============
@ -167,22 +169,9 @@ def get_all_types_id(correlation_type):
    else:
        return []
-def is_valid_type_id(correlation_type, type_id):
+def get_key_id_metadata(obj_type, subtype, obj_id):
-    all_type_id = get_all_types_id(correlation_type)
+    obj = ail_objects.get_object_meta(obj_type, subtype, obj_id)
-    if type_id in all_type_id:
+    return obj._get_meta()
        return True
    else:
        return False
 def get_key_id_metadata(correlation_type, type_id, key_id):
    key_id_metadata = {}
    if r_serv_metadata.exists('{}_metadata_{}:{}'.format(correlation_type, type_id, key_id)):
        key_id_metadata['first_seen'] = r_serv_metadata.hget('{}_metadata_{}:{}'.format(correlation_type, type_id, key_id), 'first_seen')
        key_id_metadata['first_seen'] = '{}/{}/{}'.format(key_id_metadata['first_seen'][0:4], key_id_metadata['first_seen'][4:6], key_id_metadata['first_seen'][6:8])
        key_id_metadata['last_seen'] = r_serv_metadata.hget('{}_metadata_{}:{}'.format(correlation_type, type_id, key_id), 'last_seen')
        key_id_metadata['last_seen'] = '{}/{}/{}'.format(key_id_metadata['last_seen'][0:4], key_id_metadata['last_seen'][4:6], key_id_metadata['last_seen'][6:8])
        key_id_metadata['nb_seen'] = r_serv_metadata.scard('set_{}_{}:{}'.format(correlation_type, type_id, key_id))
    return key_id_metadata
 def list_sparkline_type_id_values(date_range_sparkline, correlation_type, type_id, key_id):
    sparklines_value = []
@ -250,7 +239,7 @@ def main_correlation_page(correlation_type, type_id, date_from, date_to, show_de
    if type_id is not None:
        #retrieve char
        type_id = type_id.replace(' ', '')
-        if not is_valid_type_id(correlation_type, type_id):
+        if not ail_objects.is_valid_object_subtype(correlation_type, type_id):
            type_id = None
    date_range = []
@ -897,7 +886,7 @@ def pgpdump_graph_line_json():
 def correlation_graph_line_json(correlation_type, type_id, key_id, date_from, date_to):
    # verify input
-    if key_id is not None and is_valid_type_id(correlation_type, type_id) and r_serv_metadata.exists('{}_metadata_{}:{}'.format(correlation_type, type_id, key_id)):
+    if key_id is not None and ail_objects.is_valid_object_subtype(correlation_type, type_id) and ail_objects.exists_obj(correlation_type, type_id, key_id):
        if date_from is None or date_to is None:
            nb_days_seen_in_pastes = 30
--- a/var/www/templates/objects/item/show_item.html
+++ b/var/www/templates/objects/item/show_item.html
@ -10,6 +10,7 @@
 	<link href="{{ url_for('static', filename='css/font-awesome.min.css') }}" rel="stylesheet">
  <link href="{{ url_for('static', filename='css/dataTables.bootstrap.min.css') }}" rel="stylesheet">
  <link href="{{ url_for('static', filename='css/tags.css') }}" rel="stylesheet" type="text/css" />
    <link href="{{ url_for('static', filename='css/ail-project.css') }}" rel="stylesheet">
  <script src="{{ url_for('static', filename='js/jquery.js')}}"></script>
  <script src="{{ url_for('static', filename='js/popper.min.js')}}"></script>
@ -256,6 +257,8 @@
        </div>
      </div>
    </div>
  </div>
  {% endif %}
@ -346,6 +349,13 @@
  {% endif %}
  {% if extracted %}
      {% for row in extracted %}
          <div><a href="#{{ row[0] }}:{{ row[1] }}">{{ row[2] }}</a></div>
      {% endfor  %}
  {% endif %}
  <!-- nav-pills nav-justified nav-tabs-->
  <div class="card">
@ -367,15 +377,21 @@
        </li>
      </ul>
      <div class="tab-content" id="pills-tabContent">
        <div class="tab-pane fade show active" id="pills-content" role="tabpanel" aria-labelledby="pills-content-tab">
-          <p class="my-0"> <pre class="border">{{ meta['content'] }}</pre></p>
+            {% if not extracted %}
                <p class="my-0"> <pre class="border">{{ meta['content'] }}</pre></p>
            {% else %}
                <p class="my-0"> <pre class="border">{{ meta['content'][:extracted[0][0]] }}{% for row in extracted %}<span class="hg-text" id="{{ row[0] }}:{{ row[1] }}">{{ meta['content'][row[0]:row[1]] }}</span>{% if loop.index + 1 > extracted|length %}{{ meta['content'][extracted[-1][1]:] }}{% else %}{{ meta['content'][row[1]:extracted[loop.index][0]] }}{% endif %}{% endfor %}</pre></p>
            {% endif %}
        </div>
        <div class="tab-pane fade" id="pills-html2text" role="tabpanel" aria-labelledby="pills-html2text-tab">
          <p class="my-0"> <pre id="html2text-container" class="border"></pre></p>
        </div>
      </div>
    </div>
  </div>
		`@ -1 +1 @@`
			`Subproject commit aba1321b34e18122ec1825b54e2fc8176a4bd25c`				`Subproject commit de12f46ba6305d457b1e248cfeeec89827ec93c9`