mirror of https://github.com/CIRCL/AIL-framework
chg: [Hosts] improve perf + regex timeout + cache DNS results
parent
a10119fb6a
commit
2db8587d03
|
@ -41,7 +41,13 @@ class DomClassifier(AbstractModule):
|
|||
|
||||
addr_dns = config_loader.get_config_str("DomClassifier", "dns")
|
||||
|
||||
self.c = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns])
|
||||
redis_host = config_loader.get_config_str('Redis_Cache', 'host')
|
||||
redis_port = config_loader.get_config_int('Redis_Cache', 'port')
|
||||
redis_db = config_loader.get_config_int('Redis_Cache', 'db')
|
||||
self.dom_classifier = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns],
|
||||
redis_host=redis_host,
|
||||
redis_port=redis_port, redis_db=redis_db,
|
||||
re_timeout=30)
|
||||
|
||||
self.cc = config_loader.get_config_str("DomClassifier", "cc")
|
||||
self.cc_tld = config_loader.get_config_str("DomClassifier", "cc_tld")
|
||||
|
@ -58,34 +64,34 @@ class DomClassifier(AbstractModule):
|
|||
item_source = item.get_source()
|
||||
try:
|
||||
|
||||
self.c.text(rawtext=host)
|
||||
if not self.c.domain:
|
||||
self.dom_classifier.text(rawtext=host)
|
||||
if not self.dom_classifier.domain:
|
||||
return
|
||||
print(self.c.domain)
|
||||
self.c.validdomain(passive_dns=True, extended=False)
|
||||
# self.logger.debug(self.c.vdomain)
|
||||
print(self.dom_classifier.domain)
|
||||
self.dom_classifier.validdomain(passive_dns=True, extended=False)
|
||||
# self.logger.debug(self.dom_classifier.vdomain)
|
||||
|
||||
print(self.c.vdomain)
|
||||
print(self.dom_classifier.vdomain)
|
||||
print()
|
||||
|
||||
if self.c.vdomain and d4.is_passive_dns_enabled():
|
||||
for dns_record in self.c.vdomain:
|
||||
if self.dom_classifier.vdomain and d4.is_passive_dns_enabled():
|
||||
for dns_record in self.dom_classifier.vdomain:
|
||||
self.add_message_to_queue(obj=None, message=dns_record)
|
||||
|
||||
if self.cc_tld:
|
||||
localizeddomains = self.c.include(expression=self.cc_tld)
|
||||
localizeddomains = self.dom_classifier.include(expression=self.cc_tld)
|
||||
if localizeddomains:
|
||||
print(localizeddomains)
|
||||
self.redis_logger.warning(f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {self.cc_tld};{item.get_id()}")
|
||||
|
||||
if self.cc:
|
||||
localizeddomains = self.c.localizedomain(cc=self.cc)
|
||||
localizeddomains = self.dom_classifier.localizedomain(cc=self.cc)
|
||||
if localizeddomains:
|
||||
print(localizeddomains)
|
||||
self.redis_logger.warning(f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {self.cc};{item.get_id()}")
|
||||
|
||||
if r_result:
|
||||
return self.c.vdomain
|
||||
return self.dom_classifier.vdomain
|
||||
|
||||
except IOError as err:
|
||||
self.redis_logger.error(f"Duplicate;{item_source};{item_date};{item_basename};CRC Checksum Failed")
|
||||
|
|
|
@ -18,13 +18,14 @@ import os
|
|||
import re
|
||||
import sys
|
||||
|
||||
import DomainClassifier.domainclassifier
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from modules.abstract_module import AbstractModule
|
||||
from lib.ConfigLoader import ConfigLoader
|
||||
from lib.objects.Items import Item
|
||||
|
||||
class Hosts(AbstractModule):
|
||||
"""
|
||||
|
@ -43,28 +44,29 @@ class Hosts(AbstractModule):
|
|||
# Waiting time in seconds between to message processed
|
||||
self.pending_seconds = 1
|
||||
|
||||
self.host_regex = r'\b([a-zA-Z\d-]{,63}(?:\.[a-zA-Z\d-]{,63})+)\b'
|
||||
re.compile(self.host_regex)
|
||||
|
||||
redis_host = config_loader.get_config_str('Redis_Cache', 'host')
|
||||
redis_port = config_loader.get_config_int('Redis_Cache', 'port')
|
||||
redis_db = config_loader.get_config_int('Redis_Cache', 'db')
|
||||
self.dom_classifier = DomainClassifier.domainclassifier.Extract(rawtext="",
|
||||
redis_host=redis_host,
|
||||
redis_port=redis_port,
|
||||
redis_db=redis_db,
|
||||
re_timeout=30)
|
||||
self.logger.info(f"Module: {self.module_name} Launched")
|
||||
|
||||
def compute(self, message):
|
||||
item = self.get_obj()
|
||||
obj = self.get_obj()
|
||||
|
||||
# mimetype = item_basic.get_item_mimetype(item.get_id())
|
||||
# if mimetype.split('/')[0] == "text":
|
||||
|
||||
content = item.get_content()
|
||||
hosts = self.regex_findall(self.host_regex, item.get_id(), content, r_set=True)
|
||||
if hosts:
|
||||
print(f'{len(hosts)} host {item.get_id()}')
|
||||
for host in hosts:
|
||||
# print(host)
|
||||
if not host.endswith('.onion'):
|
||||
self.add_message_to_queue(message=str(host), queue='Host')
|
||||
content = obj.get_content()
|
||||
self.dom_classifier.text(content)
|
||||
if self.dom_classifier.domain:
|
||||
print(f'{len(self.dom_classifier.domain)} host {obj.get_id()}')
|
||||
# print(self.dom_classifier.domain)
|
||||
for domain in self.dom_classifier.domain:
|
||||
if domain:
|
||||
self.add_message_to_queue(message=domain, queue='Host')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
module = Hosts()
|
||||
module.run()
|
||||
|
|
Loading…
Reference in New Issue