AIL-framework/bin/modules/DomClassifier.py

104 lines
3.8 KiB
Python
Raw Permalink Normal View History

2018-05-04 13:53:29 +02:00
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
"""
The DomClassifier Module
============================
2018-04-16 14:50:04 +02:00
The DomClassifier modules extract and classify Internet domains/hostnames/IP addresses from
the out output of the Global module.
"""
2021-04-28 15:24:33 +02:00
##################################
# Import External packages
##################################
import os
import sys
import DomainClassifier.domainclassifier
2021-04-28 15:24:33 +02:00
sys.path.append(os.environ['AIL_BIN'])
2021-04-28 15:24:33 +02:00
##################################
# Import Project packages
##################################
from modules.abstract_module import AbstractModule
from lib.ConfigLoader import ConfigLoader
from lib import d4
2021-04-28 15:24:33 +02:00
class DomClassifier(AbstractModule):
"""
DomClassifier module for AIL framework
"""
2021-04-28 15:24:33 +02:00
def __init__(self):
super(DomClassifier, self).__init__()
config_loader = ConfigLoader()
# Waiting time in seconds between to message processed
2021-04-28 15:24:33 +02:00
self.pending_seconds = 1
addr_dns = config_loader.get_config_str("DomClassifier", "dns")
redis_host = config_loader.get_config_str('Redis_Cache', 'host')
redis_port = config_loader.get_config_int('Redis_Cache', 'port')
redis_db = config_loader.get_config_int('Redis_Cache', 'db')
self.dom_classifier = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns],
redis_host=redis_host,
redis_port=redis_port, redis_db=redis_db,
re_timeout=30)
self.cc = config_loader.get_config_str("DomClassifier", "cc")
self.cc_tld = config_loader.get_config_str("DomClassifier", "cc_tld")
2021-04-28 15:24:33 +02:00
# Send module state to logs
2023-05-12 15:29:53 +02:00
self.logger.info(f"Module: {self.module_name} Launched")
def compute(self, message, r_result=False):
host = message
2021-04-28 15:24:33 +02:00
item = self.get_obj()
item_basename = item.get_basename()
item_date = item.get_date()
item_source = item.get_source()
2021-04-28 15:24:33 +02:00
try:
2022-05-30 09:03:27 +02:00
self.dom_classifier.text(rawtext=host)
if not self.dom_classifier.domain:
2024-01-09 11:24:54 +01:00
return
print(self.dom_classifier.domain)
self.dom_classifier.validdomain(passive_dns=True, extended=False)
# self.logger.debug(self.dom_classifier.vdomain)
2022-05-30 09:03:27 +02:00
print(self.dom_classifier.vdomain)
2022-05-30 09:03:27 +02:00
print()
if self.dom_classifier.vdomain and d4.is_passive_dns_enabled():
for dns_record in self.dom_classifier.vdomain:
self.add_message_to_queue(obj=None, message=dns_record)
2022-05-30 09:03:27 +02:00
2024-01-09 11:38:54 +01:00
if self.cc_tld:
localizeddomains = self.dom_classifier.include(expression=self.cc_tld)
2024-01-09 11:38:54 +01:00
if localizeddomains:
print(localizeddomains)
2024-03-13 11:58:40 +01:00
self.redis_logger.warning(f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {self.cc_tld};{self.obj.get_global_id()}")
2024-01-09 11:38:54 +01:00
if self.cc:
localizeddomains = self.dom_classifier.localizedomain(cc=self.cc)
2024-01-09 11:38:54 +01:00
if localizeddomains:
print(localizeddomains)
2024-03-13 11:58:40 +01:00
self.redis_logger.warning(f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {self.cc};{self.obj.get_global_id()}")
2022-05-30 09:03:27 +02:00
if r_result:
return self.dom_classifier.vdomain
2021-04-28 15:24:33 +02:00
except IOError as err:
self.redis_logger.error(f"Duplicate;{item_source};{item_date};{item_basename};CRC Checksum Failed")
2024-03-13 11:58:40 +01:00
raise Exception(f"CRC Checksum Failed on: {self.obj.get_global_id()}")
if __name__ == "__main__":
2021-04-28 15:24:33 +02:00
module = DomClassifier()
module.run()