2018-05-04 13:53:29 +02:00
#!/usr/bin/env python3
2014-09-05 10:41:00 +02:00
# -*-coding:UTF-8 -*
"""
The DomClassifier Module
== == == == == == == == == == == == == ==
2018-04-16 14:50:04 +02:00
The DomClassifier modules extract and classify Internet domains / hostnames / IP addresses from
2017-05-09 11:13:16 +02:00
the out output of the Global module .
2014-09-05 10:41:00 +02:00
"""
2021-04-28 15:24:33 +02:00
##################################
# Import External packages
##################################
2021-03-31 11:25:09 +02:00
import os
import sys
2014-09-05 10:41:00 +02:00
import DomainClassifier . domainclassifier
2021-04-28 15:24:33 +02:00
2021-06-02 14:42:23 +02:00
sys . path . append ( os . environ [ ' AIL_BIN ' ] )
2021-04-28 15:24:33 +02:00
##################################
# Import Project packages
##################################
2021-06-02 14:42:23 +02:00
from modules . abstract_module import AbstractModule
2023-04-13 14:25:02 +02:00
from lib . ConfigLoader import ConfigLoader
2022-10-25 16:25:19 +02:00
from lib import d4
2021-03-31 11:25:09 +02:00
2014-09-05 10:41:00 +02:00
2021-04-28 15:24:33 +02:00
class DomClassifier ( AbstractModule ) :
"""
DomClassifier module for AIL framework
"""
2014-09-05 10:41:00 +02:00
2021-04-28 15:24:33 +02:00
def __init__ ( self ) :
super ( DomClassifier , self ) . __init__ ( )
2014-09-05 10:41:00 +02:00
2023-04-13 14:25:02 +02:00
config_loader = ConfigLoader ( )
2022-10-25 16:25:19 +02:00
# Waiting time in seconds between to message processed
2021-04-28 15:24:33 +02:00
self . pending_seconds = 1
2014-09-05 10:41:00 +02:00
2023-04-13 14:25:02 +02:00
addr_dns = config_loader . get_config_str ( " DomClassifier " , " dns " )
2014-09-05 10:41:00 +02:00
2024-01-30 10:28:50 +01:00
redis_host = config_loader . get_config_str ( ' Redis_Cache ' , ' host ' )
redis_port = config_loader . get_config_int ( ' Redis_Cache ' , ' port ' )
redis_db = config_loader . get_config_int ( ' Redis_Cache ' , ' db ' )
self . dom_classifier = DomainClassifier . domainclassifier . Extract ( rawtext = " " , nameservers = [ addr_dns ] ,
redis_host = redis_host ,
redis_port = redis_port , redis_db = redis_db ,
re_timeout = 30 )
2014-09-17 17:19:03 +02:00
2023-04-13 14:25:02 +02:00
self . cc = config_loader . get_config_str ( " DomClassifier " , " cc " )
self . cc_tld = config_loader . get_config_str ( " DomClassifier " , " cc_tld " )
2014-09-05 10:41:00 +02:00
2021-04-28 15:24:33 +02:00
# Send module state to logs
2023-05-12 15:29:53 +02:00
self . logger . info ( f " Module: { self . module_name } Launched " )
2021-05-27 17:28:20 +02:00
def compute ( self , message , r_result = False ) :
2023-06-22 15:38:04 +02:00
host = message
2021-04-28 15:24:33 +02:00
2023-06-22 15:38:04 +02:00
item = self . get_obj ( )
2021-05-27 17:28:20 +02:00
item_basename = item . get_basename ( )
item_date = item . get_date ( )
item_source = item . get_source ( )
2021-04-28 15:24:33 +02:00
try :
2022-05-30 09:03:27 +02:00
2024-01-30 10:28:50 +01:00
self . dom_classifier . text ( rawtext = host )
if not self . dom_classifier . domain :
2024-01-09 11:24:54 +01:00
return
2024-01-30 10:28:50 +01:00
print ( self . dom_classifier . domain )
self . dom_classifier . validdomain ( passive_dns = True , extended = False )
# self.logger.debug(self.dom_classifier.vdomain)
2022-05-30 09:03:27 +02:00
2024-01-30 10:28:50 +01:00
print ( self . dom_classifier . vdomain )
2022-05-30 09:03:27 +02:00
print ( )
2024-01-30 10:28:50 +01:00
if self . dom_classifier . vdomain and d4 . is_passive_dns_enabled ( ) :
for dns_record in self . dom_classifier . vdomain :
2023-06-22 15:38:04 +02:00
self . add_message_to_queue ( obj = None , message = dns_record )
2022-05-30 09:03:27 +02:00
2024-01-09 11:38:54 +01:00
if self . cc_tld :
2024-01-30 10:28:50 +01:00
localizeddomains = self . dom_classifier . include ( expression = self . cc_tld )
2024-01-09 11:38:54 +01:00
if localizeddomains :
print ( localizeddomains )
2024-03-13 11:58:40 +01:00
self . redis_logger . warning ( f " DomainC; { item_source } ; { item_date } ; { item_basename } ;Checked { localizeddomains } located in { self . cc_tld } ; { self . obj . get_global_id ( ) } " )
2024-01-09 11:38:54 +01:00
if self . cc :
2024-01-30 10:28:50 +01:00
localizeddomains = self . dom_classifier . localizedomain ( cc = self . cc )
2024-01-09 11:38:54 +01:00
if localizeddomains :
print ( localizeddomains )
2024-03-13 11:58:40 +01:00
self . redis_logger . warning ( f " DomainC; { item_source } ; { item_date } ; { item_basename } ;Checked { localizeddomains } located in { self . cc } ; { self . obj . get_global_id ( ) } " )
2022-05-30 09:03:27 +02:00
if r_result :
2024-01-30 10:28:50 +01:00
return self . dom_classifier . vdomain
2021-04-28 15:24:33 +02:00
except IOError as err :
self . redis_logger . error ( f " Duplicate; { item_source } ; { item_date } ; { item_basename } ;CRC Checksum Failed " )
2024-03-13 11:58:40 +01:00
raise Exception ( f " CRC Checksum Failed on: { self . obj . get_global_id ( ) } " )
2021-03-31 11:25:09 +02:00
2014-09-05 10:41:00 +02:00
if __name__ == " __main__ " :
2021-04-28 15:24:33 +02:00
module = DomClassifier ( )
module . run ( )