2018-05-04 13:53:29 +02:00
|
|
|
#!/usr/bin/env python3
|
2014-09-05 10:41:00 +02:00
|
|
|
# -*-coding:UTF-8 -*
|
|
|
|
|
|
|
|
"""
|
|
|
|
The DomClassifier Module
|
|
|
|
============================
|
|
|
|
|
2018-04-16 14:50:04 +02:00
|
|
|
The DomClassifier modules extract and classify Internet domains/hostnames/IP addresses from
|
2017-05-09 11:13:16 +02:00
|
|
|
the out output of the Global module.
|
2014-09-05 10:41:00 +02:00
|
|
|
|
|
|
|
"""
|
2021-03-31 11:25:09 +02:00
|
|
|
import os
|
|
|
|
import sys
|
2014-09-05 10:41:00 +02:00
|
|
|
import time
|
|
|
|
from pubsublogger import publisher
|
|
|
|
|
|
|
|
import DomainClassifier.domainclassifier
|
|
|
|
from Helper import Process
|
|
|
|
|
2021-03-31 11:25:09 +02:00
|
|
|
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib'))
|
|
|
|
import d4
|
|
|
|
import item_basic
|
|
|
|
|
2014-09-05 10:41:00 +02:00
|
|
|
|
|
|
|
def main():
|
|
|
|
publisher.port = 6380
|
|
|
|
publisher.channel = "Script"
|
|
|
|
|
|
|
|
config_section = 'DomClassifier'
|
|
|
|
|
|
|
|
p = Process(config_section)
|
2018-05-02 17:07:10 +02:00
|
|
|
addr_dns = p.config.get("DomClassifier", "dns")
|
2014-09-05 10:41:00 +02:00
|
|
|
|
|
|
|
publisher.info("""ZMQ DomainClassifier is Running""")
|
|
|
|
|
2018-05-02 17:07:10 +02:00
|
|
|
c = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns])
|
2014-09-17 17:19:03 +02:00
|
|
|
|
|
|
|
cc = p.config.get("DomClassifier", "cc")
|
|
|
|
cc_tld = p.config.get("DomClassifier", "cc_tld")
|
|
|
|
|
2014-09-05 10:41:00 +02:00
|
|
|
while True:
|
|
|
|
try:
|
2021-03-31 11:25:09 +02:00
|
|
|
item_id = p.get_from_set()
|
2014-09-05 10:41:00 +02:00
|
|
|
|
2021-03-31 11:25:09 +02:00
|
|
|
if item_id is None:
|
2016-06-30 14:36:47 +02:00
|
|
|
publisher.debug("Script DomClassifier is idling 1s")
|
2014-09-05 10:41:00 +02:00
|
|
|
time.sleep(1)
|
|
|
|
continue
|
2018-04-20 10:42:19 +02:00
|
|
|
|
2021-03-31 11:25:09 +02:00
|
|
|
item_content = item_basic.get_item_content(item_id)
|
|
|
|
mimetype = item_basic.get_item_mimetype(item_id)
|
|
|
|
item_basename = item_basic.get_basename(item_id)
|
|
|
|
item_source = item_basic.get_source(item_id)
|
|
|
|
item_date = item_basic.get_item_date(item_id)
|
|
|
|
|
|
|
|
if mimetype.split('/')[0] == "text":
|
|
|
|
c.text(rawtext=item_content)
|
2014-09-05 10:41:00 +02:00
|
|
|
c.potentialdomain()
|
2021-03-31 11:27:45 +02:00
|
|
|
c.validdomain(passive_dns=True, extended=False)
|
2021-03-31 11:25:09 +02:00
|
|
|
print(c.vdomain)
|
|
|
|
|
|
|
|
if c.vdomain and d4.is_passive_dns_enabled():
|
|
|
|
for dns_record in c.vdomain:
|
|
|
|
p.populate_set_out(dns_record)
|
|
|
|
|
2014-09-17 17:19:03 +02:00
|
|
|
localizeddomains = c.include(expression=cc_tld)
|
2014-09-05 10:41:00 +02:00
|
|
|
if localizeddomains:
|
2014-09-17 17:19:03 +02:00
|
|
|
print(localizeddomains)
|
2021-03-31 11:25:09 +02:00
|
|
|
publisher.warning(f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {cc_tld};{item_id}")
|
2014-09-17 17:19:03 +02:00
|
|
|
localizeddomains = c.localizedomain(cc=cc)
|
2021-03-31 11:25:09 +02:00
|
|
|
|
2014-09-05 10:41:00 +02:00
|
|
|
if localizeddomains:
|
2014-09-17 17:19:03 +02:00
|
|
|
print(localizeddomains)
|
2021-03-31 11:25:09 +02:00
|
|
|
publisher.warning(f"DomainC;{item_source};{item_date};{item_basename};Checked {localizeddomains} located in {cc};{item_id}")
|
|
|
|
|
2014-09-05 10:41:00 +02:00
|
|
|
except IOError:
|
2021-03-31 11:25:09 +02:00
|
|
|
print("CRC Checksum Failed on :", item_id)
|
|
|
|
publisher.error(f"Duplicate;{item_source};{item_date};{item_basename};CRC Checksum Failed")
|
2014-09-05 10:41:00 +02:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|