From 8df581c9574244a52e34ee14aa8f2a49671cbecc Mon Sep 17 00:00:00 2001
From: AntoniaBK <antonia@cookfirst.de>
Date: Thu, 4 Apr 2024 14:22:19 +0200
Subject: [PATCH 1/3] Lookup of abuse-c

---
 lookyloo/modules/uwhois.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/lookyloo/modules/uwhois.py b/lookyloo/modules/uwhois.py
index ced97e6d..bb06cf10 100644
--- a/lookyloo/modules/uwhois.py
+++ b/lookyloo/modules/uwhois.py
@@ -76,6 +76,9 @@ class UniversalWhois(AbstractModule):
         ...
 
     def whois(self, query: str, contact_email_only: bool=False) -> str | list[str]:
+        
+        EMAIL_REGEX = rb'(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)'
+
         if not self.available:
             return ''
         bytes_whois = b''
@@ -87,7 +90,13 @@ class UniversalWhois(AbstractModule):
                 if not data:
                     break
                 bytes_whois += data
+        #if an abuse-c-Object is found in the whois entry, the result of its lookup will be returned
+        abuse_c = re.search(rb'abuse-c:\s+(.*)\s', bytes_whois)
+        if abuse_c is not None:
+            return self.whois(abuse_c.group(1).decode(), contact_email_only)
+        
         if not contact_email_only:
             return bytes_whois.decode()
-        emails = list(set(re.findall(rb'[\w\.-]+@[\w\.-]+', bytes_whois)))
+        emails = list(set(re.findall(EMAIL_REGEX, bytes_whois)))
         return [e.decode() for e in sorted(emails)]
+    
\ No newline at end of file

From e8492cc96ffc19204f8f41abebd3ceaaecc7f70c Mon Sep 17 00:00:00 2001
From: AntoniaBK <antonia@cookfirst.de>
Date: Mon, 8 Apr 2024 10:59:10 +0200
Subject: [PATCH 2/3] Concatenate the whois-lookups\n when contact_email_only
 is false the abuse-c-lookup is added to the initial lookup

---
 lookyloo/modules/uwhois.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/lookyloo/modules/uwhois.py b/lookyloo/modules/uwhois.py
index bb06cf10..86f5afaa 100644
--- a/lookyloo/modules/uwhois.py
+++ b/lookyloo/modules/uwhois.py
@@ -90,11 +90,16 @@ class UniversalWhois(AbstractModule):
                 if not data:
                     break
                 bytes_whois += data
-        #if an abuse-c-Object is found in the whois entry, the result of its lookup will be returned
+
+        #if an abuse-c-Object is found in the whois entry, it will take precedence
         abuse_c = re.search(rb'abuse-c:\s+(.*)\s', bytes_whois)
-        if abuse_c is not None:
-            return self.whois(abuse_c.group(1).decode(), contact_email_only)
-        
+        if abuse_c:
+            abuse_c_query = self.whois(abuse_c.group(1).decode(), contact_email_only)
+            if contact_email_only:
+                return abuse_c_query
+            else:
+                return bytes_whois.decode() + abuse_c_query
+
         if not contact_email_only:
             return bytes_whois.decode()
         emails = list(set(re.findall(EMAIL_REGEX, bytes_whois)))

From f680c0a11b60829dfd84f43d5c016b62bcb6a984 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= <raphael@vinot.info>
Date: Mon, 8 Apr 2024 20:41:23 +0200
Subject: [PATCH 3/3] chg: Make mypy happy

---
 lookyloo/modules/uwhois.py | 31 ++++++++++++++++++-------------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/lookyloo/modules/uwhois.py b/lookyloo/modules/uwhois.py
index 86f5afaa..c4486b26 100644
--- a/lookyloo/modules/uwhois.py
+++ b/lookyloo/modules/uwhois.py
@@ -40,12 +40,12 @@ class UniversalWhois(AbstractModule):
                 # old format
                 _all_ips = hostnode.resolved_ips
             for ip in _all_ips:
-                self.whois(ip)
+                self.whois(ip, contact_email_only=False)
         if hasattr(hostnode, 'cnames'):
             cname: str
             for cname in hostnode.cnames:
-                self.whois(cname)
-        self.whois(hostnode.name)
+                self.whois(cname, contact_email_only=False)
+        self.whois(hostnode.name, contact_email_only=False)
 
     def capture_default_trigger(self, crawled_tree: CrawledTree, /, *, force: bool=False, auto_trigger: bool=False) -> None:
         '''Run the module on all the nodes up to the final redirect'''
@@ -72,11 +72,11 @@ class UniversalWhois(AbstractModule):
         ...
 
     @overload
-    def whois(self, query: str, contact_email_only: bool=False) -> str | list[str]:
+    def whois(self, query: str, contact_email_only: bool) -> str | list[str]:
         ...
 
     def whois(self, query: str, contact_email_only: bool=False) -> str | list[str]:
-        
+
         EMAIL_REGEX = rb'(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)'
 
         if not self.available:
@@ -91,17 +91,22 @@ class UniversalWhois(AbstractModule):
                     break
                 bytes_whois += data
 
-        #if an abuse-c-Object is found in the whois entry, it will take precedence
+        # if an abuse-c-Object is found in the whois entry, it will take precedence
         abuse_c = re.search(rb'abuse-c:\s+(.*)\s', bytes_whois)
-        if abuse_c:
-            abuse_c_query = self.whois(abuse_c.group(1).decode(), contact_email_only)
-            if contact_email_only:
+        if abuse_c and abuse_c.lastindex and abuse_c.lastindex > 0:  # make sure we have a match and avoid exception on None or missing group 1
+            # The whois entry has an abuse-c object
+            _obj_name: str = abuse_c.group(1).decode()
+            abuse_c_query = self.whois(_obj_name, contact_email_only)
+            # The object exists
+            if abuse_c_query and contact_email_only:
+                # The object exists and we only want the email(s), the response is a list of emails
                 return abuse_c_query
-            else:
-                return bytes_whois.decode() + abuse_c_query
-
+            elif abuse_c_query:
+                # The object exists and we want the full whois entry, contatenate with a new line.
+                # contact_email_only is False, so the response is a string, ignore the typing warning accordingy
+                return '\n'.join([bytes_whois.decode(), abuse_c_query])  # type: ignore[list-item]
+        # We either dont have an abuse-c object or it does not exist
         if not contact_email_only:
             return bytes_whois.decode()
         emails = list(set(re.findall(EMAIL_REGEX, bytes_whois)))
         return [e.decode() for e in sorted(emails)]
-    
\ No newline at end of file