From 8df581c9574244a52e34ee14aa8f2a49671cbecc Mon Sep 17 00:00:00 2001 From: AntoniaBK Date: Thu, 4 Apr 2024 14:22:19 +0200 Subject: [PATCH 1/3] Lookup of abuse-c --- lookyloo/modules/uwhois.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/lookyloo/modules/uwhois.py b/lookyloo/modules/uwhois.py index ced97e6d..bb06cf10 100644 --- a/lookyloo/modules/uwhois.py +++ b/lookyloo/modules/uwhois.py @@ -76,6 +76,9 @@ class UniversalWhois(AbstractModule): ... def whois(self, query: str, contact_email_only: bool=False) -> str | list[str]: + + EMAIL_REGEX = rb'(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)' + if not self.available: return '' bytes_whois = b'' @@ -87,7 +90,13 @@ class UniversalWhois(AbstractModule): if not data: break bytes_whois += data + #if an abuse-c-Object is found in the whois entry, the result of its lookup will be returned + abuse_c = re.search(rb'abuse-c:\s+(.*)\s', bytes_whois) + if abuse_c is not None: + return self.whois(abuse_c.group(1).decode(), contact_email_only) + if not contact_email_only: return bytes_whois.decode() - emails = list(set(re.findall(rb'[\w\.-]+@[\w\.-]+', bytes_whois))) + emails = list(set(re.findall(EMAIL_REGEX, bytes_whois))) return [e.decode() for e in sorted(emails)] + \ No newline at end of file From e8492cc96ffc19204f8f41abebd3ceaaecc7f70c Mon Sep 17 00:00:00 2001 From: AntoniaBK Date: Mon, 8 Apr 2024 10:59:10 +0200 Subject: [PATCH 2/3] Concatenate the whois-lookups\n when contact_email_only is false the abuse-c-lookup is added to the initial lookup --- lookyloo/modules/uwhois.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/lookyloo/modules/uwhois.py b/lookyloo/modules/uwhois.py index bb06cf10..86f5afaa 100644 --- a/lookyloo/modules/uwhois.py +++ b/lookyloo/modules/uwhois.py @@ -90,11 +90,16 @@ class UniversalWhois(AbstractModule): if not data: break bytes_whois += data - #if an abuse-c-Object is found in the whois entry, the result of its lookup will be returned + + #if an abuse-c-Object is found in the whois entry, it will take precedence abuse_c = re.search(rb'abuse-c:\s+(.*)\s', bytes_whois) - if abuse_c is not None: - return self.whois(abuse_c.group(1).decode(), contact_email_only) - + if abuse_c: + abuse_c_query = self.whois(abuse_c.group(1).decode(), contact_email_only) + if contact_email_only: + return abuse_c_query + else: + return bytes_whois.decode() + abuse_c_query + if not contact_email_only: return bytes_whois.decode() emails = list(set(re.findall(EMAIL_REGEX, bytes_whois))) From f680c0a11b60829dfd84f43d5c016b62bcb6a984 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Mon, 8 Apr 2024 20:41:23 +0200 Subject: [PATCH 3/3] chg: Make mypy happy --- lookyloo/modules/uwhois.py | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/lookyloo/modules/uwhois.py b/lookyloo/modules/uwhois.py index 86f5afaa..c4486b26 100644 --- a/lookyloo/modules/uwhois.py +++ b/lookyloo/modules/uwhois.py @@ -40,12 +40,12 @@ class UniversalWhois(AbstractModule): # old format _all_ips = hostnode.resolved_ips for ip in _all_ips: - self.whois(ip) + self.whois(ip, contact_email_only=False) if hasattr(hostnode, 'cnames'): cname: str for cname in hostnode.cnames: - self.whois(cname) - self.whois(hostnode.name) + self.whois(cname, contact_email_only=False) + self.whois(hostnode.name, contact_email_only=False) def capture_default_trigger(self, crawled_tree: CrawledTree, /, *, force: bool=False, auto_trigger: bool=False) -> None: '''Run the module on all the nodes up to the final redirect''' @@ -72,11 +72,11 @@ class UniversalWhois(AbstractModule): ... @overload - def whois(self, query: str, contact_email_only: bool=False) -> str | list[str]: + def whois(self, query: str, contact_email_only: bool) -> str | list[str]: ... def whois(self, query: str, contact_email_only: bool=False) -> str | list[str]: - + EMAIL_REGEX = rb'(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)' if not self.available: @@ -91,17 +91,22 @@ class UniversalWhois(AbstractModule): break bytes_whois += data - #if an abuse-c-Object is found in the whois entry, it will take precedence + # if an abuse-c-Object is found in the whois entry, it will take precedence abuse_c = re.search(rb'abuse-c:\s+(.*)\s', bytes_whois) - if abuse_c: - abuse_c_query = self.whois(abuse_c.group(1).decode(), contact_email_only) - if contact_email_only: + if abuse_c and abuse_c.lastindex and abuse_c.lastindex > 0: # make sure we have a match and avoid exception on None or missing group 1 + # The whois entry has an abuse-c object + _obj_name: str = abuse_c.group(1).decode() + abuse_c_query = self.whois(_obj_name, contact_email_only) + # The object exists + if abuse_c_query and contact_email_only: + # The object exists and we only want the email(s), the response is a list of emails return abuse_c_query - else: - return bytes_whois.decode() + abuse_c_query - + elif abuse_c_query: + # The object exists and we want the full whois entry, contatenate with a new line. + # contact_email_only is False, so the response is a string, ignore the typing warning accordingy + return '\n'.join([bytes_whois.decode(), abuse_c_query]) # type: ignore[list-item] + # We either dont have an abuse-c object or it does not exist if not contact_email_only: return bytes_whois.decode() emails = list(set(re.findall(EMAIL_REGEX, bytes_whois))) return [e.decode() for e in sorted(emails)] - \ No newline at end of file