From 479fcb39e50786a7efca8c194261383296a0c9e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Fri, 23 Dec 2022 16:36:11 +0100 Subject: [PATCH] new: Basic support for IPASN lookup --- lookyloo/capturecache.py | 51 +++++++++++++++++++++++ poetry.lock | 28 ++++++++++--- pyproject.toml | 1 + website/web/templates/hostname_popup.html | 5 ++- 4 files changed, 79 insertions(+), 6 deletions(-) diff --git a/lookyloo/capturecache.py b/lookyloo/capturecache.py index 69ffcb37..757f7163 100644 --- a/lookyloo/capturecache.py +++ b/lookyloo/capturecache.py @@ -18,6 +18,7 @@ from typing import Any, Dict, List, Optional, Tuple, Union, Set import dns.rdatatype import dns.resolver from har2tree import CrawledTree, Har2TreeError, HarFile +from pyipasnhistory import IPASNHistory from redis import Redis from .context import Context @@ -111,6 +112,14 @@ class CapturesIndex(Mapping): self.__cache: Dict[str, CaptureCache] = {} self._quick_init() self.timeout = get_config('generic', 'max_tree_create_time') + try: + self.ipasnhistory: Optional[IPASNHistory] = IPASNHistory() + if not self.ipasnhistory.is_up: + self.ipasnhistory = None + except Exception as e: + # Unable to setup IPASN History + print(e) + self.ipasnhistory = None @property def cached_captures(self) -> Set[str]: @@ -374,6 +383,8 @@ class CapturesIndex(Mapping): cnames_path = ct.root_hartree.har.path.parent / 'cnames.json' ips_path = ct.root_hartree.har.path.parent / 'ips.json' + ipasn_path = ct.root_hartree.har.path.parent / 'ipasn.json' + host_cnames: Dict[str, str] = {} if cnames_path.exists(): try: @@ -392,6 +403,15 @@ class CapturesIndex(Mapping): # The json is broken, delete and re-trigger the requests host_ips = {} + ipasn: Dict[str, Dict[str, str]] = {} + if ipasn_path.exists(): + try: + with ipasn_path.open() as f: + ipasn = json.load(f) + except json.decoder.JSONDecodeError: + # The json is broken, delete and re-trigger the requests + ipasn = {} + for node in ct.root_hartree.hostname_tree.traverse(): if node.name not in host_cnames or node.name not in host_ips: # Resolve and cache @@ -415,8 +435,39 @@ class CapturesIndex(Mapping): elif node.name in host_ips: node.add_feature('resolved_ips', host_ips[node.name]) + if self.ipasnhistory: + # Throw all the IPs to IPASN History for query later. + if ips := [{'ip': ip} for ips_sublist in host_ips.values() for ip in ips_sublist if ip and ip not in ipasn]: + try: + self.ipasnhistory.mass_cache(ips) + except Exception as e: + self.logger.warning(f'Unable to submit IPs to IPASNHistory: {e}') + else: + time.sleep(2) + ipasn_responses = self.ipasnhistory.mass_query(ips) + if 'responses' in ipasn_responses: + for response in ipasn_responses['responses']: + ip = response['meta']['ip'] + r = list(response['response'].values())[0] + if ip not in ipasn and r: + ipasn[ip] = r + if ipasn: + # retraverse tree to populate it with the features + for node in ct.root_hartree.hostname_tree.traverse(): + if not hasattr(node, 'resolved_ips'): + continue + ipasn_entries = {} + for ip in node.resolved_ips: + if ip not in ipasn: + continue + ipasn_entries[ip] = ipasn[ip] + if ipasn_entries: + node.add_feature('ipasn', ipasn_entries) + with cnames_path.open('w') as f: json.dump(host_cnames, f) with ips_path.open('w') as f: json.dump(host_ips, f) + with ipasn_path.open('w') as f: + json.dump(ipasn, f) return ct diff --git a/poetry.lock b/poetry.lock index d01ef16f..7a1d2bce 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1666,21 +1666,21 @@ typing-extensions = {version = "*", markers = "python_version <= \"3.8\""} [[package]] name = "playwrightcapture" -version = "1.16.9" +version = "1.16.10" description = "A simple library to capture websites using playwright" category = "main" optional = false python-versions = ">=3.8,<4.0" files = [ - {file = "playwrightcapture-1.16.9-py3-none-any.whl", hash = "sha256:0b8024783b8cacbfe8f7b268105753b0ec817fa88ccdbd3e34ec0b0d8ff190d8"}, - {file = "playwrightcapture-1.16.9.tar.gz", hash = "sha256:0dfb9f818cc9a69156ae458366dd9895f2de02f84c6647d1074fd36a1e7c711e"}, + {file = "playwrightcapture-1.16.10-py3-none-any.whl", hash = "sha256:7fc67f9ba93c8fb7a4947257cf9f797e2279a0f2839e57ffb2e505f38ec90f28"}, + {file = "playwrightcapture-1.16.10.tar.gz", hash = "sha256:ab537b257056a30733a6ff0ce12d57244edb6d3c1d488ff43d5c1ba312677978"}, ] [package.dependencies] beautifulsoup4 = ">=4.11.1,<5.0.0" dateparser = ">=1.1.4,<2.0.0" lxml = ">=4.9.2,<5.0.0" -playwright = ">=1.28.0,<2.0.0" +playwright = ">=1.29.0,<2.0.0" pydub = {version = ">=0.25.1,<0.26.0", optional = true, markers = "extra == \"recaptcha\""} requests = {version = ">=2.28.1,<3.0.0", optional = true, markers = "extra == \"recaptcha\""} SpeechRecognition = {version = ">=3.9.0,<4.0.0", optional = true, markers = "extra == \"recaptcha\""} @@ -1868,6 +1868,24 @@ requests = ">=2.28.1,<3.0.0" [package.extras] docs = ["Sphinx (>=5.1.1,<6.0.0)"] +[[package]] +name = "pyipasnhistory" +version = "2.1.2" +description = "Python client for IP ASN History" +category = "main" +optional = false +python-versions = ">=3.8,<4.0" +files = [ + {file = "pyipasnhistory-2.1.2-py3-none-any.whl", hash = "sha256:7743de1bb7e735f9b907a3cff8ab189a1d8b5517b56b64f151fc4793b2863e35"}, + {file = "pyipasnhistory-2.1.2.tar.gz", hash = "sha256:10aed86bfbaedc8a119cdd5f59eca646938eb266c717f10394ba9fc2199f0281"}, +] + +[package.dependencies] +requests = ">=2.28.1,<3.0.0" + +[package.extras] +docs = ["Sphinx (>=5.3.0,<6.0.0)"] + [[package]] name = "pylacus" version = "1.1.1" @@ -2822,4 +2840,4 @@ testing = ["flake8 (<5)", "func-timeout", "jaraco.functools", "jaraco.itertools" [metadata] lock-version = "2.0" python-versions = ">=3.8,<3.12" -content-hash = "306d1dd17105d19461515493ba490a2b08522208ebee5c92e3850a7149687ac5" +content-hash = "6da977b7d29f6753c86bc1be143c03d34b2774ed3e1bdff0c0f2c2f533a3d7c8" diff --git a/pyproject.toml b/pyproject.toml index e31e1e1f..9e17264f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,6 +67,7 @@ filetype = "^1.2.0" pypandora = "^1.2.0" lacuscore = "^1.1.14" pylacus = "^1.1.1" +pyipasnhistory = "^2.1.2" [tool.poetry.group.dev.dependencies] mypy = "^0.991" diff --git a/website/web/templates/hostname_popup.html b/website/web/templates/hostname_popup.html index 294d6f23..03acaa3a 100644 --- a/website/web/templates/hostname_popup.html +++ b/website/web/templates/hostname_popup.html @@ -120,7 +120,10 @@
Domain IPs from a standalone DNS lookup:
{% endif %}