new: Basic support for IPASN lookup

pull/562/head
Raphaël Vinot 2022-12-23 16:36:11 +01:00
parent 19259ca1dd
commit 479fcb39e5
4 changed files with 79 additions and 6 deletions

View File

@ -18,6 +18,7 @@ from typing import Any, Dict, List, Optional, Tuple, Union, Set
import dns.rdatatype
import dns.resolver
from har2tree import CrawledTree, Har2TreeError, HarFile
from pyipasnhistory import IPASNHistory
from redis import Redis
from .context import Context
@ -111,6 +112,14 @@ class CapturesIndex(Mapping):
self.__cache: Dict[str, CaptureCache] = {}
self._quick_init()
self.timeout = get_config('generic', 'max_tree_create_time')
try:
self.ipasnhistory: Optional[IPASNHistory] = IPASNHistory()
if not self.ipasnhistory.is_up:
self.ipasnhistory = None
except Exception as e:
# Unable to setup IPASN History
print(e)
self.ipasnhistory = None
@property
def cached_captures(self) -> Set[str]:
@ -374,6 +383,8 @@ class CapturesIndex(Mapping):
cnames_path = ct.root_hartree.har.path.parent / 'cnames.json'
ips_path = ct.root_hartree.har.path.parent / 'ips.json'
ipasn_path = ct.root_hartree.har.path.parent / 'ipasn.json'
host_cnames: Dict[str, str] = {}
if cnames_path.exists():
try:
@ -392,6 +403,15 @@ class CapturesIndex(Mapping):
# The json is broken, delete and re-trigger the requests
host_ips = {}
ipasn: Dict[str, Dict[str, str]] = {}
if ipasn_path.exists():
try:
with ipasn_path.open() as f:
ipasn = json.load(f)
except json.decoder.JSONDecodeError:
# The json is broken, delete and re-trigger the requests
ipasn = {}
for node in ct.root_hartree.hostname_tree.traverse():
if node.name not in host_cnames or node.name not in host_ips:
# Resolve and cache
@ -415,8 +435,39 @@ class CapturesIndex(Mapping):
elif node.name in host_ips:
node.add_feature('resolved_ips', host_ips[node.name])
if self.ipasnhistory:
# Throw all the IPs to IPASN History for query later.
if ips := [{'ip': ip} for ips_sublist in host_ips.values() for ip in ips_sublist if ip and ip not in ipasn]:
try:
self.ipasnhistory.mass_cache(ips)
except Exception as e:
self.logger.warning(f'Unable to submit IPs to IPASNHistory: {e}')
else:
time.sleep(2)
ipasn_responses = self.ipasnhistory.mass_query(ips)
if 'responses' in ipasn_responses:
for response in ipasn_responses['responses']:
ip = response['meta']['ip']
r = list(response['response'].values())[0]
if ip not in ipasn and r:
ipasn[ip] = r
if ipasn:
# retraverse tree to populate it with the features
for node in ct.root_hartree.hostname_tree.traverse():
if not hasattr(node, 'resolved_ips'):
continue
ipasn_entries = {}
for ip in node.resolved_ips:
if ip not in ipasn:
continue
ipasn_entries[ip] = ipasn[ip]
if ipasn_entries:
node.add_feature('ipasn', ipasn_entries)
with cnames_path.open('w') as f:
json.dump(host_cnames, f)
with ips_path.open('w') as f:
json.dump(host_ips, f)
with ipasn_path.open('w') as f:
json.dump(ipasn, f)
return ct

28
poetry.lock generated
View File

@ -1666,21 +1666,21 @@ typing-extensions = {version = "*", markers = "python_version <= \"3.8\""}
[[package]]
name = "playwrightcapture"
version = "1.16.9"
version = "1.16.10"
description = "A simple library to capture websites using playwright"
category = "main"
optional = false
python-versions = ">=3.8,<4.0"
files = [
{file = "playwrightcapture-1.16.9-py3-none-any.whl", hash = "sha256:0b8024783b8cacbfe8f7b268105753b0ec817fa88ccdbd3e34ec0b0d8ff190d8"},
{file = "playwrightcapture-1.16.9.tar.gz", hash = "sha256:0dfb9f818cc9a69156ae458366dd9895f2de02f84c6647d1074fd36a1e7c711e"},
{file = "playwrightcapture-1.16.10-py3-none-any.whl", hash = "sha256:7fc67f9ba93c8fb7a4947257cf9f797e2279a0f2839e57ffb2e505f38ec90f28"},
{file = "playwrightcapture-1.16.10.tar.gz", hash = "sha256:ab537b257056a30733a6ff0ce12d57244edb6d3c1d488ff43d5c1ba312677978"},
]
[package.dependencies]
beautifulsoup4 = ">=4.11.1,<5.0.0"
dateparser = ">=1.1.4,<2.0.0"
lxml = ">=4.9.2,<5.0.0"
playwright = ">=1.28.0,<2.0.0"
playwright = ">=1.29.0,<2.0.0"
pydub = {version = ">=0.25.1,<0.26.0", optional = true, markers = "extra == \"recaptcha\""}
requests = {version = ">=2.28.1,<3.0.0", optional = true, markers = "extra == \"recaptcha\""}
SpeechRecognition = {version = ">=3.9.0,<4.0.0", optional = true, markers = "extra == \"recaptcha\""}
@ -1868,6 +1868,24 @@ requests = ">=2.28.1,<3.0.0"
[package.extras]
docs = ["Sphinx (>=5.1.1,<6.0.0)"]
[[package]]
name = "pyipasnhistory"
version = "2.1.2"
description = "Python client for IP ASN History"
category = "main"
optional = false
python-versions = ">=3.8,<4.0"
files = [
{file = "pyipasnhistory-2.1.2-py3-none-any.whl", hash = "sha256:7743de1bb7e735f9b907a3cff8ab189a1d8b5517b56b64f151fc4793b2863e35"},
{file = "pyipasnhistory-2.1.2.tar.gz", hash = "sha256:10aed86bfbaedc8a119cdd5f59eca646938eb266c717f10394ba9fc2199f0281"},
]
[package.dependencies]
requests = ">=2.28.1,<3.0.0"
[package.extras]
docs = ["Sphinx (>=5.3.0,<6.0.0)"]
[[package]]
name = "pylacus"
version = "1.1.1"
@ -2822,4 +2840,4 @@ testing = ["flake8 (<5)", "func-timeout", "jaraco.functools", "jaraco.itertools"
[metadata]
lock-version = "2.0"
python-versions = ">=3.8,<3.12"
content-hash = "306d1dd17105d19461515493ba490a2b08522208ebee5c92e3850a7149687ac5"
content-hash = "6da977b7d29f6753c86bc1be143c03d34b2774ed3e1bdff0c0f2c2f533a3d7c8"

View File

@ -67,6 +67,7 @@ filetype = "^1.2.0"
pypandora = "^1.2.0"
lacuscore = "^1.1.14"
pylacus = "^1.1.1"
pyipasnhistory = "^2.1.2"
[tool.poetry.group.dev.dependencies]
mypy = "^0.991"

View File

@ -120,7 +120,10 @@
<h5>Domain IPs from a standalone DNS lookup:</h5>
<ul>
{% for ip in hostnode.resolved_ips %}
<li>{{ ip }}{% if uwhois_available %} (<a href="{{ url_for('whois', query=ip)}}">whois</a>){% endif %}</li>
<li>
{{ ip }}{% if uwhois_available %} (<a href="{{ url_for('whois', query=ip)}}">whois</a>){% endif %}
{% if 'ipasn' in hostnode.features and hostnode.ipasn.get(ip) %}- AS{{ hostnode.ipasn[ip]['asn'] }} {% if uwhois_available %} (<a href="{{ url_for('whois', query='AS'+hostnode.ipasn[ip]['asn'])}}">whois</a>){% endif %}{% endif %}
</li>
{% endfor %}
</ul>
{% endif %}