From 08a0a32813822a75b31b3c7e39825f4edb5409d0 Mon Sep 17 00:00:00 2001 From: Kevin Holvoet Date: Wed, 2 Jun 2021 14:51:30 +0200 Subject: [PATCH] generate-stackpath.py: Added scraper on website to get new link for ipblocks.txt file --- lists/stackpath/list.json | 224 +++++++++++++++++++++++++++++++++++- tools/generate-stackpath.py | 81 +++++++++++-- 2 files changed, 294 insertions(+), 11 deletions(-) diff --git a/lists/stackpath/list.json b/lists/stackpath/list.json index 1f096b4..6a967f4 100644 --- a/lists/stackpath/list.json +++ b/lists/stackpath/list.json @@ -1,6 +1,226 @@ { "description": "List of known Stackpath (Highwinds) CDN IP ranges (https://support.stackpath.com/hc/en-us/articles/360001091666-Whitelist-CDN-WAF-IP-Blocks)", - "list": [], + "list": [ + "102.133.165.127/32", + "102.133.168.247/32", + "103.209.192.93/32", + "103.228.104.0/24", + "103.66.28.0/22", + "104.156.232.232/32", + "104.214.147.166/32", + "104.214.150.207/32", + "104.238.157.42/32", + "108.61.185.90/32", + "120.26.119.191/32", + "146.88.130.128/25", + "149.154.157.239/32", + "149.154.159.21/32", + "149.28.235.77/32", + "149.28.254.195/32", + "151.139.0.0/17", + "151.139.42.0/24", + "151.236.14.231/32", + "151.236.14.238/32", + "151.236.15.26/32", + "151.236.18.167/32", + "151.236.20.95/32", + "151.236.21.35/32", + "151.236.21.87/32", + "151.236.23.142/32", + "151.236.23.78/32", + "151.236.24.35/32", + "151.236.24.50/32", + "158.255.208.86/32", + "173.245.194.0/24", + "173.245.208.64/26", + "173.245.210.64/26", + "173.245.216.64/26", + "173.245.218.64/26", + "184.179.88.128/25", + "184.179.90.128/25", + "185.157.232.52/32", + "185.157.233.153/32", + "185.69.89.0/24", + "185.69.91.0/24", + "192.166.245.71/32", + "192.166.245.98/32", + "2001:1938:7001:1::/64", + "2001:1938:7002:1::/64", + "2001:1938:7003:1::/64", + "2001:1938:7004:1::/64", + "2001:1938:7005:1::/64", + "2001:1938:7006:1::/64", + "2001:1938:7007:1::/64", + "2001:1938:7008:1::/64", + "2001:19f0:5800:8bfc:5400:ff:fe1c:5b87", + "2001:19f0:5800:8d34:5400:ff:fe1c:5b8c", + "2001:19f0:6000:9301:5400:ff:fe1c:85", + "2001:19f0:6000:95c5:5400:ff:fe1c:88", + "2001:19f0:7000:9aa1:5400:ff:fe1c:1090", + "2001:19f0:7000:9c35:5400:ff:fe1c:4562", + "2001:19f0:7401:834f:5400:ff:fe1c:c96", + "2001:19f0:7401:844e:5400:ff:fe1c:c99", + "2001:19f0:8000:8652:5400:ff:fe1c:45c2", + "2001:19f0:8000:8706:5400:ff:fe1c:45c4", + "2001:4801:7824:101:be76:4eff:fe10:24dc", + "2001:4801:7824:101:be76:4eff:fe10:55c6", + "2001:4DE0:110::/64", + "2001:4DE0:2010::/64", + "2001:4DE0:210::/64", + "2001:4DE0:2110::/64", + "2001:4DE0:2210:1::/64", + "2001:4DE0:2210::/64", + "2001:4DE0:2310::/64", + "2001:4DE0:3010::/64", + "2001:4DE0:3110::/64", + "2001:4DE0:4010::/64", + "2001:4DE0:410::/64", + "2001:4DE0:4110::/64", + "2001:4DE0:4310::/64", + "2001:4DE0:5010::/64", + "2001:4DE0:510::/64", + "2001:4DE0:610::/64", + "2001:4DE0:7001:1::/64", + "2001:4DE0:7002:1::/64", + "2001:4DE0:7003:1::/64", + "2001:57A:300:1100::/64", + "2001:57A:400:1100::/64", + "2001:b60:1000:149:154:157:239:1", + "2001:b60:1000:151:236:18:167:1", + "205.185.216.0/22", + "207.148.1.50/32", + "209.197.10.0/24", + "209.197.21.0/24", + "209.197.24.0/21", + "209.197.7.0/24", + "209.197.8.0/21", + "209.234.242.0/25", + "213.183.56.187/32", + "213.183.56.71/32", + "23.253.20.207/32", + "23.253.22.201/32", + "2407:1580:1100::/40", + "2407:1580:1200::/40", + "2407:1580:1300::/40", + "2407:1580:1400::/40", + "2407:1580:1500::/40", + "2407:1580:1600::/40", + "2407:1580:1700::/40", + "2604:6840:1100::/40", + "2604:6840:1200::/40", + "2604:6840:1300::/40", + "2604:6840:1400::/40", + "2604:6840:1500::/40", + "2604:6840:1600::/40", + "2604:6840:1700::/40", + "2604:6840:1800::/40", + "2604:6840:1900::/40", + "2604:6840:1C00:1100::/64", + "2604:6840:1E00::/40", + "2604:6840:f800::/40", + "2604:6840:f900::/40", + "2604:6840:fa00::/40", + "2604:6840:fb00::/40", + "2604:6840:fc00::/40", + "2606:CE80:6100:1::/64", + "2606:CE80:6200:1::/64", + "2606:CE80:6300:1::/64", + "2606:CE80:6400:1::/64", + "2606:CE80:6500:1::/64", + "2606:CE80:6600:1::/64", + "2606:CE80:6700:2::/64", + "2606:CE80:6900:1::/64", + "2A0A:E200:1100::/40", + "2A0A:E200:1300::/40", + "2A0A:E200:1400::/40", + "2A0A:E200:1600::/40", + "2A0A:E200:1700::/40", + "2A0A:E200:1900:1100::/64", + "2A0A:E200:1A00::/40", + "2A0A:E200:1C00::/40", + "2a00:1768:1003:151:236:14:231:1", + "2a00:1768:1003:151:236:14:238:1", + "2a00:1a28:1251:46:246:126:136:1", + "2a00:1a28:1251:46:246:93:179:1", + "2a00:1a48:7805:113:be76:4eff:fe08:25fa", + "2a00:1a48:7805:113:be76:4eff:fe09:1f07", + "2a00:1d70:ed15:151:236:23:142:1", + "2a00:1d70:ed15:151:236:23:78:1", + "2a01:348:99:151:236:21:35:1", + "2a01:348:99:151:236:21:87:1", + "2a03:f80:354:151:236:24:35:1", + "2a03:f80:354:151:236:24:50:1", + "2a03:f80:49:149:154:159:21:1", + "2a03:f80:49:151:236:15:26:1", + "2a03:f80:56:37:235:52:196:1", + "2a03:f80:56:37:235:52:70:1", + "2a03:f80:7:213:183:56:187:1", + "2a03:f80:7:213:183:56:71:1", + "2a03:f80:852:151:236:20:95:1", + "2a03:f80:852:158:255:208:86:1", + "2a07:4580:b0d:82::793a", + "2a07:4580:b0d:f::6324", + "2a0a:e200:1100:1600::/56", + "2a0a:e200:1900:1100::", + "2a0a:e200:1a00:1100::/56", + "2a0a:e200:1b00:1100::/56", + "34.201.233.220/32", + "34.203.52.30/32", + "34.249.164.113/32", + "34.85.22.40/32", + "34.87.56.240/32", + "34.90.24.209/32", + "35.186.155.99/32", + "35.198.12.22/32", + "35.198.52.85/32", + "35.200.117.161/32", + "35.201.16.129/32", + "35.204.22.69/32", + "35.242.210.32/32", + "35.242.213.204/32", + "35.244.107.67/32", + "35.245.0.188/32", + "35.245.111.92/32", + "37.235.52.196/32", + "37.235.52.70/32", + "45.252.191.10/32", + "45.32.129.60/32", + "45.32.179.191/32", + "45.32.183.237/32", + "45.32.225.132/32", + "45.32.45.117/32", + "45.32.52.15/32", + "45.32.69.31/32", + "45.32.94.5/32", + "45.63.90.144/32", + "46.246.126.136/32", + "46.246.93.179/32", + "47.97.251.164/32", + "52.52.23.91/32", + "52.53.106.71/32", + "54.76.234.169/32", + "63.209.33.45/32", + "69.16.133.0/24", + "69.16.176.0/20", + "69.197.10.0/24", + "69.197.11.0/24", + "69.197.12.0/24", + "69.197.8.0/24", + "69.197.9.0/24", + "74.209.134.0/24", + "78.142.19.197/32", + "81.171.105.0/24", + "81.171.106.64/26", + "81.171.112.0/24", + "81.171.60.0/24", + "81.171.61.0/24", + "81.171.68.64/26", + "84.54.49.15/32", + "94.46.144.0/20", + "95.138.170.88/32", + "95.138.175.4/32", + "A0A:E200:1200::/40" + ], "matching_attributes": [ "ip-dst", "ip-src", @@ -8,5 +228,5 @@ ], "name": "List of known Stackpath CDN IP ranges", "type": "cidr", - "version": 20210526 + "version": 20210602 } diff --git a/tools/generate-stackpath.py b/tools/generate-stackpath.py index b791166..2403ad9 100755 --- a/tools/generate-stackpath.py +++ b/tools/generate-stackpath.py @@ -1,8 +1,71 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +import ipaddress +from urllib.parse import urlparse, urljoin + +import requests +from bs4 import BeautifulSoup from generator import download_to_file, get_version, write_to_file, get_abspath_source_file -import ipaddress + + +def is_valid(url): + """ + Checks whether `url` is a valid URL. + """ + parsed = urlparse(url) + return bool(parsed.netloc) and bool(parsed.scheme) + + +def get_all_website_links(url): + internal_urls = set() + external_urls = set() + + """ + Returns all URLs that is found on `url` in which it belongs to the same website + """ + # all URLs of `url` + urls = set() + # domain name of the URL without the protocol + domain_name = urlparse(url).netloc + soup = BeautifulSoup(requests.get(url).content, "html.parser") + + for a_tag in soup.findAll("a"): + href = a_tag.attrs.get("href") + if href == "" or href is None: + # href empty tag + continue + + # join the URL if it's relative (not absolute link) + href = urljoin(url, href) + + parsed_href = urlparse(href) + # remove URL GET parameters, URL fragments, etc. + href = parsed_href.scheme + "://" + parsed_href.netloc + parsed_href.path + + if not is_valid(href): + # not a valid URL + continue + if href in internal_urls: + # already in the set + continue + if domain_name not in href: + # external link + if href not in external_urls: + external_urls.add(href) + continue + urls.add(href) + internal_urls.add(href) + + return urls, internal_urls, external_urls + + +def get_file_link(base_url, filename): + urls, internal_urls, external_urls = get_all_website_links(base_url) + for url in internal_urls: + if filename in url: + return url + def process(files, dst): warninglist = { @@ -21,23 +84,23 @@ def process(files, dst): iptoadd = ip.strip() try: ipaddress.ip_network(ip.strip()) - except ValueError as err:# if it's host given strip to the subnet + except ValueError as err: # if it's host given strip to the subnet iptoadd = str(ipaddress.IPv6Interface(ip.strip()).ip) warninglist['list'].append(iptoadd) write_to_file(warninglist, dst) + if __name__ == '__main__': - sp_base_url = "https://support.stackpath.com/hc/en-us/article_attachments/360083735711/" - uri_list = ['ipblocks.txt'] + sp_base_url = "https://support.stackpath.com/hc/en-us/articles/360001091666-Whitelist-CDN-WAF-IP-Blocks" + filename = 'ipblocks.txt' sp_dst = 'stackpath' to_process = list() - for uri in uri_list: - url = sp_base_url+uri - file = 'stackpath_{}'.format(uri) - download_to_file(url, file) - to_process.append(file) + url = get_file_link(sp_base_url, filename) + file = 'stackpath_{}'.format(filename) + download_to_file(url, file) + to_process.append(file) process(to_process, sp_dst)