generate-stackpath.py: Added scraper on website to get new link for ipblocks.txt file

pull/181/head
Kevin Holvoet 2021-06-02 14:51:30 +02:00
parent 29192e0b20
commit 08a0a32813
2 changed files with 294 additions and 11 deletions

View File

@ -1,6 +1,226 @@
{
"description": "List of known Stackpath (Highwinds) CDN IP ranges (https://support.stackpath.com/hc/en-us/articles/360001091666-Whitelist-CDN-WAF-IP-Blocks)",
"list": [],
"list": [
"102.133.165.127/32",
"102.133.168.247/32",
"103.209.192.93/32",
"103.228.104.0/24",
"103.66.28.0/22",
"104.156.232.232/32",
"104.214.147.166/32",
"104.214.150.207/32",
"104.238.157.42/32",
"108.61.185.90/32",
"120.26.119.191/32",
"146.88.130.128/25",
"149.154.157.239/32",
"149.154.159.21/32",
"149.28.235.77/32",
"149.28.254.195/32",
"151.139.0.0/17",
"151.139.42.0/24",
"151.236.14.231/32",
"151.236.14.238/32",
"151.236.15.26/32",
"151.236.18.167/32",
"151.236.20.95/32",
"151.236.21.35/32",
"151.236.21.87/32",
"151.236.23.142/32",
"151.236.23.78/32",
"151.236.24.35/32",
"151.236.24.50/32",
"158.255.208.86/32",
"173.245.194.0/24",
"173.245.208.64/26",
"173.245.210.64/26",
"173.245.216.64/26",
"173.245.218.64/26",
"184.179.88.128/25",
"184.179.90.128/25",
"185.157.232.52/32",
"185.157.233.153/32",
"185.69.89.0/24",
"185.69.91.0/24",
"192.166.245.71/32",
"192.166.245.98/32",
"2001:1938:7001:1::/64",
"2001:1938:7002:1::/64",
"2001:1938:7003:1::/64",
"2001:1938:7004:1::/64",
"2001:1938:7005:1::/64",
"2001:1938:7006:1::/64",
"2001:1938:7007:1::/64",
"2001:1938:7008:1::/64",
"2001:19f0:5800:8bfc:5400:ff:fe1c:5b87",
"2001:19f0:5800:8d34:5400:ff:fe1c:5b8c",
"2001:19f0:6000:9301:5400:ff:fe1c:85",
"2001:19f0:6000:95c5:5400:ff:fe1c:88",
"2001:19f0:7000:9aa1:5400:ff:fe1c:1090",
"2001:19f0:7000:9c35:5400:ff:fe1c:4562",
"2001:19f0:7401:834f:5400:ff:fe1c:c96",
"2001:19f0:7401:844e:5400:ff:fe1c:c99",
"2001:19f0:8000:8652:5400:ff:fe1c:45c2",
"2001:19f0:8000:8706:5400:ff:fe1c:45c4",
"2001:4801:7824:101:be76:4eff:fe10:24dc",
"2001:4801:7824:101:be76:4eff:fe10:55c6",
"2001:4DE0:110::/64",
"2001:4DE0:2010::/64",
"2001:4DE0:210::/64",
"2001:4DE0:2110::/64",
"2001:4DE0:2210:1::/64",
"2001:4DE0:2210::/64",
"2001:4DE0:2310::/64",
"2001:4DE0:3010::/64",
"2001:4DE0:3110::/64",
"2001:4DE0:4010::/64",
"2001:4DE0:410::/64",
"2001:4DE0:4110::/64",
"2001:4DE0:4310::/64",
"2001:4DE0:5010::/64",
"2001:4DE0:510::/64",
"2001:4DE0:610::/64",
"2001:4DE0:7001:1::/64",
"2001:4DE0:7002:1::/64",
"2001:4DE0:7003:1::/64",
"2001:57A:300:1100::/64",
"2001:57A:400:1100::/64",
"2001:b60:1000:149:154:157:239:1",
"2001:b60:1000:151:236:18:167:1",
"205.185.216.0/22",
"207.148.1.50/32",
"209.197.10.0/24",
"209.197.21.0/24",
"209.197.24.0/21",
"209.197.7.0/24",
"209.197.8.0/21",
"209.234.242.0/25",
"213.183.56.187/32",
"213.183.56.71/32",
"23.253.20.207/32",
"23.253.22.201/32",
"2407:1580:1100::/40",
"2407:1580:1200::/40",
"2407:1580:1300::/40",
"2407:1580:1400::/40",
"2407:1580:1500::/40",
"2407:1580:1600::/40",
"2407:1580:1700::/40",
"2604:6840:1100::/40",
"2604:6840:1200::/40",
"2604:6840:1300::/40",
"2604:6840:1400::/40",
"2604:6840:1500::/40",
"2604:6840:1600::/40",
"2604:6840:1700::/40",
"2604:6840:1800::/40",
"2604:6840:1900::/40",
"2604:6840:1C00:1100::/64",
"2604:6840:1E00::/40",
"2604:6840:f800::/40",
"2604:6840:f900::/40",
"2604:6840:fa00::/40",
"2604:6840:fb00::/40",
"2604:6840:fc00::/40",
"2606:CE80:6100:1::/64",
"2606:CE80:6200:1::/64",
"2606:CE80:6300:1::/64",
"2606:CE80:6400:1::/64",
"2606:CE80:6500:1::/64",
"2606:CE80:6600:1::/64",
"2606:CE80:6700:2::/64",
"2606:CE80:6900:1::/64",
"2A0A:E200:1100::/40",
"2A0A:E200:1300::/40",
"2A0A:E200:1400::/40",
"2A0A:E200:1600::/40",
"2A0A:E200:1700::/40",
"2A0A:E200:1900:1100::/64",
"2A0A:E200:1A00::/40",
"2A0A:E200:1C00::/40",
"2a00:1768:1003:151:236:14:231:1",
"2a00:1768:1003:151:236:14:238:1",
"2a00:1a28:1251:46:246:126:136:1",
"2a00:1a28:1251:46:246:93:179:1",
"2a00:1a48:7805:113:be76:4eff:fe08:25fa",
"2a00:1a48:7805:113:be76:4eff:fe09:1f07",
"2a00:1d70:ed15:151:236:23:142:1",
"2a00:1d70:ed15:151:236:23:78:1",
"2a01:348:99:151:236:21:35:1",
"2a01:348:99:151:236:21:87:1",
"2a03:f80:354:151:236:24:35:1",
"2a03:f80:354:151:236:24:50:1",
"2a03:f80:49:149:154:159:21:1",
"2a03:f80:49:151:236:15:26:1",
"2a03:f80:56:37:235:52:196:1",
"2a03:f80:56:37:235:52:70:1",
"2a03:f80:7:213:183:56:187:1",
"2a03:f80:7:213:183:56:71:1",
"2a03:f80:852:151:236:20:95:1",
"2a03:f80:852:158:255:208:86:1",
"2a07:4580:b0d:82::793a",
"2a07:4580:b0d:f::6324",
"2a0a:e200:1100:1600::/56",
"2a0a:e200:1900:1100::",
"2a0a:e200:1a00:1100::/56",
"2a0a:e200:1b00:1100::/56",
"34.201.233.220/32",
"34.203.52.30/32",
"34.249.164.113/32",
"34.85.22.40/32",
"34.87.56.240/32",
"34.90.24.209/32",
"35.186.155.99/32",
"35.198.12.22/32",
"35.198.52.85/32",
"35.200.117.161/32",
"35.201.16.129/32",
"35.204.22.69/32",
"35.242.210.32/32",
"35.242.213.204/32",
"35.244.107.67/32",
"35.245.0.188/32",
"35.245.111.92/32",
"37.235.52.196/32",
"37.235.52.70/32",
"45.252.191.10/32",
"45.32.129.60/32",
"45.32.179.191/32",
"45.32.183.237/32",
"45.32.225.132/32",
"45.32.45.117/32",
"45.32.52.15/32",
"45.32.69.31/32",
"45.32.94.5/32",
"45.63.90.144/32",
"46.246.126.136/32",
"46.246.93.179/32",
"47.97.251.164/32",
"52.52.23.91/32",
"52.53.106.71/32",
"54.76.234.169/32",
"63.209.33.45/32",
"69.16.133.0/24",
"69.16.176.0/20",
"69.197.10.0/24",
"69.197.11.0/24",
"69.197.12.0/24",
"69.197.8.0/24",
"69.197.9.0/24",
"74.209.134.0/24",
"78.142.19.197/32",
"81.171.105.0/24",
"81.171.106.64/26",
"81.171.112.0/24",
"81.171.60.0/24",
"81.171.61.0/24",
"81.171.68.64/26",
"84.54.49.15/32",
"94.46.144.0/20",
"95.138.170.88/32",
"95.138.175.4/32",
"A0A:E200:1200::/40"
],
"matching_attributes": [
"ip-dst",
"ip-src",
@ -8,5 +228,5 @@
],
"name": "List of known Stackpath CDN IP ranges",
"type": "cidr",
"version": 20210526
"version": 20210602
}

View File

@ -1,8 +1,71 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import ipaddress
from urllib.parse import urlparse, urljoin
import requests
from bs4 import BeautifulSoup
from generator import download_to_file, get_version, write_to_file, get_abspath_source_file
import ipaddress
def is_valid(url):
"""
Checks whether `url` is a valid URL.
"""
parsed = urlparse(url)
return bool(parsed.netloc) and bool(parsed.scheme)
def get_all_website_links(url):
internal_urls = set()
external_urls = set()
"""
Returns all URLs that is found on `url` in which it belongs to the same website
"""
# all URLs of `url`
urls = set()
# domain name of the URL without the protocol
domain_name = urlparse(url).netloc
soup = BeautifulSoup(requests.get(url).content, "html.parser")
for a_tag in soup.findAll("a"):
href = a_tag.attrs.get("href")
if href == "" or href is None:
# href empty tag
continue
# join the URL if it's relative (not absolute link)
href = urljoin(url, href)
parsed_href = urlparse(href)
# remove URL GET parameters, URL fragments, etc.
href = parsed_href.scheme + "://" + parsed_href.netloc + parsed_href.path
if not is_valid(href):
# not a valid URL
continue
if href in internal_urls:
# already in the set
continue
if domain_name not in href:
# external link
if href not in external_urls:
external_urls.add(href)
continue
urls.add(href)
internal_urls.add(href)
return urls, internal_urls, external_urls
def get_file_link(base_url, filename):
urls, internal_urls, external_urls = get_all_website_links(base_url)
for url in internal_urls:
if filename in url:
return url
def process(files, dst):
warninglist = {
@ -27,16 +90,16 @@ def process(files, dst):
write_to_file(warninglist, dst)
if __name__ == '__main__':
sp_base_url = "https://support.stackpath.com/hc/en-us/article_attachments/360083735711/"
uri_list = ['ipblocks.txt']
sp_base_url = "https://support.stackpath.com/hc/en-us/articles/360001091666-Whitelist-CDN-WAF-IP-Blocks"
filename = 'ipblocks.txt'
sp_dst = 'stackpath'
to_process = list()
for uri in uri_list:
url = sp_base_url+uri
file = 'stackpath_{}'.format(uri)
url = get_file_link(sp_base_url, filename)
file = 'stackpath_{}'.format(filename)
download_to_file(url, file)
to_process.append(file)