108 lines
		
	
	
		
			3.2 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
			
		
		
	
	
			108 lines
		
	
	
		
			3.2 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
| #!/usr/bin/env python3
 | |
| # -*- coding: utf-8 -*-
 | |
| import ipaddress
 | |
| from urllib.parse import urlparse, urljoin
 | |
| 
 | |
| import requests
 | |
| from bs4 import BeautifulSoup
 | |
| 
 | |
| from generator import download_to_file, get_version, write_to_file, get_abspath_source_file
 | |
| 
 | |
| 
 | |
| def is_valid(url):
 | |
|     """
 | |
|     Checks whether `url` is a valid URL.
 | |
|     """
 | |
|     parsed = urlparse(url)
 | |
|     return bool(parsed.netloc) and bool(parsed.scheme)
 | |
| 
 | |
| 
 | |
| def get_all_website_links(url):
 | |
|     internal_urls = set()
 | |
|     external_urls = set()
 | |
| 
 | |
|     """
 | |
|     Returns all URLs that is found on `url` in which it belongs to the same website
 | |
|     """
 | |
|     # all URLs of `url`
 | |
|     urls = set()
 | |
|     # domain name of the URL without the protocol
 | |
|     domain_name = urlparse(url).netloc
 | |
|     soup = BeautifulSoup(requests.get(url).content, "html.parser")
 | |
| 
 | |
|     for a_tag in soup.findAll("a"):
 | |
|         href = a_tag.attrs.get("href")
 | |
|         if href == "" or href is None:
 | |
|             # href empty tag
 | |
|             continue
 | |
| 
 | |
|         # join the URL if it's relative (not absolute link)
 | |
|         href = urljoin(url, href)
 | |
| 
 | |
|         parsed_href = urlparse(href)
 | |
|         # remove URL GET parameters, URL fragments, etc.
 | |
|         href = parsed_href.scheme + "://" + parsed_href.netloc + parsed_href.path
 | |
| 
 | |
|         if not is_valid(href):
 | |
|             # not a valid URL
 | |
|             continue
 | |
|         if href in internal_urls:
 | |
|             # already in the set
 | |
|             continue
 | |
|         if domain_name not in href:
 | |
|             # external link
 | |
|             if href not in external_urls:
 | |
|                 external_urls.add(href)
 | |
|             continue
 | |
|         urls.add(href)
 | |
|         internal_urls.add(href)
 | |
| 
 | |
|     return urls, internal_urls, external_urls
 | |
| 
 | |
| 
 | |
| def get_file_link(base_url, filename):
 | |
|     urls, internal_urls, external_urls = get_all_website_links(base_url)
 | |
|     for url in internal_urls:
 | |
|         if filename in url:
 | |
|             return url
 | |
| 
 | |
| 
 | |
| def process(files, dst):
 | |
|     warninglist = {
 | |
|         'name': "List of known Stackpath CDN IP ranges",
 | |
|         'version': get_version(),
 | |
|         'description': "List of known Stackpath (Highwinds) CDN IP ranges (https://support.stackpath.com/hc/en-us/articles/360001091666-Whitelist-CDN-WAF-IP-Blocks)",
 | |
|         'type': "cidr",
 | |
|         'list': [],
 | |
|         'matching_attributes': ["ip-dst", "ip-src", "domain|ip"]
 | |
|     }
 | |
| 
 | |
|     for file in files:
 | |
|         with open(get_abspath_source_file(file), 'r') as f:
 | |
|             ips = f.readlines()
 | |
|         for ip in ips:
 | |
|             iptoadd = ip.strip()
 | |
|             try:
 | |
|                 ipaddress.ip_network(ip.strip())
 | |
|             except ValueError as err:  # if it's host given strip to the subnet
 | |
|                 iptoadd = str(ipaddress.IPv6Interface(ip.strip()).ip)
 | |
|             warninglist['list'].append(iptoadd)
 | |
| 
 | |
|     write_to_file(warninglist, dst)
 | |
| 
 | |
| 
 | |
| if __name__ == '__main__':
 | |
|     # Base url where a text file is attached https://support.stackpath.com/hc/en-us/articles/360001091666-Whitelist-CDN-WAF-IP-Blocks"
 | |
|     sp_base_url = "https://support.stackpath.com/hc/en-us/article_attachments/360096407372/ipblocks.txt"
 | |
|     filename = 'ipblocks.txt'
 | |
|     sp_dst = 'stackpath'
 | |
| 
 | |
|     to_process = list()
 | |
| 
 | |
|     #url = get_file_link(sp_base_url, filename)
 | |
|     file = 'stackpath_{}'.format(filename)
 | |
|     download_to_file(sp_base_url, file)
 | |
|     to_process.append(file)
 | |
| 
 | |
|     process(to_process, sp_dst)
 |