107 lines
		
	
	
		
			3.1 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
			
		
		
	
	
			107 lines
		
	
	
		
			3.1 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable File
		
	
#!/usr/bin/env python3
 | 
						|
# -*- coding: utf-8 -*-
 | 
						|
import ipaddress
 | 
						|
from urllib.parse import urlparse, urljoin
 | 
						|
 | 
						|
import requests
 | 
						|
from bs4 import BeautifulSoup
 | 
						|
 | 
						|
from generator import download_to_file, get_version, write_to_file, get_abspath_source_file
 | 
						|
 | 
						|
 | 
						|
def is_valid(url):
 | 
						|
    """
 | 
						|
    Checks whether `url` is a valid URL.
 | 
						|
    """
 | 
						|
    parsed = urlparse(url)
 | 
						|
    return bool(parsed.netloc) and bool(parsed.scheme)
 | 
						|
 | 
						|
 | 
						|
def get_all_website_links(url):
 | 
						|
    internal_urls = set()
 | 
						|
    external_urls = set()
 | 
						|
 | 
						|
    """
 | 
						|
    Returns all URLs that is found on `url` in which it belongs to the same website
 | 
						|
    """
 | 
						|
    # all URLs of `url`
 | 
						|
    urls = set()
 | 
						|
    # domain name of the URL without the protocol
 | 
						|
    domain_name = urlparse(url).netloc
 | 
						|
    soup = BeautifulSoup(requests.get(url).content, "html.parser")
 | 
						|
 | 
						|
    for a_tag in soup.findAll("a"):
 | 
						|
        href = a_tag.attrs.get("href")
 | 
						|
        if href == "" or href is None:
 | 
						|
            # href empty tag
 | 
						|
            continue
 | 
						|
 | 
						|
        # join the URL if it's relative (not absolute link)
 | 
						|
        href = urljoin(url, href)
 | 
						|
 | 
						|
        parsed_href = urlparse(href)
 | 
						|
        # remove URL GET parameters, URL fragments, etc.
 | 
						|
        href = parsed_href.scheme + "://" + parsed_href.netloc + parsed_href.path
 | 
						|
 | 
						|
        if not is_valid(href):
 | 
						|
            # not a valid URL
 | 
						|
            continue
 | 
						|
        if href in internal_urls:
 | 
						|
            # already in the set
 | 
						|
            continue
 | 
						|
        if domain_name not in href:
 | 
						|
            # external link
 | 
						|
            if href not in external_urls:
 | 
						|
                external_urls.add(href)
 | 
						|
            continue
 | 
						|
        urls.add(href)
 | 
						|
        internal_urls.add(href)
 | 
						|
 | 
						|
    return urls, internal_urls, external_urls
 | 
						|
 | 
						|
 | 
						|
def get_file_link(base_url, filename):
 | 
						|
    urls, internal_urls, external_urls = get_all_website_links(base_url)
 | 
						|
    for url in internal_urls:
 | 
						|
        if filename in url:
 | 
						|
            return url
 | 
						|
 | 
						|
 | 
						|
def process(files, dst):
 | 
						|
    warninglist = {
 | 
						|
        'name': "List of known Stackpath CDN IP ranges",
 | 
						|
        'version': get_version(),
 | 
						|
        'description': "List of known Stackpath (Highwinds) CDN IP ranges (https://support.stackpath.com/hc/en-us/articles/360001091666-Whitelist-CDN-WAF-IP-Blocks)",
 | 
						|
        'type': "cidr",
 | 
						|
        'list': [],
 | 
						|
        'matching_attributes': ["ip-dst", "ip-src", "domain|ip"]
 | 
						|
    }
 | 
						|
 | 
						|
    for file in files:
 | 
						|
        with open(get_abspath_source_file(file), 'r') as f:
 | 
						|
            ips = f.readlines()
 | 
						|
        for ip in ips:
 | 
						|
            iptoadd = ip.strip()
 | 
						|
            try:
 | 
						|
                ipaddress.ip_network(ip.strip())
 | 
						|
            except ValueError as err:  # if it's host given strip to the subnet
 | 
						|
                iptoadd = str(ipaddress.IPv6Interface(ip.strip()).ip)
 | 
						|
            warninglist['list'].append(iptoadd)
 | 
						|
 | 
						|
    write_to_file(warninglist, dst)
 | 
						|
 | 
						|
 | 
						|
if __name__ == '__main__':
 | 
						|
    sp_base_url = "https://support.stackpath.com/hc/en-us/articles/360001091666-Whitelist-CDN-WAF-IP-Blocks"
 | 
						|
    filename = 'ipblocks.txt'
 | 
						|
    sp_dst = 'stackpath'
 | 
						|
 | 
						|
    to_process = list()
 | 
						|
 | 
						|
    url = get_file_link(sp_base_url, filename)
 | 
						|
    file = 'stackpath_{}'.format(filename)
 | 
						|
    download_to_file(url, file)
 | 
						|
    to_process.append(file)
 | 
						|
 | 
						|
    process(to_process, sp_dst)
 |