54 lines
		
	
	
		
			1.8 KiB
		
	
	
	
		
			Python
		
	
	
			
		
		
	
	
			54 lines
		
	
	
		
			1.8 KiB
		
	
	
	
		
			Python
		
	
	
#!/usr/bin/env python3
 | 
						|
 | 
						|
from lxml import etree
 | 
						|
from bs4 import BeautifulSoup
 | 
						|
import datetime
 | 
						|
import urllib.request, urllib.parse, urllib.error
 | 
						|
import json
 | 
						|
 | 
						|
#webpage = urllib.request.urlopen("https://en.wikipedia.org/w/index.php?title=List_of_Google_domains&printable=yes")
 | 
						|
webpage = urllib.request.urlopen("https://en.wikipedia.org/w/index.php?title=List_of_Google_domains")
 | 
						|
soup = BeautifulSoup(webpage,'html.parser')
 | 
						|
 | 
						|
tables = soup.find_all("table", { "class" : "wikitable sortable" })
 | 
						|
#print(tables)
 | 
						|
 | 
						|
gdomains = []
 | 
						|
for tabl in tables :
 | 
						|
 | 
						|
    for row in tabl.findAll("tr"):
 | 
						|
        cells = row.findAll('td')
 | 
						|
        if len(cells) == 4:
 | 
						|
            domain = cells[2].find_all(text=True)
 | 
						|
 | 
						|
            if len(domain) is 1 :
 | 
						|
                domain = "{}".format(domain[0])
 | 
						|
            elif len(domain) is 2 :
 | 
						|
                domain = "{}{}".format(domain[0], domain[1])
 | 
						|
            elif len(domain) is 3  and not "[" in domain[2]:
 | 
						|
                domain = "{}{}".format(domain[0], domain[2])
 | 
						|
            else:
 | 
						|
                domain = "{}{}".format(domain[0], domain[1])
 | 
						|
 | 
						|
            print(domain)
 | 
						|
 | 
						|
            gdomains.append(domain)
 | 
						|
#print(gdomains)
 | 
						|
gdomains = sorted(set(gdomains))
 | 
						|
 | 
						|
google_warninglist = {}
 | 
						|
version = int(datetime.date.today().strftime('%Y%m%d'))
 | 
						|
 | 
						|
google_warninglist['description'] = "Event contains one or more entries from the google owned domains."
 | 
						|
d = datetime.datetime.now()
 | 
						|
google_warninglist['version'] = version
 | 
						|
google_warninglist['name'] = "Known Google domains"
 | 
						|
google_warninglist['list'] = []
 | 
						|
google_warninglist['matching_attributes'] = ['hostname', 'domain']
 | 
						|
 | 
						|
for site in gdomains:
 | 
						|
    #v = str(site).split(',')[1]
 | 
						|
    google_warninglist['list'].append(site)
 | 
						|
google_warninglist['list'] = sorted(set(google_warninglist['list']))
 | 
						|
#print(json.dumps(google_warninglist))                                                                       
 |