54 lines
1.8 KiB
Python
54 lines
1.8 KiB
Python
#!/usr/bin/env python3
|
|
|
|
from lxml import etree
|
|
from bs4 import BeautifulSoup
|
|
import datetime
|
|
import urllib.request, urllib.parse, urllib.error
|
|
import json
|
|
|
|
#webpage = urllib.request.urlopen("https://en.wikipedia.org/w/index.php?title=List_of_Google_domains&printable=yes")
|
|
webpage = urllib.request.urlopen("https://en.wikipedia.org/w/index.php?title=List_of_Google_domains")
|
|
soup = BeautifulSoup(webpage,'html.parser')
|
|
|
|
tables = soup.find_all("table", { "class" : "wikitable sortable" })
|
|
#print(tables)
|
|
|
|
gdomains = []
|
|
for tabl in tables :
|
|
|
|
for row in tabl.findAll("tr"):
|
|
cells = row.findAll('td')
|
|
if len(cells) == 4:
|
|
domain = cells[2].find_all(text=True)
|
|
|
|
if len(domain) is 1 :
|
|
domain = "{}".format(domain[0])
|
|
elif len(domain) is 2 :
|
|
domain = "{}{}".format(domain[0], domain[1])
|
|
elif len(domain) is 3 and not "[" in domain[2]:
|
|
domain = "{}{}".format(domain[0], domain[2])
|
|
else:
|
|
domain = "{}{}".format(domain[0], domain[1])
|
|
|
|
print(domain)
|
|
|
|
gdomains.append(domain)
|
|
#print(gdomains)
|
|
gdomains = sorted(set(gdomains))
|
|
|
|
google_warninglist = {}
|
|
version = int(datetime.date.today().strftime('%Y%m%d'))
|
|
|
|
google_warninglist['description'] = "Event contains one or more entries from the google owned domains."
|
|
d = datetime.datetime.now()
|
|
google_warninglist['version'] = version
|
|
google_warninglist['name'] = "Known Google domains"
|
|
google_warninglist['list'] = []
|
|
google_warninglist['matching_attributes'] = ['hostname', 'domain']
|
|
|
|
for site in gdomains:
|
|
#v = str(site).split(',')[1]
|
|
google_warninglist['list'].append(site)
|
|
google_warninglist['list'] = sorted(set(google_warninglist['list']))
|
|
#print(json.dumps(google_warninglist))
|