misp-warninglists/tools/generate-google.py

54 lines
1.8 KiB
Python
Executable File

#!/usr/bin/env python3
from lxml import etree
from bs4 import BeautifulSoup
import datetime
import urllib.request, urllib.parse, urllib.error
import json
#webpage = urllib.request.urlopen("https://en.wikipedia.org/w/index.php?title=List_of_Google_domains&printable=yes")
webpage = urllib.request.urlopen("https://en.wikipedia.org/w/index.php?title=List_of_Google_domains")
soup = BeautifulSoup(webpage,'html.parser')
tables = soup.find_all("table", { "class" : "wikitable sortable" })
#print(tables)
gdomains = []
for tabl in tables :
for row in tabl.findAll("tr"):
cells = row.findAll('td')
if len(cells) == 4:
domain = cells[2].find_all(text=True)
if len(domain) is 1 :
domain = "{}".format(domain[0])
elif len(domain) is 2 :
domain = "{}{}".format(domain[0], domain[1])
elif len(domain) is 3 and not "[" in domain[2]:
domain = "{}{}".format(domain[0], domain[2])
else:
domain = "{}{}".format(domain[0], domain[1])
print(domain)
gdomains.append(domain)
#print(gdomains)
gdomains = sorted(set(gdomains))
google_warninglist = {}
version = int(datetime.date.today().strftime('%Y%m%d'))
google_warninglist['description'] = "Event contains one or more entries from the google owned domains."
d = datetime.datetime.now()
google_warninglist['version'] = version
google_warninglist['name'] = "Known Google domains"
google_warninglist['list'] = []
google_warninglist['matching_attributes'] = ['hostname', 'domain']
for site in gdomains:
#v = str(site).split(',')[1]
google_warninglist['list'].append(site)
google_warninglist['list'] = sorted(set(google_warninglist['list']))
#print(json.dumps(google_warninglist))