70 lines
2.3 KiB
Python
Executable File
70 lines
2.3 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
|
|
import requests
|
|
import datetime
|
|
import json
|
|
import csv
|
|
import os
|
|
|
|
# TODO: Include MozRank
|
|
|
|
moz_url_domains = "https://moz.com/top500/domains/csv"
|
|
moz_url_pages = "https://moz.com/top500/pages/csv"
|
|
|
|
moz_file_domains = "/tmp/top500.domains.csv"
|
|
moz_file_pages = "/tmp/top500.pages.csv"
|
|
|
|
user_agent = {"User-agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0"}
|
|
|
|
rDomains = requests.get(moz_url_domains, headers=user_agent)
|
|
rPages = requests.get(moz_url_pages, headers=user_agent)
|
|
open(moz_file_domains, 'wb').write(rDomains.content)
|
|
open(moz_file_pages, 'wb').write(rPages.content)
|
|
|
|
moz_warninglist = {}
|
|
version = int(datetime.date.today().strftime('%Y%m%d'))
|
|
|
|
moz_warninglist['description'] = "Event contains one or more entries from the top 500 of the most used domains (Mozilla)."
|
|
d = datetime.datetime.now()
|
|
moz_warninglist['version'] = version
|
|
moz_warninglist['name'] = "Top 500 domains and pages from https://moz.com/top500"
|
|
moz_warninglist['type'] = 'hostname'
|
|
moz_warninglist['list'] = []
|
|
moz_warninglist['matching_attributes'] = ['hostname', 'domain', 'uri', 'url']
|
|
|
|
with open(moz_file_domains) as csv_file:
|
|
csv_reader = csv.reader(csv_file, delimiter=',')
|
|
line_count = 0
|
|
for row in csv_reader:
|
|
if line_count == 0:
|
|
#print(f'Column names are {", ".join(row)}')
|
|
line_count += 1
|
|
else:
|
|
#print(f'\t{row[0]}. {row[1]}, MozTrust: {row[5]}.')
|
|
v = row[1]
|
|
moz_warninglist['list'].append(v.rstrip().rstrip('/'))
|
|
line_count += 1
|
|
|
|
with open(moz_file_pages) as csv_file:
|
|
csv_reader = csv.reader(csv_file, delimiter=',')
|
|
line_count = 0
|
|
for row in csv_reader:
|
|
if line_count == 0:
|
|
#print(f'Column names are {", ".join(row)}')
|
|
line_count += 1
|
|
else:
|
|
#print(f'\t{row[0]}. {row[1]}, MozTrust: {row[5]}.')
|
|
v = row[1]
|
|
moz_warninglist['list'].append(v.rstrip().rstrip('/'))
|
|
line_count += 1
|
|
|
|
moz_warninglist['list'] = sorted(set(moz_warninglist['list']))
|
|
print(json.dumps(moz_warninglist))
|
|
|
|
try:
|
|
os.remove(moz_file_domains)
|
|
os.remove(moz_file_pages)
|
|
except:
|
|
print(f'Perhaps {moz_file_domains}/{moz_file_pages} does not exist.')
|