diff --git a/tools/generate-amazon-aws.py b/tools/generate-amazon-aws.py index fa2b835..026e122 100755 --- a/tools/generate-amazon-aws.py +++ b/tools/generate-amazon-aws.py @@ -3,7 +3,7 @@ import json -from generator import download, download_to_file, get_abspath_list_file, get_version +from generator import download_to_file, get_abspath_list_file, get_version def process(file, dst): diff --git a/tools/generate-cisco.py b/tools/generate-cisco.py index c2fcd53..9d74690 100755 --- a/tools/generate-cisco.py +++ b/tools/generate-cisco.py @@ -1,10 +1,10 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -import zipfile import json +import zipfile -from generator import download, download_to_file, get_abspath_list_file, get_version +from generator import download_to_file, get_abspath_list_file, get_version def process(file, warninglist, dst, limit='1k'): @@ -23,17 +23,18 @@ def process(file, warninglist, dst, limit='1k'): top = cisco_list.readlines()[:20000] else: continue - + warninglist['version'] = get_version() warninglist['type'] = 'hostname' - warninglist['matching_attributes'] = ['hostname', 'domain', 'url', 'domain|ip'] + warninglist['matching_attributes'] = [ + 'hostname', 'domain', 'url', 'domain|ip'] warninglist['list'] = [] for site in top: v = site.decode('UTF-8').split(',')[1] - warninglist['list'].append(v.strip().replace('\\r\\n','')) + warninglist['list'].append(v.strip().replace('\\r\\n', '')) warninglist['list'] = sorted(set(warninglist['list'])) - + with open(get_abspath_list_file(dst), 'w') as data_file: json.dump(warninglist, data_file, indent=2, sort_keys=True) data_file.write("\n") @@ -42,7 +43,7 @@ def process(file, warninglist, dst, limit='1k'): if __name__ == '__main__': cisco_url = "http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip" cisco_file = "cisco_top-1m.csv.zip" - + download_to_file(cisco_url, cisco_file) cisco_dst_1k = 'cisco_top1000' @@ -58,7 +59,7 @@ if __name__ == '__main__': 'description': 'Event contains one or more entries from the top 5000 of the most used websites (Cisco Umbrella).' } process(cisco_file, cisco_5k_warninglist, cisco_dst_5k, limit='5k') - + cisco_dst_10k = 'cisco_top10k' cisco_10k_warninglist = { 'name': 'Top 10 000 websites from Cisco Umbrella', diff --git a/tools/generate-cloudflare.py b/tools/generate-cloudflare.py index 3f5a685..15f67cf 100755 --- a/tools/generate-cloudflare.py +++ b/tools/generate-cloudflare.py @@ -2,7 +2,7 @@ import json -from generator import download, download_to_file, get_abspath_list_file, get_version +from generator import download_to_file, get_abspath_list_file, get_version def process(files, dst): @@ -12,15 +12,15 @@ def process(files, dst): warninglist['description'] = "List of known Cloudflare IP ranges (https://www.cloudflare.com/ips/)" warninglist['type'] = "cidr" warninglist['list'] = [] - warninglist['matching_attributes'] = ["ip-dst","ip-src","domain|ip"] - + warninglist['matching_attributes'] = ["ip-dst", "ip-src", "domain|ip"] + for file in files: with open(file, 'r') as f: ips = f.readlines() for ip in ips: warninglist['list'].append(ip.strip()) warninglist['list'] = sorted(set(warninglist['list'])) - + with open(get_abspath_list_file(dst), 'w') as data_file: json.dump(warninglist, data_file, indent=2, sort_keys=True) data_file.write("\n") @@ -28,7 +28,7 @@ def process(files, dst): if __name__ == '__main__': cf_base_url = "https://www.cloudflare.com/" - uri_list = ['ips-v4','ips-v6'] + uri_list = ['ips-v4', 'ips-v6'] cf_dst = 'cloudflare' to_process = list() @@ -38,5 +38,5 @@ if __name__ == '__main__': file = 'cloudflare_{}.txt'.format(uri) download_to_file(url, file) to_process.append(file) - + process(to_process, cf_dst) diff --git a/tools/generate-covid.py b/tools/generate-covid.py index 9f6f9b0..f0e0994 100755 --- a/tools/generate-covid.py +++ b/tools/generate-covid.py @@ -3,7 +3,7 @@ import json -from generator import download, download_to_file, get_abspath_list_file, get_version +from generator import download, get_abspath_list_file, get_version def process(url, warninglist, dst): @@ -33,6 +33,7 @@ if __name__ == '__main__': 'name': 'Covid-19 Cyber Threat Coalition\'s Whitelist', 'description': 'The Cyber Threat Coalition\'s whitelist of COVID-19 related websites.' } - + process(covid_krassi_url, covid_krassi_warninglist, covid_krassi_dst) - process(covid_cyber_threat_coalition_url, covid_cyber_threat_coalition_warninglist, covid_cyber_threat_coalition_dst) + process(covid_cyber_threat_coalition_url, + covid_cyber_threat_coalition_warninglist, covid_cyber_threat_coalition_dst) diff --git a/tools/generate-microsoft-azure.py b/tools/generate-microsoft-azure.py index bfed4f1..cfaf582 100755 --- a/tools/generate-microsoft-azure.py +++ b/tools/generate-microsoft-azure.py @@ -2,10 +2,11 @@ # -*- coding: utf-8 -*- import json -import requests + from bs4 import BeautifulSoup -from generator import download, download_to_file, get_abspath_list_file, get_version +from generator import (download, download_to_file, get_abspath_list_file, + get_version) def get_json_url(page): @@ -25,10 +26,10 @@ def process(file, dst): 'matching_attributes': ["ip-src", "ip-dst", "domain|ip"], 'type': 'cidr' } - + with open(file, 'r') as json_file: ms_azure_ip_list = json.load(json_file) - + for value in ms_azure_ip_list['values']: warninglist['list'] += value['properties']['addressPrefixes'] @@ -46,4 +47,4 @@ if __name__ == '__main__': ms_azure_json_url = get_json_url(download(ms_azure_url)) download_to_file(ms_azure_json_url, ms_azure_file) - process(ms_azure_file, ms_azure_dst) \ No newline at end of file + process(ms_azure_file, ms_azure_dst) diff --git a/tools/generate_alexa.py b/tools/generate_alexa.py index 3269e3f..e6a7ff4 100755 --- a/tools/generate_alexa.py +++ b/tools/generate_alexa.py @@ -4,7 +4,7 @@ import json import zipfile -from generator import download, download_to_file, get_abspath_list_file, get_version +from generator import download_to_file, get_abspath_list_file, get_version def process(file, dst): diff --git a/tools/generate_majestic-million.py b/tools/generate_majestic-million.py index a44480f..13f4144 100755 --- a/tools/generate_majestic-million.py +++ b/tools/generate_majestic-million.py @@ -2,14 +2,15 @@ # -*- coding: utf-8 -*- import json -from generator import download, download_to_file, get_abspath_list_file, get_version + +from generator import download_to_file, get_abspath_list_file, get_version def process(file, dst): with open(file, newline='\n', encoding='utf-8', errors='replace') as csv_file: sites = csv_file.readlines()[:10000] - + warninglist = { 'name': 'Top 10K websites from Majestic Million', 'version': get_version(), @@ -35,4 +36,4 @@ if __name__ == '__main__': majestic_dst = 'majestic_million' download_to_file(majestic_url, majestic_file) - process(majestic_file, majestic_dst) \ No newline at end of file + process(majestic_file, majestic_dst) diff --git a/tools/generate_mozilla_certificates.py b/tools/generate_mozilla_certificates.py index 45e5412..cb103df 100755 --- a/tools/generate_mozilla_certificates.py +++ b/tools/generate_mozilla_certificates.py @@ -7,7 +7,7 @@ import json from OpenSSL.crypto import FILETYPE_PEM, load_certificate -from generator import download, download_to_file, get_abspath_list_file, get_version +from generator import download_to_file, get_abspath_list_file, get_version def gethash(cert, digest): @@ -31,7 +31,7 @@ def process(file, dst, type): warninglist = {} warninglist['name'] = 'Fingerprint of {type}'.format(type=type) - warninglist['version'] = int(datetime.date.today().strftime('%Y%m%d')) + warninglist['version'] = get_version() warninglist['description'] = "Fingerprint of {type} taken from Mozilla's lists at https://wiki.mozilla.org/CA".format( type=type) warninglist['list'] = sorted(hashes) diff --git a/tools/generate_tranco.py b/tools/generate_tranco.py index fb76746..3068f10 100755 --- a/tools/generate_tranco.py +++ b/tools/generate_tranco.py @@ -4,7 +4,7 @@ import json import zipfile -from generator import download, download_to_file, get_abspath_list_file, get_version +from generator import download_to_file, get_abspath_list_file, get_version def process(file, warninglist, dst, first_10k=False): @@ -19,10 +19,11 @@ def process(file, warninglist, dst, first_10k=False): sites = tranco.readlines() else: continue - + warninglist['type'] = 'hostname' warninglist['version'] = get_version() - warninglist['matching_attributes'] = ['hostname', 'domain', 'url', 'domain|ip'] + warninglist['matching_attributes'] = [ + 'hostname', 'domain', 'url', 'domain|ip'] for site in sites: v = site.decode('UTF-8').split(',')[1] @@ -54,4 +55,5 @@ if __name__ == '__main__': 'description': "Event contains one or more entries from the top 10K most-used sites (https://tranco-list.eu/).", 'name': "Top 10K most-used sites from Tranco" } - process(tranco_file, tranco_10k_warninglist, tranco_10k_dst, first_10k=True) + process(tranco_file, tranco_10k_warninglist, + tranco_10k_dst, first_10k=True)