add .gitignore for downloaded files, refactor code for generators: use central module, remove useless code, fix minor issues
parent
c00b21de5f
commit
049475ab22
|
@ -0,0 +1,18 @@
|
|||
# files download by generators
|
||||
alexa_top-1m.csv.zip
|
||||
amazon_ip-ranges.json
|
||||
cisco_top-1m.csv.zip
|
||||
cloudflare_ips-v4.txt
|
||||
cloudflare_ips-v6.txt
|
||||
IncludedCACertificateReportPEMCSV.csv
|
||||
majestic_million.csv
|
||||
ocsp_crl-hostnames.txt.txt
|
||||
ocsp_crl-ipv4.txt.txt
|
||||
ocsp_crl-ipv6.txt.txt
|
||||
ocsp_ocsp-hostnames.txt.txt
|
||||
ocsp_ocsp-ipv4.txt.txt
|
||||
ocsp_ocsp-ipv6.txt.txt
|
||||
PublicAllIntermediateCertsWithPEMCSV.csv
|
||||
top500.domains.csv
|
||||
top500.pages.csv
|
||||
top-1m.csv.zip
|
|
@ -1,29 +1,39 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import json
|
||||
import datetime
|
||||
import urllib.request
|
||||
import json
|
||||
|
||||
res = urllib.request.urlopen('https://ip-ranges.amazonaws.com/ip-ranges.json')
|
||||
from generator import download, download_to_file, get_abspath_list_file, get_version
|
||||
|
||||
res_body = res.read()
|
||||
j = json.loads(res_body.decode("utf-8"))
|
||||
l = []
|
||||
|
||||
for prefix in j['prefixes']:
|
||||
l.append(prefix['ip_prefix'])
|
||||
def process(file, dst):
|
||||
with open(file, 'r') as json_file:
|
||||
amazon_aws_ip_list = json.load(json_file)
|
||||
l = []
|
||||
|
||||
for prefix in j['ipv6_prefixes']:
|
||||
l.append(prefix['ipv6_prefix'])
|
||||
|
||||
warninglist = {}
|
||||
warninglist['name'] = 'List of known Amazon AWS IP address ranges'
|
||||
warninglist['version'] = int(datetime.date.today().strftime('%Y%m%d'))
|
||||
warninglist['description'] = 'Amazon AWS IP address ranges (https://ip-ranges.amazonaws.com/ip-ranges.json)'
|
||||
warninglist['type'] = 'cidr'
|
||||
warninglist['list'] = sorted(set(l))
|
||||
warninglist['matching_attributes'] = ["ip-src", "ip-dst", "domain|ip"]
|
||||
for prefix in amazon_aws_ip_list['prefixes']:
|
||||
l.append(prefix['ip_prefix'])
|
||||
|
||||
print(json.dumps(warninglist))
|
||||
for prefix in amazon_aws_ip_list['ipv6_prefixes']:
|
||||
l.append(prefix['ipv6_prefix'])
|
||||
|
||||
warninglist = {}
|
||||
warninglist['name'] = 'List of known Amazon AWS IP address ranges'
|
||||
warninglist['version'] = get_version()
|
||||
warninglist['description'] = 'Amazon AWS IP address ranges (https://ip-ranges.amazonaws.com/ip-ranges.json)'
|
||||
warninglist['type'] = 'cidr'
|
||||
warninglist['list'] = sorted(set(l))
|
||||
warninglist['matching_attributes'] = ["ip-src", "ip-dst", "domain|ip"]
|
||||
|
||||
with open(get_abspath_list_file(dst), 'w') as data_file:
|
||||
json.dump(warninglist, data_file, indent=2, sort_keys=True)
|
||||
data_file.write('\n')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
amazon_url = "https://ip-ranges.amazonaws.com/ip-ranges.json"
|
||||
amazon_file = "amazon_ip-ranges.json"
|
||||
amazon_dst = "amazon-aws"
|
||||
|
||||
download_to_file(amazon_url, amazon_file)
|
||||
process(amazon_file, amazon_dst)
|
||||
|
|
|
@ -1,40 +1,44 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import requests
|
||||
import zipfile
|
||||
import datetime
|
||||
import json
|
||||
|
||||
cisco_url = "http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip"
|
||||
cisco_file = "top-1m.csv.zip"
|
||||
user_agent = {"User-agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0"}
|
||||
r = requests.get(cisco_url, headers=user_agent)
|
||||
with open(cisco_file, 'wb') as fd:
|
||||
for chunk in r.iter_content(4096):
|
||||
fd.write(chunk)
|
||||
with zipfile.ZipFile(cisco_file, 'r') as cisco_lists:
|
||||
for name in cisco_lists.namelist():
|
||||
if name == "top-1m.csv":
|
||||
with cisco_lists.open(name) as top:
|
||||
top1000 = top.readlines()[:1000]
|
||||
else:
|
||||
continue
|
||||
|
||||
cisco_warninglist = {}
|
||||
version = int(datetime.date.today().strftime('%Y%m%d'))
|
||||
|
||||
cisco_warninglist['description'] = 'Event contains one or more entries from the top 1000 of the most used website (Cisco Umbrella).'
|
||||
d = datetime.datetime.now()
|
||||
cisco_warninglist['version'] = version
|
||||
cisco_warninglist['name'] = 'Top 1000 website from Cisco Umbrella'
|
||||
cisco_warninglist['type'] = 'hostname'
|
||||
cisco_warninglist['matching_attributes'] = ['hostname', 'domain']
|
||||
cisco_warninglist['list'] = []
|
||||
from generator import download, download_to_file, get_abspath_list_file, get_version
|
||||
|
||||
|
||||
for site in top1000:
|
||||
v = str(site).split(',')[1]
|
||||
cisco_warninglist['list'].append(v.rstrip())
|
||||
cisco_warninglist['list'] = sorted(set(cisco_warninglist['list']))
|
||||
print(json.dumps(cisco_warninglist))
|
||||
def process(file, dst):
|
||||
with zipfile.ZipFile(file, 'r') as cisco_lists:
|
||||
for name in cisco_lists.namelist():
|
||||
if name == "top-1m.csv":
|
||||
with cisco_lists.open(name) as top:
|
||||
top1000 = top.readlines()[:1000]
|
||||
else:
|
||||
continue
|
||||
|
||||
warninglist = {
|
||||
'description': 'Event contains one or more entries from the top 1000 of the most used website (Cisco Umbrella).',
|
||||
'version': get_version(),
|
||||
'name': 'Top 1000 website from Cisco Umbrella',
|
||||
'type': 'hostname',
|
||||
'matching_attributes': ['hostname', 'domain', 'url', 'domain|ip'],
|
||||
'list': []
|
||||
}
|
||||
|
||||
for site in top1000:
|
||||
v = site.decode('UTF-8').split(',')[1]
|
||||
warninglist['list'].append(v.strip().replace('\\r\\n',''))
|
||||
warninglist['list'] = sorted(set(warninglist['list']))
|
||||
|
||||
with open(get_abspath_list_file(dst), 'w') as data_file:
|
||||
json.dump(warninglist, data_file, indent=2, sort_keys=True)
|
||||
data_file.write("\n")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
cisco_url = "http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip"
|
||||
cisco_file = "cisco_top-1m.csv.zip"
|
||||
cisco_dst = 'cisco_top1000'
|
||||
|
||||
download_to_file(cisco_url, cisco_file)
|
||||
process(cisco_file, cisco_dst)
|
|
@ -1,33 +1,42 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import json
|
||||
import os
|
||||
import requests
|
||||
import datetime
|
||||
import io
|
||||
|
||||
base_url="https://www.cloudflare.com/"
|
||||
uri_list=['ips-v4','ips-v6']
|
||||
dict=dict()
|
||||
dict['list']=list()
|
||||
def source_read_and_add(input_file):
|
||||
output_list=list()
|
||||
|
||||
for line in input_file.splitlines():
|
||||
output_list.append(line)
|
||||
return output_list
|
||||
from generator import download, download_to_file, get_abspath_list_file, get_version
|
||||
|
||||
|
||||
for uri in uri_list:
|
||||
url = base_url + uri
|
||||
r=requests.get(url)
|
||||
dict['list'] += source_read_and_add(r.text)
|
||||
def process(files, dst):
|
||||
warninglist = {}
|
||||
warninglist['name'] = "List of known Cloudflare IP ranges"
|
||||
warninglist['version'] = get_version()
|
||||
warninglist['description'] = "List of known Cloudflare IP ranges (https://www.cloudflare.com/ips/)"
|
||||
warninglist['type'] = "cidr"
|
||||
warninglist['list'] = []
|
||||
warninglist['matching_attributes'] = ["ip-dst","ip-src","domain|ip"]
|
||||
|
||||
for file in files:
|
||||
with open(file, 'r') as f:
|
||||
ips = f.readlines()
|
||||
for ip in ips:
|
||||
warninglist['list'].append(ip.strip())
|
||||
warninglist['list'] = sorted(set(warninglist['list']))
|
||||
|
||||
with open(get_abspath_list_file(dst), 'w') as data_file:
|
||||
json.dump(warninglist, data_file, indent=2, sort_keys=True)
|
||||
data_file.write("\n")
|
||||
|
||||
dict['type'] = "cidr"
|
||||
dict['matching_attributes']=["ip-dst","ip-src","domain|ip"]
|
||||
dict['name']="List of known Cloudflare IP ranges"
|
||||
dict['version']= int(datetime.date.today().strftime('%Y%m%d'))
|
||||
dict['description']="List of known Cloudflare IP ranges (https://www.cloudflare.com/ips/)"
|
||||
dict['list']=list(set(dict['list']))
|
||||
|
||||
print(json.dumps(dict))
|
||||
if __name__ == '__main__':
|
||||
cf_base_url = "https://www.cloudflare.com/"
|
||||
uri_list = ['ips-v4','ips-v6']
|
||||
cf_dst = 'cloudflare'
|
||||
|
||||
to_process = list()
|
||||
|
||||
for uri in uri_list:
|
||||
url = cf_base_url+uri
|
||||
file = 'cloudflare_{}.txt'.format(uri)
|
||||
download_to_file(url, file)
|
||||
to_process.append(file)
|
||||
|
||||
process(to_process, cf_dst)
|
||||
|
|
|
@ -1,43 +1,38 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import requests
|
||||
import json
|
||||
import datetime
|
||||
|
||||
url = 'https://raw.githubusercontent.com/krassi/covid19-related/master/whitelist-domains.txt'
|
||||
r = requests.get(url)
|
||||
whitelist = r.text
|
||||
whitelist = list(set(whitelist.split()))
|
||||
|
||||
warninglist = {
|
||||
'name': 'Covid-19 Krassi\'s Whitelist',
|
||||
'description': 'Krassimir\'s Covid-19 whitelist of known good Covid-19 related websites.',
|
||||
'type': 'hostname',
|
||||
'matching_attributes': ['domain', 'hostname', 'url'],
|
||||
'version': int(datetime.date.today().strftime('%Y%m%d')),
|
||||
'list': sorted(whitelist)
|
||||
}
|
||||
|
||||
with open('../lists/covid-19-krassi-whitelist/list.json', 'w+') as data_file:
|
||||
json.dump(warninglist, data_file, indent=2, sort_keys=True)
|
||||
|
||||
url = 'https://raw.githubusercontent.com/Cyber-Threat-Coalition/goodlist/master/hostnames.txt'
|
||||
r = requests.get(url)
|
||||
whitelist = r.text
|
||||
whitelist = list(set(whitelist.split()))
|
||||
|
||||
warninglist = {
|
||||
'name': 'Covid-19 Cyber Threat Coalition\'s Whitelist',
|
||||
'description': 'The Cyber Threat Coalition\'s whitelist of COVID-19 related websites.',
|
||||
'type': 'hostname',
|
||||
'matching_attributes': ['domain', 'hostname', 'url'],
|
||||
'version': int(datetime.date.today().strftime('%Y%m%d')),
|
||||
'list': sorted(whitelist)
|
||||
}
|
||||
|
||||
with open('../lists/covid-19-cyber-threat-coalition-whitelist/list.json', 'w+') as data_file:
|
||||
json.dump(warninglist, data_file, indent=2, sort_keys=True)
|
||||
from generator import download, download_to_file, get_abspath_list_file, get_version
|
||||
|
||||
|
||||
def process(url, warninglist, dst):
|
||||
whitelist = download(url).text
|
||||
whitelist = list(set(whitelist.split()))
|
||||
|
||||
warninglist['type'] = 'hostname'
|
||||
warninglist['matching_attributes'] = ['domain', 'hostname', 'url']
|
||||
warninglist['version'] = get_version()
|
||||
warninglist['list'] = sorted(whitelist)
|
||||
|
||||
with open(get_abspath_list_file(dst), 'w') as data_file:
|
||||
json.dump(warninglist, data_file, indent=2, sort_keys=True)
|
||||
data_file.write('\n')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
covid_krassi_url = 'https://raw.githubusercontent.com/krassi/covid19-related/master/whitelist-domains.txt'
|
||||
covid_krassi_dst = 'covid-19-krassi-whitelist'
|
||||
covid_krassi_warninglist = {
|
||||
'name': 'Covid-19 Krassi\'s Whitelist',
|
||||
'description': 'Krassimir\'s Covid-19 whitelist of known good Covid-19 related websites.'
|
||||
}
|
||||
covid_cyber_threat_coalition_url = 'https://raw.githubusercontent.com/Cyber-Threat-Coalition/goodlist/master/hostnames.txt'
|
||||
covid_cyber_threat_coalition_dst = 'covid-19-cyber-threat-coalition-whitelist'
|
||||
covid_cyber_threat_coalition_warninglist = {
|
||||
'name': 'Covid-19 Cyber Threat Coalition\'s Whitelist',
|
||||
'description': 'The Cyber Threat Coalition\'s whitelist of COVID-19 related websites.'
|
||||
}
|
||||
|
||||
process(covid_krassi_url, covid_krassi_warninglist, covid_krassi_dst)
|
||||
process(covid_cyber_threat_coalition_url, covid_cyber_threat_coalition_warninglist, covid_cyber_threat_coalition_dst)
|
||||
|
|
|
@ -1,30 +1,10 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import datetime
|
||||
import json
|
||||
from os import path
|
||||
import zipfile
|
||||
from inspect import currentframe, getframeinfo
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
def download(url, file):
|
||||
user_agent = {
|
||||
"User-agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0"}
|
||||
r = requests.get(url, headers=user_agent)
|
||||
with open(file, 'wb') as fd:
|
||||
for chunk in r.iter_content(4096):
|
||||
fd.write(chunk)
|
||||
|
||||
|
||||
def get_abspath_list_file(dst):
|
||||
rel_path = getframeinfo(currentframe()).filename
|
||||
current_folder = path.dirname(path.abspath(rel_path))
|
||||
real_path = path.join(
|
||||
current_folder, '../lists/{dst}/list.json'.format(dst=dst))
|
||||
return path.abspath(path.realpath(real_path))
|
||||
from generator import download, download_to_file, get_abspath_list_file, get_version
|
||||
|
||||
|
||||
def process(file, dst):
|
||||
|
@ -36,30 +16,29 @@ def process(file, dst):
|
|||
else:
|
||||
continue
|
||||
|
||||
alexa_warninglist = {}
|
||||
alexa_warninglist[
|
||||
'description'] = "Event contains one or more entries from the top 1000 of the most used website (Alexa)."
|
||||
alexa_warninglist['version'] = int(
|
||||
datetime.date.today().strftime('%Y%m%d'))
|
||||
alexa_warninglist['name'] = "Top 1000 website from Alexa"
|
||||
alexa_warninglist['type'] = 'hostname'
|
||||
alexa_warninglist['list'] = []
|
||||
alexa_warninglist['matching_attributes'] = ['hostname', 'domain', 'url', 'domain|ip']
|
||||
warninglist = {
|
||||
'description': "Event contains one or more entries from the top 1000 of the most used website (Alexa).",
|
||||
'version': get_version(),
|
||||
'name': "Top 1000 website from Alexa",
|
||||
'type': 'hostname',
|
||||
'list': [],
|
||||
'matching_attributes': ['hostname', 'domain', 'url', 'domain|ip']
|
||||
}
|
||||
|
||||
for site in top1000:
|
||||
v = site.decode('UTF-8').split(',')[1]
|
||||
alexa_warninglist['list'].append(v.rstrip())
|
||||
alexa_warninglist['list'] = sorted(set(alexa_warninglist['list']))
|
||||
warninglist['list'].append(v.rstrip())
|
||||
warninglist['list'] = sorted(set(warninglist['list']))
|
||||
|
||||
with open(get_abspath_list_file(dst), 'w') as data_file:
|
||||
json.dump(alexa_warninglist, data_file, indent=2, sort_keys=True)
|
||||
json.dump(warninglist, data_file, indent=2, sort_keys=True)
|
||||
data_file.write("\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
alexa_url = "http://s3.amazonaws.com/alexa-static/top-1m.csv.zip"
|
||||
alexa_file = "top-1m.csv.zip"
|
||||
alexa_file = "alexa_top-1m.csv.zip"
|
||||
alexa_dst = "alexa"
|
||||
|
||||
download(alexa_url, alexa_file)
|
||||
download_to_file(alexa_url, alexa_file)
|
||||
process(alexa_file, alexa_dst)
|
||||
|
|
|
@ -1,40 +1,38 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import datetime
|
||||
import logging
|
||||
import json
|
||||
import os
|
||||
import requests
|
||||
from generator import download, download_to_file, get_abspath_list_file, get_version
|
||||
|
||||
servers_url = 'http://downloads.majestic.com/majestic_million.csv'
|
||||
csv_path = 'majestic_million.csv'
|
||||
hostname_path = 'list.json'
|
||||
|
||||
if os.path.isfile(csv_path):
|
||||
logging.warning('Not erasing local csv file')
|
||||
else:
|
||||
req = requests.get(servers_url)
|
||||
with open(csv_path, 'wb') as fd:
|
||||
for chunk in req.iter_content(4096):
|
||||
fd.write(chunk)
|
||||
def process(file, dst):
|
||||
|
||||
host_list = []
|
||||
with open(csv_path, newline='\n', encoding='utf-8', errors='replace') as csv_file:
|
||||
top10k = csv_file.readlines()[:10000]
|
||||
with open(file, newline='\n', encoding='utf-8', errors='replace') as csv_file:
|
||||
sites = csv_file.readlines()[:10000]
|
||||
|
||||
warninglist = {
|
||||
'name': 'Top 10K websites from Majestic Million',
|
||||
'version': get_version(),
|
||||
'description': 'Event contains one or more entries from the top 10K of the most used websites (Majestic Million).',
|
||||
'matching_attributes': ['hostname', 'domain'],
|
||||
'type': 'hostname',
|
||||
'list': []
|
||||
}
|
||||
|
||||
version = int(datetime.date.today().strftime('%Y%m%d'))
|
||||
out_list = {}
|
||||
for site in sites:
|
||||
v = site.split(',')[2]
|
||||
warninglist['list'].append(v.rstrip())
|
||||
warninglist['list'] = sorted(set(warninglist['list']))
|
||||
|
||||
out_list['name'] = 'Top 10K websites from Majestic Million'
|
||||
out_list['version'] = version
|
||||
out_list['description'] = 'Event contains one or more entries from the top 10K of the most used websites (Majestic Million).'
|
||||
out_list['matching_attributes'] = ['hostname', 'domain']
|
||||
out_list['type'] = 'hostname'
|
||||
out_list['list'] = sorted(set(host_list))
|
||||
with open(get_abspath_list_file(dst), 'w') as data_file:
|
||||
json.dump(warninglist, data_file, indent=2, sort_keys=True)
|
||||
data_file.write("\n")
|
||||
|
||||
for hostname in top10k:
|
||||
v = hostname.split(',')[2]
|
||||
out_list['list'].append(v.rstrip())
|
||||
out_list['list'] = sorted(set(out_list['list']))
|
||||
with open(hostname_path, 'w', newline='\n') as hostname_file:
|
||||
hostname_file.write(json.dumps(out_list, indent=2, sort_keys=False))
|
||||
|
||||
if __name__ == '__main__':
|
||||
majestic_url = 'http://downloads.majestic.com/majestic_million.csv'
|
||||
majestic_file = 'majestic_million.csv'
|
||||
majestic_dst = 'majestic_million'
|
||||
|
||||
download_to_file(majestic_url, majestic_file)
|
||||
process(majestic_file, majestic_dst)
|
|
@ -4,32 +4,16 @@
|
|||
import csv
|
||||
import datetime
|
||||
import json
|
||||
from inspect import currentframe, getframeinfo
|
||||
from os import path
|
||||
|
||||
import requests
|
||||
from OpenSSL.crypto import FILETYPE_PEM, load_certificate
|
||||
|
||||
|
||||
def download(url, file):
|
||||
r = requests.get(url)
|
||||
with open(file, 'wb') as fd:
|
||||
for chunk in r.iter_content(4096):
|
||||
fd.write(chunk)
|
||||
from generator import download, download_to_file, get_abspath_list_file, get_version
|
||||
|
||||
|
||||
def gethash(cert, digest):
|
||||
return cert.digest(digest).decode('ASCII').replace(':', '').lower()
|
||||
|
||||
|
||||
def get_abspath_list_file(dst):
|
||||
rel_path = getframeinfo(currentframe()).filename
|
||||
current_folder = path.dirname(path.abspath(rel_path))
|
||||
real_path = path.join(
|
||||
current_folder, '../lists/{dst}/list.json'.format(dst=dst))
|
||||
return path.abspath(path.realpath(real_path))
|
||||
|
||||
|
||||
def process(file, dst, type):
|
||||
hashes = set()
|
||||
with open(file, 'r') as f_in:
|
||||
|
@ -68,8 +52,8 @@ if __name__ == '__main__':
|
|||
CA_known_intermediate_file = 'PublicAllIntermediateCertsWithPEMCSV.csv'
|
||||
CA_known_intermediate_dst = 'mozilla-IntermediateCA'
|
||||
|
||||
download(Included_CA_url, Included_CA_file)
|
||||
download_to_file(Included_CA_url, Included_CA_file)
|
||||
process(Included_CA_file, Included_CA_dst, 'trusted CA certificates')
|
||||
download(CA_known_intermediate_url, CA_known_intermediate_file)
|
||||
download_to_file(CA_known_intermediate_url, CA_known_intermediate_file)
|
||||
process(CA_known_intermediate_file, CA_known_intermediate_dst,
|
||||
'known intermedicate of trusted certificates')
|
||||
|
|
|
@ -36,7 +36,7 @@ def process(file, warninglist, dst, first_10k=False):
|
|||
|
||||
if __name__ == '__main__':
|
||||
tranco_url = 'https://tranco-list.eu/top-1m.csv.zip'
|
||||
tranco_file = 'top-1m.csv.zip'
|
||||
tranco_file = 'tranco_top-1m.csv.zip'
|
||||
|
||||
download_to_file(tranco_url, tranco_file)
|
||||
|
||||
|
|
Loading…
Reference in New Issue