misp-warninglists/tools/generate-chrome-crux-1m.py

49 lines
1.3 KiB
Python
Executable File

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import csv
import gzip
from urllib.parse import urlparse
from generator import download_to_file, get_version, write_to_file, get_abspath_source_file
def process(files, dst):
warninglist = {
'description': "Cached Chrome Top Million Websites - top 1 million",
'version': get_version(),
'name': "google-chrome-crux-1million",
'type': 'string',
'list': [],
'matching_attributes': ['hostname', 'domain', 'uri', 'url']
}
flag = True
for file in files:
with open(get_abspath_source_file(file)) as csv_file:
csv_reader = csv.reader(csv_file, delimiter=',')
for row in csv_reader:
if flag:
flag = False
continue
v = row[0]
p = urlparse(v)
host = p.hostname
warninglist['list'].append(host)
write_to_file(warninglist, dst)
if __name__ == '__main__':
crux_domains_url = "https://github.com/zakird/crux-top-lists/raw/main/data/global/current.csv.gz"
crux_domains_file = "crux-top-1m.csv.gz"
dst = 'google-chrome-crux-1million'
download_to_file(crux_domains_url, crux_domains_file, gzip_enable=True)
process([crux_domains_file], dst)