chg: [majestic_million] numbers parameter
parent
88d388a01e
commit
49922d0635
|
@ -14,7 +14,7 @@ python3 generate-disposal.py
|
||||||
# TODO: Google page on Wikipedia does not exist anymore
|
# TODO: Google page on Wikipedia does not exist anymore
|
||||||
# Suggestion came to use a passivetotal whois search for org:Google LLC
|
# Suggestion came to use a passivetotal whois search for org:Google LLC
|
||||||
#python3 generate-google.py > lists/google/list.json
|
#python3 generate-google.py > lists/google/list.json
|
||||||
python3 generate_majestic-million.py
|
python3 generate_majestic-million.py -n 10000
|
||||||
python3 generate-microsoft-azure.py
|
python3 generate-microsoft-azure.py
|
||||||
python3 generate_mozilla_certificates.py
|
python3 generate_mozilla_certificates.py
|
||||||
python3 generate_moz-top500.py
|
python3 generate_moz-top500.py
|
||||||
|
|
|
@ -2,15 +2,16 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from generator import download_to_file, get_version, write_to_file, get_abspath_source_file
|
from generator import download_to_file, get_version, write_to_file, get_abspath_source_file
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
|
||||||
def process(file, dst):
|
def process(file, dst, numbers):
|
||||||
|
|
||||||
with open(get_abspath_source_file(file), newline='\n', encoding='utf-8', errors='replace') as csv_file:
|
with open(get_abspath_source_file(file), newline='\n', encoding='utf-8', errors='replace') as csv_file:
|
||||||
sites = csv_file.readlines()[:10000]
|
sites = csv_file.readlines()[:numbers]
|
||||||
|
|
||||||
warninglist = {
|
warninglist = {
|
||||||
'name': 'Top 10K websites from Majestic Million',
|
'name': f'Top {numbers} websites from Majestic Million',
|
||||||
'version': get_version(),
|
'version': get_version(),
|
||||||
'description': 'Event contains one or more entries from the top 10K of the most used websites (Majestic Million).',
|
'description': 'Event contains one or more entries from the top 10K of the most used websites (Majestic Million).',
|
||||||
'matching_attributes': ['hostname', 'domain'],
|
'matching_attributes': ['hostname', 'domain'],
|
||||||
|
@ -26,9 +27,14 @@ def process(file, dst):
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("-n", help="number of website to process", required=True)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
majestic_url = 'http://downloads.majestic.com/majestic_million.csv'
|
majestic_url = 'http://downloads.majestic.com/majestic_million.csv'
|
||||||
majestic_file = 'majestic_million.csv'
|
majestic_file = 'majestic_million.csv'
|
||||||
majestic_dst = 'majestic_million'
|
majestic_dst = 'majestic_million'
|
||||||
|
|
||||||
download_to_file(majestic_url, majestic_file)
|
download_to_file(majestic_url, majestic_file)
|
||||||
process(majestic_file, majestic_dst)
|
process(majestic_file, majestic_dst, int(args.n))
|
||||||
|
|
Loading…
Reference in New Issue