New script to generate Cisco Umbrella Top 1000

pull/76/head
Robert Nixon 2018-08-10 08:47:40 -04:00 committed by GitHub
parent 0b4f1f32d4
commit ed9700b150
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 38 additions and 0 deletions

View File

@ -0,0 +1,38 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import requests
import zipfile
import datetime
import json
cisco_url = "http://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip"
cisco_file = "top-1m.csv.zip"
user_agent = {"User-agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0"}
r = requests.get(cisco_url, headers=user_agent)
with open(cisco_file, 'wb') as fd:
for chunk in r.iter_content(4096):
fd.write(chunk)
with zipfile.ZipFile(cisco_file, 'r') as cisco_lists:
for name in cisco_lists.namelist():
if name == "top-1m.csv":
with cisco_lists.open(name) as top:
top1000 = top.readlines()[:1000]
else:
continue
cisco_warninglist = {}
version = int(datetime.date.today().strftime('%Y%m%d'))
cisco_warninglist['description'] = "Event contains one or more entries from the top 1000 of the most used website (Cisco Umbrella)."
d = datetime.datetime.now()
cisco_warninglist['version'] = version
cisco_warninglist['name'] = "Top 1000 website from Cisco Umbrella"
cisco_warninglist['list'] = []
cisco_warninglist['matching_attributes'] = ['hostname', 'domain']
for site in top1000:
v = str(site).split(',')[1]
cisco_warninglist['list'].append(v[:-6])
cisco_warninglist['list'] = sorted(set(cisco_warninglist['list']))
print(json.dumps(cisco_warninglist))