commit
7a52d7bf8d
|
@ -6,15 +6,18 @@ import zipfile
|
||||||
import datetime
|
import datetime
|
||||||
import json
|
import json
|
||||||
|
|
||||||
alexa_url = "http://s3.amazonaws.com/alexa-static/top-1mcsv.zip"
|
alexa_url = "http://s3.amazonaws.com/alexa-static/top-1m.csv.zip"
|
||||||
alexa_file = "top-1m.csv.zip"
|
alexa_file = "top-1m.csv.zip"
|
||||||
user_agent = {"User-agent":"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0"}
|
user_agent = {"User-agent":"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0"}
|
||||||
r = requests.get(alexa_url, headers=user_agent)
|
r = requests.get(alexa_url, headers=user_agent)
|
||||||
|
with open(alexa_file, 'wb') as fd:
|
||||||
|
for chunk in r.iter_content(4096):
|
||||||
|
fd.write(chunk)
|
||||||
with zipfile.ZipFile(alexa_file, 'r') as alexa_lists:
|
with zipfile.ZipFile(alexa_file, 'r') as alexa_lists:
|
||||||
for name in alexa_lists.namelist():
|
for name in alexa_lists.namelist():
|
||||||
if name == "top-1m.csv":
|
if name == "top-1m.csv":
|
||||||
with alexa_lists.open(name) as top:
|
with alexa_lists.open(name) as top:
|
||||||
top1000 = top.readlines()[0:999]
|
top1000 = top.readlines()[:1000]
|
||||||
else:
|
else:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue