2020-07-17 09:22:34 +02:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
|
|
import datetime
|
2020-07-21 00:31:06 +02:00
|
|
|
import json
|
2020-07-17 09:22:34 +02:00
|
|
|
from inspect import currentframe, getframeinfo
|
|
|
|
from os import path
|
2020-07-21 13:42:50 +02:00
|
|
|
import logging
|
2020-07-17 09:22:34 +02:00
|
|
|
|
|
|
|
import requests
|
2020-07-21 09:11:51 +02:00
|
|
|
from dateutil.parser import parse as parsedate
|
2020-07-17 09:22:34 +02:00
|
|
|
|
|
|
|
|
|
|
|
def download_to_file(url, file):
|
2020-07-21 13:42:50 +02:00
|
|
|
user_agent = {"User-agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0"}
|
|
|
|
try:
|
|
|
|
r = requests.head(url, headers=user_agent)
|
|
|
|
url_datetime = parsedate(r.headers['Last-Modified']).astimezone()
|
|
|
|
file_datetime = datetime.datetime.fromtimestamp(
|
|
|
|
path.getmtime(file)).astimezone()
|
|
|
|
|
|
|
|
if(url_datetime > file_datetime):
|
|
|
|
actual_download_to_file(url, file, user_agent)
|
|
|
|
except KeyError as ex:
|
|
|
|
logging.warning(str(ex))
|
|
|
|
actual_download_to_file(url, file, user_agent)
|
|
|
|
|
|
|
|
|
|
|
|
def actual_download_to_file(url, file, user_agent):
|
|
|
|
r = requests.get(url, headers=user_agent)
|
|
|
|
with open(file, 'wb') as fd:
|
|
|
|
for chunk in r.iter_content(4096):
|
|
|
|
fd.write(chunk)
|
2020-07-21 09:11:51 +02:00
|
|
|
|
2020-07-17 09:22:34 +02:00
|
|
|
|
2020-07-21 13:42:50 +02:00
|
|
|
def process_stream(url):
|
|
|
|
r = requests.get(url, stream=True)
|
|
|
|
|
|
|
|
data_list = []
|
|
|
|
for line in r.iter_lines():
|
|
|
|
v = line.decode('utf-8')
|
|
|
|
if not v.startswith("#"):
|
|
|
|
if v:
|
|
|
|
data_list.append(v)
|
|
|
|
|
|
|
|
return data_list
|
2020-07-17 09:22:34 +02:00
|
|
|
|
2020-07-21 13:42:50 +02:00
|
|
|
|
2020-07-17 09:22:34 +02:00
|
|
|
def download(url):
|
|
|
|
user_agent = {
|
|
|
|
"User-agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0"}
|
|
|
|
return requests.get(url, headers=user_agent)
|
|
|
|
|
|
|
|
|
|
|
|
def get_abspath_list_file(dst):
|
|
|
|
rel_path = getframeinfo(currentframe()).filename
|
|
|
|
current_folder = path.dirname(path.abspath(rel_path))
|
|
|
|
real_path = path.join(
|
|
|
|
current_folder, '../lists/{dst}/list.json'.format(dst=dst))
|
|
|
|
return path.abspath(path.realpath(real_path))
|
|
|
|
|
|
|
|
|
|
|
|
def get_version():
|
|
|
|
return int(datetime.date.today().strftime('%Y%m%d'))
|
2020-07-21 00:31:06 +02:00
|
|
|
|
|
|
|
|
|
|
|
def unique_sorted_warninglist(warninglist):
|
|
|
|
warninglist['list'] = sorted(set(warninglist['list']))
|
|
|
|
return warninglist
|
|
|
|
|
2020-07-21 09:11:51 +02:00
|
|
|
|
2020-07-21 00:31:06 +02:00
|
|
|
def write_to_file(warninglist, dst):
|
|
|
|
with open(get_abspath_list_file(dst), 'w') as data_file:
|
2020-07-21 09:11:51 +02:00
|
|
|
json.dump(unique_sorted_warninglist(warninglist),
|
|
|
|
data_file, indent=2, sort_keys=True)
|
2020-07-21 00:31:06 +02:00
|
|
|
data_file.write("\n")
|