144 lines
		
	
	
		
			4.5 KiB
		
	
	
	
		
			Python
		
	
	
			
		
		
	
	
			144 lines
		
	
	
		
			4.5 KiB
		
	
	
	
		
			Python
		
	
	
#!/usr/bin/env python3
 | 
						|
# -*- coding: utf-8 -*-
 | 
						|
 | 
						|
import datetime
 | 
						|
import json
 | 
						|
import logging
 | 
						|
from inspect import currentframe, getframeinfo, getmodulename, stack
 | 
						|
from os import mkdir, path
 | 
						|
 | 
						|
import requests
 | 
						|
from dateutil.parser import parse as parsedate
 | 
						|
 | 
						|
 | 
						|
def init_logging():
 | 
						|
    rel_path = getframeinfo(currentframe()).filename
 | 
						|
    current_folder = path.dirname(path.abspath(rel_path))
 | 
						|
    LOG_DIR = path.join(current_folder, '../generators.log')
 | 
						|
 | 
						|
    logFormatter = logging.Formatter(
 | 
						|
        "[%(asctime)s] %(levelname)s::%(funcName)s()::%(message)s")
 | 
						|
    rootLogger = logging.getLogger()
 | 
						|
    rootLogger.setLevel(logging.INFO)
 | 
						|
    # Log to file
 | 
						|
    fileHandler = logging.FileHandler(LOG_DIR)
 | 
						|
    fileHandler.setFormatter(logFormatter)
 | 
						|
    rootLogger.addHandler(fileHandler)
 | 
						|
    # Log to console too
 | 
						|
    ''' consoleHandler = logging.StreamHandler()
 | 
						|
    consoleHandler.setFormatter(logFormatter)
 | 
						|
    rootLogger.addHandler(consoleHandler) '''
 | 
						|
    return rootLogger
 | 
						|
 | 
						|
 | 
						|
init_logging()
 | 
						|
 | 
						|
 | 
						|
def download_to_file(url, file):
 | 
						|
    frame_records = stack()[1]
 | 
						|
    caller = getmodulename(frame_records[1]).upper()
 | 
						|
 | 
						|
    user_agent = {
 | 
						|
        "User-agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0"}
 | 
						|
    try:
 | 
						|
        r = requests.head(url, headers=user_agent)
 | 
						|
        url_datetime = parsedate(r.headers['Last-Modified']).astimezone()
 | 
						|
        file_datetime = datetime.datetime.fromtimestamp(
 | 
						|
            path.getmtime(get_abspath_source_file(file))).astimezone()
 | 
						|
 | 
						|
        if(url_datetime > file_datetime):
 | 
						|
            logging.info('{} File on server is newer, so downloading update to {}'.format(
 | 
						|
                caller, get_abspath_source_file(file)))
 | 
						|
            actual_download_to_file(url, file, user_agent)
 | 
						|
        else:
 | 
						|
            logging.info(
 | 
						|
                '{} File on server is older, nothing to do'.format(caller))
 | 
						|
    except KeyError as exc:
 | 
						|
        logging.warning('{} KeyError in the headers. the {} header was not sent by server {}. Downloading file'.format(
 | 
						|
            caller, str(exc), url))
 | 
						|
        actual_download_to_file(url, file, user_agent)
 | 
						|
    except FileNotFoundError as exc:
 | 
						|
        logging.info(
 | 
						|
            "{} File didn't exist, so downloading {} from {}".format(caller, file, url))
 | 
						|
        actual_download_to_file(url, file, user_agent)
 | 
						|
    except Exception as exc:
 | 
						|
        logging.warning(
 | 
						|
            '{} General exception occured: {}.'.format(caller, str(exc)))
 | 
						|
        actual_download_to_file(url, file, user_agent)
 | 
						|
 | 
						|
 | 
						|
def actual_download_to_file(url, file, user_agent):
 | 
						|
    r = requests.get(url, headers=user_agent)
 | 
						|
    with open(get_abspath_source_file(file), 'wb') as fd:
 | 
						|
        for chunk in r.iter_content(4096):
 | 
						|
            fd.write(chunk)
 | 
						|
 | 
						|
 | 
						|
def process_stream(url):
 | 
						|
    r = requests.get(url, stream=True)
 | 
						|
 | 
						|
    data_list = []
 | 
						|
    for line in r.iter_lines():
 | 
						|
        v = line.decode('utf-8')
 | 
						|
        if not v.startswith("#"):
 | 
						|
            if v:
 | 
						|
                data_list.append(v)
 | 
						|
 | 
						|
    return data_list
 | 
						|
 | 
						|
 | 
						|
def download(url):
 | 
						|
    user_agent = {
 | 
						|
        "User-agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0"}
 | 
						|
    return requests.get(url, headers=user_agent)
 | 
						|
 | 
						|
 | 
						|
def get_abspath_list_file(dst):
 | 
						|
    rel_path = getframeinfo(currentframe()).filename
 | 
						|
    current_folder = path.dirname(path.abspath(rel_path))
 | 
						|
    real_path = path.join(
 | 
						|
        current_folder, '../lists/{dst}/list.json'.format(dst=dst))
 | 
						|
    return path.abspath(path.realpath(real_path))
 | 
						|
 | 
						|
 | 
						|
def get_abspath_source_file(dst):
 | 
						|
    rel_path = getframeinfo(currentframe()).filename
 | 
						|
    current_folder = path.dirname(path.abspath(rel_path))
 | 
						|
    tmp_path = path.join(current_folder, '../tmp/')
 | 
						|
    if not path.exists(tmp_path):
 | 
						|
        mkdir(tmp_path)
 | 
						|
    return path.abspath(path.realpath(path.join(tmp_path, '{dst}'.format(dst=dst))))
 | 
						|
 | 
						|
 | 
						|
def get_version():
 | 
						|
    return int(datetime.date.today().strftime('%Y%m%d'))
 | 
						|
 | 
						|
 | 
						|
def unique_sorted_warninglist(warninglist):
 | 
						|
    warninglist['list'] = sorted(set(warninglist['list']))
 | 
						|
    return warninglist
 | 
						|
 | 
						|
 | 
						|
def write_to_file(warninglist, dst):
 | 
						|
    frame_records = stack()[1]
 | 
						|
    caller = getmodulename(frame_records[1]).upper()
 | 
						|
 | 
						|
    try:
 | 
						|
        with open(get_abspath_list_file(dst), 'w') as data_file:
 | 
						|
            json.dump(unique_sorted_warninglist(warninglist),
 | 
						|
                      data_file, indent=2, sort_keys=True)
 | 
						|
            data_file.write("\n")
 | 
						|
        logging.info('New warninglist written to {}.'.format(
 | 
						|
            get_abspath_list_file(dst)))
 | 
						|
    except Exception as exc:
 | 
						|
        logging.error(
 | 
						|
            '{} General exception occured: {}.'.format(caller, str(exc)))
 | 
						|
 | 
						|
 | 
						|
def main():
 | 
						|
    init_logging()
 | 
						|
 | 
						|
 | 
						|
if __name__ == '__main__':
 | 
						|
    main()
 |