misp-warninglists/tools/generator.py

144 lines
4.5 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import datetime
import json
import logging
from inspect import currentframe, getframeinfo, getmodulename, stack
from os import mkdir, path
import requests
from dateutil.parser import parse as parsedate
def init_logging():
rel_path = getframeinfo(currentframe()).filename
current_folder = path.dirname(path.abspath(rel_path))
LOG_DIR = path.join(current_folder, '../generators.log')
logFormatter = logging.Formatter(
"[%(asctime)s] %(levelname)s::%(funcName)s()::%(message)s")
rootLogger = logging.getLogger()
rootLogger.setLevel(logging.INFO)
# Log to file
fileHandler = logging.FileHandler(LOG_DIR)
fileHandler.setFormatter(logFormatter)
rootLogger.addHandler(fileHandler)
# Log to console too
''' consoleHandler = logging.StreamHandler()
consoleHandler.setFormatter(logFormatter)
rootLogger.addHandler(consoleHandler) '''
return rootLogger
init_logging()
def download_to_file(url, file):
frame_records = stack()[1]
caller = getmodulename(frame_records[1]).upper()
user_agent = {
"User-agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0"}
try:
r = requests.head(url, headers=user_agent)
url_datetime = parsedate(r.headers['Last-Modified']).astimezone()
file_datetime = datetime.datetime.fromtimestamp(
path.getmtime(get_abspath_source_file(file))).astimezone()
if(url_datetime > file_datetime):
logging.info('{} File on server is newer, so downloading update to {}'.format(
caller, get_abspath_source_file(file)))
actual_download_to_file(url, file, user_agent)
else:
logging.info(
'{} File on server is older, nothing to do'.format(caller))
except KeyError as exc:
logging.warning('{} KeyError in the headers. the {} header was not sent by server {}. Downloading file'.format(
caller, str(exc), url))
actual_download_to_file(url, file, user_agent)
except FileNotFoundError as exc:
logging.info(
"{} File didn't exist, so downloading {} from {}".format(caller, file, url))
actual_download_to_file(url, file, user_agent)
except Exception as exc:
logging.warning(
'{} General exception occured: {}.'.format(caller, str(exc)))
actual_download_to_file(url, file, user_agent)
def actual_download_to_file(url, file, user_agent):
r = requests.get(url, headers=user_agent)
with open(get_abspath_source_file(file), 'wb') as fd:
for chunk in r.iter_content(4096):
fd.write(chunk)
def process_stream(url):
r = requests.get(url, stream=True)
data_list = []
for line in r.iter_lines():
v = line.decode('utf-8')
if not v.startswith("#"):
if v:
data_list.append(v)
return data_list
def download(url):
user_agent = {
"User-agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0"}
return requests.get(url, headers=user_agent)
def get_abspath_list_file(dst):
rel_path = getframeinfo(currentframe()).filename
current_folder = path.dirname(path.abspath(rel_path))
real_path = path.join(
current_folder, '../lists/{dst}/list.json'.format(dst=dst))
return path.abspath(path.realpath(real_path))
def get_abspath_source_file(dst):
rel_path = getframeinfo(currentframe()).filename
current_folder = path.dirname(path.abspath(rel_path))
tmp_path = path.join(current_folder, '../tmp/')
if not path.exists(tmp_path):
mkdir(tmp_path)
return path.abspath(path.realpath(path.join(tmp_path, '{dst}'.format(dst=dst))))
def get_version():
return int(datetime.date.today().strftime('%Y%m%d'))
def unique_sorted_warninglist(warninglist):
warninglist['list'] = sorted(set(warninglist['list']))
return warninglist
def write_to_file(warninglist, dst):
frame_records = stack()[1]
caller = getmodulename(frame_records[1]).upper()
try:
with open(get_abspath_list_file(dst), 'w') as data_file:
json.dump(unique_sorted_warninglist(warninglist),
data_file, indent=2, sort_keys=True)
data_file.write("\n")
logging.info('New warninglist written to {}.'.format(
get_abspath_list_file(dst)))
except Exception as exc:
logging.error(
'{} General exception occured: {}.'.format(caller, str(exc)))
def main():
init_logging()
if __name__ == '__main__':
main()