chg: use pathlib everywhere, remove old tmpfiles

pull/27/head
Raphaël Vinot 2018-03-22 18:33:42 +01:00
parent 247a4a26b0
commit fb195971e8
1 changed files with 24 additions and 20 deletions

View File

@ -9,13 +9,12 @@ from scrapysplashwrapper import crawl
from flask import Flask, render_template, request, session, send_file from flask import Flask, render_template, request, session, send_file
from flask_bootstrap import Bootstrap from flask_bootstrap import Bootstrap
from glob import glob
import os
from datetime import datetime from datetime import datetime
import pickle import pickle
import tempfile import tempfile
import pathlib import pathlib
import time
from zipfile import ZipFile, ZIP_DEFLATED from zipfile import ZipFile, ZIP_DEFLATED
from io import BytesIO from io import BytesIO
@ -33,10 +32,10 @@ app.config['BOOTSTRAP_SERVE_LOCAL'] = True
app.config['SESSION_COOKIE_NAME'] = 'lookyloo' app.config['SESSION_COOKIE_NAME'] = 'lookyloo'
app.debug = True app.debug = True
HAR_DIR = 'scraped' HAR_DIR = pathlib.Path('scraped')
SPLASH = 'http://127.0.0.1:8050' SPLASH = 'http://127.0.0.1:8050'
pathlib.Path(HAR_DIR).mkdir(parents=True, exist_ok=True) HAR_DIR.mkdir(parents=True, exist_ok=True)
@app.before_request @app.before_request
@ -45,11 +44,15 @@ def session_management():
session.permanent = True session.permanent = True
def cleanup_old_tmpfiles():
for tmpfile in pathlib.Path(tempfile.gettempdir()).glob('lookyloo*'):
if time.time() - tmpfile.stat().st_atime > 36000:
tmpfile.unlink()
def load_tree(report_dir): def load_tree(report_dir):
if session.get('tree'):
os.unlink(session.get('tree'))
session.clear() session.clear()
har_files = sorted(glob(os.path.join(HAR_DIR, report_dir, '*.har'))) har_files = sorted(report_dir.glob('*.har'))
ct = CrawledTree(har_files) ct = CrawledTree(har_files)
ct.find_parents() ct.find_parents()
ct.join_trees() ct.join_trees()
@ -74,20 +77,20 @@ def scrape():
# broken # broken
pass pass
width = len(str(len(items))) width = len(str(len(items)))
dirpath = os.path.join(HAR_DIR, datetime.now().isoformat()) dirpath = HAR_DIR / datetime.now().isoformat()
os.makedirs(dirpath) dirpath.mkdir()
for i, item in enumerate(items): for i, item in enumerate(items):
harfile = item['har'] harfile = item['har']
png = base64.b64decode(item['png']) png = base64.b64decode(item['png'])
child_frames = item['childFrames'] child_frames = item['childFrames']
html = item['html'] html = item['html']
with open(os.path.join(dirpath, '{0:0{width}}.har'.format(i, width=width)), 'w') as f: with (dirpath / '{0:0{width}}.har'.format(i, width=width)).open('w') as f:
json.dump(harfile, f) json.dump(harfile, f)
with open(os.path.join(dirpath, '{0:0{width}}.png'.format(i, width=width)), 'wb') as f: with (dirpath / '{0:0{width}}.png'.format(i, width=width)).open('wb') as f:
f.write(png) f.write(png)
with open(os.path.join(dirpath, '{0:0{width}}.html'.format(i, width=width)), 'w') as f: with (dirpath / '{0:0{width}}.html'.format(i, width=width)).open('w') as f:
f.write(html) f.write(html)
with open(os.path.join(dirpath, '{0:0{width}}.frames.json'.format(i, width=width)), 'w') as f: with (dirpath / '{0:0{width}}.frames.json'.format(i, width=width)).open('w') as f:
json.dump(child_frames, f) json.dump(child_frames, f)
return tree(0) return tree(0)
return render_template('scrape.html') return render_template('scrape.html')
@ -95,10 +98,10 @@ def scrape():
def get_report_dirs(): def get_report_dirs():
# Cleanup HAR_DIR of failed runs. # Cleanup HAR_DIR of failed runs.
for report_dir in os.listdir(HAR_DIR): for report_dir in HAR_DIR.iterdir():
if not os.listdir(os.path.join(HAR_DIR, report_dir)): if report_dir.is_dir() and not report_dir.iterdir():
os.rmdir(os.path.join(HAR_DIR, report_dir)) report_dir.rmdir()
return sorted(os.listdir(HAR_DIR), reverse=True) return sorted(HAR_DIR.iterdir(), reverse=True)
@app.route('/tree/hostname/<node_uuid>', methods=['GET']) @app.route('/tree/hostname/<node_uuid>', methods=['GET'])
@ -141,12 +144,13 @@ def tree(tree_id):
@app.route('/', methods=['GET']) @app.route('/', methods=['GET'])
def index(): def index():
cleanup_old_tmpfiles()
i = 0 i = 0
titles = [] titles = []
if not os.path.exists(HAR_DIR): if not HAR_DIR.exists():
os.makedirs(HAR_DIR) HAR_DIR.mkdir(parents=True)
for report_dir in get_report_dirs(): for report_dir in get_report_dirs():
har_files = sorted(glob(os.path.join(HAR_DIR, report_dir, '*.har'))) har_files = sorted(report_dir.glob('*.har'))
if not har_files: if not har_files:
continue continue
with open(har_files[0], 'r') as f: with open(har_files[0], 'r') as f: