chg: use pathlib everywhere, remove old tmpfiles

2018-03-22 18:33:42 +01:00 · 2018-03-22 18:33:42 +01:00 · fb195971e8
parent 247a4a26b0
commit fb195971e8
1 changed files with 24 additions and 20 deletions
--- a/lookyloo/init.py
+++ b/lookyloo/init.py
@ -9,13 +9,12 @@ from scrapysplashwrapper import crawl
 from flask import Flask, render_template, request, session, send_file
 from flask_bootstrap import Bootstrap

-from glob import glob
-import os
 from datetime import datetime

 import pickle
 import tempfile
 import pathlib
+import time

 from zipfile import ZipFile, ZIP_DEFLATED
 from io import BytesIO
@ -33,10 +32,10 @@ app.config['BOOTSTRAP_SERVE_LOCAL'] = True
 app.config['SESSION_COOKIE_NAME'] = 'lookyloo'
 app.debug = True

-HAR_DIR = 'scraped'
+HAR_DIR = pathlib.Path('scraped')
 SPLASH = 'http://127.0.0.1:8050'

-pathlib.Path(HAR_DIR).mkdir(parents=True, exist_ok=True)
+HAR_DIR.mkdir(parents=True, exist_ok=True)


@app.before_request
@ -45,11 +44,15 @@ def session_management():
    session.permanent = True


+def cleanup_old_tmpfiles():
+    for tmpfile in pathlib.Path(tempfile.gettempdir()).glob('lookyloo*'):
+        if time.time() - tmpfile.stat().st_atime > 36000:
+            tmpfile.unlink()
+
+
 def load_tree(report_dir):
-    if session.get('tree'):
-        os.unlink(session.get('tree'))
    session.clear()
-    har_files = sorted(glob(os.path.join(HAR_DIR, report_dir, '*.har')))
+    har_files = sorted(report_dir.glob('*.har'))
    ct = CrawledTree(har_files)
    ct.find_parents()
    ct.join_trees()
@ -74,20 +77,20 @@ def scrape():
            # broken
            pass
        width = len(str(len(items)))
-        dirpath = os.path.join(HAR_DIR, datetime.now().isoformat())
-        os.makedirs(dirpath)
+        dirpath = HAR_DIR / datetime.now().isoformat()
+        dirpath.mkdir()
        for i, item in enumerate(items):
            harfile = item['har']
            png = base64.b64decode(item['png'])
            child_frames = item['childFrames']
            html = item['html']
-            with open(os.path.join(dirpath, '{0:0{width}}.har'.format(i, width=width)), 'w') as f:
+            with (dirpath / '{0:0{width}}.har'.format(i, width=width)).open('w') as f:
                json.dump(harfile, f)
-            with open(os.path.join(dirpath, '{0:0{width}}.png'.format(i, width=width)), 'wb') as f:
+            with (dirpath / '{0:0{width}}.png'.format(i, width=width)).open('wb') as f:
                f.write(png)
-            with open(os.path.join(dirpath, '{0:0{width}}.html'.format(i, width=width)), 'w') as f:
+            with (dirpath / '{0:0{width}}.html'.format(i, width=width)).open('w') as f:
                f.write(html)
-            with open(os.path.join(dirpath, '{0:0{width}}.frames.json'.format(i, width=width)), 'w') as f:
+            with (dirpath / '{0:0{width}}.frames.json'.format(i, width=width)).open('w') as f:
                json.dump(child_frames, f)
        return tree(0)
    return render_template('scrape.html')
@ -95,10 +98,10 @@ def scrape():

 def get_report_dirs():
    # Cleanup HAR_DIR of failed runs.
-    for report_dir in os.listdir(HAR_DIR):
-        if not os.listdir(os.path.join(HAR_DIR, report_dir)):
-            os.rmdir(os.path.join(HAR_DIR, report_dir))
-    return sorted(os.listdir(HAR_DIR), reverse=True)
+    for report_dir in HAR_DIR.iterdir():
+        if report_dir.is_dir() and not report_dir.iterdir():
+            report_dir.rmdir()
+    return sorted(HAR_DIR.iterdir(), reverse=True)


@app.route('/tree/hostname/<node_uuid>', methods=['GET'])
@ -141,12 +144,13 @@ def tree(tree_id):

@app.route('/', methods=['GET'])
 def index():
+    cleanup_old_tmpfiles()
    i = 0
    titles = []
-    if not os.path.exists(HAR_DIR):
-        os.makedirs(HAR_DIR)
+    if not HAR_DIR.exists():
+        HAR_DIR.mkdir(parents=True)
    for report_dir in get_report_dirs():
-        har_files = sorted(glob(os.path.join(HAR_DIR, report_dir, '*.har')))
+        har_files = sorted(report_dir.glob('*.har'))
        if not har_files:
            continue
        with open(har_files[0], 'r') as f: