mirror of https://github.com/CIRCL/PyCIRCLean
First commit with Logger object
- Made logger object - Moved some logger related code from Groomer to Logger - Changed logging related tests - Filecheck tests still do not passpull/12/head
parent
92d1b1cd93
commit
1cf8a62f46
|
@ -204,6 +204,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
|
||||||
def _run_process(self, command_string, timeout=None):
|
def _run_process(self, command_string, timeout=None):
|
||||||
"""Run command_string in a subprocess, wait until it finishes."""
|
"""Run command_string in a subprocess, wait until it finishes."""
|
||||||
args = shlex.split(command_string)
|
args = shlex.split(command_string)
|
||||||
|
# TODO: log_debug_err and log_debug are now broken, fix
|
||||||
with open(self.log_debug_err, 'ab') as stderr, open(self.log_debug_out, 'ab') as stdout:
|
with open(self.log_debug_err, 'ab') as stderr, open(self.log_debug_out, 'ab') as stdout:
|
||||||
try:
|
try:
|
||||||
subprocess.check_call(args, stdout=stdout, stderr=stderr, timeout=timeout)
|
subprocess.check_call(args, stdout=stdout, stderr=stderr, timeout=timeout)
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#!/usr/bin/env python
|
#!/usr/bin/env python
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
from .helpers import FileBase, KittenGroomerBase, main
|
from .helpers import FileBase, KittenGroomerBase, GroomerLog, main
|
||||||
|
|
|
@ -14,7 +14,7 @@ import shutil
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
import magic
|
import magic
|
||||||
from twiggy import quick_setup, log
|
import twiggy
|
||||||
|
|
||||||
|
|
||||||
class KittenGroomerError(Exception):
|
class KittenGroomerError(Exception):
|
||||||
|
@ -38,7 +38,7 @@ class FileBase(object):
|
||||||
or methods relevant to a given implementation.
|
or methods relevant to a given implementation.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, src_path, dst_path):
|
def __init__(self, src_path, dst_path, logger=None):
|
||||||
"""Initialized with the source path and expected destination path."""
|
"""Initialized with the source path and expected destination path."""
|
||||||
self.src_path = src_path
|
self.src_path = src_path
|
||||||
self.dst_path = dst_path
|
self.dst_path = dst_path
|
||||||
|
@ -46,6 +46,7 @@ class FileBase(object):
|
||||||
self.log_string = ''
|
self.log_string = ''
|
||||||
self._determine_extension()
|
self._determine_extension()
|
||||||
self._determine_mimetype()
|
self._determine_mimetype()
|
||||||
|
self.logger = logger
|
||||||
|
|
||||||
def _determine_extension(self):
|
def _determine_extension(self):
|
||||||
_, ext = os.path.splitext(self.src_path)
|
_, ext = os.path.splitext(self.src_path)
|
||||||
|
@ -174,46 +175,25 @@ class FileBase(object):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
class KittenGroomerBase(object):
|
class GroomerLog(object):
|
||||||
"""Base object responsible for copy/sanitization process."""
|
"""Groomer logging object"""
|
||||||
|
|
||||||
def __init__(self, root_src, root_dst, debug=False):
|
def __init__(self, root_dir, debug=False):
|
||||||
"""Initialized with path to source and dest directories."""
|
self.log_dir_path = os.path.join(root_dir, 'logs')
|
||||||
self.src_root_dir = root_src
|
if os.path.exists(self.log_dir_path):
|
||||||
self.dst_root_dir = root_dst
|
shutil.rmtree(self.log_dir_path)
|
||||||
self.debug = debug
|
os.makedirs(self.log_dir_path)
|
||||||
self.cur_file = None
|
self.log_processing = os.path.join(self.log_dir_path, 'processing.log')
|
||||||
# Setup logs
|
self.log_content = os.path.join(self.log_dir_path, 'content.log')
|
||||||
self.log_root_dir = os.path.join(self.dst_root_dir, 'logs')
|
twiggy.quick_setup(file=self.log_processing)
|
||||||
self._safe_rmtree(self.log_root_dir)
|
self.log = twiggy.log.name('files')
|
||||||
self._safe_mkdir(self.log_root_dir)
|
if debug:
|
||||||
self.log_processing = os.path.join(self.log_root_dir, 'processing.log')
|
self.log_debug_err = os.path.join(self.log_dir_path, 'debug_stderr.log')
|
||||||
self.log_content = os.path.join(self.log_root_dir, 'content.log')
|
self.log_debug_out = os.path.join(self.log_dir_path, 'debug_stdout.log')
|
||||||
quick_setup(file=self.log_processing)
|
|
||||||
self.log_name = log.name('files')
|
|
||||||
|
|
||||||
self.tree(self.src_root_dir)
|
|
||||||
self.resources_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data')
|
|
||||||
os.environ["PATH"] += os.pathsep + self.resources_path
|
|
||||||
|
|
||||||
if self.debug:
|
|
||||||
self.log_debug_err = os.path.join(self.log_root_dir, 'debug_stderr.log')
|
|
||||||
self.log_debug_out = os.path.join(self.log_root_dir, 'debug_stdout.log')
|
|
||||||
else:
|
else:
|
||||||
self.log_debug_err = os.devnull
|
self.log_debug_err = os.devnull
|
||||||
self.log_debug_out = os.devnull
|
self.log_debug_out = os.devnull
|
||||||
|
|
||||||
def _computehash(self, path):
|
|
||||||
"""Returns a sha256 hash of a file at a given path."""
|
|
||||||
s = hashlib.sha256()
|
|
||||||
with open(path, 'rb') as f:
|
|
||||||
while True:
|
|
||||||
buf = f.read(0x100000)
|
|
||||||
if not buf:
|
|
||||||
break
|
|
||||||
s.update(buf)
|
|
||||||
return s.hexdigest()
|
|
||||||
|
|
||||||
def tree(self, base_dir, padding=' '):
|
def tree(self, base_dir, padding=' '):
|
||||||
"""Writes a graphical tree to the log for a given directory."""
|
"""Writes a graphical tree to the log for a given directory."""
|
||||||
with open(self.log_content, 'ab') as lf:
|
with open(self.log_content, 'ab') as lf:
|
||||||
|
@ -230,6 +210,32 @@ class KittenGroomerBase(object):
|
||||||
elif os.path.isfile(curpath):
|
elif os.path.isfile(curpath):
|
||||||
lf.write('{}+-- {}\t- {}\n'.format(padding, f, self._computehash(curpath)).encode(errors='ignore'))
|
lf.write('{}+-- {}\t- {}\n'.format(padding, f, self._computehash(curpath)).encode(errors='ignore'))
|
||||||
|
|
||||||
|
def _computehash(self, path):
|
||||||
|
"""Returns a sha256 hash of a file at a given path."""
|
||||||
|
s = hashlib.sha256()
|
||||||
|
with open(path, 'rb') as f:
|
||||||
|
while True:
|
||||||
|
buf = f.read(0x100000)
|
||||||
|
if not buf:
|
||||||
|
break
|
||||||
|
s.update(buf)
|
||||||
|
return s.hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
class KittenGroomerBase(object):
|
||||||
|
"""Base object responsible for copy/sanitization process."""
|
||||||
|
|
||||||
|
def __init__(self, root_src, root_dst, debug=False):
|
||||||
|
"""Initialized with path to source and dest directories."""
|
||||||
|
self.src_root_dir = root_src
|
||||||
|
self.dst_root_dir = root_dst
|
||||||
|
self.debug = debug
|
||||||
|
self.cur_file = None
|
||||||
|
self.logger = GroomerLog(self.dst_root_dir, debug)
|
||||||
|
# Add data/ to PATH
|
||||||
|
# self.resources_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data')
|
||||||
|
# os.environ["PATH"] += os.pathsep + self.resources_path
|
||||||
|
|
||||||
# ##### Helpers #####
|
# ##### Helpers #####
|
||||||
def _safe_rmtree(self, directory):
|
def _safe_rmtree(self, directory):
|
||||||
"""Remove a directory tree if it exists."""
|
"""Remove a directory tree if it exists."""
|
||||||
|
@ -262,6 +268,7 @@ class KittenGroomerBase(object):
|
||||||
print(e)
|
print(e)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
# TODO: this isn't a private method, change and edit the groomers as well
|
||||||
def _list_all_files(self, directory):
|
def _list_all_files(self, directory):
|
||||||
"""Generator yielding path to all of the files in a directory tree."""
|
"""Generator yielding path to all of the files in a directory tree."""
|
||||||
for root, dirs, files in os.walk(directory):
|
for root, dirs, files in os.walk(directory):
|
||||||
|
@ -269,18 +276,9 @@ class KittenGroomerBase(object):
|
||||||
filepath = os.path.join(root, filename)
|
filepath = os.path.join(root, filename)
|
||||||
yield filepath
|
yield filepath
|
||||||
|
|
||||||
def _print_log(self):
|
|
||||||
"""
|
|
||||||
Print log, should be called after each file.
|
|
||||||
|
|
||||||
You probably want to reimplement it in the subclass.
|
|
||||||
"""
|
|
||||||
tmp_log = self.log_name.fields(**self.cur_file.log_details)
|
|
||||||
tmp_log.info('It did a thing.')
|
|
||||||
|
|
||||||
#######################
|
#######################
|
||||||
|
|
||||||
def processdir(self, src_dir=None, dst_dir=None):
|
def processdir(self, src_dir, dst_dir):
|
||||||
"""
|
"""
|
||||||
Implement this function in your subclass to define file processing behavior.
|
Implement this function in your subclass to define file processing behavior.
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -5,7 +5,7 @@ import os
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from kittengroomer import FileBase, KittenGroomerBase
|
from kittengroomer import FileBase, KittenGroomerBase, GroomerLog
|
||||||
from kittengroomer.helpers import ImplementationRequired
|
from kittengroomer.helpers import ImplementationRequired
|
||||||
|
|
||||||
skip = pytest.mark.skip
|
skip = pytest.mark.skip
|
||||||
|
@ -224,6 +224,12 @@ class TestFileBase:
|
||||||
# if there is no metadata to write should this work?
|
# if there is no metadata to write should this work?
|
||||||
|
|
||||||
|
|
||||||
|
class TestLogger:
|
||||||
|
@xfail
|
||||||
|
def test_tree(self, tmpdir):
|
||||||
|
GroomerLog.tree(tmpdir)
|
||||||
|
|
||||||
|
|
||||||
class TestKittenGroomerBase:
|
class TestKittenGroomerBase:
|
||||||
|
|
||||||
@fixture
|
@fixture
|
||||||
|
@ -248,15 +254,6 @@ class TestKittenGroomerBase:
|
||||||
debug=True)
|
debug=True)
|
||||||
# we should maybe protect access to self.current_file in some way?
|
# we should maybe protect access to self.current_file in some way?
|
||||||
|
|
||||||
def test_computehash(self, tmpdir):
|
|
||||||
file = tmpdir.join('test.txt')
|
|
||||||
file.write('testing')
|
|
||||||
simple_groomer = KittenGroomerBase(tmpdir.strpath, tmpdir.strpath)
|
|
||||||
simple_groomer._computehash(file.strpath)
|
|
||||||
|
|
||||||
def test_tree(self, generic_groomer):
|
|
||||||
generic_groomer.tree(generic_groomer.src_root_dir)
|
|
||||||
|
|
||||||
def test_safe_copy(self, tmpdir):
|
def test_safe_copy(self, tmpdir):
|
||||||
file = tmpdir.join('test.txt')
|
file = tmpdir.join('test.txt')
|
||||||
file.write('testing')
|
file.write('testing')
|
||||||
|
@ -278,11 +275,6 @@ class TestKittenGroomerBase:
|
||||||
assert file.strpath in files
|
assert file.strpath in files
|
||||||
assert testdir.strpath not in files
|
assert testdir.strpath not in files
|
||||||
|
|
||||||
def test_print_log(self, generic_groomer):
|
|
||||||
with pytest.raises(AttributeError):
|
|
||||||
generic_groomer._print_log()
|
|
||||||
# Kind of a bad test, but this should be implemented by the user anyway
|
|
||||||
|
|
||||||
def test_processdir(self, generic_groomer):
|
def test_processdir(self, generic_groomer):
|
||||||
with pytest.raises(ImplementationRequired):
|
with pytest.raises(ImplementationRequired):
|
||||||
generic_groomer.processdir()
|
generic_groomer.processdir(None, None)
|
||||||
|
|
Loading…
Reference in New Issue