Add comments/notes to helpers.py

pull/14/head
Dan Puttick 2017-04-10 13:18:27 +02:00
parent 67c90087ba
commit c43ac0697a
1 changed files with 7 additions and 8 deletions

View File

@ -3,8 +3,8 @@
""" """
Contains the base objects for use when creating a sanitizer using Contains the base objects for use when creating a sanitizer using
PyCIRCLean. Subclass FileBase and KittenGroomerBase to implement your PyCIRCLean. Subclass or import from FileBase/KittenGroomerBase and implement
desired behavior. your desired behavior.
""" """
@ -87,9 +87,9 @@ class FileBase(object):
else: else:
try: try:
mt = magic.from_file(self.src_path, mime=True) mt = magic.from_file(self.src_path, mime=True)
# Note: magic will always return something, even if it's just 'data' # Note: libmagic will always return something, even if it's just 'data'
except UnicodeEncodeError as e: except UnicodeEncodeError as e:
# FIXME: The encoding of the file is broken (possibly UTF-16) # FIXME: The encoding of the file that triggers this is broken (possibly it's UTF-16 and Python expects utf8)
# Note: one of the Travis files will trigger this exception # Note: one of the Travis files will trigger this exception
self.add_error(e, '') self.add_error(e, '')
mt = None mt = None
@ -118,8 +118,6 @@ class FileBase(object):
@property @property
def has_mimetype(self): def has_mimetype(self):
"""True if file has a main and sub mimetype, else False.""" """True if file has a main and sub mimetype, else False."""
# TODO: broken mimetype checks should be done somewhere else.
# Should the check be by default or should we let the API consumer write it?
if not self.main_type or not self.sub_type: if not self.main_type or not self.sub_type:
return False return False
else: else:
@ -326,19 +324,20 @@ class KittenGroomerBase(object):
def list_all_files(self, directory_path): def list_all_files(self, directory_path):
"""Generator yielding path to all of the files in a directory tree.""" """Generator yielding path to all of the files in a directory tree."""
for root, dirs, files in os.walk(directory_path): for root, dirs, files in os.walk(directory_path):
# files is a list anyway so we don't get much from using a generator here
for filename in files: for filename in files:
filepath = os.path.join(root, filename) filepath = os.path.join(root, filename)
yield filepath yield filepath
####################### #######################
# TODO: feels like this function doesn't need to exist if we move main() # TODO: if we move main() we can get rid of this as well
def processdir(self, src_dir, dst_dir): def processdir(self, src_dir, dst_dir):
"""Implement this function to define file processing behavior.""" """Implement this function to define file processing behavior."""
raise ImplementationRequired('Please implement processdir.') raise ImplementationRequired('Please implement processdir.')
# TODO: Maybe this shouldn't exist? It should probably get moved to filecheck since this isn't really API code # TODO: Should this get moved to filecheck? It isn't really API code and somebody can implement it themselves
def main(kg_implementation, description='Call a KittenGroomer implementation to process files present in the source directory and copy them to the destination directory.'): def main(kg_implementation, description='Call a KittenGroomer implementation to process files present in the source directory and copy them to the destination directory.'):
parser = argparse.ArgumentParser(prog='KittenGroomer', description=description) parser = argparse.ArgumentParser(prog='KittenGroomer', description=description)
parser.add_argument('-s', '--source', type=str, help='Source directory') parser.add_argument('-s', '--source', type=str, help='Source directory')