fix: Make sure the test suite passes

pull/38/head
Raphaël Vinot 2022-11-11 14:59:41 +01:00
parent 2eff2b7127
commit cc081a921d
4 changed files with 25 additions and 31 deletions

View File

@ -11,7 +11,7 @@ import shutil
import time import time
import hashlib import hashlib
from pathlib import Path from pathlib import Path
from typing import Dict, List, Tuple, Callable, Optional from typing import Dict, List, Tuple, Callable, Optional, Union
import oletools.oleid # type: ignore import oletools.oleid # type: ignore
import olefile # type: ignore import olefile # type: ignore
@ -34,7 +34,7 @@ class Config:
mimes_rtf: Tuple[str, ...] = ('rtf', 'richtext',) mimes_rtf: Tuple[str, ...] = ('rtf', 'richtext',)
mimes_pdf: Tuple[str, ...] = ('pdf', 'postscript',) mimes_pdf: Tuple[str, ...] = ('pdf', 'postscript',)
mimes_xml: Tuple[str, ...] = ('xml',) mimes_xml: Tuple[str, ...] = ('xml',)
mimes_csv: Tuple[str, ...] = ('csv','text/csv') mimes_csv: Tuple[str, ...] = ('csv', 'text/csv')
mimes_ms: Tuple[str, ...] = ('dosexec',) mimes_ms: Tuple[str, ...] = ('dosexec',)
mimes_compressed: Tuple[str, ...] = ('zip', 'rar', 'x-rar', 'bzip2', 'lzip', 'lzma', 'lzop', mimes_compressed: Tuple[str, ...] = ('zip', 'rar', 'x-rar', 'bzip2', 'lzip', 'lzma', 'lzop',
'xz', 'compress', 'gzip', 'tar',) 'xz', 'compress', 'gzip', 'tar',)
@ -49,20 +49,20 @@ class Config:
mimes_metadata: Tuple[str, ...] = ('image/jpeg', 'image/tiff', 'image/png',) mimes_metadata: Tuple[str, ...] = ('image/jpeg', 'image/tiff', 'image/png',)
# Mimetype aliases # Mimetype aliases
aliases: Dict[str, str] = { aliases: Dict[str, Union[str, List[str]]] = {
# Win executables # Win executables
'application/x-msdos-program': 'application/x-dosexec', 'application/x-msdos-program': 'application/x-dosexec',
'application/x-dosexec': 'application/x-msdos-program', 'application/x-dosexec': 'application/x-msdos-program',
# Other apps with confusing mimetypes # Other apps with confusing mimetypes
'application/rtf': 'text/rtf', 'application/rtf': 'text/rtf',
'application/rar': 'application/x-rar', 'application/vnd.rar': 'application/x-rar',
'application/ogg': 'audio/ogg', 'application/ogg': 'audio/ogg',
'audio/ogg': 'application/ogg' 'audio/ogg': 'application/ogg'
} }
# Mime Type / Extension fix. TODO: Doesn't quite work....???? # Mime Type / Extension fix. TODO: Doesn't quite work....????
mimetypes.add_type('text/plain','.csv',False) mimetypes.add_type('text/plain', '.csv', False)
mimetypes.add_type('text/csv','.csv',False) mimetypes.add_type('text/csv', '.csv', False)
mimetypes.add_type('application/vnd.apple.numbers', '.numbers', True) mimetypes.add_type('application/vnd.apple.numbers', '.numbers', True)
mimetypes.add_type('application/vnd.apple.pages', '.pages', False) mimetypes.add_type('application/vnd.apple.pages', '.pages', False)
mimetypes.add_type('application/vnd.apple.keynote', '.keynote', False) mimetypes.add_type('application/vnd.apple.keynote', '.keynote', False)
@ -126,11 +126,11 @@ class Config:
# In [12]: mimetypes.guess_type('toot.tar.gz', strict=False) # In [12]: mimetypes.guess_type('toot.tar.gz', strict=False)
# Out[12]: ('application/x-tar', 'gzip') # Out[12]: ('application/x-tar', 'gzip')
# It works as expected if you do mimetypes.guess_type('application/gzip', strict=False) # It works as expected if you do mimetypes.guess_type('application/gzip', strict=False)
override_ext: Dict[str, str] = {'.gz': 'application/gzip' override_ext: Dict[str, str] = {'.gz': 'application/gzip',
, '.csv': 'text/csv' #,'text/plain' ) '.csv': 'text/csv', # ,'text/plain' )
, '.numbers': 'application/vnd.apple.numbers' #,'application/zip') '.numbers': 'application/vnd.apple.numbers', # ,'application/zip')
, '.pages': 'application/vnd.apple.pages' #,'application/zip') '.pages': 'application/vnd.apple.pages', # ,'application/zip')
, '.keynote': 'application/vnd.apple.keynote' #,'application/zip') '.keynote': 'application/vnd.apple.keynote' # ,'application/zip')
} }
@ -209,6 +209,9 @@ class File(FileBase):
expected_mimetypes = [expected_mimetype] expected_mimetypes = [expected_mimetype]
if expected_mimetype in Config.aliases: if expected_mimetype in Config.aliases:
if isinstance(Config.aliases[expected_mimetype], list):
expected_mimetypes += Config.aliases[expected_mimetype]
else:
expected_mimetypes.append(Config.aliases[expected_mimetype]) expected_mimetypes.append(Config.aliases[expected_mimetype])
if (encoding is None) and (os.path.getsize(self.src_path) == 0): if (encoding is None) and (os.path.getsize(self.src_path) == 0):
is_empty_file = True is_empty_file = True
@ -833,12 +836,12 @@ class KittenGroomerFileCheck(KittenGroomerBase):
Performs a depth-first traversal of the file tree. Performs a depth-first traversal of the file tree.
""" """
skipped_files = ( '.Trashes', '._.Trashes', '.DS_Store', '.fseventsd', '.Spotlight-V100','System Volume Information') skipped_files = ('.Trashes', '._.Trashes', '.DS_Store', '.fseventsd', '.Spotlight-V100', 'System Volume Information')
queue = [] queue = []
for path in sorted(os.listdir(root_dir_path), key=lambda x: str.lower(x)): for path in sorted(os.listdir(root_dir_path), key=lambda x: str.lower(x)):
full_path = root_dir_path / path full_path = root_dir_path / path
filename = full_path.name filename = full_path.name
if not filename in skipped_files and not filename.startswith('._'): if filename not in skipped_files and not filename.startswith('._'):
# check for symlinks first to prevent getting trapped in infinite symlink recursion # check for symlinks first to prevent getting trapped in infinite symlink recursion
if full_path.is_symlink(): if full_path.is_symlink():
queue.append(full_path) queue.append(full_path)
@ -849,7 +852,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
elif full_path.is_file(): elif full_path.is_file():
queue.append(full_path) queue.append(full_path)
else: else:
print("SKIPPING: "+filename) print(f"SKIPPING: {filename}")
return queue return queue
def run(self): def run(self):

View File

@ -243,17 +243,8 @@ class FileBase(object):
mimetype = 'inode/symlink' mimetype = 'inode/symlink'
self.set_property('symlink_path', os.readlink(file_path)) self.set_property('symlink_path', os.readlink(file_path))
else: else:
try:
mt = magic.from_file(file_path, mime=True)
# libmagic always returns something, even if it's just 'data' # libmagic always returns something, even if it's just 'data'
except UnicodeEncodeError as e: mimetype = magic.from_file(file_path, mime=True)
self.add_error(e, '')
mt = None
try:
mimetype = mt.decode("utf-8") # type: ignore
except Exception:
# FIXME: what should the exception be if mimetype isn't utf-8?
mimetype = 'application/octet-stream'
return mimetype return mimetype
def _split_mimetype(self, mimetype: str) -> Tuple[Union[str, None], Union[str, None]]: def _split_mimetype(self, mimetype: str) -> Tuple[Union[str, None], Union[str, None]]:

6
poetry.lock generated
View File

@ -197,9 +197,9 @@ lxml = "*"
[package.source] [package.source]
type = "git" type = "git"
url = "https://github.com/grierforensics/officedissector.git" url = "https://github.com/Rafiot/officedissector.git"
reference = "HEAD" reference = "HEAD"
resolved_reference = "2059a5ba08fa139362e3936578f99c4da9a9b55d" resolved_reference = "e3d9e8e155cc01180524c9b45b9fbec232206121"
[[package]] [[package]]
name = "olefile" name = "olefile"
@ -444,7 +444,7 @@ python-versions = "*"
[metadata] [metadata]
lock-version = "1.1" lock-version = "1.1"
python-versions = "^3.8" python-versions = "^3.8"
content-hash = "31838fedc10762d58f75f2822a5a557756f2043f62ed70e3e3bb4ef806958a9b" content-hash = "dd6a8563c8232219ba31a071d662bee519adaf95317f66ae81b2022269609510"
[metadata.files] [metadata.files]
attrs = [ attrs = [

View File

@ -26,7 +26,7 @@ pillow = "^9.3.0"
olefile = "^0.46" olefile = "^0.46"
oletools = "^0.60.1" oletools = "^0.60.1"
python-magic = "^0.4.27" python-magic = "^0.4.27"
officedissector = {git = "https://github.com/grierforensics/officedissector.git"} officedissector = {git = "https://github.com/Rafiot/officedissector.git"}
[tool.poetry.dev-dependencies] [tool.poetry.dev-dependencies]
tox = "^3.27.0" tox = "^3.27.0"