mirror of https://github.com/CIRCL/PyCIRCLean
fix: Make sure the test suite passes
parent
2eff2b7127
commit
cc081a921d
|
@ -11,7 +11,7 @@ import shutil
|
|||
import time
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Tuple, Callable, Optional
|
||||
from typing import Dict, List, Tuple, Callable, Optional, Union
|
||||
|
||||
import oletools.oleid # type: ignore
|
||||
import olefile # type: ignore
|
||||
|
@ -49,13 +49,13 @@ class Config:
|
|||
mimes_metadata: Tuple[str, ...] = ('image/jpeg', 'image/tiff', 'image/png',)
|
||||
|
||||
# Mimetype aliases
|
||||
aliases: Dict[str, str] = {
|
||||
aliases: Dict[str, Union[str, List[str]]] = {
|
||||
# Win executables
|
||||
'application/x-msdos-program': 'application/x-dosexec',
|
||||
'application/x-dosexec': 'application/x-msdos-program',
|
||||
# Other apps with confusing mimetypes
|
||||
'application/rtf': 'text/rtf',
|
||||
'application/rar': 'application/x-rar',
|
||||
'application/vnd.rar': 'application/x-rar',
|
||||
'application/ogg': 'audio/ogg',
|
||||
'audio/ogg': 'application/ogg'
|
||||
}
|
||||
|
@ -126,11 +126,11 @@ class Config:
|
|||
# In [12]: mimetypes.guess_type('toot.tar.gz', strict=False)
|
||||
# Out[12]: ('application/x-tar', 'gzip')
|
||||
# It works as expected if you do mimetypes.guess_type('application/gzip', strict=False)
|
||||
override_ext: Dict[str, str] = {'.gz': 'application/gzip'
|
||||
, '.csv': 'text/csv' #,'text/plain' )
|
||||
, '.numbers': 'application/vnd.apple.numbers' #,'application/zip')
|
||||
, '.pages': 'application/vnd.apple.pages' #,'application/zip')
|
||||
, '.keynote': 'application/vnd.apple.keynote' #,'application/zip')
|
||||
override_ext: Dict[str, str] = {'.gz': 'application/gzip',
|
||||
'.csv': 'text/csv', # ,'text/plain' )
|
||||
'.numbers': 'application/vnd.apple.numbers', # ,'application/zip')
|
||||
'.pages': 'application/vnd.apple.pages', # ,'application/zip')
|
||||
'.keynote': 'application/vnd.apple.keynote' # ,'application/zip')
|
||||
}
|
||||
|
||||
|
||||
|
@ -209,6 +209,9 @@ class File(FileBase):
|
|||
|
||||
expected_mimetypes = [expected_mimetype]
|
||||
if expected_mimetype in Config.aliases:
|
||||
if isinstance(Config.aliases[expected_mimetype], list):
|
||||
expected_mimetypes += Config.aliases[expected_mimetype]
|
||||
else:
|
||||
expected_mimetypes.append(Config.aliases[expected_mimetype])
|
||||
if (encoding is None) and (os.path.getsize(self.src_path) == 0):
|
||||
is_empty_file = True
|
||||
|
@ -838,7 +841,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
|
|||
for path in sorted(os.listdir(root_dir_path), key=lambda x: str.lower(x)):
|
||||
full_path = root_dir_path / path
|
||||
filename = full_path.name
|
||||
if not filename in skipped_files and not filename.startswith('._'):
|
||||
if filename not in skipped_files and not filename.startswith('._'):
|
||||
# check for symlinks first to prevent getting trapped in infinite symlink recursion
|
||||
if full_path.is_symlink():
|
||||
queue.append(full_path)
|
||||
|
@ -849,7 +852,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
|
|||
elif full_path.is_file():
|
||||
queue.append(full_path)
|
||||
else:
|
||||
print("SKIPPING: "+filename)
|
||||
print(f"SKIPPING: {filename}")
|
||||
return queue
|
||||
|
||||
def run(self):
|
||||
|
|
|
@ -243,17 +243,8 @@ class FileBase(object):
|
|||
mimetype = 'inode/symlink'
|
||||
self.set_property('symlink_path', os.readlink(file_path))
|
||||
else:
|
||||
try:
|
||||
mt = magic.from_file(file_path, mime=True)
|
||||
# libmagic always returns something, even if it's just 'data'
|
||||
except UnicodeEncodeError as e:
|
||||
self.add_error(e, '')
|
||||
mt = None
|
||||
try:
|
||||
mimetype = mt.decode("utf-8") # type: ignore
|
||||
except Exception:
|
||||
# FIXME: what should the exception be if mimetype isn't utf-8?
|
||||
mimetype = 'application/octet-stream'
|
||||
mimetype = magic.from_file(file_path, mime=True)
|
||||
return mimetype
|
||||
|
||||
def _split_mimetype(self, mimetype: str) -> Tuple[Union[str, None], Union[str, None]]:
|
||||
|
|
|
@ -197,9 +197,9 @@ lxml = "*"
|
|||
|
||||
[package.source]
|
||||
type = "git"
|
||||
url = "https://github.com/grierforensics/officedissector.git"
|
||||
url = "https://github.com/Rafiot/officedissector.git"
|
||||
reference = "HEAD"
|
||||
resolved_reference = "2059a5ba08fa139362e3936578f99c4da9a9b55d"
|
||||
resolved_reference = "e3d9e8e155cc01180524c9b45b9fbec232206121"
|
||||
|
||||
[[package]]
|
||||
name = "olefile"
|
||||
|
@ -444,7 +444,7 @@ python-versions = "*"
|
|||
[metadata]
|
||||
lock-version = "1.1"
|
||||
python-versions = "^3.8"
|
||||
content-hash = "31838fedc10762d58f75f2822a5a557756f2043f62ed70e3e3bb4ef806958a9b"
|
||||
content-hash = "dd6a8563c8232219ba31a071d662bee519adaf95317f66ae81b2022269609510"
|
||||
|
||||
[metadata.files]
|
||||
attrs = [
|
||||
|
|
|
@ -26,7 +26,7 @@ pillow = "^9.3.0"
|
|||
olefile = "^0.46"
|
||||
oletools = "^0.60.1"
|
||||
python-magic = "^0.4.27"
|
||||
officedissector = {git = "https://github.com/grierforensics/officedissector.git"}
|
||||
officedissector = {git = "https://github.com/Rafiot/officedissector.git"}
|
||||
|
||||
[tool.poetry.dev-dependencies]
|
||||
tox = "^3.27.0"
|
||||
|
|
Loading…
Reference in New Issue