mirror of https://github.com/CIRCL/PyCIRCLean
fix: Make sure the test suite passes
parent
2eff2b7127
commit
cc081a921d
|
@ -11,7 +11,7 @@ import shutil
|
|||
import time
|
||||
import hashlib
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Tuple, Callable, Optional
|
||||
from typing import Dict, List, Tuple, Callable, Optional, Union
|
||||
|
||||
import oletools.oleid # type: ignore
|
||||
import olefile # type: ignore
|
||||
|
@ -34,7 +34,7 @@ class Config:
|
|||
mimes_rtf: Tuple[str, ...] = ('rtf', 'richtext',)
|
||||
mimes_pdf: Tuple[str, ...] = ('pdf', 'postscript',)
|
||||
mimes_xml: Tuple[str, ...] = ('xml',)
|
||||
mimes_csv: Tuple[str, ...] = ('csv','text/csv')
|
||||
mimes_csv: Tuple[str, ...] = ('csv', 'text/csv')
|
||||
mimes_ms: Tuple[str, ...] = ('dosexec',)
|
||||
mimes_compressed: Tuple[str, ...] = ('zip', 'rar', 'x-rar', 'bzip2', 'lzip', 'lzma', 'lzop',
|
||||
'xz', 'compress', 'gzip', 'tar',)
|
||||
|
@ -49,20 +49,20 @@ class Config:
|
|||
mimes_metadata: Tuple[str, ...] = ('image/jpeg', 'image/tiff', 'image/png',)
|
||||
|
||||
# Mimetype aliases
|
||||
aliases: Dict[str, str] = {
|
||||
aliases: Dict[str, Union[str, List[str]]] = {
|
||||
# Win executables
|
||||
'application/x-msdos-program': 'application/x-dosexec',
|
||||
'application/x-dosexec': 'application/x-msdos-program',
|
||||
# Other apps with confusing mimetypes
|
||||
'application/rtf': 'text/rtf',
|
||||
'application/rar': 'application/x-rar',
|
||||
'application/vnd.rar': 'application/x-rar',
|
||||
'application/ogg': 'audio/ogg',
|
||||
'audio/ogg': 'application/ogg'
|
||||
}
|
||||
|
||||
# Mime Type / Extension fix. TODO: Doesn't quite work....????
|
||||
mimetypes.add_type('text/plain','.csv',False)
|
||||
mimetypes.add_type('text/csv','.csv',False)
|
||||
mimetypes.add_type('text/plain', '.csv', False)
|
||||
mimetypes.add_type('text/csv', '.csv', False)
|
||||
mimetypes.add_type('application/vnd.apple.numbers', '.numbers', True)
|
||||
mimetypes.add_type('application/vnd.apple.pages', '.pages', False)
|
||||
mimetypes.add_type('application/vnd.apple.keynote', '.keynote', False)
|
||||
|
@ -126,12 +126,12 @@ class Config:
|
|||
# In [12]: mimetypes.guess_type('toot.tar.gz', strict=False)
|
||||
# Out[12]: ('application/x-tar', 'gzip')
|
||||
# It works as expected if you do mimetypes.guess_type('application/gzip', strict=False)
|
||||
override_ext: Dict[str, str] = {'.gz': 'application/gzip'
|
||||
, '.csv': 'text/csv' #,'text/plain' )
|
||||
, '.numbers': 'application/vnd.apple.numbers' #,'application/zip')
|
||||
, '.pages': 'application/vnd.apple.pages' #,'application/zip')
|
||||
, '.keynote': 'application/vnd.apple.keynote' #,'application/zip')
|
||||
}
|
||||
override_ext: Dict[str, str] = {'.gz': 'application/gzip',
|
||||
'.csv': 'text/csv', # ,'text/plain' )
|
||||
'.numbers': 'application/vnd.apple.numbers', # ,'application/zip')
|
||||
'.pages': 'application/vnd.apple.pages', # ,'application/zip')
|
||||
'.keynote': 'application/vnd.apple.keynote' # ,'application/zip')
|
||||
}
|
||||
|
||||
|
||||
SEVENZ_PATH = '/usr/bin/7z'
|
||||
|
@ -209,7 +209,10 @@ class File(FileBase):
|
|||
|
||||
expected_mimetypes = [expected_mimetype]
|
||||
if expected_mimetype in Config.aliases:
|
||||
expected_mimetypes.append(Config.aliases[expected_mimetype])
|
||||
if isinstance(Config.aliases[expected_mimetype], list):
|
||||
expected_mimetypes += Config.aliases[expected_mimetype]
|
||||
else:
|
||||
expected_mimetypes.append(Config.aliases[expected_mimetype])
|
||||
if (encoding is None) and (os.path.getsize(self.src_path) == 0):
|
||||
is_empty_file = True
|
||||
else:
|
||||
|
@ -833,12 +836,12 @@ class KittenGroomerFileCheck(KittenGroomerBase):
|
|||
|
||||
Performs a depth-first traversal of the file tree.
|
||||
"""
|
||||
skipped_files = ( '.Trashes', '._.Trashes', '.DS_Store', '.fseventsd', '.Spotlight-V100','System Volume Information')
|
||||
skipped_files = ('.Trashes', '._.Trashes', '.DS_Store', '.fseventsd', '.Spotlight-V100', 'System Volume Information')
|
||||
queue = []
|
||||
for path in sorted(os.listdir(root_dir_path), key=lambda x: str.lower(x)):
|
||||
full_path = root_dir_path / path
|
||||
filename = full_path.name
|
||||
if not filename in skipped_files and not filename.startswith('._'):
|
||||
if filename not in skipped_files and not filename.startswith('._'):
|
||||
# check for symlinks first to prevent getting trapped in infinite symlink recursion
|
||||
if full_path.is_symlink():
|
||||
queue.append(full_path)
|
||||
|
@ -849,7 +852,7 @@ class KittenGroomerFileCheck(KittenGroomerBase):
|
|||
elif full_path.is_file():
|
||||
queue.append(full_path)
|
||||
else:
|
||||
print("SKIPPING: "+filename)
|
||||
print(f"SKIPPING: {filename}")
|
||||
return queue
|
||||
|
||||
def run(self):
|
||||
|
|
|
@ -243,17 +243,8 @@ class FileBase(object):
|
|||
mimetype = 'inode/symlink'
|
||||
self.set_property('symlink_path', os.readlink(file_path))
|
||||
else:
|
||||
try:
|
||||
mt = magic.from_file(file_path, mime=True)
|
||||
# libmagic always returns something, even if it's just 'data'
|
||||
except UnicodeEncodeError as e:
|
||||
self.add_error(e, '')
|
||||
mt = None
|
||||
try:
|
||||
mimetype = mt.decode("utf-8") # type: ignore
|
||||
except Exception:
|
||||
# FIXME: what should the exception be if mimetype isn't utf-8?
|
||||
mimetype = 'application/octet-stream'
|
||||
# libmagic always returns something, even if it's just 'data'
|
||||
mimetype = magic.from_file(file_path, mime=True)
|
||||
return mimetype
|
||||
|
||||
def _split_mimetype(self, mimetype: str) -> Tuple[Union[str, None], Union[str, None]]:
|
||||
|
|
|
@ -197,9 +197,9 @@ lxml = "*"
|
|||
|
||||
[package.source]
|
||||
type = "git"
|
||||
url = "https://github.com/grierforensics/officedissector.git"
|
||||
url = "https://github.com/Rafiot/officedissector.git"
|
||||
reference = "HEAD"
|
||||
resolved_reference = "2059a5ba08fa139362e3936578f99c4da9a9b55d"
|
||||
resolved_reference = "e3d9e8e155cc01180524c9b45b9fbec232206121"
|
||||
|
||||
[[package]]
|
||||
name = "olefile"
|
||||
|
@ -444,7 +444,7 @@ python-versions = "*"
|
|||
[metadata]
|
||||
lock-version = "1.1"
|
||||
python-versions = "^3.8"
|
||||
content-hash = "31838fedc10762d58f75f2822a5a557756f2043f62ed70e3e3bb4ef806958a9b"
|
||||
content-hash = "dd6a8563c8232219ba31a071d662bee519adaf95317f66ae81b2022269609510"
|
||||
|
||||
[metadata.files]
|
||||
attrs = [
|
||||
|
|
|
@ -26,7 +26,7 @@ pillow = "^9.3.0"
|
|||
olefile = "^0.46"
|
||||
oletools = "^0.60.1"
|
||||
python-magic = "^0.4.27"
|
||||
officedissector = {git = "https://github.com/grierforensics/officedissector.git"}
|
||||
officedissector = {git = "https://github.com/Rafiot/officedissector.git"}
|
||||
|
||||
[tool.poetry.dev-dependencies]
|
||||
tox = "^3.27.0"
|
||||
|
|
Loading…
Reference in New Issue