PyMISP/pymisp/tools/fileobject.py

88 lines
3.3 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from ..exceptions import InvalidMISPObject
from .abstractgenerator import AbstractMISPObjectGenerator
import os
from io import BytesIO
from hashlib import md5, sha1, sha256, sha512
import math
from collections import Counter
import logging
from pathlib import Path
from typing import Union, Optional
logger = logging.getLogger('pymisp')
try:
import pydeep # type: ignore
HAS_PYDEEP = True
except ImportError:
HAS_PYDEEP = False
try:
import magic # type: ignore
HAS_MAGIC = True
except ImportError:
HAS_MAGIC = False
class FileObject(AbstractMISPObjectGenerator):
def __init__(self, filepath: Optional[Union[Path, str]] = None, pseudofile: Optional[BytesIO] = None, filename: Optional[str] = None, **kwargs) -> None:
super().__init__('file', **kwargs)
if not HAS_PYDEEP:
logger.warning("pydeep is missing, please install pymisp this way: pip install pymisp[fileobjects]")
if not HAS_MAGIC:
logger.warning("python-magic is missing, please install pymisp this way: pip install pymisp[fileobjects]")
if filename:
# Useful in case the file is copied with a pre-defined name by a script but we want to keep the original name
self.__filename = filename
elif filepath:
self.__filename = os.path.basename(filepath)
else:
raise InvalidMISPObject('A file name is required (either in the path, or as a parameter).')
if filepath:
with open(filepath, 'rb') as f:
self.__pseudofile = BytesIO(f.read())
elif pseudofile and isinstance(pseudofile, BytesIO):
# WARNING: lief.parse requires a path
self.__pseudofile = pseudofile
else:
raise InvalidMISPObject('File buffer (BytesIO) or a path is required.')
self.__data = self.__pseudofile.getvalue()
self.generate_attributes()
def generate_attributes(self):
self.add_attribute('filename', value=self.__filename)
size = self.add_attribute('size-in-bytes', value=len(self.__data))
if int(size.value) > 0:
self.add_attribute('entropy', value=self.__entropy_H(self.__data))
self.add_attribute('md5', value=md5(self.__data).hexdigest())
self.add_attribute('sha1', value=sha1(self.__data).hexdigest())
self.add_attribute('sha256', value=sha256(self.__data).hexdigest())
self.add_attribute('sha512', value=sha512(self.__data).hexdigest())
self.add_attribute('malware-sample', value=self.__filename, data=self.__pseudofile, disable_correlation=True)
if HAS_MAGIC:
self.add_attribute('mimetype', value=magic.from_buffer(self.__data, mime=True))
if HAS_PYDEEP:
self.add_attribute('ssdeep', value=pydeep.hash_buf(self.__data).decode())
def __entropy_H(self, data: bytes) -> float:
"""Calculate the entropy of a chunk of data."""
# NOTE: copy of the entropy function from pefile
if len(data) == 0:
return 0.0
occurrences = Counter(bytearray(data))
entropy = 0.0
for x in occurrences.values():
p_x = float(x) / len(data)
entropy -= p_x * math.log(p_x, 2)
return entropy