#!/usr/bin/env python3 # -*- coding: utf-8 -*- from ..exceptions import InvalidMISPObject from .abstractgenerator import AbstractMISPObjectGenerator import os from io import BytesIO from hashlib import md5, sha1, sha256, sha512 import math from collections import Counter import logging from pathlib import Path from typing import Union logger = logging.getLogger('pymisp') try: import pydeep # type: ignore HAS_PYDEEP = True except ImportError: HAS_PYDEEP = False try: import magic # type: ignore HAS_MAGIC = True except ImportError: HAS_MAGIC = False class FileObject(AbstractMISPObjectGenerator): def __init__(self, filepath: Union[Path, str] = None, pseudofile: BytesIO = None, filename: str = None, **kwargs): super().__init__('file', **kwargs) if not HAS_PYDEEP: logger.warning("Please install pydeep: pip install git+https://github.com/kbandla/pydeep.git") if not HAS_MAGIC: logger.warning("Please install python-magic: pip install python-magic.") if filename: # Useful in case the file is copied with a pre-defined name by a script but we want to keep the original name self.__filename = filename elif filepath: self.__filename = os.path.basename(filepath) else: raise InvalidMISPObject('A file name is required (either in the path, or as a parameter).') if filepath: with open(filepath, 'rb') as f: self.__pseudofile = BytesIO(f.read()) elif pseudofile and isinstance(pseudofile, BytesIO): # WARNING: lief.parse requires a path self.__pseudofile = pseudofile else: raise InvalidMISPObject('File buffer (BytesIO) or a path is required.') self.__data = self.__pseudofile.getvalue() self.generate_attributes() def generate_attributes(self): self.add_attribute('filename', value=self.__filename) size = self.add_attribute('size-in-bytes', value=len(self.__data)) if int(size.value) > 0: self.add_attribute('entropy', value=self.__entropy_H(self.__data)) self.add_attribute('md5', value=md5(self.__data).hexdigest()) self.add_attribute('sha1', value=sha1(self.__data).hexdigest()) self.add_attribute('sha256', value=sha256(self.__data).hexdigest()) self.add_attribute('sha512', value=sha512(self.__data).hexdigest()) self.add_attribute('malware-sample', value=self.__filename, data=self.__pseudofile, disable_correlation=True) if HAS_MAGIC: self.add_attribute('mimetype', value=magic.from_buffer(self.__data, mime=True)) if HAS_PYDEEP: self.add_attribute('ssdeep', value=pydeep.hash_buf(self.__data).decode()) def __entropy_H(self, data: bytes) -> float: """Calculate the entropy of a chunk of data.""" # NOTE: copy of the entropy function from pefile if len(data) == 0: return 0.0 occurrences = Counter(bytearray(data)) entropy = 0.0 for x in occurrences.values(): p_x = float(x) / len(data) entropy -= p_x * math.log(p_x, 2) return entropy