PyMISP/pymisp/tools/fileobject.py

93 lines
3.2 KiB
Python

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from pymisp.tools import MISPObjectGenerator
import os
from io import BytesIO
from hashlib import md5, sha1, sha256, sha512
import math
from collections import Counter
try:
import pydeep
HAS_PYDEEP = True
except ImportError:
HAS_PYDEEP = False
try:
import magic
HAS_MAGIC = True
except ImportError:
HAS_MAGIC = False
class FileObject(MISPObjectGenerator):
def __init__(self, filepath=None, pseudofile=None, filename=None):
if not HAS_PYDEEP:
raise ImportError("Please install pydeep: pip install git+https://github.com/kbandla/pydeep.git")
if not HAS_MAGIC:
raise ImportError("Please install python-magic: pip install python-magic.")
if filepath:
self.filepath = filepath
self.filename = os.path.basename(self.filepath)
with open(filepath, 'rb') as f:
self.pseudofile = BytesIO(f.read())
elif pseudofile and isinstance(pseudofile, BytesIO):
# WARNING: lief.parse requires a path
self.filepath = None
self.pseudofile = pseudofile
self.filename = filename
else:
raise Exception('File buffer (BytesIO) or a path is required.')
MISPObjectGenerator.__init__(self, 'file')
self.data = self.pseudofile.getvalue()
self.generate_attributes()
def generate_attributes(self):
self.size = len(self.data)
if self.size > 0:
self.entropy = self.__entropy_H(self.data)
self.md5 = md5(self.data).hexdigest()
self.sha1 = sha1(self.data).hexdigest()
self.sha256 = sha256(self.data).hexdigest()
self.sha512 = sha512(self.data).hexdigest()
self.filetype = magic.from_buffer(self.data)
self.ssdeep = pydeep.hash_buf(self.data).decode()
def __entropy_H(self, data):
"""Calculate the entropy of a chunk of data."""
# NOTE: copy of the entropy function from pefile
if len(data) == 0:
return 0.0
occurences = Counter(bytearray(data))
entropy = 0
for x in occurences.values():
p_x = float(x) / len(data)
entropy -= p_x * math.log(p_x, 2)
return entropy
def dump(self):
file_object = {}
file_object['filename'] = {'value': self.filename}
file_object['size-in-bytes'] = {'value': self.size}
if self.size > 0:
file_object['entropy'] = {'value': self.entropy}
file_object['ssdeep'] = {'value': self.ssdeep}
file_object['sha512'] = {'value': self.sha512}
file_object['md5'] = {'value': self.md5}
file_object['sha1'] = {'value': self.sha1}
file_object['sha256'] = {'value': self.sha256}
file_object['malware-sample'] = {'value': '{}|{}'.format(self.filename, self.md5), 'data': self.pseudofile}
# file_object['authentihash'] = self.
# file_object['sha-224'] = self.
# file_object['sha-384'] = self.
# file_object['sha512/224'] = self.
# file_object['sha512/256'] = self.
# file_object['tlsh'] = self.
return self._fill_object(file_object)