mirror of https://github.com/MISP/PyMISP
93 lines
3.2 KiB
Python
93 lines
3.2 KiB
Python
#!/usr/bin/env python3
|
|
# -*- coding: utf-8 -*-
|
|
|
|
from pymisp.tools import MISPObjectGenerator
|
|
import os
|
|
from io import BytesIO
|
|
from hashlib import md5, sha1, sha256, sha512
|
|
import math
|
|
from collections import Counter
|
|
|
|
try:
|
|
import pydeep
|
|
HAS_PYDEEP = True
|
|
except ImportError:
|
|
HAS_PYDEEP = False
|
|
|
|
try:
|
|
import magic
|
|
HAS_MAGIC = True
|
|
except ImportError:
|
|
HAS_MAGIC = False
|
|
|
|
|
|
class FileObject(MISPObjectGenerator):
|
|
|
|
def __init__(self, filepath=None, pseudofile=None, filename=None):
|
|
if not HAS_PYDEEP:
|
|
raise ImportError("Please install pydeep: pip install git+https://github.com/kbandla/pydeep.git")
|
|
if not HAS_MAGIC:
|
|
raise ImportError("Please install python-magic: pip install python-magic.")
|
|
if filepath:
|
|
self.filepath = filepath
|
|
self.filename = os.path.basename(self.filepath)
|
|
with open(filepath, 'rb') as f:
|
|
self.pseudofile = BytesIO(f.read())
|
|
elif pseudofile and isinstance(pseudofile, BytesIO):
|
|
# WARNING: lief.parse requires a path
|
|
self.filepath = None
|
|
self.pseudofile = pseudofile
|
|
self.filename = filename
|
|
else:
|
|
raise Exception('File buffer (BytesIO) or a path is required.')
|
|
MISPObjectGenerator.__init__(self, 'file')
|
|
self.data = self.pseudofile.getvalue()
|
|
self.generate_attributes()
|
|
|
|
def generate_attributes(self):
|
|
self.size = len(self.data)
|
|
if self.size > 0:
|
|
self.entropy = self.__entropy_H(self.data)
|
|
self.md5 = md5(self.data).hexdigest()
|
|
self.sha1 = sha1(self.data).hexdigest()
|
|
self.sha256 = sha256(self.data).hexdigest()
|
|
self.sha512 = sha512(self.data).hexdigest()
|
|
self.filetype = magic.from_buffer(self.data)
|
|
self.ssdeep = pydeep.hash_buf(self.data).decode()
|
|
|
|
def __entropy_H(self, data):
|
|
"""Calculate the entropy of a chunk of data."""
|
|
# NOTE: copy of the entropy function from pefile
|
|
|
|
if len(data) == 0:
|
|
return 0.0
|
|
|
|
occurences = Counter(bytearray(data))
|
|
|
|
entropy = 0
|
|
for x in occurences.values():
|
|
p_x = float(x) / len(data)
|
|
entropy -= p_x * math.log(p_x, 2)
|
|
|
|
return entropy
|
|
|
|
def dump(self):
|
|
file_object = {}
|
|
file_object['filename'] = {'value': self.filename}
|
|
file_object['size-in-bytes'] = {'value': self.size}
|
|
if self.size > 0:
|
|
file_object['entropy'] = {'value': self.entropy}
|
|
file_object['ssdeep'] = {'value': self.ssdeep}
|
|
file_object['sha512'] = {'value': self.sha512}
|
|
file_object['md5'] = {'value': self.md5}
|
|
file_object['sha1'] = {'value': self.sha1}
|
|
file_object['sha256'] = {'value': self.sha256}
|
|
file_object['malware-sample'] = {'value': '{}|{}'.format(self.filename, self.md5), 'data': self.pseudofile}
|
|
# file_object['authentihash'] = self.
|
|
# file_object['sha-224'] = self.
|
|
# file_object['sha-384'] = self.
|
|
# file_object['sha512/224'] = self.
|
|
# file_object['sha512/256'] = self.
|
|
# file_object['tlsh'] = self.
|
|
return self._fill_object(file_object)
|