From 6098cd869f07552c0d9aca0144b8d70ccdfde848 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Fri, 22 Nov 2019 17:36:24 +0100 Subject: [PATCH] chg: Make the feed generator more generic --- examples/feed-generator/generate.py | 9 +- pymisp/abstract.py | 2 +- pymisp/mispevent.py | 152 +++++++++++++++++----------- pymisp/tools/__init__.py | 1 + pymisp/tools/feed_meta_generator.py | 26 +++++ 5 files changed, 127 insertions(+), 63 deletions(-) create mode 100644 pymisp/tools/feed_meta_generator.py diff --git a/examples/feed-generator/generate.py b/examples/feed-generator/generate.py index 991b2da..174428c 100755 --- a/examples/feed-generator/generate.py +++ b/examples/feed-generator/generate.py @@ -14,9 +14,9 @@ def init(): # If we have an old settings.py file then this variable won't exist global valid_attribute_distributions try: - valid_attribute_distributions = valid_attribute_distribution_levels + valid_attribute_distributions = [int(v) for v in valid_attribute_distribution_levels] except Exception: - valid_attribute_distributions = ['0', '1', '2', '3', '4', '5'] + valid_attribute_distributions = [0, 1, 2, 3, 4, 5] return ExpandedPyMISP(url, key, ssl) @@ -64,7 +64,10 @@ if __name__ == '__main__': total = len(events) for event in events: e = misp.get_event(event.uuid, pythonify=True) - e_feed = e.to_feed() + e_feed = e.to_feed(valid_distributions=valid_attribute_distributions, with_meta=True) + if not e_feed: + print(f'Invalid distribution {e.distribution}, skipping') + continue hashes += [[h, e.uuid] for h in e_feed.pop('_hashes')] manifest.update(e_feed.pop('_manifest')) saveEvent(e_feed) diff --git a/pymisp/abstract.py b/pymisp/abstract.py index 29e027c..7ea7ced 100644 --- a/pymisp/abstract.py +++ b/pymisp/abstract.py @@ -287,7 +287,7 @@ class AbstractMISP(MutableMapping, MISPFileCache): raise Exception('Unable to export in the feed format, _fields_for_feed is missing.') to_return = {} for field in self._fields_for_feed: - if getattr(self, field, None): + if getattr(self, field, None) is not None: if field in ['timestamp', 'publish_timestamp']: to_return[field] = self._datetime_to_timestamp(getattr(self, field)) elif field == 'date': diff --git a/pymisp/mispevent.py b/pymisp/mispevent.py index e5332da..dda64fb 100644 --- a/pymisp/mispevent.py +++ b/pymisp/mispevent.py @@ -123,6 +123,25 @@ class MISPAttribute(AbstractMISP): self.ShadowAttribute = [] self.Sighting = [] + def hash_values(self, algorithm='sha512'): + """Compute the hash of every values for fast lookups""" + if algorithm not in hashlib.algorithms_available: + raise PyMISPError('The algorithm {} is not available for hashing.'.format(algorithm)) + if '|' in self.type or self.type == 'malware-sample': + hashes = [] + for v in self.value.split('|'): + h = hashlib.new(algorithm) + h.update(v.encode("utf-8")) + hashes.append(h.hexdigest()) + return hashes + else: + h = hashlib.new(algorithm) + to_encode = self.value + if not isinstance(to_encode, str): + to_encode = str(to_encode) + h.update(to_encode.encode("utf-8")) + return [h.hexdigest()] + def _to_feed(self, valid_distributions): if (hasattr(self, 'distribution') and self.distribution is not None and self.distribution not in valid_distributions): @@ -132,16 +151,6 @@ class MISPAttribute(AbstractMISP): to_return['data'] = base64.b64encode(self.data.getvalue()).decode() if self.tags: to_return['Tag'] = list(filter(None, [tag._to_feed() for tag in self.tags])) - # Compute the hash of every values for fast lookups - hashes = [] - if '|' in self.type or self.type == 'malware-sample': - hashes = [hashlib.md5(v.encode("utf-8")).hexdigest() for v in self.value.split('|')] - else: - to_encode = self.value - if not isinstance(to_encode, str): - to_encode = str(to_encode) - hashes = [hashlib.md5(to_encode.encode("utf-8")).hexdigest()] - to_return['_hashes'] = hashes return to_return @property @@ -466,56 +475,34 @@ class MISPEvent(AbstractMISP): self.RelatedEvent = [] self.ShadowAttribute = [] - def to_feed(self, date=None, uuid=None, analysis=2, threat_level_id=4, valid_distributions=[0, 1, 2, 3, 4, 5]): - """ Generate a json output for MISP Feed. - Notes: - * valid_distributions only makes sense if the distribution key is set (i.e. the event is exported from a MISP instance) - * analysis: 0 means initial, 1 ongoing, 2 completed - * threat_level_id 4 means undefine. Tags are recommended. - """ - if hasattr(self, 'distribution') and self.distribution not in valid_distributions: - raise PyMISPError('Invalid event distribution ({}). Not in {}'.format(self.distribution, ', '.join(valid_distributions))) - - if date: - self.set_date(date) - elif not hasattr(self, 'date'): + def _set_default(self): + """There are a few keys that could be set by default""" + if not hasattr(self, 'uuid'): + self.uuid = str(uuid.uuid4()) + if not hasattr(self, 'date'): self.set_date(datetime.date.today()) - if not hasattr(self, 'timestamp'): self.timestamp = datetime.datetime.timestamp(datetime.datetime.now()) - - if uuid: - self.uuid = uuid - elif not hasattr(self, 'uuid'): - self.uuid = str(uuid.uuid4()) - if not hasattr(self, 'analysis'): - self.analysis = analysis + # analysis: 0 means initial, 1 ongoing, 2 completed + self.analysis = 2 if not hasattr(self, 'threat_level_id'): - self.threat_level_id = threat_level_id + # threat_level_id 4 means undefined. Tags are recommended. + self.threat_level_id = 4 - to_return = super(MISPEvent, self)._to_feed() - to_return['Orgc'] = self.Orgc._to_feed() - to_return['Tag'] = list(filter(None, [tag._to_feed() for tag in self.tags])) + @property + def manifest(self): + required = ['info', 'Orgc'] + for r in required: + if not hasattr(self, r): + raise PyMISPError('The field {} is required to generate the event manifest.') - to_return['_hashes'] = [] + self._set_default() - if self.attributes: - to_return['Attribute'] = list(filter(None, [attribute._to_feed(valid_distributions) for attribute in self.attributes])) - # Get the hash of every values for fast lookups - for attribute in to_return['Attribute']: - to_return['_hashes'] += attribute.pop('_hashes') - - if self.objects: - to_return['Object'] = list(filter(None, [obj._to_feed(valid_distributions) for obj in self.objects])) - # Get the hash of every values for fast lookups - for obj in to_return['Object']: - to_return['_hashes'] += obj.pop('_hashes') - - to_return['_manifest'] = { + return { self.uuid: { - 'Orgc': to_return['Orgc'], - 'Tag': to_return['Tag'], + 'Orgc': self.Orgc._to_feed(), + 'Tag': list(filter(None, [tag._to_feed() for tag in self.tags])), 'info': self.info, 'date': self.date.isoformat(), 'analysis': self.analysis, @@ -523,6 +510,60 @@ class MISPEvent(AbstractMISP): 'timestamp': self._datetime_to_timestamp(self.timestamp) } } + + def attributes_hashes(self, algorithm='sha512'): + to_return = [] + for attribute in self.attributes: + to_return += attribute.hash_values(algorithm) + for obj in self.objects: + for attribute in obj.attributes: + to_return += attribute.hash_values(algorithm) + return to_return + + def to_feed(self, valid_distributions=[0, 1, 2, 3, 4, 5], with_meta=False): + """ Generate a json output for MISP Feed. + Notes: + * valid_distributions only makes sense if the distribution key is set (i.e. the event is exported from a MISP instance) + """ + required = ['info', 'Orgc'] + for r in required: + if not hasattr(self, r): + raise PyMISPError('The field {} is required to generate the event feed output.') + + if hasattr(self, 'distribution') and int(self.distribution) not in valid_distributions: + return + + self._set_default() + + to_return = super(MISPEvent, self)._to_feed() + if with_meta: + to_return['_hashes'] = [] + to_return['_manifest'] = self.manifest + + to_return['Orgc'] = self.Orgc._to_feed() + to_return['Tag'] = list(filter(None, [tag._to_feed() for tag in self.tags])) + if self.attributes: + to_return['Attribute'] = [] + for attribute in self.attributes: + if (valid_distributions and attribute.get('distribution') is not None and attribute.distribution not in valid_distributions): + continue + to_return['Attribute'].append(attribute._to_feed(valid_distributions)) + if with_meta: + to_return['_hashes'] += attribute.hash_values('md5') + + if self.objects: + to_return['Object']['Attribute'] = [] + for obj in self.objects: + if (valid_distributions and obj.get('distribution') is not None and obj.distribution not in valid_distributions): + continue + to_return['Object'] = obj._to_feed() + for attribute in obj.attributes: + if (valid_distributions and attribute.get('distribution') is not None and attribute.distribution not in valid_distributions): + continue + to_return['Object']['Attribute'].append(attribute._to_feed(valid_distributions)) + if with_meta: + to_return['_hashes'] += attribute.hash_values('md5') + return to_return @property @@ -1297,15 +1338,8 @@ class MISPObject(AbstractMISP): self.template_version = self._definition['version'] return True - def _to_feed(self, valid_distributions): - if hasattr(self, 'distribution') and self.distribution not in valid_distributions: - return False + def _to_feed(self): to_return = super(MISPObject, self)._to_feed() - to_return['Attribute'] = list(filter(None, [attribute._to_feed(valid_distributions) for attribute in self.attributes])) - # Get the hash of every values for fast lookups - to_return['_hashes'] = [] - for attribute in to_return['Attribute']: - to_return['_hashes'] += attribute.pop('_hashes') if self.references: to_return['ObjectReference'] = [reference._to_feed() for reference in self.references] return to_return diff --git a/pymisp/tools/__init__.py b/pymisp/tools/__init__.py index eb23098..6759174 100644 --- a/pymisp/tools/__init__.py +++ b/pymisp/tools/__init__.py @@ -21,3 +21,4 @@ if sys.version_info >= (3, 6): from .vehicleobject import VehicleObject # noqa from .csvloader import CSVLoader # noqa from .sshauthkeyobject import SSHAuthorizedKeysObject # noqa + from .feed_meta_generator import feed_meta_generator # noqa diff --git a/pymisp/tools/feed_meta_generator.py b/pymisp/tools/feed_meta_generator.py new file mode 100644 index 0000000..9ff53d2 --- /dev/null +++ b/pymisp/tools/feed_meta_generator.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +from pathlib import Path +from pymisp import MISPEvent +import json + + +def feed_meta_generator(path: Path): + manifests = {} + hashes = [] + + for f_name in path.glob('*.json'): + if str(f_name.name) == 'manifest.json': + continue + event = MISPEvent() + event.load_file(str(f_name)) + manifests.update(event.manifest) + hashes += [f'{h},{event.uuid}' for h in event.attributes_hashes('md5')] + + with (path / 'manifest.json').open('w') as f: + json.dump(manifests, f) + + with (path / 'hashes.csv').open('w') as f: + for h in hashes: + f.write(f'{h}\n')