diff --git a/stix2/__init__.py b/stix2/__init__.py index 187d18a..4a9ec75 100644 --- a/stix2/__init__.py +++ b/stix2/__init__.py @@ -3,6 +3,7 @@ # flake8: noqa from .bundle import Bundle +from .observables import Artifact, AutonomousSystem, EmailAddress, File from .other import ExternalReference, KillChainPhase, MarkingDefinition, \ GranularMarking, StatementMarking, TLPMarking from .sdo import AttackPattern, Campaign, CourseOfAction, Identity, Indicator, \ @@ -31,6 +32,13 @@ OBJ_MAP = { 'vulnerability': Vulnerability, } +OBJ_MAP_OBSERVABLE = { + 'artifact': Artifact, + 'autonomous-system': AutonomousSystem, + 'email-address': EmailAddress, + 'file': File, +} + def parse(data): """Deserialize a string or file-like object into a STIX object""" @@ -43,9 +51,27 @@ def parse(data): else: try: obj_class = OBJ_MAP[obj['type']] - return obj_class(**obj) except KeyError: # TODO handle custom objects - raise ValueError("Can't parse unknown object type!") + raise ValueError("Can't parse unknown object type '%s'!" % obj['type']) + return obj_class(**obj) return obj + + +def parse_observable(data, _valid_refs): + """Deserialize a string or file-like object into a STIX Cyber Observable + object. + """ + + obj = get_dict(data) + obj['_valid_refs'] = _valid_refs + + if 'type' not in obj: + raise ValueError("'type' is a required field!") + try: + obj_class = OBJ_MAP_OBSERVABLE[obj['type']] + except KeyError: + # TODO handle custom objects + raise ValueError("Can't parse unknown object type '%s'!" % obj['type']) + return obj_class(**obj) diff --git a/stix2/base.py b/stix2/base.py index 1c90dab..c69d44d 100644 --- a/stix2/base.py +++ b/stix2/base.py @@ -7,8 +7,9 @@ import datetime as dt import json -from .exceptions import ExtraFieldsError, ImmutableError, InvalidValueError, \ - MissingFieldsError, RevokeError, UnmodifiablePropertyError +from .exceptions import ExtraFieldsError, ImmutableError, InvalidObjRefError, \ + InvalidValueError, MissingFieldsError, RevokeError, \ + UnmodifiablePropertyError from .utils import format_datetime, get_timestamp, NOW, parse_into_datetime __all__ = ['STIXJSONEncoder', '_STIXBase'] @@ -142,3 +143,23 @@ class _STIXBase(collections.Mapping): if self.revoked: raise RevokeError("revoke") return self.new_version(revoked=True) + + +class Observable(_STIXBase): + + def __init__(self, **kwargs): + self._STIXBase__valid_refs = kwargs.pop('_valid_refs') + super(Observable, self).__init__(**kwargs) + + def _check_property(self, prop_name, prop, kwargs): + super(Observable, self)._check_property(prop_name, prop, kwargs) + if prop_name.endswith('_ref'): + ref = kwargs[prop_name].split('--', 1)[0] + if ref not in self._STIXBase__valid_refs: + raise InvalidObjRefError(self.__class__, prop_name, "'%s' is not a valid object in local scope" % ref) + if prop_name.endswith('_refs'): + for r in kwargs[prop_name]: + ref = r.split('--', 1)[0] + if ref not in self._STIXBase__valid_refs: + raise InvalidObjRefError(self.__class__, prop_name, "'%s' is not a valid object in local scope" % ref) + # TODO also check the type of the object referenced, not just that the key exists diff --git a/stix2/exceptions.py b/stix2/exceptions.py index 20e44a7..fdd3a46 100644 --- a/stix2/exceptions.py +++ b/stix2/exceptions.py @@ -51,6 +51,33 @@ class ImmutableError(STIXError, ValueError): super(ImmutableError, self).__init__("Cannot modify properties after creation.") +class DictionaryKeyError(STIXError, ValueError): + """Dictionary key does not conform to the correct format.""" + + def __init__(self, key, reason): + super(DictionaryKeyError, self).__init__() + self.key = key + self.reason = reason + + def __str__(self): + msg = "Invliad dictionary key {0.key}: ({0.reason})." + return msg.format(self) + + +class InvalidObjRefError(STIXError, ValueError): + """A STIX Cyber Observable Object contains an invalid object reference.""" + + def __init__(self, cls, prop_name, reason): + super(InvalidObjRefError, self).__init__() + self.cls = cls + self.prop_name = prop_name + self.reason = reason + + def __str__(self): + msg = "Invalid object reference for '{0.cls.__name__}:{0.prop_name}': {0.reason}" + return msg.format(self) + + class UnmodifiablePropertyError(STIXError, ValueError): """Attempted to modify an unmodifiable property of object when creating a new version""" diff --git a/stix2/observables.py b/stix2/observables.py new file mode 100644 index 0000000..7cad32a --- /dev/null +++ b/stix2/observables.py @@ -0,0 +1,46 @@ +"""STIX 2.0 Cyber Observable Objects""" + +from .base import Observable +# from .properties import (BinaryProperty, BooleanProperty, DictionaryProperty, +# HashesProperty, HexProperty, IDProperty, +# IntegerProperty, ListProperty, ReferenceProperty, +# StringProperty, TimestampProperty, TypeProperty) +from .properties import BinaryProperty, HashesProperty, IntegerProperty, ObjectReferenceProperty, StringProperty, TypeProperty + + +class Artifact(Observable): + _type = 'artifact' + _properties = { + 'type': TypeProperty(_type), + 'mime_type': StringProperty(), + 'payload_bin': BinaryProperty(), + 'url': StringProperty(), + 'hashes': HashesProperty(), + } + + +class AutonomousSystem(Observable): + _type = 'autonomous-system' + _properties = { + 'type': TypeProperty(_type), + 'number': IntegerProperty(), + 'name': StringProperty(), + 'rir': StringProperty(), + } + + +class EmailAddress(Observable): + _type = 'email-address' + _properties = { + 'type': TypeProperty(_type), + 'value': StringProperty(required=True), + 'display_name': StringProperty(), + 'belongs_to_ref': ObjectReferenceProperty(), + } + + +class File(Observable): + _type = 'file' + _properties = { + 'type': TypeProperty(_type), + } diff --git a/stix2/properties.py b/stix2/properties.py index 76fe31b..57ebeca 100644 --- a/stix2/properties.py +++ b/stix2/properties.py @@ -1,3 +1,5 @@ +import base64 +import binascii import collections import datetime as dt import inspect @@ -8,7 +10,8 @@ from dateutil import parser import pytz from six import text_type -from .base import _STIXBase +from .base import _STIXBase, Observable +from .exceptions import DictionaryKeyError class Property(object): @@ -213,6 +216,90 @@ class TimestampProperty(Property): return pytz.utc.localize(parsed) +class ObservableProperty(Property): + + def clean(self, value): + dictified = dict(value) + from .__init__ import parse_observable # avoid circular import + for key, obj in dictified.items(): + parsed_obj = parse_observable(obj, dictified.keys()) + if not issubclass(type(parsed_obj), Observable): + raise ValueError("Objects in an observable property must be " + "Cyber Observable Objects") + dictified[key] = parsed_obj + + return dictified + + +class DictionaryProperty(Property): + + def clean(self, value): + dictified = dict(value) + for k in dictified.keys(): + if len(k) < 3: + raise DictionaryKeyError(k, "shorter than 3 characters") + elif len(k) > 256: + raise DictionaryKeyError(k, "longer than 256 characters") + if not re.match('^[a-zA-Z0-9_-]+$', k): + raise DictionaryKeyError(k, "contains characters other than" + "lowercase a-z, uppercase A-Z, " + "numerals 0-9, hyphen (-), or " + "underscore (_)") + return dictified + + +HASHES_REGEX = { + "MD5": ("^[a-fA-F0-9]{32}$", "MD5"), + "MD6": ("^[a-fA-F0-9]{32}|[a-fA-F0-9]{40}|[a-fA-F0-9]{56}|[a-fA-F0-9]{64}|[a-fA-F0-9]{96}|[a-fA-F0-9]{128}$", "MD6"), + "RIPEMD160": ("^[a-fA-F0-9]{40}$", "RIPEMD-160"), + "SHA1": ("^[a-fA-F0-9]{40}$", "SHA-1"), + "SHA224": ("^[a-fA-F0-9]{56}$", "SHA-224"), + "SHA256": ("^[a-fA-F0-9]{64}$", "SHA-256"), + "SHA384": ("^[a-fA-F0-9]{96}$", "SHA-384"), + "SHA512": ("^[a-fA-F0-9]{128}$", "SHA-512"), + "SHA3224": ("^[a-fA-F0-9]{56}$", "SHA3-224"), + "SHA3256": ("^[a-fA-F0-9]{64}$", "SHA3-256"), + "SHA3384": ("^[a-fA-F0-9]{96}$", "SHA3-384"), + "SHA3512": ("^[a-fA-F0-9]{128}$", "SHA3-512"), + "SSDEEP": ("^[a-zA-Z0-9/+:.]{1,128}$", "ssdeep"), + "WHIRLPOOL": ("^[a-fA-F0-9]{128}$", "WHIRLPOOL"), +} + + +class HashesProperty(DictionaryProperty): + + def clean(self, value): + clean_dict = super(HashesProperty, self).clean(value) + for k, v in clean_dict.items(): + key = k.upper().replace('-', '') + if key in HASHES_REGEX: + vocab_key = HASHES_REGEX[key][1] + if not re.match(HASHES_REGEX[key][0], v): + raise ValueError("'%s' is not a valid %s hash" % (v, vocab_key)) + if k != vocab_key: + clean_dict[vocab_key] = clean_dict[k] + del clean_dict[k] + return clean_dict + + +class BinaryProperty(Property): + + def clean(self, value): + try: + base64.b64decode(value) + except (binascii.Error, TypeError): + raise ValueError("must contain a base64 encoded string") + return value + + +class HexProperty(Property): + + def clean(self, value): + if not re.match('^([a-fA-F0-9]{2})+$', value): + raise ValueError("must contain an even number of hexadecimal characters") + return value + + REF_REGEX = re.compile("^[a-z][a-z-]+[a-z]--[0-9a-fA-F]{8}-[0-9a-fA-F]{4}" "-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$") @@ -248,3 +335,7 @@ class SelectorProperty(Property): if not SELECTOR_REGEX.match(value): raise ValueError("must adhere to selector syntax.") return value + + +class ObjectReferenceProperty(StringProperty): + pass diff --git a/stix2/sdo.py b/stix2/sdo.py index 693b750..ca1e5b4 100644 --- a/stix2/sdo.py +++ b/stix2/sdo.py @@ -3,9 +3,9 @@ from .base import _STIXBase from .common import COMMON_PROPERTIES from .other import KillChainPhase -from .properties import (IDProperty, IntegerProperty, ListProperty, Property, - ReferenceProperty, StringProperty, TimestampProperty, - TypeProperty) +from .properties import (IDProperty, IntegerProperty, ListProperty, + ObservableProperty, ReferenceProperty, + StringProperty, TimestampProperty, TypeProperty) from .utils import NOW @@ -126,7 +126,7 @@ class ObservedData(_STIXBase): 'first_observed': TimestampProperty(required=True), 'last_observed': TimestampProperty(required=True), 'number_observed': IntegerProperty(required=True), - 'objects': Property(), + 'objects': ObservableProperty(), }) diff --git a/stix2/test/test_observed_data.py b/stix2/test/test_observed_data.py index 52dc15b..28388c8 100644 --- a/stix2/test/test_observed_data.py +++ b/stix2/test/test_observed_data.py @@ -1,10 +1,12 @@ import datetime as dt +import re import pytest import pytz import stix2 from .constants import OBSERVED_DATA_ID +from ..exceptions import InvalidValueError EXPECTED = """{ "created": "2016-04-06T19:58:16Z", @@ -70,6 +72,84 @@ def test_parse_observed_data(data): assert odata.first_observed == dt.datetime(2015, 12, 21, 19, 0, 0, tzinfo=pytz.utc) assert odata.last_observed == dt.datetime(2015, 12, 21, 19, 0, 0, tzinfo=pytz.utc) assert odata.created_by_ref == "identity--f431f809-377b-45e0-aa1c-6a4751cae5ff" - # assert odata.objects["0"].type == "file" # TODO + assert odata.objects["0"].type == "file" + + +@pytest.mark.parametrize("data", [ + """"0": { + "type": "artifact", + "mime_type": "image/jpeg", + "payload_bin": "VBORw0KGgoAAAANSUhEUgAAADI==" + }""", + """"0": { + "type": "artifact", + "mime_type": "image/jpeg", + "url": "https://upload.wikimedia.org/wikipedia/commons/b/b4/JPEG_example_JPG_RIP_100.jpg", + "hashes": { + "MD5": "6826f9a05da08134006557758bb3afbb" + } + }""", +]) +def test_parse_artifact_valid(data): + odata_str = re.compile('"objects".+\},', re.DOTALL).sub('"objects": { %s },' % data, EXPECTED) + odata = stix2.parse(odata_str) + assert odata.objects["0"].type == "artifact" + + +@pytest.mark.parametrize("data", [ + """"0": { + "type": "artifact", + "mime_type": "image/jpeg", + "payload_bin": "abcVBORw0KGgoAAAANSUhEUgAAADI==" + }""", + """"0": { + "type": "artifact", + "mime_type": "image/jpeg", + "url": "https://upload.wikimedia.org/wikipedia/commons/b/b4/JPEG_example_JPG_RIP_100.jpg", + "hashes": { + "MD5": "a" + } + }""", +]) +def test_parse_artifact_invalid(data): + odata_str = re.compile('"objects".+\},', re.DOTALL).sub('"objects": { %s },' % data, EXPECTED) + with pytest.raises(ValueError): + stix2.parse(odata_str) + + +@pytest.mark.parametrize("data", [ + """"0": { + "type": "autonomous-system", + "number": 15139, + "name": "Slime Industries", + "rir": "ARIN" + }""", +]) +def test_parse_autonomous_system_valid(data): + odata_str = re.compile('"objects".+\},', re.DOTALL).sub('"objects": { %s },' % data, EXPECTED) + odata = stix2.parse(odata_str) + assert odata.objects["0"].type == "autonomous-system" + assert odata.objects["0"].number == 15139 + assert odata.objects["0"].name == "Slime Industries" + assert odata.objects["0"].rir == "ARIN" + + +@pytest.mark.parametrize("data", [ + """"1": { + "type": "email-address", + "value": "john@example.com", + "display_name": "John Doe", + "belongs_to_ref": "0" + }""", +]) +def test_parse_email_address(data): + odata_str = re.compile('\}.+\},', re.DOTALL).sub('}, %s},' % data, EXPECTED) + odata = stix2.parse(odata_str) + assert odata.objects["1"].type == "email-address" + + odata_str = re.compile('"belongs_to_ref": "0"', re.DOTALL).sub('"belongs_to_ref": "3"', odata_str) + with pytest.raises(InvalidValueError): + stix2.parse(odata_str) + # TODO: Add other examples diff --git a/stix2/test/test_properties.py b/stix2/test/test_properties.py index e83b2fc..246f349 100644 --- a/stix2/test/test_properties.py +++ b/stix2/test/test_properties.py @@ -1,8 +1,12 @@ import pytest -from stix2.properties import (BooleanProperty, IDProperty, IntegerProperty, - ListProperty, Property, ReferenceProperty, - StringProperty, TimestampProperty, TypeProperty) +from stix2.exceptions import DictionaryKeyError +from stix2.properties import (BinaryProperty, BooleanProperty, + DictionaryProperty, HashesProperty, HexProperty, + IDProperty, IntegerProperty, ListProperty, + Property, ReferenceProperty, StringProperty, + TimestampProperty, TypeProperty) + from .constants import FAKE_TIME @@ -171,3 +175,60 @@ def test_timestamp_property_invalid(): ts_prop.clean(1) with pytest.raises(ValueError): ts_prop.clean("someday sometime") + + +def test_binary_property(): + bin_prop = BinaryProperty() + + assert bin_prop.clean("TG9yZW0gSXBzdW0=") + with pytest.raises(ValueError): + bin_prop.clean("foobar") + + +def test_hex_property(): + hex_prop = HexProperty() + + assert hex_prop.clean("4c6f72656d20497073756d") + with pytest.raises(ValueError): + hex_prop.clean("foobar") + + +@pytest.mark.parametrize("d", [ + {'description': 'something'}, + [('abc', 1), ('bcd', 2), ('cde', 3)], +]) +def test_dictionary_property_valid(d): + dict_prop = DictionaryProperty() + assert dict_prop.clean(d) + + +@pytest.mark.parametrize("d", [ + {'a': 'something'}, + {'a'*300: 'something'}, + {'Hey!': 'something'}, +]) +def test_dictionary_property_invalid(d): + dict_prop = DictionaryProperty() + + with pytest.raises(DictionaryKeyError): + dict_prop.clean(d) + + +@pytest.mark.parametrize("value", [ + {"sha256": "6db12788c37247f2316052e142f42f4b259d6561751e5f401a1ae2a6df9c674b"}, + [('MD5', '2dfb1bcc980200c6706feee399d41b3f'), ('RIPEMD-160', 'b3a8cd8a27c90af79b3c81754f267780f443dfef')], +]) +def test_hashes_property_valid(value): + hash_prop = HashesProperty() + assert hash_prop.clean(value) + + +@pytest.mark.parametrize("value", [ + {"MD5": "a"}, + {"SHA-256": "2dfb1bcc980200c6706feee399d41b3f"}, +]) +def test_hashes_property_invalid(value): + hash_prop = HashesProperty() + + with pytest.raises(ValueError): + hash_prop.clean(value)