Merge pull request #2 from oasis-open/parse-cyber-observables

Parse cyber observables
stix2.1
Rich Piazza 2017-05-08 11:19:52 -04:00 committed by GitHub
commit 517533ef24
8 changed files with 365 additions and 13 deletions

View File

@ -3,6 +3,7 @@
# flake8: noqa
from .bundle import Bundle
from .observables import Artifact, AutonomousSystem, EmailAddress, File
from .other import ExternalReference, KillChainPhase, MarkingDefinition, \
GranularMarking, StatementMarking, TLPMarking
from .sdo import AttackPattern, Campaign, CourseOfAction, Identity, Indicator, \
@ -31,6 +32,13 @@ OBJ_MAP = {
'vulnerability': Vulnerability,
}
OBJ_MAP_OBSERVABLE = {
'artifact': Artifact,
'autonomous-system': AutonomousSystem,
'email-address': EmailAddress,
'file': File,
}
def parse(data):
"""Deserialize a string or file-like object into a STIX object"""
@ -43,9 +51,27 @@ def parse(data):
else:
try:
obj_class = OBJ_MAP[obj['type']]
return obj_class(**obj)
except KeyError:
# TODO handle custom objects
raise ValueError("Can't parse unknown object type!")
raise ValueError("Can't parse unknown object type '%s'!" % obj['type'])
return obj_class(**obj)
return obj
def parse_observable(data, _valid_refs):
"""Deserialize a string or file-like object into a STIX Cyber Observable
object.
"""
obj = get_dict(data)
obj['_valid_refs'] = _valid_refs
if 'type' not in obj:
raise ValueError("'type' is a required field!")
try:
obj_class = OBJ_MAP_OBSERVABLE[obj['type']]
except KeyError:
# TODO handle custom objects
raise ValueError("Can't parse unknown object type '%s'!" % obj['type'])
return obj_class(**obj)

View File

@ -7,8 +7,9 @@ import datetime as dt
import json
from .exceptions import ExtraFieldsError, ImmutableError, InvalidValueError, \
MissingFieldsError, RevokeError, UnmodifiablePropertyError
from .exceptions import ExtraFieldsError, ImmutableError, InvalidObjRefError, \
InvalidValueError, MissingFieldsError, RevokeError, \
UnmodifiablePropertyError
from .utils import format_datetime, get_timestamp, NOW, parse_into_datetime
__all__ = ['STIXJSONEncoder', '_STIXBase']
@ -142,3 +143,23 @@ class _STIXBase(collections.Mapping):
if self.revoked:
raise RevokeError("revoke")
return self.new_version(revoked=True)
class Observable(_STIXBase):
def __init__(self, **kwargs):
self._STIXBase__valid_refs = kwargs.pop('_valid_refs')
super(Observable, self).__init__(**kwargs)
def _check_property(self, prop_name, prop, kwargs):
super(Observable, self)._check_property(prop_name, prop, kwargs)
if prop_name.endswith('_ref'):
ref = kwargs[prop_name].split('--', 1)[0]
if ref not in self._STIXBase__valid_refs:
raise InvalidObjRefError(self.__class__, prop_name, "'%s' is not a valid object in local scope" % ref)
if prop_name.endswith('_refs'):
for r in kwargs[prop_name]:
ref = r.split('--', 1)[0]
if ref not in self._STIXBase__valid_refs:
raise InvalidObjRefError(self.__class__, prop_name, "'%s' is not a valid object in local scope" % ref)
# TODO also check the type of the object referenced, not just that the key exists

View File

@ -51,6 +51,33 @@ class ImmutableError(STIXError, ValueError):
super(ImmutableError, self).__init__("Cannot modify properties after creation.")
class DictionaryKeyError(STIXError, ValueError):
"""Dictionary key does not conform to the correct format."""
def __init__(self, key, reason):
super(DictionaryKeyError, self).__init__()
self.key = key
self.reason = reason
def __str__(self):
msg = "Invliad dictionary key {0.key}: ({0.reason})."
return msg.format(self)
class InvalidObjRefError(STIXError, ValueError):
"""A STIX Cyber Observable Object contains an invalid object reference."""
def __init__(self, cls, prop_name, reason):
super(InvalidObjRefError, self).__init__()
self.cls = cls
self.prop_name = prop_name
self.reason = reason
def __str__(self):
msg = "Invalid object reference for '{0.cls.__name__}:{0.prop_name}': {0.reason}"
return msg.format(self)
class UnmodifiablePropertyError(STIXError, ValueError):
"""Attempted to modify an unmodifiable property of object when creating a new version"""

46
stix2/observables.py Normal file
View File

@ -0,0 +1,46 @@
"""STIX 2.0 Cyber Observable Objects"""
from .base import Observable
# from .properties import (BinaryProperty, BooleanProperty, DictionaryProperty,
# HashesProperty, HexProperty, IDProperty,
# IntegerProperty, ListProperty, ReferenceProperty,
# StringProperty, TimestampProperty, TypeProperty)
from .properties import BinaryProperty, HashesProperty, IntegerProperty, ObjectReferenceProperty, StringProperty, TypeProperty
class Artifact(Observable):
_type = 'artifact'
_properties = {
'type': TypeProperty(_type),
'mime_type': StringProperty(),
'payload_bin': BinaryProperty(),
'url': StringProperty(),
'hashes': HashesProperty(),
}
class AutonomousSystem(Observable):
_type = 'autonomous-system'
_properties = {
'type': TypeProperty(_type),
'number': IntegerProperty(),
'name': StringProperty(),
'rir': StringProperty(),
}
class EmailAddress(Observable):
_type = 'email-address'
_properties = {
'type': TypeProperty(_type),
'value': StringProperty(required=True),
'display_name': StringProperty(),
'belongs_to_ref': ObjectReferenceProperty(),
}
class File(Observable):
_type = 'file'
_properties = {
'type': TypeProperty(_type),
}

View File

@ -1,3 +1,5 @@
import base64
import binascii
import collections
import datetime as dt
import inspect
@ -8,7 +10,8 @@ from dateutil import parser
import pytz
from six import text_type
from .base import _STIXBase
from .base import _STIXBase, Observable
from .exceptions import DictionaryKeyError
class Property(object):
@ -213,6 +216,90 @@ class TimestampProperty(Property):
return pytz.utc.localize(parsed)
class ObservableProperty(Property):
def clean(self, value):
dictified = dict(value)
from .__init__ import parse_observable # avoid circular import
for key, obj in dictified.items():
parsed_obj = parse_observable(obj, dictified.keys())
if not issubclass(type(parsed_obj), Observable):
raise ValueError("Objects in an observable property must be "
"Cyber Observable Objects")
dictified[key] = parsed_obj
return dictified
class DictionaryProperty(Property):
def clean(self, value):
dictified = dict(value)
for k in dictified.keys():
if len(k) < 3:
raise DictionaryKeyError(k, "shorter than 3 characters")
elif len(k) > 256:
raise DictionaryKeyError(k, "longer than 256 characters")
if not re.match('^[a-zA-Z0-9_-]+$', k):
raise DictionaryKeyError(k, "contains characters other than"
"lowercase a-z, uppercase A-Z, "
"numerals 0-9, hyphen (-), or "
"underscore (_)")
return dictified
HASHES_REGEX = {
"MD5": ("^[a-fA-F0-9]{32}$", "MD5"),
"MD6": ("^[a-fA-F0-9]{32}|[a-fA-F0-9]{40}|[a-fA-F0-9]{56}|[a-fA-F0-9]{64}|[a-fA-F0-9]{96}|[a-fA-F0-9]{128}$", "MD6"),
"RIPEMD160": ("^[a-fA-F0-9]{40}$", "RIPEMD-160"),
"SHA1": ("^[a-fA-F0-9]{40}$", "SHA-1"),
"SHA224": ("^[a-fA-F0-9]{56}$", "SHA-224"),
"SHA256": ("^[a-fA-F0-9]{64}$", "SHA-256"),
"SHA384": ("^[a-fA-F0-9]{96}$", "SHA-384"),
"SHA512": ("^[a-fA-F0-9]{128}$", "SHA-512"),
"SHA3224": ("^[a-fA-F0-9]{56}$", "SHA3-224"),
"SHA3256": ("^[a-fA-F0-9]{64}$", "SHA3-256"),
"SHA3384": ("^[a-fA-F0-9]{96}$", "SHA3-384"),
"SHA3512": ("^[a-fA-F0-9]{128}$", "SHA3-512"),
"SSDEEP": ("^[a-zA-Z0-9/+:.]{1,128}$", "ssdeep"),
"WHIRLPOOL": ("^[a-fA-F0-9]{128}$", "WHIRLPOOL"),
}
class HashesProperty(DictionaryProperty):
def clean(self, value):
clean_dict = super(HashesProperty, self).clean(value)
for k, v in clean_dict.items():
key = k.upper().replace('-', '')
if key in HASHES_REGEX:
vocab_key = HASHES_REGEX[key][1]
if not re.match(HASHES_REGEX[key][0], v):
raise ValueError("'%s' is not a valid %s hash" % (v, vocab_key))
if k != vocab_key:
clean_dict[vocab_key] = clean_dict[k]
del clean_dict[k]
return clean_dict
class BinaryProperty(Property):
def clean(self, value):
try:
base64.b64decode(value)
except (binascii.Error, TypeError):
raise ValueError("must contain a base64 encoded string")
return value
class HexProperty(Property):
def clean(self, value):
if not re.match('^([a-fA-F0-9]{2})+$', value):
raise ValueError("must contain an even number of hexadecimal characters")
return value
REF_REGEX = re.compile("^[a-z][a-z-]+[a-z]--[0-9a-fA-F]{8}-[0-9a-fA-F]{4}"
"-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$")
@ -248,3 +335,7 @@ class SelectorProperty(Property):
if not SELECTOR_REGEX.match(value):
raise ValueError("must adhere to selector syntax.")
return value
class ObjectReferenceProperty(StringProperty):
pass

View File

@ -3,9 +3,9 @@
from .base import _STIXBase
from .common import COMMON_PROPERTIES
from .other import KillChainPhase
from .properties import (IDProperty, IntegerProperty, ListProperty, Property,
ReferenceProperty, StringProperty, TimestampProperty,
TypeProperty)
from .properties import (IDProperty, IntegerProperty, ListProperty,
ObservableProperty, ReferenceProperty,
StringProperty, TimestampProperty, TypeProperty)
from .utils import NOW
@ -126,7 +126,7 @@ class ObservedData(_STIXBase):
'first_observed': TimestampProperty(required=True),
'last_observed': TimestampProperty(required=True),
'number_observed': IntegerProperty(required=True),
'objects': Property(),
'objects': ObservableProperty(),
})

View File

@ -1,10 +1,12 @@
import datetime as dt
import re
import pytest
import pytz
import stix2
from .constants import OBSERVED_DATA_ID
from ..exceptions import InvalidValueError
EXPECTED = """{
"created": "2016-04-06T19:58:16Z",
@ -70,6 +72,84 @@ def test_parse_observed_data(data):
assert odata.first_observed == dt.datetime(2015, 12, 21, 19, 0, 0, tzinfo=pytz.utc)
assert odata.last_observed == dt.datetime(2015, 12, 21, 19, 0, 0, tzinfo=pytz.utc)
assert odata.created_by_ref == "identity--f431f809-377b-45e0-aa1c-6a4751cae5ff"
# assert odata.objects["0"].type == "file" # TODO
assert odata.objects["0"].type == "file"
@pytest.mark.parametrize("data", [
""""0": {
"type": "artifact",
"mime_type": "image/jpeg",
"payload_bin": "VBORw0KGgoAAAANSUhEUgAAADI=="
}""",
""""0": {
"type": "artifact",
"mime_type": "image/jpeg",
"url": "https://upload.wikimedia.org/wikipedia/commons/b/b4/JPEG_example_JPG_RIP_100.jpg",
"hashes": {
"MD5": "6826f9a05da08134006557758bb3afbb"
}
}""",
])
def test_parse_artifact_valid(data):
odata_str = re.compile('"objects".+\},', re.DOTALL).sub('"objects": { %s },' % data, EXPECTED)
odata = stix2.parse(odata_str)
assert odata.objects["0"].type == "artifact"
@pytest.mark.parametrize("data", [
""""0": {
"type": "artifact",
"mime_type": "image/jpeg",
"payload_bin": "abcVBORw0KGgoAAAANSUhEUgAAADI=="
}""",
""""0": {
"type": "artifact",
"mime_type": "image/jpeg",
"url": "https://upload.wikimedia.org/wikipedia/commons/b/b4/JPEG_example_JPG_RIP_100.jpg",
"hashes": {
"MD5": "a"
}
}""",
])
def test_parse_artifact_invalid(data):
odata_str = re.compile('"objects".+\},', re.DOTALL).sub('"objects": { %s },' % data, EXPECTED)
with pytest.raises(ValueError):
stix2.parse(odata_str)
@pytest.mark.parametrize("data", [
""""0": {
"type": "autonomous-system",
"number": 15139,
"name": "Slime Industries",
"rir": "ARIN"
}""",
])
def test_parse_autonomous_system_valid(data):
odata_str = re.compile('"objects".+\},', re.DOTALL).sub('"objects": { %s },' % data, EXPECTED)
odata = stix2.parse(odata_str)
assert odata.objects["0"].type == "autonomous-system"
assert odata.objects["0"].number == 15139
assert odata.objects["0"].name == "Slime Industries"
assert odata.objects["0"].rir == "ARIN"
@pytest.mark.parametrize("data", [
""""1": {
"type": "email-address",
"value": "john@example.com",
"display_name": "John Doe",
"belongs_to_ref": "0"
}""",
])
def test_parse_email_address(data):
odata_str = re.compile('\}.+\},', re.DOTALL).sub('}, %s},' % data, EXPECTED)
odata = stix2.parse(odata_str)
assert odata.objects["1"].type == "email-address"
odata_str = re.compile('"belongs_to_ref": "0"', re.DOTALL).sub('"belongs_to_ref": "3"', odata_str)
with pytest.raises(InvalidValueError):
stix2.parse(odata_str)
# TODO: Add other examples

View File

@ -1,8 +1,12 @@
import pytest
from stix2.properties import (BooleanProperty, IDProperty, IntegerProperty,
ListProperty, Property, ReferenceProperty,
StringProperty, TimestampProperty, TypeProperty)
from stix2.exceptions import DictionaryKeyError
from stix2.properties import (BinaryProperty, BooleanProperty,
DictionaryProperty, HashesProperty, HexProperty,
IDProperty, IntegerProperty, ListProperty,
Property, ReferenceProperty, StringProperty,
TimestampProperty, TypeProperty)
from .constants import FAKE_TIME
@ -171,3 +175,60 @@ def test_timestamp_property_invalid():
ts_prop.clean(1)
with pytest.raises(ValueError):
ts_prop.clean("someday sometime")
def test_binary_property():
bin_prop = BinaryProperty()
assert bin_prop.clean("TG9yZW0gSXBzdW0=")
with pytest.raises(ValueError):
bin_prop.clean("foobar")
def test_hex_property():
hex_prop = HexProperty()
assert hex_prop.clean("4c6f72656d20497073756d")
with pytest.raises(ValueError):
hex_prop.clean("foobar")
@pytest.mark.parametrize("d", [
{'description': 'something'},
[('abc', 1), ('bcd', 2), ('cde', 3)],
])
def test_dictionary_property_valid(d):
dict_prop = DictionaryProperty()
assert dict_prop.clean(d)
@pytest.mark.parametrize("d", [
{'a': 'something'},
{'a'*300: 'something'},
{'Hey!': 'something'},
])
def test_dictionary_property_invalid(d):
dict_prop = DictionaryProperty()
with pytest.raises(DictionaryKeyError):
dict_prop.clean(d)
@pytest.mark.parametrize("value", [
{"sha256": "6db12788c37247f2316052e142f42f4b259d6561751e5f401a1ae2a6df9c674b"},
[('MD5', '2dfb1bcc980200c6706feee399d41b3f'), ('RIPEMD-160', 'b3a8cd8a27c90af79b3c81754f267780f443dfef')],
])
def test_hashes_property_valid(value):
hash_prop = HashesProperty()
assert hash_prop.clean(value)
@pytest.mark.parametrize("value", [
{"MD5": "a"},
{"SHA-256": "2dfb1bcc980200c6706feee399d41b3f"},
])
def test_hashes_property_invalid(value):
hash_prop = HashesProperty()
with pytest.raises(ValueError):
hash_prop.clean(value)