cti-python-stix2/stix2/properties.py

439 lines
14 KiB
Python
Raw Normal View History

import base64
import binascii
import collections
import datetime as dt
import inspect
import re
2017-02-24 16:28:53 +01:00
import uuid
from dateutil import parser
import pytz
from six import text_type
from .base import _Observable, _STIXBase
from .exceptions import DictionaryKeyError
from .utils import get_dict
2017-02-24 16:28:53 +01:00
class Property(object):
"""Represent a property of STIX data type.
Subclasses can define the following attributes as keyword arguments to
__init__():
- `required` - If `True`, the property must be provided when creating an
object with that property. No default value exists for these properties.
(Default: `False`)
- `fixed` - This provides a constant default value. Users are free to
provide this value explicity when constructing an object (which allows
you to copy *all* values from an existing object to a new object), but
if the user provides a value other than the `fixed` value, it will raise
an error. This is semantically equivalent to defining both:
2017-04-17 21:13:11 +02:00
- a `clean()` function that checks if the value matches the fixed
2017-02-24 16:28:53 +01:00
value, and
- a `default()` function that returns the fixed value.
(Default: `None`)
Subclasses can also define the following functions.
- `def clean(self, value) -> any:`
2017-04-17 21:13:11 +02:00
- Return a value that is valid for this property. If `value` is not
valid for this property, this will attempt to transform it first. If
`value` is not valid and no such transformation is possible, it should
raise a ValueError.
2017-02-24 16:28:53 +01:00
- `def default(self):`
- provide a default value for this property.
- `default()` can return the special value `NOW` to use the current
time. This is useful when several timestamps in the same object need
to use the same default value, so calling now() for each property--
2017-02-24 16:28:53 +01:00
likely several microseconds apart-- does not work.
Subclasses can instead provide a lambda function for `default` as a keyword
2017-04-17 21:13:11 +02:00
argument. `clean` should not be provided as a lambda since lambdas cannot
raise their own exceptions.
When instantiating Properties, `required` and `default` should not be used
together. `default` implies that the property is required in the specification
so this function will be used to supply a value if none is provided.
`required` means that the user must provide this; it is required in the
specification and we can't or don't want to create a default value.
2017-02-24 16:28:53 +01:00
"""
2017-04-17 21:13:11 +02:00
def _default_clean(self, value):
if value != self._fixed_value:
raise ValueError("must equal '{0}'.".format(self._fixed_value))
return value
2017-04-17 21:13:11 +02:00
def __init__(self, required=False, fixed=None, default=None, type=None):
2017-02-24 16:28:53 +01:00
self.required = required
self.type = type
2017-02-24 16:28:53 +01:00
if fixed:
self._fixed_value = fixed
2017-04-17 21:13:11 +02:00
self.clean = self._default_clean
2017-02-24 16:28:53 +01:00
self.default = lambda: fixed
if default:
self.default = default
def clean(self, value):
return value
def __call__(self, value=None):
"""Used by ListProperty to handle lists that have been defined with
either a class or an instance.
"""
return value
2017-02-24 16:28:53 +01:00
class ListProperty(Property):
2017-02-24 16:28:53 +01:00
def __init__(self, contained, **kwargs):
2017-02-24 16:28:53 +01:00
"""
Contained should be a function which returns an object from the value.
2017-02-24 16:28:53 +01:00
"""
if inspect.isclass(contained) and issubclass(contained, Property):
# If it's a class and not an instance, instantiate it so that
2017-04-17 21:13:11 +02:00
# clean() can be called on it, and ListProperty.clean() will
# use __call__ when it appends the item.
self.contained = contained()
else:
self.contained = contained
super(ListProperty, self).__init__(**kwargs)
2017-02-24 16:28:53 +01:00
2017-04-17 21:13:11 +02:00
def clean(self, value):
2017-04-10 16:18:54 +02:00
try:
iter(value)
except TypeError:
raise ValueError("must be an iterable.")
try:
if isinstance(value, basestring):
value = [value]
except NameError:
if isinstance(value, str):
value = [value]
2017-04-10 16:18:54 +02:00
result = []
2017-02-24 16:28:53 +01:00
for item in value:
2017-04-11 21:05:22 +02:00
try:
2017-04-17 21:13:11 +02:00
valid = self.contained.clean(item)
except ValueError:
raise
except AttributeError:
2017-04-17 21:13:11 +02:00
# type of list has no clean() function (eg. built in Python types)
# TODO Should we raise an error here?
valid = item
if type(self.contained) is EmbeddedObjectProperty:
obj_type = self.contained.type
else:
obj_type = self.contained
if isinstance(valid, collections.Mapping):
result.append(obj_type(**valid))
else:
result.append(obj_type(valid))
# STIX spec forbids empty lists
if len(result) < 1:
raise ValueError("must not be empty.")
return result
class StringProperty(Property):
def __init__(self, **kwargs):
self.string_type = text_type
super(StringProperty, self).__init__(**kwargs)
def clean(self, value):
return self.string_type(value)
2017-02-24 16:28:53 +01:00
class TypeProperty(Property):
def __init__(self, type):
super(TypeProperty, self).__init__(fixed=type)
2017-02-24 16:28:53 +01:00
class IDProperty(Property):
def __init__(self, type):
2017-02-24 17:20:24 +01:00
self.required_prefix = type + "--"
super(IDProperty, self).__init__()
2017-02-24 16:28:53 +01:00
2017-04-17 21:13:11 +02:00
def clean(self, value):
2017-02-24 17:20:24 +01:00
if not value.startswith(self.required_prefix):
raise ValueError("must start with '{0}'.".format(self.required_prefix))
2017-04-06 19:08:48 +02:00
try:
2017-04-25 16:03:37 +02:00
uuid.UUID(value.split('--', 1)[1])
2017-04-06 19:08:48 +02:00
except Exception:
2017-04-25 16:03:37 +02:00
raise ValueError("must have a valid UUID after the prefix.")
2017-02-24 17:20:24 +01:00
return value
2017-02-24 16:28:53 +01:00
def default(self):
2017-02-24 17:20:24 +01:00
return self.required_prefix + str(uuid.uuid4())
2017-04-18 15:19:38 +02:00
class IntegerProperty(Property):
def clean(self, value):
try:
return int(value)
except Exception:
raise ValueError("must be an integer.")
class FloatProperty(Property):
def clean(self, value):
try:
return float(value)
except Exception:
raise ValueError("must be a float.")
class BooleanProperty(Property):
def clean(self, value):
if isinstance(value, bool):
return value
trues = ['true', 't']
falses = ['false', 'f']
try:
if value.lower() in trues:
return True
if value.lower() in falses:
return False
except AttributeError:
if value == 1:
return True
if value == 0:
return False
2017-04-17 21:13:11 +02:00
raise ValueError("must be a boolean value.")
2017-04-11 18:10:55 +02:00
class TimestampProperty(Property):
2017-04-17 21:13:11 +02:00
def clean(self, value):
if isinstance(value, dt.date):
if hasattr(value, 'hour'):
return value
else:
# Add a time component
return dt.datetime.combine(value, dt.time(), tzinfo=pytz.utc)
2017-04-11 18:10:55 +02:00
# value isn't a date or datetime object so assume it's a string
2017-04-11 18:10:55 +02:00
try:
parsed = parser.parse(value)
2017-04-11 18:10:55 +02:00
except TypeError:
# Unknown format
2017-04-11 18:10:55 +02:00
raise ValueError("must be a datetime object, date object, or "
"timestamp string in a recognizable format.")
if parsed.tzinfo:
return parsed.astimezone(pytz.utc)
else:
# Doesn't have timezone info in the string; assume UTC
return pytz.utc.localize(parsed)
2017-04-11 18:10:55 +02:00
class ObservableProperty(Property):
def clean(self, value):
try:
dictified = get_dict(value)
except ValueError:
raise ValueError("The observable property must contain a dictionary")
valid_refs = dict((k, v['type']) for (k, v) in dictified.items())
from .__init__ import parse_observable # avoid circular import
2017-05-03 23:35:33 +02:00
for key, obj in dictified.items():
parsed_obj = parse_observable(obj, valid_refs)
if not issubclass(type(parsed_obj), _Observable):
raise ValueError("Objects in an observable property must be "
"Cyber Observable Objects")
2017-05-03 23:35:33 +02:00
dictified[key] = parsed_obj
return dictified
class DictionaryProperty(Property):
def clean(self, value):
try:
dictified = get_dict(value)
except ValueError:
raise ValueError("The dictionary property must contain a dictionary")
for k in dictified.keys():
if len(k) < 3:
raise DictionaryKeyError(k, "shorter than 3 characters")
elif len(k) > 256:
raise DictionaryKeyError(k, "longer than 256 characters")
if not re.match('^[a-zA-Z0-9_-]+$', k):
raise DictionaryKeyError(k, "contains characters other than"
"lowercase a-z, uppercase A-Z, "
"numerals 0-9, hyphen (-), or "
"underscore (_)")
return dictified
HASHES_REGEX = {
"MD5": ("^[a-fA-F0-9]{32}$", "MD5"),
"MD6": ("^[a-fA-F0-9]{32}|[a-fA-F0-9]{40}|[a-fA-F0-9]{56}|[a-fA-F0-9]{64}|[a-fA-F0-9]{96}|[a-fA-F0-9]{128}$", "MD6"),
"RIPEMD160": ("^[a-fA-F0-9]{40}$", "RIPEMD-160"),
"SHA1": ("^[a-fA-F0-9]{40}$", "SHA-1"),
"SHA224": ("^[a-fA-F0-9]{56}$", "SHA-224"),
"SHA256": ("^[a-fA-F0-9]{64}$", "SHA-256"),
"SHA384": ("^[a-fA-F0-9]{96}$", "SHA-384"),
"SHA512": ("^[a-fA-F0-9]{128}$", "SHA-512"),
"SHA3224": ("^[a-fA-F0-9]{56}$", "SHA3-224"),
"SHA3256": ("^[a-fA-F0-9]{64}$", "SHA3-256"),
"SHA3384": ("^[a-fA-F0-9]{96}$", "SHA3-384"),
"SHA3512": ("^[a-fA-F0-9]{128}$", "SHA3-512"),
"SSDEEP": ("^[a-zA-Z0-9/+:.]{1,128}$", "ssdeep"),
"WHIRLPOOL": ("^[a-fA-F0-9]{128}$", "WHIRLPOOL"),
}
class HashesProperty(DictionaryProperty):
def clean(self, value):
clean_dict = super(HashesProperty, self).clean(value)
for k, v in clean_dict.items():
key = k.upper().replace('-', '')
if key in HASHES_REGEX:
vocab_key = HASHES_REGEX[key][1]
if not re.match(HASHES_REGEX[key][0], v):
raise ValueError("'%s' is not a valid %s hash" % (v, vocab_key))
if k != vocab_key:
clean_dict[vocab_key] = clean_dict[k]
del clean_dict[k]
return clean_dict
class BinaryProperty(Property):
def clean(self, value):
try:
base64.b64decode(value)
except (binascii.Error, TypeError):
raise ValueError("must contain a base64 encoded string")
return value
class HexProperty(Property):
def clean(self, value):
if not re.match('^([a-fA-F0-9]{2})+$', value):
raise ValueError("must contain an even number of hexadecimal characters")
return value
REF_REGEX = re.compile("^[a-z][a-z-]+[a-z]--[0-9a-fA-F]{8}-[0-9a-fA-F]{4}"
"-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$")
class ReferenceProperty(Property):
def __init__(self, required=False, type=None):
"""
references sometimes must be to a specific object type
"""
self.type = type
super(ReferenceProperty, self).__init__(required, type=type)
2017-04-17 21:13:11 +02:00
def clean(self, value):
if isinstance(value, _STIXBase):
value = value.id
if self.type:
if not value.startswith(self.type):
raise ValueError("must start with '{0}'.".format(self.type))
if not REF_REGEX.match(value):
raise ValueError("must match <object-type>--<guid>.")
return value
SELECTOR_REGEX = re.compile("^[a-z0-9_-]{3,250}(\\.(\\[\\d+\\]|[a-z0-9_-]{1,250}))*$")
class SelectorProperty(Property):
def __init__(self, type=None):
# ignore type
super(SelectorProperty, self).__init__()
2017-04-17 21:13:11 +02:00
def clean(self, value):
if not SELECTOR_REGEX.match(value):
raise ValueError("must adhere to selector syntax.")
return value
2017-05-04 00:19:30 +02:00
class ObjectReferenceProperty(StringProperty):
def __init__(self, valid_types=None, **kwargs):
if valid_types and type(valid_types) is not list:
valid_types = [valid_types]
self.valid_types = valid_types
super(ObjectReferenceProperty, self).__init__(**kwargs)
class EmbeddedObjectProperty(Property):
def __init__(self, type, required=False):
self.type = type
super(EmbeddedObjectProperty, self).__init__(required, type=type)
def clean(self, value):
if type(value) is dict:
value = self.type(**value)
elif not isinstance(value, self.type):
raise ValueError("must be of type %s." % self.type.__name__)
return value
class EnumProperty(StringProperty):
def __init__(self, allowed, **kwargs):
if type(allowed) is not list:
allowed = list(allowed)
self.allowed = allowed
super(EnumProperty, self).__init__(**kwargs)
def clean(self, value):
value = super(EnumProperty, self).clean(value)
if value not in self.allowed:
raise ValueError("value '%s' is not valid for this enumeration." % value)
return self.string_type(value)
2017-05-12 17:22:23 +02:00
class ExtensionsProperty(DictionaryProperty):
def __init__(self, enclosing_type=None, required=False):
self.enclosing_type = enclosing_type
super(ExtensionsProperty, self).__init__(required)
def clean(self, value):
try:
dictified = get_dict(value)
except ValueError:
raise ValueError("The extensions property must contain a dictionary")
from .__init__ import EXT_MAP # avoid circular import
if self.enclosing_type in EXT_MAP:
specific_type_map = EXT_MAP[self.enclosing_type]
for key, subvalue in dictified.items():
if key in specific_type_map:
cls = specific_type_map[key]
if type(subvalue) is dict:
dictified[key] = cls(**subvalue)
elif type(subvalue) is cls:
dictified[key] = subvalue
else:
raise ValueError("Cannot determine extension type.")
else:
raise ValueError("The key used in the extensions dictionary is not an extension type name")
else:
raise ValueError("The enclosing type has no extensions defined")
return dictified