import base64 import binascii import collections import datetime as dt import inspect import re import uuid from dateutil import parser import pytz from six import text_type from .base import _Observable, _STIXBase from .exceptions import DictionaryKeyError from .utils import get_dict class Property(object): """Represent a property of STIX data type. Subclasses can define the following attributes as keyword arguments to __init__(): - `required` - If `True`, the property must be provided when creating an object with that property. No default value exists for these properties. (Default: `False`) - `fixed` - This provides a constant default value. Users are free to provide this value explicity when constructing an object (which allows you to copy *all* values from an existing object to a new object), but if the user provides a value other than the `fixed` value, it will raise an error. This is semantically equivalent to defining both: - a `clean()` function that checks if the value matches the fixed value, and - a `default()` function that returns the fixed value. (Default: `None`) Subclasses can also define the following functions. - `def clean(self, value) -> any:` - Return a value that is valid for this property. If `value` is not valid for this property, this will attempt to transform it first. If `value` is not valid and no such transformation is possible, it should raise a ValueError. - `def default(self):` - provide a default value for this property. - `default()` can return the special value `NOW` to use the current time. This is useful when several timestamps in the same object need to use the same default value, so calling now() for each property-- likely several microseconds apart-- does not work. Subclasses can instead provide a lambda function for `default` as a keyword argument. `clean` should not be provided as a lambda since lambdas cannot raise their own exceptions. When instantiating Properties, `required` and `default` should not be used together. `default` implies that the property is required in the specification so this function will be used to supply a value if none is provided. `required` means that the user must provide this; it is required in the specification and we can't or don't want to create a default value. """ def _default_clean(self, value): if value != self._fixed_value: raise ValueError("must equal '{0}'.".format(self._fixed_value)) return value def __init__(self, required=False, fixed=None, default=None, type=None): self.required = required self.type = type if fixed: self._fixed_value = fixed self.clean = self._default_clean self.default = lambda: fixed if default: self.default = default def clean(self, value): return value def __call__(self, value=None): """Used by ListProperty to handle lists that have been defined with either a class or an instance. """ return value class ListProperty(Property): def __init__(self, contained, **kwargs): """ Contained should be a function which returns an object from the value. """ if inspect.isclass(contained) and issubclass(contained, Property): # If it's a class and not an instance, instantiate it so that # clean() can be called on it, and ListProperty.clean() will # use __call__ when it appends the item. self.contained = contained() else: self.contained = contained super(ListProperty, self).__init__(**kwargs) def clean(self, value): try: iter(value) except TypeError: raise ValueError("must be an iterable.") try: if isinstance(value, basestring): value = [value] except NameError: if isinstance(value, str): value = [value] result = [] for item in value: try: valid = self.contained.clean(item) except ValueError: raise except AttributeError: # type of list has no clean() function (eg. built in Python types) # TODO Should we raise an error here? valid = item if type(self.contained) is EmbeddedObjectProperty: obj_type = self.contained.type else: obj_type = self.contained if isinstance(valid, collections.Mapping): result.append(obj_type(**valid)) else: result.append(obj_type(valid)) # STIX spec forbids empty lists if len(result) < 1: raise ValueError("must not be empty.") return result class StringProperty(Property): def __init__(self, **kwargs): self.string_type = text_type super(StringProperty, self).__init__(**kwargs) def clean(self, value): return self.string_type(value) class TypeProperty(Property): def __init__(self, type): super(TypeProperty, self).__init__(fixed=type) class IDProperty(Property): def __init__(self, type): self.required_prefix = type + "--" super(IDProperty, self).__init__() def clean(self, value): if not value.startswith(self.required_prefix): raise ValueError("must start with '{0}'.".format(self.required_prefix)) try: uuid.UUID(value.split('--', 1)[1]) except Exception: raise ValueError("must have a valid UUID after the prefix.") return value def default(self): return self.required_prefix + str(uuid.uuid4()) class IntegerProperty(Property): def clean(self, value): try: return int(value) except Exception: raise ValueError("must be an integer.") class FloatProperty(Property): def clean(self, value): try: return float(value) except Exception: raise ValueError("must be a float.") class BooleanProperty(Property): def clean(self, value): if isinstance(value, bool): return value trues = ['true', 't'] falses = ['false', 'f'] try: if value.lower() in trues: return True if value.lower() in falses: return False except AttributeError: if value == 1: return True if value == 0: return False raise ValueError("must be a boolean value.") class TimestampProperty(Property): def clean(self, value): if isinstance(value, dt.date): if hasattr(value, 'hour'): return value else: # Add a time component return dt.datetime.combine(value, dt.time(), tzinfo=pytz.utc) # value isn't a date or datetime object so assume it's a string try: parsed = parser.parse(value) except TypeError: # Unknown format raise ValueError("must be a datetime object, date object, or " "timestamp string in a recognizable format.") if parsed.tzinfo: return parsed.astimezone(pytz.utc) else: # Doesn't have timezone info in the string; assume UTC return pytz.utc.localize(parsed) class ObservableProperty(Property): def clean(self, value): try: dictified = get_dict(value) except ValueError: raise ValueError("The observable property must contain a dictionary") if dictified == {}: raise ValueError("The dictionary property must contain a non-empty dictionary") valid_refs = dict((k, v['type']) for (k, v) in dictified.items()) from .__init__ import parse_observable # avoid circular import for key, obj in dictified.items(): parsed_obj = parse_observable(obj, valid_refs) if not issubclass(type(parsed_obj), _Observable): raise ValueError("Objects in an observable property must be " "Cyber Observable Objects") dictified[key] = parsed_obj return dictified class DictionaryProperty(Property): def clean(self, value): try: dictified = get_dict(value) except ValueError: raise ValueError("The dictionary property must contain a dictionary") if dictified == {}: raise ValueError("The dictionary property must contain a non-empty dictionary") for k in dictified.keys(): if len(k) < 3: raise DictionaryKeyError(k, "shorter than 3 characters") elif len(k) > 256: raise DictionaryKeyError(k, "longer than 256 characters") if not re.match('^[a-zA-Z0-9_-]+$', k): raise DictionaryKeyError(k, "contains characters other than" "lowercase a-z, uppercase A-Z, " "numerals 0-9, hyphen (-), or " "underscore (_)") return dictified HASHES_REGEX = { "MD5": ("^[a-fA-F0-9]{32}$", "MD5"), "MD6": ("^[a-fA-F0-9]{32}|[a-fA-F0-9]{40}|[a-fA-F0-9]{56}|[a-fA-F0-9]{64}|[a-fA-F0-9]{96}|[a-fA-F0-9]{128}$", "MD6"), "RIPEMD160": ("^[a-fA-F0-9]{40}$", "RIPEMD-160"), "SHA1": ("^[a-fA-F0-9]{40}$", "SHA-1"), "SHA224": ("^[a-fA-F0-9]{56}$", "SHA-224"), "SHA256": ("^[a-fA-F0-9]{64}$", "SHA-256"), "SHA384": ("^[a-fA-F0-9]{96}$", "SHA-384"), "SHA512": ("^[a-fA-F0-9]{128}$", "SHA-512"), "SHA3224": ("^[a-fA-F0-9]{56}$", "SHA3-224"), "SHA3256": ("^[a-fA-F0-9]{64}$", "SHA3-256"), "SHA3384": ("^[a-fA-F0-9]{96}$", "SHA3-384"), "SHA3512": ("^[a-fA-F0-9]{128}$", "SHA3-512"), "SSDEEP": ("^[a-zA-Z0-9/+:.]{1,128}$", "ssdeep"), "WHIRLPOOL": ("^[a-fA-F0-9]{128}$", "WHIRLPOOL"), } class HashesProperty(DictionaryProperty): def clean(self, value): clean_dict = super(HashesProperty, self).clean(value) for k, v in clean_dict.items(): key = k.upper().replace('-', '') if key in HASHES_REGEX: vocab_key = HASHES_REGEX[key][1] if not re.match(HASHES_REGEX[key][0], v): raise ValueError("'%s' is not a valid %s hash" % (v, vocab_key)) if k != vocab_key: clean_dict[vocab_key] = clean_dict[k] del clean_dict[k] return clean_dict class BinaryProperty(Property): def clean(self, value): try: base64.b64decode(value) except (binascii.Error, TypeError): raise ValueError("must contain a base64 encoded string") return value class HexProperty(Property): def clean(self, value): if not re.match('^([a-fA-F0-9]{2})+$', value): raise ValueError("must contain an even number of hexadecimal characters") return value REF_REGEX = re.compile("^[a-z][a-z-]+[a-z]--[0-9a-fA-F]{8}-[0-9a-fA-F]{4}" "-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$") class ReferenceProperty(Property): def __init__(self, required=False, type=None): """ references sometimes must be to a specific object type """ self.type = type super(ReferenceProperty, self).__init__(required, type=type) def clean(self, value): if isinstance(value, _STIXBase): value = value.id if self.type: if not value.startswith(self.type): raise ValueError("must start with '{0}'.".format(self.type)) if not REF_REGEX.match(value): raise ValueError("must match --.") return value SELECTOR_REGEX = re.compile("^[a-z0-9_-]{3,250}(\\.(\\[\\d+\\]|[a-z0-9_-]{1,250}))*$") class SelectorProperty(Property): def __init__(self, type=None): # ignore type super(SelectorProperty, self).__init__() def clean(self, value): if not SELECTOR_REGEX.match(value): raise ValueError("must adhere to selector syntax.") return value class ObjectReferenceProperty(StringProperty): def __init__(self, valid_types=None, **kwargs): if valid_types and type(valid_types) is not list: valid_types = [valid_types] self.valid_types = valid_types super(ObjectReferenceProperty, self).__init__(**kwargs) class EmbeddedObjectProperty(Property): def __init__(self, type, required=False): self.type = type super(EmbeddedObjectProperty, self).__init__(required, type=type) def clean(self, value): if type(value) is dict: value = self.type(**value) elif not isinstance(value, self.type): raise ValueError("must be of type %s." % self.type.__name__) return value class EnumProperty(StringProperty): def __init__(self, allowed, **kwargs): if type(allowed) is not list: allowed = list(allowed) self.allowed = allowed super(EnumProperty, self).__init__(**kwargs) def clean(self, value): value = super(EnumProperty, self).clean(value) if value not in self.allowed: raise ValueError("value '%s' is not valid for this enumeration." % value) return self.string_type(value) class ExtensionsProperty(DictionaryProperty): def __init__(self, enclosing_type=None, required=False): self.enclosing_type = enclosing_type super(ExtensionsProperty, self).__init__(required) def clean(self, value): try: dictified = get_dict(value) except ValueError: raise ValueError("The extensions property must contain a dictionary") if dictified == {}: raise ValueError("The dictionary property must contain a non-empty dictionary") from .__init__ import EXT_MAP # avoid circular import if self.enclosing_type in EXT_MAP: specific_type_map = EXT_MAP[self.enclosing_type] for key, subvalue in dictified.items(): if key in specific_type_map: cls = specific_type_map[key] if type(subvalue) is dict: dictified[key] = cls(**subvalue) elif type(subvalue) is cls: dictified[key] = subvalue else: raise ValueError("Cannot determine extension type.") else: raise ValueError("The key used in the extensions dictionary is not an extension type name") else: raise ValueError("The enclosing type has no extensions defined") return dictified