cti-python-stix2/stix2/parsing.py

145 lines
5.9 KiB
Python

"""STIX2 Core parsing methods."""
import copy
from . import registry
from .exceptions import ParseError
from .utils import _get_dict, detect_spec_version
def parse(data, allow_custom=False, version=None):
"""Convert a string, dict or file-like object into a STIX object.
Args:
data (str, dict, file-like object): The STIX 2 content to be parsed.
allow_custom (bool): Whether to allow custom properties as well unknown
custom objects. Note that unknown custom objects cannot be parsed
into STIX objects, and will be returned as is. Default: False.
version (str): If present, it forces the parser to use the version
provided. Otherwise, the library will make the best effort based
on checking the "spec_version" property. If none of the above are
possible, it will use the default version specified by the library.
Returns:
An instantiated Python STIX object.
Warnings:
'allow_custom=True' will allow for the return of any supplied STIX
dict(s) that cannot be found to map to any known STIX object types
(both STIX2 domain objects or defined custom STIX2 objects); NO
validation is done. This is done to allow the processing of possibly
unknown custom STIX objects (example scenario: I need to query a
third-party TAXII endpoint that could provide custom STIX objects that
I don't know about ahead of time)
"""
# convert STIX object to dict, if not already
obj = _get_dict(data)
# convert dict to full python-stix2 obj
obj = dict_to_stix2(obj, allow_custom, version)
return obj
def dict_to_stix2(stix_dict, allow_custom=False, version=None):
"""convert dictionary to full python-stix2 object
Args:
stix_dict (dict): a python dictionary of a STIX object
that (presumably) is semantically correct to be parsed
into a full python-stix2 obj
allow_custom (bool): Whether to allow custom properties as well
unknown custom objects. Note that unknown custom objects cannot
be parsed into STIX objects, and will be returned as is.
Default: False.
version (str): If present, it forces the parser to use the version
provided. Otherwise, the library will make the best effort based
on checking the "spec_version" property. If none of the above are
possible, it will use the default version specified by the library.
Returns:
An instantiated Python STIX object
Warnings:
'allow_custom=True' will allow for the return of any supplied STIX
dict(s) that cannot be found to map to any known STIX object types
(both STIX2 domain objects or defined custom STIX2 objects); NO
validation is done. This is done to allow the processing of
possibly unknown custom STIX objects (example scenario: I need to
query a third-party TAXII endpoint that could provide custom STIX
objects that I don't know about ahead of time)
"""
if 'type' not in stix_dict:
raise ParseError("Can't parse object with no 'type' property: %s" % str(stix_dict))
if not version:
version = detect_spec_version(stix_dict)
obj_type = stix_dict["type"]
obj_class = registry.class_for_type(obj_type, version, "objects") \
or registry.class_for_type(obj_type, version, "observables")
if not obj_class:
if allow_custom:
# flag allows for unknown custom objects too, but will not
# be parsed into STIX object, returned as is
return stix_dict
for key_id, ext_def in stix_dict.get('extensions', {}).items():
if key_id.startswith('extension-definition--') and ext_def.get('extension_type', None):
# prevents ParseError for unregistered objects when
# 'is_new_object' or 'is_extension_so' are set to True and allow_custom=False
return stix_dict
raise ParseError("Can't parse unknown object type '%s'! For custom types, use the CustomObject decorator." % stix_dict['type'])
return obj_class(allow_custom=allow_custom, **stix_dict)
def parse_observable(data, _valid_refs=None, allow_custom=False, version=None):
"""Deserialize a string or file-like object into a STIX Cyber Observable
object.
Args:
data (str, dict, file-like object): The STIX2 content to be parsed.
_valid_refs: A list of object references valid for the scope of the
object being parsed. Use empty list if no valid refs are present.
allow_custom (bool): Whether to allow custom properties or not.
Default: False.
version (str): If present, it forces the parser to use the version
provided. Otherwise, the default version specified by the library
will be used.
Returns:
An instantiated Python STIX Cyber Observable object.
"""
obj = _get_dict(data)
if 'type' not in obj:
raise ParseError("Can't parse observable with no 'type' property: %s" % str(obj))
# get deep copy since we are going modify the dict and might
# modify the original dict as _get_dict() does not return new
# dict when passed a dict
obj = copy.deepcopy(obj)
obj['_valid_refs'] = _valid_refs or []
if not version:
version = detect_spec_version(obj)
obj_type = obj["type"]
obj_class = registry.class_for_type(obj_type, version, "observables")
if not obj_class:
if allow_custom:
# flag allows for unknown custom objects too, but will not
# be parsed into STIX observable object, just returned as is
return obj
raise ParseError(
"Can't parse unknown observable type '%s'! For custom observables, "
"use the CustomObservable decorator." % obj['type'],
)
return obj_class(allow_custom=allow_custom, **obj)