From 827f622c045d7350d9dbbbc36f9899f692c51e43 Mon Sep 17 00:00:00 2001 From: Emmanuelle Vargas-Gonzalez Date: Wed, 17 Mar 2021 15:01:49 -0400 Subject: [PATCH 1/5] provide ability to stream STIX output to fp ``.write()`` file-like object --- stix2/base.py | 32 ++++++++++++++++++++++++++++- stix2/serialization.py | 38 +++++++++++++++++++++++++++++++++++ stix2/test/v20/test_bundle.py | 22 ++++++++++++++++++++ stix2/test/v21/test_bundle.py | 22 ++++++++++++++++++++ 4 files changed, 113 insertions(+), 1 deletion(-) diff --git a/stix2/base.py b/stix2/base.py index b158f06..03d8ea4 100644 --- a/stix2/base.py +++ b/stix2/base.py @@ -17,7 +17,8 @@ from .exceptions import ( from .markings import _MarkingsMixin from .markings.utils import validate from .serialization import ( - STIXJSONEncoder, STIXJSONIncludeOptionalDefaultsEncoder, serialize, + STIXJSONEncoder, STIXJSONIncludeOptionalDefaultsEncoder, fp_serialize, + serialize, ) from .utils import NOW, PREFIX_21_REGEX, get_timestamp from .versioning import new_version as _new_version @@ -260,6 +261,35 @@ class _STIXBase(Mapping): """ return serialize(self, *args, **kwargs) + def fp_serialize(self, *args, **kwargs): + """ + Serialize a STIX object to a file-like supporting object. + + Examples: + >>> import stix2 + >>> identity = stix2.Identity(name='Example Corp.', identity_class='organization') + >>> print(identity.serialize(sort_keys=True)) + {"created": "2018-06-08T19:03:54.066Z", ... "name": "Example Corp.", "type": "identity"} + >>> print(identity.serialize(sort_keys=True, indent=4)) + { + "created": "2018-06-08T19:03:54.066Z", + "id": "identity--d7f3e25a-ba1c-447a-ab71-6434b092b05e", + "identity_class": "organization", + "modified": "2018-06-08T19:03:54.066Z", + "name": "Example Corp.", + "type": "identity" + } + >>> with open("example.json", mode="w", encoding="utf-8") as f: + >>> identity.fp_serialize(f, pretty=True) + + Returns: + None + + See Also: + ``stix2.serialization.fp_serialize`` for options. + """ + fp_serialize(self, *args, **kwargs) + class _DomainObject(_STIXBase, _MarkingsMixin): pass diff --git a/stix2/serialization.py b/stix2/serialization.py index 7488eb5..8822f33 100644 --- a/stix2/serialization.py +++ b/stix2/serialization.py @@ -85,6 +85,44 @@ def serialize(obj, pretty=False, include_optional_defaults=False, **kwargs): return json.dumps(obj, cls=STIXJSONEncoder, **kwargs) +def fp_serialize(obj, fp, pretty=False, include_optional_defaults=False, **kwargs): + """ + Serialize a STIX object as a stream to file-like supporting object. + + Args: + obj: The STIX object to be serialized. + fp: A ``.write()``-supporting file-like object. + pretty (bool): If True, output properties following the STIX specs + formatting. This includes indentation. Refer to notes for more + details. (Default: ``False``) + include_optional_defaults (bool): Determines whether to include + optional properties set to the default value defined in the spec. + **kwargs: The arguments for a json.dumps() call. + + Returns: + None + + Note: + The argument ``pretty=True`` will output the STIX object following + spec order. Using this argument greatly impacts object serialization + performance. If your use case is centered across machine-to-machine + operation it is recommended to set ``pretty=False``. + + When ``pretty=True`` the following key-value pairs will be added or + overridden: indent=4, separators=(",", ": "), item_sort_key=sort_by. + """ + if pretty: + def sort_by(element): + return find_property_index(obj, *element) + + kwargs.update({'indent': 4, 'separators': (',', ': '), 'item_sort_key': sort_by}) + + if include_optional_defaults: + json.dump(obj, fp, cls=STIXJSONIncludeOptionalDefaultsEncoder, **kwargs) + else: + json.dump(obj, fp, cls=STIXJSONEncoder, **kwargs) + + def _find(seq, val): """ Search sequence 'seq' for val. This behaves like str.find(): if not found, diff --git a/stix2/test/v20/test_bundle.py b/stix2/test/v20/test_bundle.py index f53d0cb..fed91e1 100644 --- a/stix2/test/v20/test_bundle.py +++ b/stix2/test/v20/test_bundle.py @@ -1,3 +1,4 @@ +import io import json import pytest @@ -113,6 +114,27 @@ def test_bundle_id_must_start_with_bundle(): assert str(excinfo.value) == "Invalid value for Bundle 'id': must start with 'bundle--'." +def test_create_bundle_fp_serialize_true(indicator, malware, relationship): + bundle = stix2.v20.Bundle(objects=[indicator, malware, relationship]) + buffer = io.StringIO() + + bundle.fp_serialize(buffer, pretty=True) + + assert str(bundle) == EXPECTED_BUNDLE + assert bundle.serialize(pretty=True) == EXPECTED_BUNDLE + assert buffer.getvalue() == EXPECTED_BUNDLE + + +def test_create_bundle_fp_serialize_false(indicator, malware, relationship): + bundle = stix2.v20.Bundle(objects=[indicator, malware, relationship]) + buffer = io.StringIO() + + bundle.fp_serialize(buffer, sort_keys=True) + + assert bundle.serialize(sort_keys=True) == json.dumps(json.loads(EXPECTED_BUNDLE), sort_keys=True) + assert buffer.getvalue() == json.dumps(json.loads(EXPECTED_BUNDLE), sort_keys=True) + + def test_create_bundle1(indicator, malware, relationship): bundle = stix2.v20.Bundle(objects=[indicator, malware, relationship]) diff --git a/stix2/test/v21/test_bundle.py b/stix2/test/v21/test_bundle.py index 4e30c84..07014c6 100644 --- a/stix2/test/v21/test_bundle.py +++ b/stix2/test/v21/test_bundle.py @@ -1,3 +1,4 @@ +import io import json import pytest @@ -123,6 +124,27 @@ def test_bundle_id_must_start_with_bundle(): assert str(excinfo.value) == "Invalid value for Bundle 'id': must start with 'bundle--'." +def test_create_bundle_fp_serialize_true(indicator, malware, relationship): + bundle = stix2.v21.Bundle(objects=[indicator, malware, relationship]) + buffer = io.StringIO() + + bundle.fp_serialize(buffer, pretty=True) + + assert str(bundle) == EXPECTED_BUNDLE + assert bundle.serialize(pretty=True) == EXPECTED_BUNDLE + assert buffer.getvalue() == EXPECTED_BUNDLE + + +def test_create_bundle_fp_serialize_false(indicator, malware, relationship): + bundle = stix2.v21.Bundle(objects=[indicator, malware, relationship]) + buffer = io.StringIO() + + bundle.fp_serialize(buffer, sort_keys=True) + + assert bundle.serialize(sort_keys=True) == json.dumps(json.loads(EXPECTED_BUNDLE), sort_keys=True) + assert buffer.getvalue() == json.dumps(json.loads(EXPECTED_BUNDLE), sort_keys=True) + + def test_create_bundle1(indicator, malware, relationship): bundle = stix2.v21.Bundle(objects=[indicator, malware, relationship]) From 2ea9c0c63c1e40e9ecbf78dddf296e2bec28992a Mon Sep 17 00:00:00 2001 From: Emmanuelle Vargas-Gonzalez Date: Wed, 17 Mar 2021 15:15:26 -0400 Subject: [PATCH 2/5] use it on filesystem.py data sink --- stix2/datastore/filesystem.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/stix2/datastore/filesystem.py b/stix2/datastore/filesystem.py index d844115..2209116 100644 --- a/stix2/datastore/filesystem.py +++ b/stix2/datastore/filesystem.py @@ -13,7 +13,7 @@ from stix2.datastore import ( ) from stix2.datastore.filters import Filter, FilterSet, apply_common_filters from stix2.parsing import parse -from stix2.serialization import serialize +from stix2.serialization import fp_serialize from stix2.utils import format_datetime, get_type_from_id, parse_into_datetime @@ -584,9 +584,8 @@ class FileSystemSink(DataSink): if os.path.isfile(file_path): raise DataSourceError("Attempted to overwrite file (!) at: {}".format(file_path)) - with io.open(file_path, 'w', encoding=encoding) as f: - stix_obj = serialize(stix_obj, pretty=True, encoding=encoding, ensure_ascii=False) - f.write(stix_obj) + with io.open(file_path, mode='w', encoding=encoding) as f: + fp_serialize(stix_obj, f, pretty=True, encoding=encoding, ensure_ascii=False) def add(self, stix_data=None, version=None): """Add STIX objects to file directory. From 922de111ed965bb0f1b24c78d77a4e9dccacc7d8 Mon Sep 17 00:00:00 2001 From: Emmanuelle Vargas-Gonzalez Date: Thu, 18 Mar 2021 10:14:36 -0400 Subject: [PATCH 3/5] minor tweaks to docstrings --- stix2/base.py | 2 +- stix2/serialization.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/stix2/base.py b/stix2/base.py index 03d8ea4..2c48ef6 100644 --- a/stix2/base.py +++ b/stix2/base.py @@ -263,7 +263,7 @@ class _STIXBase(Mapping): def fp_serialize(self, *args, **kwargs): """ - Serialize a STIX object to a file-like supporting object. + Serialize a STIX object to ``fp`` (a text stream file-like supporting object). Examples: >>> import stix2 diff --git a/stix2/serialization.py b/stix2/serialization.py index 8822f33..660bba6 100644 --- a/stix2/serialization.py +++ b/stix2/serialization.py @@ -87,11 +87,11 @@ def serialize(obj, pretty=False, include_optional_defaults=False, **kwargs): def fp_serialize(obj, fp, pretty=False, include_optional_defaults=False, **kwargs): """ - Serialize a STIX object as a stream to file-like supporting object. + Serialize a STIX object to ``fp`` (a text stream file-like supporting object). Args: obj: The STIX object to be serialized. - fp: A ``.write()``-supporting file-like object. + fp: A text stream file-like object supporting ``.write()``. pretty (bool): If True, output properties following the STIX specs formatting. This includes indentation. Refer to notes for more details. (Default: ``False``) From c2d360d22345475ee39ca642735c4a2e338f8b50 Mon Sep 17 00:00:00 2001 From: Emmanuelle Vargas-Gonzalez Date: Thu, 18 Mar 2021 18:08:31 -0400 Subject: [PATCH 4/5] apply fp_serialize() changes on main serialize() method --- stix2/serialization.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/stix2/serialization.py b/stix2/serialization.py index 660bba6..2784d39 100644 --- a/stix2/serialization.py +++ b/stix2/serialization.py @@ -2,6 +2,7 @@ import copy import datetime as dt +import io import simplejson as json @@ -73,16 +74,9 @@ def serialize(obj, pretty=False, include_optional_defaults=False, **kwargs): When ``pretty=True`` the following key-value pairs will be added or overridden: indent=4, separators=(",", ": "), item_sort_key=sort_by. """ - if pretty: - def sort_by(element): - return find_property_index(obj, *element) - - kwargs.update({'indent': 4, 'separators': (',', ': '), 'item_sort_key': sort_by}) - - if include_optional_defaults: - return json.dumps(obj, cls=STIXJSONIncludeOptionalDefaultsEncoder, **kwargs) - else: - return json.dumps(obj, cls=STIXJSONEncoder, **kwargs) + with io.StringIO() as fp: + fp_serialize(obj, fp, pretty, include_optional_defaults, **kwargs) + return fp.getvalue() def fp_serialize(obj, fp, pretty=False, include_optional_defaults=False, **kwargs): From 19196654c5137def1dada0b3b4a8c95d5842b322 Mon Sep 17 00:00:00 2001 From: Emmanuelle Vargas-Gonzalez Date: Fri, 19 Mar 2021 15:31:01 -0400 Subject: [PATCH 5/5] Apply suggestions from code review Co-authored-by: Chris Lenk --- stix2/test/v20/test_bundle.py | 4 ++-- stix2/test/v21/test_bundle.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/stix2/test/v20/test_bundle.py b/stix2/test/v20/test_bundle.py index fed91e1..ac5d239 100644 --- a/stix2/test/v20/test_bundle.py +++ b/stix2/test/v20/test_bundle.py @@ -114,7 +114,7 @@ def test_bundle_id_must_start_with_bundle(): assert str(excinfo.value) == "Invalid value for Bundle 'id': must start with 'bundle--'." -def test_create_bundle_fp_serialize_true(indicator, malware, relationship): +def test_create_bundle_fp_serialize_pretty(indicator, malware, relationship): bundle = stix2.v20.Bundle(objects=[indicator, malware, relationship]) buffer = io.StringIO() @@ -125,7 +125,7 @@ def test_create_bundle_fp_serialize_true(indicator, malware, relationship): assert buffer.getvalue() == EXPECTED_BUNDLE -def test_create_bundle_fp_serialize_false(indicator, malware, relationship): +def test_create_bundle_fp_serialize_nonpretty(indicator, malware, relationship): bundle = stix2.v20.Bundle(objects=[indicator, malware, relationship]) buffer = io.StringIO() diff --git a/stix2/test/v21/test_bundle.py b/stix2/test/v21/test_bundle.py index 07014c6..1cf30d0 100644 --- a/stix2/test/v21/test_bundle.py +++ b/stix2/test/v21/test_bundle.py @@ -124,7 +124,7 @@ def test_bundle_id_must_start_with_bundle(): assert str(excinfo.value) == "Invalid value for Bundle 'id': must start with 'bundle--'." -def test_create_bundle_fp_serialize_true(indicator, malware, relationship): +def test_create_bundle_fp_serialize_pretty(indicator, malware, relationship): bundle = stix2.v21.Bundle(objects=[indicator, malware, relationship]) buffer = io.StringIO() @@ -135,7 +135,7 @@ def test_create_bundle_fp_serialize_true(indicator, malware, relationship): assert buffer.getvalue() == EXPECTED_BUNDLE -def test_create_bundle_fp_serialize_false(indicator, malware, relationship): +def test_create_bundle_fp_serialize_nonpretty(indicator, malware, relationship): bundle = stix2.v21.Bundle(objects=[indicator, malware, relationship]) buffer = io.StringIO()