From 5820fa0845761851f4829ccba45b2afc31b51c57 Mon Sep 17 00:00:00 2001 From: Greg Back Date: Wed, 14 Mar 2018 10:06:03 -0500 Subject: [PATCH 1/7] GH-138: ignore invalid JSON files. --- stix2/datastore/filesystem.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/stix2/datastore/filesystem.py b/stix2/datastore/filesystem.py index 26d0c58..a00c6e1 100644 --- a/stix2/datastore/filesystem.py +++ b/stix2/datastore/filesystem.py @@ -303,7 +303,11 @@ class FileSystemSource(DataSource): for file_ in files: if not id_ or id_ == file_.split(".")[0]: # have to load into memory regardless to evaluate other filters - stix_obj = json.load(open(os.path.join(root, file_))) + try: + stix_obj = json.load(open(os.path.join(root, file_))) + except UnicodeDecodeError: # likely not a JSON file + # TODO: log a warning somehow? (os.path.abspath(file_))) + continue if stix_obj.get('type', '') == 'bundle': stix_obj = stix_obj['objects'][0] # check against other filters, add if match From af14cd4f881241dfd7373618c3352142faba3c02 Mon Sep 17 00:00:00 2001 From: = Date: Wed, 14 Mar 2018 16:28:44 -0400 Subject: [PATCH 2/7] more type checking of filesystem json files; added corresponding tests --- stix2/datastore/filesystem.py | 20 +++++++++---- stix2/test/test_filesystem.py | 56 ++++++++++++++++++++++++++++++++++- 2 files changed, 70 insertions(+), 6 deletions(-) diff --git a/stix2/datastore/filesystem.py b/stix2/datastore/filesystem.py index a00c6e1..438e706 100644 --- a/stix2/datastore/filesystem.py +++ b/stix2/datastore/filesystem.py @@ -301,15 +301,25 @@ class FileSystemSource(DataSource): for path in include_paths: for root, dirs, files in os.walk(path): for file_ in files: + if not file_.endswith(".json"): + # skip non '.json' files as more likely to be random non-STIX files + continue + if not id_ or id_ == file_.split(".")[0]: # have to load into memory regardless to evaluate other filters try: stix_obj = json.load(open(os.path.join(root, file_))) - except UnicodeDecodeError: # likely not a JSON file - # TODO: log a warning somehow? (os.path.abspath(file_))) - continue - if stix_obj.get('type', '') == 'bundle': - stix_obj = stix_obj['objects'][0] + + if stix_obj["type"] == "bundle": + stix_obj = stix_obj["objects"][0] + + stix_obj["type"] + stix_obj["id"] + + except (UnicodeDecodeError, ValueError, KeyError) as e: # likely not a JSON file + print("filesytem TypeError raised") + raise TypeError("STIX JSON object at '{0}' could either not be parsed to JSON or was not valid STIX JSON".format(os.path.join(root, file_))) + # check against other filters, add if match all_data.extend(apply_common_filters([stix_obj], query)) diff --git a/stix2/test/test_filesystem.py b/stix2/test/test_filesystem.py index 020fee5..64d0e31 100644 --- a/stix2/test/test_filesystem.py +++ b/stix2/test/test_filesystem.py @@ -1,4 +1,5 @@ import os +import json import shutil import pytest @@ -44,6 +45,39 @@ def fs_sink(): # remove campaign dir shutil.rmtree(os.path.join(FS_PATH, "campaign"), True) +@pytest.fixture +def bad_json_files(): + # create erroneous JSON files for tests to make sure handled gracefully + + with open(os.path.join(FS_PATH, "indicator", "indicator--test-non-json.txt"), "w") as f: + f.write("Im not a JSON file") + + with open(os.path.join(FS_PATH, "indicator", "indicator--test-bad-json.json"), "w") as f: + f.write("Im not a JSON formatted file") + + yield True # dummy yield so can have teardown + + os.remove(os.path.join(FS_PATH, "indicator", "indicator--test-non-json.txt")) + os.remove(os.path.join(FS_PATH, "indicator", "indicator--test-bad-json.json")) + +@pytest.fixture +def bad_stix_files(): + # create erroneous STIX JSON files for tests to make sure handled correctly + + # bad STIX object + stix_obj = { + "id": "indicator--test-bad-stix", + "spec_version": "2.0" + # no "type" field + } + + with open(os.path.join(FS_PATH, "indicator", "indicator--test-non-stix.json"), "w") as f: + f.write(json.dumps(stix_obj)) + + yield True # dummy yield so can have teardown + + os.remove(os.path.join(FS_PATH, "indicator", "indicator--test-non-stix.json")) + @pytest.fixture(scope='module') def rel_fs_store(): @@ -76,6 +110,26 @@ def test_filesystem_sink_nonexistent_folder(): assert "for STIX data does not exist" in str(excinfo) +def test_filesystem_source_bad_json_file(fs_source, bad_json_files): + # this tests the handling of two bad json files + # - one file should just be skipped (silently) as its a ".txt" extension + # - one file should be parsed and raise Exception bc its not JSON + try: + bad_json_indicator = fs_source.get("indicator--test-bad-json") + except TypeError as e: + assert "indicator--test-bad-json" in str(e) + assert "could either not be parsed to JSON or was not valid STIX JSON" in str(e) + + +def test_filesystem_source_bad_stix_file(fs_source, bad_stix_files): + # this tests handling of bad STIX json object + try: + bad_stix_indicator = fs_source.get("indicator--test-non-stix") + except TypeError as e: + assert "indicator--test-non-stix" in str(e) + assert "could either not be parsed to JSON or was not valid STIX JSON" in str(e) + + def test_filesytem_source_get_object(fs_source): # get object mal = fs_source.get("malware--6b616fc1-1505-48e3-8b2c-0d19337bff38") @@ -470,4 +524,4 @@ def test_related_to_by_target(rel_fs_store): assert len(resp) == 2 assert any(x['id'] == CAMPAIGN_ID for x in resp) - assert any(x['id'] == INDICATOR_ID for x in resp) + assert any(x['id'] == INDICATOR_ID for x in resp) \ No newline at end of file From 6f762e7ea053ee6db90e91d1da19199307584155 Mon Sep 17 00:00:00 2001 From: = Date: Wed, 14 Mar 2018 16:32:31 -0400 Subject: [PATCH 3/7] woops forgot file, recommit of: more type checking of filesystem json files; added corresponding tests --- stix2/datastore/filesystem.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/stix2/datastore/filesystem.py b/stix2/datastore/filesystem.py index 438e706..35e3a54 100644 --- a/stix2/datastore/filesystem.py +++ b/stix2/datastore/filesystem.py @@ -313,10 +313,11 @@ class FileSystemSource(DataSource): if stix_obj["type"] == "bundle": stix_obj = stix_obj["objects"][0] + # naive STIX check stix_obj["type"] stix_obj["id"] - except (UnicodeDecodeError, ValueError, KeyError) as e: # likely not a JSON file + except (ValueError, KeyError) as e: # likely not a JSON file print("filesytem TypeError raised") raise TypeError("STIX JSON object at '{0}' could either not be parsed to JSON or was not valid STIX JSON".format(os.path.join(root, file_))) From f4558c09587734cce269b07bdb398842634407da Mon Sep 17 00:00:00 2001 From: = Date: Wed, 14 Mar 2018 19:09:25 -0400 Subject: [PATCH 4/7] pre-commit errors --- stix2/datastore/filesystem.py | 7 ++++--- stix2/test/test_filesystem.py | 20 +++++++++++--------- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/stix2/datastore/filesystem.py b/stix2/datastore/filesystem.py index 35e3a54..b525932 100644 --- a/stix2/datastore/filesystem.py +++ b/stix2/datastore/filesystem.py @@ -313,13 +313,14 @@ class FileSystemSource(DataSource): if stix_obj["type"] == "bundle": stix_obj = stix_obj["objects"][0] - # naive STIX check + # naive STIX type checking stix_obj["type"] stix_obj["id"] - except (ValueError, KeyError) as e: # likely not a JSON file + except (ValueError, KeyError): # likely not a JSON file print("filesytem TypeError raised") - raise TypeError("STIX JSON object at '{0}' could either not be parsed to JSON or was not valid STIX JSON".format(os.path.join(root, file_))) + raise TypeError("STIX JSON object at '{0}' could either not be parsed to " + "JSON or was not valid STIX JSON".format(os.path.join(root, file_))) # check against other filters, add if match all_data.extend(apply_common_filters([stix_obj], query)) diff --git a/stix2/test/test_filesystem.py b/stix2/test/test_filesystem.py index 64d0e31..4176322 100644 --- a/stix2/test/test_filesystem.py +++ b/stix2/test/test_filesystem.py @@ -1,5 +1,5 @@ -import os import json +import os import shutil import pytest @@ -45,21 +45,23 @@ def fs_sink(): # remove campaign dir shutil.rmtree(os.path.join(FS_PATH, "campaign"), True) + @pytest.fixture def bad_json_files(): # create erroneous JSON files for tests to make sure handled gracefully - with open(os.path.join(FS_PATH, "indicator", "indicator--test-non-json.txt"), "w") as f: + with open(os.path.join(FS_PATH, "indicator", "indicator--test-non-json.txt"), "w+") as f: f.write("Im not a JSON file") - with open(os.path.join(FS_PATH, "indicator", "indicator--test-bad-json.json"), "w") as f: + with open(os.path.join(FS_PATH, "indicator", "indicator--test-bad-json.json"), "w+") as f: f.write("Im not a JSON formatted file") - yield True # dummy yield so can have teardown + yield True # dummy yield so can have teardown os.remove(os.path.join(FS_PATH, "indicator", "indicator--test-non-json.txt")) os.remove(os.path.join(FS_PATH, "indicator", "indicator--test-bad-json.json")) + @pytest.fixture def bad_stix_files(): # create erroneous STIX JSON files for tests to make sure handled correctly @@ -71,10 +73,10 @@ def bad_stix_files(): # no "type" field } - with open(os.path.join(FS_PATH, "indicator", "indicator--test-non-stix.json"), "w") as f: + with open(os.path.join(FS_PATH, "indicator", "indicator--test-non-stix.json"), "w+") as f: f.write(json.dumps(stix_obj)) - yield True # dummy yield so can have teardown + yield True # dummy yield so can have teardown os.remove(os.path.join(FS_PATH, "indicator", "indicator--test-non-stix.json")) @@ -115,7 +117,7 @@ def test_filesystem_source_bad_json_file(fs_source, bad_json_files): # - one file should just be skipped (silently) as its a ".txt" extension # - one file should be parsed and raise Exception bc its not JSON try: - bad_json_indicator = fs_source.get("indicator--test-bad-json") + fs_source.get("indicator--test-bad-json") except TypeError as e: assert "indicator--test-bad-json" in str(e) assert "could either not be parsed to JSON or was not valid STIX JSON" in str(e) @@ -124,7 +126,7 @@ def test_filesystem_source_bad_json_file(fs_source, bad_json_files): def test_filesystem_source_bad_stix_file(fs_source, bad_stix_files): # this tests handling of bad STIX json object try: - bad_stix_indicator = fs_source.get("indicator--test-non-stix") + fs_source.get("indicator--test-non-stix") except TypeError as e: assert "indicator--test-non-stix" in str(e) assert "could either not be parsed to JSON or was not valid STIX JSON" in str(e) @@ -524,4 +526,4 @@ def test_related_to_by_target(rel_fs_store): assert len(resp) == 2 assert any(x['id'] == CAMPAIGN_ID for x in resp) - assert any(x['id'] == INDICATOR_ID for x in resp) \ No newline at end of file + assert any(x['id'] == INDICATOR_ID for x in resp) From 2fbde05e6c20effa0272b9017f38a2341189627b Mon Sep 17 00:00:00 2001 From: = Date: Wed, 14 Mar 2018 19:34:07 -0400 Subject: [PATCH 5/7] putting test files in stix type folder that exists on git (bc there are files in it) --- stix2/test/test_filesystem.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/stix2/test/test_filesystem.py b/stix2/test/test_filesystem.py index 4176322..f59136e 100644 --- a/stix2/test/test_filesystem.py +++ b/stix2/test/test_filesystem.py @@ -50,16 +50,16 @@ def fs_sink(): def bad_json_files(): # create erroneous JSON files for tests to make sure handled gracefully - with open(os.path.join(FS_PATH, "indicator", "indicator--test-non-json.txt"), "w+") as f: + with open(os.path.join(FS_PATH, "intrusion-set", "intrusion-set--test-non-json.txt"), "w+") as f: f.write("Im not a JSON file") - with open(os.path.join(FS_PATH, "indicator", "indicator--test-bad-json.json"), "w+") as f: + with open(os.path.join(FS_PATH, "intrusion-set", "intrusion-set--test-bad-json.json"), "w+") as f: f.write("Im not a JSON formatted file") yield True # dummy yield so can have teardown - os.remove(os.path.join(FS_PATH, "indicator", "indicator--test-non-json.txt")) - os.remove(os.path.join(FS_PATH, "indicator", "indicator--test-bad-json.json")) + os.remove(os.path.join(FS_PATH, "intrusion-set", "intrusion-set--test-non-json.txt")) + os.remove(os.path.join(FS_PATH, "intrusion-set", "intrusion-set--test-bad-json.json")) @pytest.fixture @@ -68,17 +68,17 @@ def bad_stix_files(): # bad STIX object stix_obj = { - "id": "indicator--test-bad-stix", + "id": "intrusion-set--test-bad-stix", "spec_version": "2.0" # no "type" field } - with open(os.path.join(FS_PATH, "indicator", "indicator--test-non-stix.json"), "w+") as f: + with open(os.path.join(FS_PATH, "intrusion-set", "intrusion-set--test-non-stix.json"), "w+") as f: f.write(json.dumps(stix_obj)) yield True # dummy yield so can have teardown - os.remove(os.path.join(FS_PATH, "indicator", "indicator--test-non-stix.json")) + os.remove(os.path.join(FS_PATH, "intrusion-set", "intrusion-set--test-non-stix.json")) @pytest.fixture(scope='module') @@ -117,18 +117,18 @@ def test_filesystem_source_bad_json_file(fs_source, bad_json_files): # - one file should just be skipped (silently) as its a ".txt" extension # - one file should be parsed and raise Exception bc its not JSON try: - fs_source.get("indicator--test-bad-json") + fs_source.get("intrusion-set--test-bad-json") except TypeError as e: - assert "indicator--test-bad-json" in str(e) + assert "intrusion-set--test-bad-json" in str(e) assert "could either not be parsed to JSON or was not valid STIX JSON" in str(e) def test_filesystem_source_bad_stix_file(fs_source, bad_stix_files): # this tests handling of bad STIX json object try: - fs_source.get("indicator--test-non-stix") + fs_source.get("intrusion-set--test-non-stix") except TypeError as e: - assert "indicator--test-non-stix" in str(e) + assert "intrusion-set--test-non-stix" in str(e) assert "could either not be parsed to JSON or was not valid STIX JSON" in str(e) From 017df285f9472275482597b1e0e3a6ee8124e8fa Mon Sep 17 00:00:00 2001 From: = Date: Thu, 15 Mar 2018 16:11:22 -0400 Subject: [PATCH 6/7] so pip installs will not include test data directory --- setup.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/setup.py b/setup.py index fa68616..b234473 100644 --- a/setup.py +++ b/setup.py @@ -54,4 +54,7 @@ setup( 'stix2-patterns', 'taxii2-client', ], + exclude_package_data={ + 'test': ['stix2_data'] + }, ) From 1f5876d4209fa3b04d4e8447a5c3c7620b01a322 Mon Sep 17 00:00:00 2001 From: Michael K Date: Fri, 16 Mar 2018 10:57:31 -0400 Subject: [PATCH 7/7] Undo last commit Removing directive that is not needed, test packages are already excluded correctly. --- setup.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/setup.py b/setup.py index b234473..fa68616 100644 --- a/setup.py +++ b/setup.py @@ -54,7 +54,4 @@ setup( 'stix2-patterns', 'taxii2-client', ], - exclude_package_data={ - 'test': ['stix2_data'] - }, )