From a3d10561220e1faa79e50ef6078d24c43a652a73 Mon Sep 17 00:00:00 2001 From: Michael Chisholm Date: Tue, 24 Aug 2021 00:15:50 -0400 Subject: [PATCH 1/2] Fix TAXIICollectionSource.query() to propagate HTTPErrors instead of ignoring them. Special-cased 416, since that can occur naturally while paging with TAXII 2.0. --- stix2/datastore/taxii.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/stix2/datastore/taxii.py b/stix2/datastore/taxii.py index 9ad6df9..84e8c35 100644 --- a/stix2/datastore/taxii.py +++ b/stix2/datastore/taxii.py @@ -296,19 +296,10 @@ class TAXIICollectionSource(DataSource): # query TAXII collection all_data = [] + paged_request = tcv21.as_pages if isinstance(self.collection, tcv21.Collection) else tcv20.as_pages try: - paged_request = tcv21.as_pages if isinstance(self.collection, tcv21.Collection) else tcv20.as_pages - for resource in paged_request(self.collection.get_objects, per_request=self.items_per_page, **taxii_filters_dict): all_data.extend(resource.get("objects", [])) - - # deduplicate data (before filtering as reduces wasted filtering) - all_data = deduplicate(all_data) - - # apply local (CompositeDataSource, TAXIICollectionSource and query) filters - query.remove(taxii_filters) - all_data = list(apply_common_filters(all_data, query)) - except HTTPError as e: # if resources not found or access is denied from TAXII server, return empty list if e.response.status_code == 404: @@ -317,6 +308,19 @@ class TAXIICollectionSource(DataSource): " the supplied TAXII Collection object are either not found or access is" " denied. Received error: ", e, ) + elif e.response.status_code != 416: + # TAXII 2.0 paging can result in a 416 (Range Not Satisfiable) + # if the server isn't sending Content-Range headers, so the + # pager just goes until it runs out of pages. So 416 can't be + # treated as a real error, just an end-of-pages condition. + raise + + # deduplicate data (before filtering as reduces wasted filtering) + all_data = deduplicate(all_data) + + # apply local (CompositeDataSource, TAXIICollectionSource and query) filters + query.remove(taxii_filters) + all_data = list(apply_common_filters(all_data, query)) # parse python STIX objects from the STIX object dicts stix_objs = [parse(stix_obj_dict, allow_custom=self.allow_custom, version=version) for stix_obj_dict in all_data] From a4ce0222bf8ef6bb04c70f9958fb7cdd7ca8f4aa Mon Sep 17 00:00:00 2001 From: Michael Chisholm Date: Tue, 24 Aug 2021 15:04:05 -0400 Subject: [PATCH 2/2] Move and edit a comment about HTTP status code 416 to reduce confusion. --- stix2/datastore/taxii.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/stix2/datastore/taxii.py b/stix2/datastore/taxii.py index 84e8c35..04b3c83 100644 --- a/stix2/datastore/taxii.py +++ b/stix2/datastore/taxii.py @@ -308,11 +308,13 @@ class TAXIICollectionSource(DataSource): " the supplied TAXII Collection object are either not found or access is" " denied. Received error: ", e, ) + + # TAXII 2.0 paging can result in a 416 (Range Not Satisfiable) if + # the server isn't sending Content-Range headers, so the pager just + # goes until it runs out of pages. So 416 can't be treated as a + # real error, just an end-of-pages condition. For other codes, + # propagate the exception. elif e.response.status_code != 416: - # TAXII 2.0 paging can result in a 416 (Range Not Satisfiable) - # if the server isn't sending Content-Range headers, so the - # pager just goes until it runs out of pages. So 416 can't be - # treated as a real error, just an end-of-pages condition. raise # deduplicate data (before filtering as reduces wasted filtering)