Fix TAXIICollectionSource.query() to propagate HTTPErrors

instead of ignoring them.  Special-cased 416, since that can
occur naturally while paging with TAXII 2.0.
pull/1/head
Michael Chisholm 2021-08-24 00:15:50 -04:00
parent 6e7e9dd832
commit a3d1056122
1 changed files with 14 additions and 10 deletions

View File

@ -296,19 +296,10 @@ class TAXIICollectionSource(DataSource):
# query TAXII collection
all_data = []
paged_request = tcv21.as_pages if isinstance(self.collection, tcv21.Collection) else tcv20.as_pages
try:
paged_request = tcv21.as_pages if isinstance(self.collection, tcv21.Collection) else tcv20.as_pages
for resource in paged_request(self.collection.get_objects, per_request=self.items_per_page, **taxii_filters_dict):
all_data.extend(resource.get("objects", []))
# deduplicate data (before filtering as reduces wasted filtering)
all_data = deduplicate(all_data)
# apply local (CompositeDataSource, TAXIICollectionSource and query) filters
query.remove(taxii_filters)
all_data = list(apply_common_filters(all_data, query))
except HTTPError as e:
# if resources not found or access is denied from TAXII server, return empty list
if e.response.status_code == 404:
@ -317,6 +308,19 @@ class TAXIICollectionSource(DataSource):
" the supplied TAXII Collection object are either not found or access is"
" denied. Received error: ", e,
)
elif e.response.status_code != 416:
# TAXII 2.0 paging can result in a 416 (Range Not Satisfiable)
# if the server isn't sending Content-Range headers, so the
# pager just goes until it runs out of pages. So 416 can't be
# treated as a real error, just an end-of-pages condition.
raise
# deduplicate data (before filtering as reduces wasted filtering)
all_data = deduplicate(all_data)
# apply local (CompositeDataSource, TAXIICollectionSource and query) filters
query.remove(taxii_filters)
all_data = list(apply_common_filters(all_data, query))
# parse python STIX objects from the STIX object dicts
stix_objs = [parse(stix_obj_dict, allow_custom=self.allow_custom, version=version) for stix_obj_dict in all_data]