Merge pull request #480 from emmanvg/479-pagination-taxii-datastore

Support Pagination in TAXII DataStore
pull/1/head
Chris Lenk 2021-03-15 09:33:42 -04:00 committed by GitHub
commit ccf3b1a0fc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 39 additions and 15 deletions

View File

@ -60,7 +60,7 @@ setup(
'Bug Tracker': 'https://github.com/oasis-open/cti-python-stix2/issues/',
},
extras_require={
'taxii': ['taxii2-client>=2.2.1'],
'taxii': ['taxii2-client>=2.3.0'],
'semantic': ['haversine', 'rapidfuzz'],
},
)

View File

@ -12,6 +12,8 @@ from stix2.parsing import parse
from stix2.utils import deduplicate
try:
from taxii2client import v20 as tcv20
from taxii2client import v21 as tcv21
from taxii2client.exceptions import ValidationError
_taxii2_client = True
except ImportError:
@ -33,9 +35,12 @@ class TAXIICollectionStore(DataStoreMixin):
side(retrieving data) and False for TAXIICollectionSink
side(pushing data). However, when parameter is supplied, it will
be applied to both TAXIICollectionSource/Sink.
items_per_page (int): How many STIX objects to request per call
to TAXII Server. The value can be tuned, but servers may override
if their internal limit is surpassed. Used by TAXIICollectionSource
"""
def __init__(self, collection, allow_custom=None):
def __init__(self, collection, allow_custom=None, items_per_page=5000):
if allow_custom is None:
allow_custom_source = True
allow_custom_sink = False
@ -43,7 +48,7 @@ class TAXIICollectionStore(DataStoreMixin):
allow_custom_sink = allow_custom_source = allow_custom
super(TAXIICollectionStore, self).__init__(
source=TAXIICollectionSource(collection, allow_custom=allow_custom_source),
source=TAXIICollectionSource(collection, allow_custom=allow_custom_source, items_per_page=items_per_page),
sink=TAXIICollectionSink(collection, allow_custom=allow_custom_sink),
)
@ -144,9 +149,12 @@ class TAXIICollectionSource(DataSource):
collection (taxii2.Collection): TAXII Collection instance
allow_custom (bool): Whether to allow custom STIX content to be
added to the FileSystemSink. Default: True
items_per_page (int): How many STIX objects to request per call
to TAXII Server. The value can be tuned, but servers may override
if their internal limit is surpassed.
"""
def __init__(self, collection, allow_custom=True):
def __init__(self, collection, allow_custom=True, items_per_page=5000):
super(TAXIICollectionSource, self).__init__()
if not _taxii2_client:
raise ImportError("taxii2client library is required for usage of TAXIICollectionSource")
@ -167,6 +175,7 @@ class TAXIICollectionSource(DataSource):
)
self.allow_custom = allow_custom
self.items_per_page = items_per_page
def get(self, stix_id, version=None, _composite_filters=None):
"""Retrieve STIX object from local/remote STIX Collection
@ -286,8 +295,12 @@ class TAXIICollectionSource(DataSource):
taxii_filters_dict = dict((f.property, f.value) for f in taxii_filters)
# query TAXII collection
all_data = []
try:
all_data = self.collection.get_objects(**taxii_filters_dict).get('objects', [])
paged_request = tcv21.as_pages if isinstance(self.collection, tcv21.Collection) else tcv20.as_pages
for resource in paged_request(self.collection.get_objects, per_request=self.items_per_page, **taxii_filters_dict):
all_data.extend(resource.get("objects", []))
# deduplicate data (before filtering as reduces wasted filtering)
all_data = deduplicate(all_data)

View File

@ -4,7 +4,7 @@ from medallion.filters.basic_filter import BasicFilter
import pytest
from requests.models import Response
from taxii2client.common import _filter_kwargs_to_query_params
from taxii2client.v20 import Collection
from taxii2client.v20 import MEDIA_TYPE_STIX_V20, Collection
import stix2
from stix2.datastore import DataSourceError
@ -34,12 +34,12 @@ class MockTAXIICollectionEndpoint(Collection):
{
"date_added": get_timestamp(),
"id": object["id"],
"media_type": "application/stix+json;version=2.1",
"media_type": "application/stix+json;version=2.0",
"version": object.get("modified", object.get("created", get_timestamp())),
},
)
def get_objects(self, **filter_kwargs):
def get_objects(self, accept=MEDIA_TYPE_STIX_V20, start=0, per_request=0, **filter_kwargs):
self._verify_can_read()
query_params = _filter_kwargs_to_query_params(filter_kwargs)
assert isinstance(query_params, dict)
@ -51,7 +51,12 @@ class MockTAXIICollectionEndpoint(Collection):
100,
)[0]
if objs:
return stix2.v20.Bundle(objects=objs)
resp = Response()
resp.status_code = 200
resp.headers["Content-Range"] = f"items 0-{len(objs)}/{len(objs)}"
resp.encoding = "utf-8"
resp._content = bytes(stix2.v20.Bundle(objects=objs).serialize(ensure_ascii=False), resp.encoding)
return resp
else:
resp = Response()
resp.status_code = 404

View File

@ -28,14 +28,14 @@ class MockTAXIICollectionEndpoint(Collection):
self._verify_can_write()
if isinstance(bundle, str):
bundle = json.loads(bundle)
for object in bundle.get("objects", []):
self.objects.append(object)
for obj in bundle.get("objects", []):
self.objects.append(obj)
self.manifests.append(
{
"date_added": get_timestamp(),
"id": object["id"],
"id": obj["id"],
"media_type": "application/stix+json;version=2.1",
"version": object.get("modified", object.get("created", get_timestamp())),
"version": obj.get("modified", obj.get("created", get_timestamp())),
},
)
@ -51,7 +51,10 @@ class MockTAXIICollectionEndpoint(Collection):
100,
)[0]
if objs:
return stix2.v21.Bundle(objects=objs)
return {
"objects": objs,
"more": False,
}
else:
resp = Response()
resp.status_code = 404
@ -75,7 +78,10 @@ class MockTAXIICollectionEndpoint(Collection):
else:
filtered_objects = []
if filtered_objects:
return stix2.v21.Bundle(objects=filtered_objects)
return {
"objects": filtered_objects,
"more": False,
}
else:
resp = Response()
resp.status_code = 404