Merge pull request #480 from emmanvg/479-pagination-taxii-datastore
Support Pagination in TAXII DataStorepull/1/head
commit
ccf3b1a0fc
2
setup.py
2
setup.py
|
@ -60,7 +60,7 @@ setup(
|
||||||
'Bug Tracker': 'https://github.com/oasis-open/cti-python-stix2/issues/',
|
'Bug Tracker': 'https://github.com/oasis-open/cti-python-stix2/issues/',
|
||||||
},
|
},
|
||||||
extras_require={
|
extras_require={
|
||||||
'taxii': ['taxii2-client>=2.2.1'],
|
'taxii': ['taxii2-client>=2.3.0'],
|
||||||
'semantic': ['haversine', 'rapidfuzz'],
|
'semantic': ['haversine', 'rapidfuzz'],
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
|
@ -12,6 +12,8 @@ from stix2.parsing import parse
|
||||||
from stix2.utils import deduplicate
|
from stix2.utils import deduplicate
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
from taxii2client import v20 as tcv20
|
||||||
|
from taxii2client import v21 as tcv21
|
||||||
from taxii2client.exceptions import ValidationError
|
from taxii2client.exceptions import ValidationError
|
||||||
_taxii2_client = True
|
_taxii2_client = True
|
||||||
except ImportError:
|
except ImportError:
|
||||||
|
@ -33,9 +35,12 @@ class TAXIICollectionStore(DataStoreMixin):
|
||||||
side(retrieving data) and False for TAXIICollectionSink
|
side(retrieving data) and False for TAXIICollectionSink
|
||||||
side(pushing data). However, when parameter is supplied, it will
|
side(pushing data). However, when parameter is supplied, it will
|
||||||
be applied to both TAXIICollectionSource/Sink.
|
be applied to both TAXIICollectionSource/Sink.
|
||||||
|
items_per_page (int): How many STIX objects to request per call
|
||||||
|
to TAXII Server. The value can be tuned, but servers may override
|
||||||
|
if their internal limit is surpassed. Used by TAXIICollectionSource
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def __init__(self, collection, allow_custom=None):
|
def __init__(self, collection, allow_custom=None, items_per_page=5000):
|
||||||
if allow_custom is None:
|
if allow_custom is None:
|
||||||
allow_custom_source = True
|
allow_custom_source = True
|
||||||
allow_custom_sink = False
|
allow_custom_sink = False
|
||||||
|
@ -43,7 +48,7 @@ class TAXIICollectionStore(DataStoreMixin):
|
||||||
allow_custom_sink = allow_custom_source = allow_custom
|
allow_custom_sink = allow_custom_source = allow_custom
|
||||||
|
|
||||||
super(TAXIICollectionStore, self).__init__(
|
super(TAXIICollectionStore, self).__init__(
|
||||||
source=TAXIICollectionSource(collection, allow_custom=allow_custom_source),
|
source=TAXIICollectionSource(collection, allow_custom=allow_custom_source, items_per_page=items_per_page),
|
||||||
sink=TAXIICollectionSink(collection, allow_custom=allow_custom_sink),
|
sink=TAXIICollectionSink(collection, allow_custom=allow_custom_sink),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -144,9 +149,12 @@ class TAXIICollectionSource(DataSource):
|
||||||
collection (taxii2.Collection): TAXII Collection instance
|
collection (taxii2.Collection): TAXII Collection instance
|
||||||
allow_custom (bool): Whether to allow custom STIX content to be
|
allow_custom (bool): Whether to allow custom STIX content to be
|
||||||
added to the FileSystemSink. Default: True
|
added to the FileSystemSink. Default: True
|
||||||
|
items_per_page (int): How many STIX objects to request per call
|
||||||
|
to TAXII Server. The value can be tuned, but servers may override
|
||||||
|
if their internal limit is surpassed.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def __init__(self, collection, allow_custom=True):
|
def __init__(self, collection, allow_custom=True, items_per_page=5000):
|
||||||
super(TAXIICollectionSource, self).__init__()
|
super(TAXIICollectionSource, self).__init__()
|
||||||
if not _taxii2_client:
|
if not _taxii2_client:
|
||||||
raise ImportError("taxii2client library is required for usage of TAXIICollectionSource")
|
raise ImportError("taxii2client library is required for usage of TAXIICollectionSource")
|
||||||
|
@ -167,6 +175,7 @@ class TAXIICollectionSource(DataSource):
|
||||||
)
|
)
|
||||||
|
|
||||||
self.allow_custom = allow_custom
|
self.allow_custom = allow_custom
|
||||||
|
self.items_per_page = items_per_page
|
||||||
|
|
||||||
def get(self, stix_id, version=None, _composite_filters=None):
|
def get(self, stix_id, version=None, _composite_filters=None):
|
||||||
"""Retrieve STIX object from local/remote STIX Collection
|
"""Retrieve STIX object from local/remote STIX Collection
|
||||||
|
@ -286,8 +295,12 @@ class TAXIICollectionSource(DataSource):
|
||||||
taxii_filters_dict = dict((f.property, f.value) for f in taxii_filters)
|
taxii_filters_dict = dict((f.property, f.value) for f in taxii_filters)
|
||||||
|
|
||||||
# query TAXII collection
|
# query TAXII collection
|
||||||
|
all_data = []
|
||||||
try:
|
try:
|
||||||
all_data = self.collection.get_objects(**taxii_filters_dict).get('objects', [])
|
paged_request = tcv21.as_pages if isinstance(self.collection, tcv21.Collection) else tcv20.as_pages
|
||||||
|
|
||||||
|
for resource in paged_request(self.collection.get_objects, per_request=self.items_per_page, **taxii_filters_dict):
|
||||||
|
all_data.extend(resource.get("objects", []))
|
||||||
|
|
||||||
# deduplicate data (before filtering as reduces wasted filtering)
|
# deduplicate data (before filtering as reduces wasted filtering)
|
||||||
all_data = deduplicate(all_data)
|
all_data = deduplicate(all_data)
|
||||||
|
|
|
@ -4,7 +4,7 @@ from medallion.filters.basic_filter import BasicFilter
|
||||||
import pytest
|
import pytest
|
||||||
from requests.models import Response
|
from requests.models import Response
|
||||||
from taxii2client.common import _filter_kwargs_to_query_params
|
from taxii2client.common import _filter_kwargs_to_query_params
|
||||||
from taxii2client.v20 import Collection
|
from taxii2client.v20 import MEDIA_TYPE_STIX_V20, Collection
|
||||||
|
|
||||||
import stix2
|
import stix2
|
||||||
from stix2.datastore import DataSourceError
|
from stix2.datastore import DataSourceError
|
||||||
|
@ -34,12 +34,12 @@ class MockTAXIICollectionEndpoint(Collection):
|
||||||
{
|
{
|
||||||
"date_added": get_timestamp(),
|
"date_added": get_timestamp(),
|
||||||
"id": object["id"],
|
"id": object["id"],
|
||||||
"media_type": "application/stix+json;version=2.1",
|
"media_type": "application/stix+json;version=2.0",
|
||||||
"version": object.get("modified", object.get("created", get_timestamp())),
|
"version": object.get("modified", object.get("created", get_timestamp())),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
def get_objects(self, **filter_kwargs):
|
def get_objects(self, accept=MEDIA_TYPE_STIX_V20, start=0, per_request=0, **filter_kwargs):
|
||||||
self._verify_can_read()
|
self._verify_can_read()
|
||||||
query_params = _filter_kwargs_to_query_params(filter_kwargs)
|
query_params = _filter_kwargs_to_query_params(filter_kwargs)
|
||||||
assert isinstance(query_params, dict)
|
assert isinstance(query_params, dict)
|
||||||
|
@ -51,7 +51,12 @@ class MockTAXIICollectionEndpoint(Collection):
|
||||||
100,
|
100,
|
||||||
)[0]
|
)[0]
|
||||||
if objs:
|
if objs:
|
||||||
return stix2.v20.Bundle(objects=objs)
|
resp = Response()
|
||||||
|
resp.status_code = 200
|
||||||
|
resp.headers["Content-Range"] = f"items 0-{len(objs)}/{len(objs)}"
|
||||||
|
resp.encoding = "utf-8"
|
||||||
|
resp._content = bytes(stix2.v20.Bundle(objects=objs).serialize(ensure_ascii=False), resp.encoding)
|
||||||
|
return resp
|
||||||
else:
|
else:
|
||||||
resp = Response()
|
resp = Response()
|
||||||
resp.status_code = 404
|
resp.status_code = 404
|
||||||
|
|
|
@ -28,14 +28,14 @@ class MockTAXIICollectionEndpoint(Collection):
|
||||||
self._verify_can_write()
|
self._verify_can_write()
|
||||||
if isinstance(bundle, str):
|
if isinstance(bundle, str):
|
||||||
bundle = json.loads(bundle)
|
bundle = json.loads(bundle)
|
||||||
for object in bundle.get("objects", []):
|
for obj in bundle.get("objects", []):
|
||||||
self.objects.append(object)
|
self.objects.append(obj)
|
||||||
self.manifests.append(
|
self.manifests.append(
|
||||||
{
|
{
|
||||||
"date_added": get_timestamp(),
|
"date_added": get_timestamp(),
|
||||||
"id": object["id"],
|
"id": obj["id"],
|
||||||
"media_type": "application/stix+json;version=2.1",
|
"media_type": "application/stix+json;version=2.1",
|
||||||
"version": object.get("modified", object.get("created", get_timestamp())),
|
"version": obj.get("modified", obj.get("created", get_timestamp())),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -51,7 +51,10 @@ class MockTAXIICollectionEndpoint(Collection):
|
||||||
100,
|
100,
|
||||||
)[0]
|
)[0]
|
||||||
if objs:
|
if objs:
|
||||||
return stix2.v21.Bundle(objects=objs)
|
return {
|
||||||
|
"objects": objs,
|
||||||
|
"more": False,
|
||||||
|
}
|
||||||
else:
|
else:
|
||||||
resp = Response()
|
resp = Response()
|
||||||
resp.status_code = 404
|
resp.status_code = 404
|
||||||
|
@ -75,7 +78,10 @@ class MockTAXIICollectionEndpoint(Collection):
|
||||||
else:
|
else:
|
||||||
filtered_objects = []
|
filtered_objects = []
|
||||||
if filtered_objects:
|
if filtered_objects:
|
||||||
return stix2.v21.Bundle(objects=filtered_objects)
|
return {
|
||||||
|
"objects": filtered_objects,
|
||||||
|
"more": False,
|
||||||
|
}
|
||||||
else:
|
else:
|
||||||
resp = Response()
|
resp = Response()
|
||||||
resp.status_code = 404
|
resp.status_code = 404
|
||||||
|
|
Loading…
Reference in New Issue