Merge pull request #228 from chisholm/multi_version_filesystem_store

Multi version filesystem store, take 2
master
Chris Lenk 2018-12-03 07:29:07 -05:00 committed by GitHub
commit 522e9cedd0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
57 changed files with 1163 additions and 282 deletions

View File

@ -3,13 +3,405 @@ Python STIX 2.0 FileSystem Source/Sink
"""
import errno
import json
import os
import stat
import pytz
import six
from stix2.base import _STIXBase
from stix2.core import Bundle, parse
from stix2.datastore import DataSink, DataSource, DataStoreMixin
from stix2.datastore.filters import Filter, FilterSet, apply_common_filters
from stix2.utils import deduplicate, get_class_hierarchy_names
from stix2.utils import get_type_from_id, is_marking
def _timestamp2filename(timestamp):
"""
Encapsulates a way to create unique filenames based on an object's
"modified" property value. This should not include an extension.
:param timestamp: A timestamp, as a datetime.datetime object.
"""
# Different times will only produce different file names if all timestamps
# are in the same time zone! So if timestamp is timezone-aware convert
# to UTC just to be safe. If naive, just use as-is.
if timestamp.tzinfo is not None:
timestamp = timestamp.astimezone(pytz.utc)
return timestamp.strftime("%Y%m%d%H%M%S%f")
class AuthSet(object):
"""
Represents either a whitelist or blacklist of values, where/what we
must/must not search to find objects which match a query. (Maybe "AuthSet"
isn't the right name, but determining authorization is a typical context in
which black/white lists are used.)
The set may be empty. For a whitelist, this means you mustn't search
anywhere, which means the query was impossible to match, so you can skip
searching altogether. For a blacklist, this means nothing is excluded
and you must search everywhere.
"""
BLACK = 0
WHITE = 1
def __init__(self, allowed, prohibited):
"""
Initialize this AuthSet from the given sets of allowed and/or
prohibited values. The type of set (black or white) is determined
from the allowed and/or prohibited values given.
:param allowed: A set of allowed values (or None if no allow filters
were found in the query)
:param prohibited: A set of prohibited values (not None)
"""
if allowed is None:
self.__values = prohibited
self.__type = AuthSet.BLACK
else:
# There was at least one allow filter, so create a whitelist. But
# any matching prohibited values create a combination of conditions
# which can never match. So exclude those.
self.__values = allowed - prohibited
self.__type = AuthSet.WHITE
@property
def values(self):
"""
Get the values in this white/blacklist, as a set.
"""
return self.__values
@property
def auth_type(self):
"""
Get the type of set: AuthSet.WHITE or AuthSet.BLACK.
"""
return self.__type
def __repr__(self):
return "{}list: {}".format(
"white" if self.auth_type == AuthSet.WHITE else "black",
self.values
)
# A fixed, reusable AuthSet which accepts anything. It came in handy.
_AUTHSET_ANY = AuthSet(None, set())
def _update_allow(allow_set, value):
"""
Updates the given set of "allow" values. The first time an update to the
set occurs, the value(s) are added. Thereafter, since all filters are
implicitly AND'd, the given values are intersected with the existing allow
set, which may remove values. At the end, it may even wind up empty.
:param allow_set: The allow set, or None
:param value: The value(s) to add (single value, or iterable of values)
:return: The updated allow set (not None)
"""
adding_seq = hasattr(value, "__iter__") and \
not isinstance(value, six.string_types)
if allow_set is None:
allow_set = set()
if adding_seq:
allow_set.update(value)
else:
allow_set.add(value)
else:
# strangely, the "&=" operator requires a set on the RHS
# whereas the method allows any iterable.
if adding_seq:
allow_set.intersection_update(value)
else:
allow_set.intersection_update({value})
return allow_set
def _find_search_optimizations(filters):
"""
Searches through all the filters, and creates white/blacklists of types and
IDs, which can be used to optimize the filesystem search.
:param filters: An iterable of filter objects representing a query
:return: A 2-tuple of AuthSet objects: the first is for object types, and
the second is for object IDs.
"""
# The basic approach to this is to determine what is allowed and
# prohibited, independently, and then combine them to create the final
# white/blacklists.
allowed_types = allowed_ids = None
prohibited_types = set()
prohibited_ids = set()
for filter_ in filters:
if filter_.property == "type":
if filter_.op in ("=", "in"):
allowed_types = _update_allow(allowed_types, filter_.value)
elif filter_.op == "!=":
prohibited_types.add(filter_.value)
elif filter_.property == "id":
if filter_.op == "=":
# An "allow" ID filter implies a type filter too, since IDs
# contain types within them.
allowed_ids = _update_allow(allowed_ids, filter_.value)
allowed_types = _update_allow(allowed_types,
get_type_from_id(filter_.value))
elif filter_.op == "!=":
prohibited_ids.add(filter_.value)
elif filter_.op == "in":
allowed_ids = _update_allow(allowed_ids, filter_.value)
allowed_types = _update_allow(allowed_types, (
get_type_from_id(id_) for id_ in filter_.value
))
opt_types = AuthSet(allowed_types, prohibited_types)
opt_ids = AuthSet(allowed_ids, prohibited_ids)
# If we have both type and ID whitelists, perform a type-based intersection
# on them, to further optimize. (Some of the cross-property constraints
# occur above; this is essentially a second pass which operates on the
# final whitelists, which among other things, incorporates any of the
# prohibitions found above.)
if opt_types.auth_type == AuthSet.WHITE and \
opt_ids.auth_type == AuthSet.WHITE:
opt_types.values.intersection_update(
get_type_from_id(id_) for id_ in opt_ids.values
)
opt_ids.values.intersection_update(
id_ for id_ in opt_ids.values
if get_type_from_id(id_) in opt_types.values
)
return opt_types, opt_ids
def _get_matching_dir_entries(parent_dir, auth_set, st_mode_test=None, ext=""):
"""
Search a directory (non-recursively), and find entries which match the
given criteria.
:param parent_dir: The directory to search
:param auth_set: an AuthSet instance, which represents a black/whitelist
filter on filenames
:param st_mode_test: A callable allowing filtering based on the type of
directory entry. E.g. just get directories, or just get files. It
will be passed the st_mode field of a stat() structure and should
return True to include the file, or False to exclude it. Easy thing to
do is pass one of the stat module functions, e.g. stat.S_ISREG. If
None, don't filter based on entry type.
:param ext: Determines how names from auth_set match up to directory
entries, and allows filtering by extension. The extension is added
to auth_set values to obtain directory entries; it is removed from
directory entries to obtain auth_set values. In this way, auth_set
may be treated as having only "basenames" of the entries. Only entries
having the given extension will be included in the results. If not
empty, the extension MUST include a leading ".". The default is the
empty string, which will result in direct comparisons, and no
extension-based filtering.
:return: A list of directory entries matching the criteria. These will not
have any path info included; they will just be bare names.
:raises OSError: If there are errors accessing directory contents or
stat()'ing files
"""
results = []
if auth_set.auth_type == AuthSet.WHITE:
for value in auth_set.values:
filename = value + ext
try:
if st_mode_test:
s = os.stat(os.path.join(parent_dir, filename))
type_pass = st_mode_test(s.st_mode)
else:
type_pass = True
if type_pass:
results.append(filename)
except OSError as e:
if e.errno != errno.ENOENT:
raise
# else, file-not-found is ok, just skip
else: # auth_set is a blacklist
for entry in os.listdir(parent_dir):
if ext:
auth_name, this_ext = os.path.splitext(entry)
if this_ext != ext:
continue
else:
auth_name = entry
if auth_name in auth_set.values:
continue
try:
if st_mode_test:
s = os.stat(os.path.join(parent_dir, entry))
type_pass = st_mode_test(s.st_mode)
else:
type_pass = True
if type_pass:
results.append(entry)
except OSError as e:
if e.errno != errno.ENOENT:
raise
# else, file-not-found is ok, just skip
return results
def _check_object_from_file(query, filepath, allow_custom, version):
"""
Read a STIX object from the given file, and check it against the given
filters.
:param query: Iterable of filters
:param filepath: Path to file to read
:param allow_custom: Whether to allow custom properties as well unknown
custom objects.
:param version: Which STIX2 version to use. (e.g. "2.0", "2.1"). If None,
use latest version.
:return: The (parsed) STIX object, if the object passes the filters. If
not, None is returned.
:raises TypeError: If the file had invalid JSON
:raises IOError: If there are problems opening/reading the file
:raises stix2.exceptions.STIXError: If there were problems creating a STIX
object from the JSON
"""
try:
with open(filepath, "r") as f:
stix_json = json.load(f)
except ValueError: # not a JSON file
raise TypeError(
"STIX JSON object at '{0}' could either not be parsed "
"to JSON or was not valid STIX JSON".format(
filepath))
stix_obj = parse(stix_json, allow_custom, version)
if stix_obj["type"] == "bundle":
stix_obj = stix_obj["objects"][0]
# check against other filters, add if match
result = next(apply_common_filters([stix_obj], query), None)
return result
def _search_versioned(query, type_path, auth_ids, allow_custom, version):
"""
Searches the given directory, which contains data for STIX objects of a
particular versioned type (i.e. not markings), and return any which match
the query.
:param query: The query to match against
:param type_path: The directory with type-specific STIX object files
:param auth_ids: Search optimization based on object ID
:param allow_custom: Whether to allow custom properties as well unknown
custom objects.
:param version: Which STIX2 version to use. (e.g. "2.0", "2.1"). If None,
use latest version.
:return: A list of all matching objects
:raises TypeError, stix2.exceptions.STIXError: If any objects had invalid
content
:raises IOError, OSError: If there were any problems opening/reading files
"""
results = []
id_dirs = _get_matching_dir_entries(type_path, auth_ids,
stat.S_ISDIR)
for id_dir in id_dirs:
id_path = os.path.join(type_path, id_dir)
# This leverages a more sophisticated function to do a simple thing:
# get all the JSON files from a directory. I guess it does give us
# file type checking, ensuring we only get regular files.
version_files = _get_matching_dir_entries(id_path, _AUTHSET_ANY,
stat.S_ISREG, ".json")
for version_file in version_files:
version_path = os.path.join(id_path, version_file)
try:
stix_obj = _check_object_from_file(query, version_path,
allow_custom, version)
if stix_obj:
results.append(stix_obj)
except IOError as e:
if e.errno != errno.ENOENT:
raise
# else, file-not-found is ok, just skip
# For backward-compatibility, also search for plain files named after
# object IDs, in the type directory.
id_files = _get_matching_dir_entries(type_path, auth_ids, stat.S_ISREG,
".json")
for id_file in id_files:
id_path = os.path.join(type_path, id_file)
try:
stix_obj = _check_object_from_file(query, id_path, allow_custom,
version)
if stix_obj:
results.append(stix_obj)
except IOError as e:
if e.errno != errno.ENOENT:
raise
# else, file-not-found is ok, just skip
return results
def _search_markings(query, markings_path, auth_ids, allow_custom, version):
"""
Searches the given directory, which contains markings data, and return any
which match the query.
:param query: The query to match against
:param markings_path: The directory with STIX markings files
:param auth_ids: Search optimization based on object ID
:param allow_custom: Whether to allow custom properties as well unknown
custom objects.
:param version: Which STIX2 version to use. (e.g. "2.0", "2.1"). If None,
use latest version.
:return: A list of all matching objects
:raises TypeError, stix2.exceptions.STIXError: If any objects had invalid
content
:raises IOError, OSError: If there were any problems opening/reading files
"""
results = []
id_files = _get_matching_dir_entries(markings_path, auth_ids, stat.S_ISREG,
".json")
for id_file in id_files:
id_path = os.path.join(markings_path, id_file)
try:
stix_obj = _check_object_from_file(query, id_path, allow_custom,
version)
if stix_obj:
results.append(stix_obj)
except IOError as e:
if e.errno != errno.ENOENT:
raise
# else, file-not-found is ok, just skip
return results
class FileSystemStore(DataStoreMixin):
@ -77,15 +469,23 @@ class FileSystemSink(DataSink):
def _check_path_and_write(self, stix_obj):
"""Write the given STIX object to a file in the STIX file directory.
"""
path = os.path.join(self._stix_dir, stix_obj["type"], stix_obj["id"] + ".json")
type_dir = os.path.join(self._stix_dir, stix_obj["type"])
if is_marking(stix_obj):
filename = stix_obj["id"]
obj_dir = type_dir
else:
filename = _timestamp2filename(stix_obj["modified"])
obj_dir = os.path.join(type_dir, stix_obj["id"])
if not os.path.exists(os.path.dirname(path)):
os.makedirs(os.path.dirname(path))
file_path = os.path.join(obj_dir, filename + ".json")
if not os.path.exists(obj_dir):
os.makedirs(obj_dir)
if self.bundlify:
stix_obj = Bundle(stix_obj, allow_custom=self.allow_custom)
with open(path, "w") as f:
with open(file_path, "w") as f:
f.write(str(stix_obj))
def add(self, stix_data=None, version=None):
@ -104,25 +504,18 @@ class FileSystemSink(DataSink):
the Bundle contained, but not the Bundle itself.
"""
if any(x in ('STIXDomainObject', 'STIXRelationshipObject', 'MarkingDefinition')
for x in get_class_hierarchy_names(stix_data)):
if isinstance(stix_data, Bundle):
# recursively add individual STIX objects
for stix_obj in stix_data.get("objects", []):
self.add(stix_obj, version=version)
elif isinstance(stix_data, _STIXBase):
# adding python STIX object
self._check_path_and_write(stix_data)
elif isinstance(stix_data, (str, dict)):
stix_data = parse(stix_data, allow_custom=self.allow_custom, version=version)
if stix_data["type"] == "bundle":
# extract STIX objects
for stix_obj in stix_data.get("objects", []):
self.add(stix_obj, version=version)
else:
# adding json-formatted STIX
self._check_path_and_write(stix_data,)
elif isinstance(stix_data, Bundle):
# recursively add individual STIX objects
for stix_obj in stix_data.get("objects", []):
self.add(stix_obj, version=version)
self.add(stix_data, version=version)
elif isinstance(stix_data, list):
# recursively add individual STIX objects
@ -176,12 +569,15 @@ class FileSystemSource(DataSource):
a python STIX object and then returned
"""
query = [Filter("id", "=", stix_id)]
all_data = self.query(query=query, version=version, _composite_filters=_composite_filters)
all_data = self.all_versions(stix_id, version=version, _composite_filters=_composite_filters)
if all_data:
stix_obj = sorted(all_data, key=lambda k: k['modified'])[0]
if is_marking(stix_id):
# Markings are unversioned; there shouldn't be more than one
# result.
stix_obj = all_data[0]
else:
stix_obj = sorted(all_data, key=lambda k: k['modified'])[-1]
else:
stix_obj = None
@ -206,7 +602,8 @@ class FileSystemSource(DataSource):
a python STIX objects and then returned
"""
return [self.get(stix_id=stix_id, version=version, _composite_filters=_composite_filters)]
query = [Filter("id", "=", stix_id)]
return self.query(query, version=version, _composite_filters=_composite_filters)
def query(self, query=None, version=None, _composite_filters=None):
"""Search and retrieve STIX objects based on the complete query.
@ -239,105 +636,20 @@ class FileSystemSource(DataSource):
if _composite_filters:
query.add(_composite_filters)
# extract any filters that are for "type" or "id" , as we can then do
# filtering before reading in the STIX objects. A STIX 'type' filter
# can reduce the query to a single sub-directory. A STIX 'id' filter
# allows for the fast checking of the file names versus loading it.
file_filters = self._parse_file_filters(query)
auth_types, auth_ids = _find_search_optimizations(query)
# establish which subdirectories can be avoided in query
# by decluding as many as possible. A filter with "type" as the property
# means that certain STIX object types can be ruled out, and thus
# the corresponding subdirectories as well
include_paths = []
declude_paths = []
if "type" in [filter.property for filter in file_filters]:
for filter in file_filters:
if filter.property == "type":
if filter.op == "=":
include_paths.append(os.path.join(self._stix_dir, filter.value))
elif filter.op == "!=":
declude_paths.append(os.path.join(self._stix_dir, filter.value))
type_dirs = _get_matching_dir_entries(self._stix_dir, auth_types,
stat.S_ISDIR)
for type_dir in type_dirs:
type_path = os.path.join(self._stix_dir, type_dir)
if type_dir == "marking-definition":
type_results = _search_markings(query, type_path, auth_ids,
self.allow_custom, version)
else:
# have to walk entire STIX directory
include_paths.append(self._stix_dir)
type_results = _search_versioned(query, type_path, auth_ids,
self.allow_custom, version)
# if a user specifies a "type" filter like "type = <stix-object_type>",
# the filter is reducing the search space to single stix object types
# (and thus single directories). This makes such a filter more powerful
# than "type != <stix-object_type>" bc the latter is substracting
# only one type of stix object type (and thus only one directory),
# As such the former type of filters are given preference over the latter;
# i.e. if both exist in a query, that latter type will be ignored
all_data.extend(type_results)
if not include_paths:
# user has specified types that are not wanted (i.e. "!=")
# so query will look in all STIX directories that are not
# the specified type. Compile correct dir paths
for dir in os.listdir(self._stix_dir):
if os.path.abspath(os.path.join(self._stix_dir, dir)) not in declude_paths:
include_paths.append(os.path.abspath(os.path.join(self._stix_dir, dir)))
# grab stix object ID as well - if present in filters, as
# may forgo the loading of STIX content into memory
if "id" in [filter.property for filter in file_filters]:
for filter in file_filters:
if filter.property == "id" and filter.op == "=":
id_ = filter.value
break
else:
id_ = None
else:
id_ = None
# now iterate through all STIX objs
for path in include_paths:
for root, dirs, files in os.walk(path):
for file_ in files:
if not file_.endswith(".json"):
# skip non '.json' files as more likely to be random non-STIX files
continue
if not id_ or id_ == file_.split(".")[0]:
# have to load into memory regardless to evaluate other filters
try:
stix_obj = json.load(open(os.path.join(root, file_)))
if stix_obj["type"] == "bundle":
stix_obj = stix_obj["objects"][0]
# naive STIX type checking
stix_obj["type"]
stix_obj["id"]
except (ValueError, KeyError): # likely not a JSON file
raise TypeError("STIX JSON object at '{0}' could either not be parsed to "
"JSON or was not valid STIX JSON".format(os.path.join(root, file_)))
# check against other filters, add if match
all_data.extend(apply_common_filters([stix_obj], query))
all_data = deduplicate(all_data)
# parse python STIX objects from the STIX object dicts
stix_objs = [parse(stix_obj_dict, allow_custom=self.allow_custom, version=version) for stix_obj_dict in all_data]
return stix_objs
def _parse_file_filters(self, query):
"""Extract STIX common filters.
Possibly speeds up querying STIX objects from the file system.
Extracts filters that are for the "id" and "type" property of
a STIX object. As the file directory is organized by STIX
object type with filenames that are equivalent to the STIX
object ID, these filters can be used first to reduce the
search space of a FileSystemStore (or FileSystemSink).
"""
file_filters = []
for filter_ in query:
if filter_.property == "id" or filter_.property == "type":
file_filters.append(filter_)
return file_filters
return all_data

View File

@ -6,18 +6,16 @@ Filters for Python STIX 2.0 DataSources, DataSinks, DataStores
import collections
from datetime import datetime
from stix2.utils import format_datetime
import six
import stix2.utils
"""Supported filter operations"""
FILTER_OPS = ['=', '!=', 'in', '>', '<', '>=', '<=', 'contains']
"""Supported filter value types"""
FILTER_VALUE_TYPES = [bool, dict, float, int, list, str, tuple]
try:
FILTER_VALUE_TYPES.append(unicode)
except NameError:
# Python 3 doesn't need to worry about unicode
pass
FILTER_VALUE_TYPES = (bool, dict, float, int, list, tuple, six.string_types,
datetime)
def _check_filter_components(prop, op, value):
@ -36,7 +34,7 @@ def _check_filter_components(prop, op, value):
# check filter operator is supported
raise ValueError("Filter operator '%s' not supported for specified property: '%s'" % (op, prop))
if type(value) not in FILTER_VALUE_TYPES:
if not isinstance(value, FILTER_VALUE_TYPES):
# check filter value type is supported
raise TypeError("Filter value of '%s' is not supported. The type must be a Python immutable type or dictionary" % type(value))
@ -69,10 +67,6 @@ class Filter(collections.namedtuple("Filter", ['property', 'op', 'value'])):
if isinstance(value, list):
value = tuple(value)
if isinstance(value, datetime):
# if value is a datetime obj, convert to str
value = format_datetime(value)
_check_filter_components(prop, op, value)
self = super(Filter, cls).__new__(cls, prop, op, value)
@ -88,31 +82,33 @@ class Filter(collections.namedtuple("Filter", ['property', 'op', 'value'])):
True if property matches the filter,
False otherwise.
"""
if isinstance(stix_obj_property, datetime):
# if a datetime obj, convert to str format before comparison
# NOTE: this check seems like it should be done upstream
# but will put here for now
stix_obj_property = format_datetime(stix_obj_property)
# If filtering on a timestamp property and the filter value is a string,
# try to convert the filter value to a datetime instance.
if isinstance(stix_obj_property, datetime) and \
isinstance(self.value, six.string_types):
filter_value = stix2.utils.parse_into_datetime(self.value)
else:
filter_value = self.value
if self.op == "=":
return stix_obj_property == self.value
return stix_obj_property == filter_value
elif self.op == "!=":
return stix_obj_property != self.value
return stix_obj_property != filter_value
elif self.op == "in":
return stix_obj_property in self.value
return stix_obj_property in filter_value
elif self.op == "contains":
if isinstance(self.value, dict):
return self.value in stix_obj_property.values()
if isinstance(filter_value, dict):
return filter_value in stix_obj_property.values()
else:
return self.value in stix_obj_property
return filter_value in stix_obj_property
elif self.op == ">":
return stix_obj_property > self.value
return stix_obj_property > filter_value
elif self.op == "<":
return stix_obj_property < self.value
return stix_obj_property < filter_value
elif self.op == ">=":
return stix_obj_property >= self.value
return stix_obj_property >= filter_value
elif self.op == "<=":
return stix_obj_property <= self.value
return stix_obj_property <= filter_value
else:
raise ValueError("Filter operator: {0} not supported for specified property: {1}".format(self.op, self.property))

View File

@ -10,6 +10,7 @@ from stix2.base import _STIXBase
from stix2.core import Bundle, parse
from stix2.datastore import DataSink, DataSource, DataStoreMixin
from stix2.datastore.filters import FilterSet, apply_common_filters
from stix2.utils import is_marking
def _add(store, stix_data=None, allow_custom=True, version=None):
@ -43,7 +44,7 @@ def _add(store, stix_data=None, allow_custom=True, version=None):
# Map ID directly to the object, if it is a marking. Otherwise,
# map to a family, so we can track multiple versions.
if _is_marking(stix_obj):
if is_marking(stix_obj):
store._data[stix_obj["id"]] = stix_obj
else:
@ -56,22 +57,6 @@ def _add(store, stix_data=None, allow_custom=True, version=None):
obj_family.add(stix_obj)
def _is_marking(obj_or_id):
"""Determines whether the given object or object ID is/is for a marking
definition.
:param obj_or_id: A STIX object or object ID as a string.
:return: True if a marking definition, False otherwise.
"""
if isinstance(obj_or_id, (_STIXBase, dict)):
id_ = obj_or_id["id"]
else:
id_ = obj_or_id
return id_.startswith("marking-definition--")
class _ObjectFamily(object):
"""
An internal implementation detail of memory sources/sinks/stores.
@ -255,7 +240,7 @@ class MemorySource(DataSource):
"""
stix_obj = None
if _is_marking(stix_id):
if is_marking(stix_id):
stix_obj = self._data.get(stix_id)
else:
object_family = self._data.get(stix_id)
@ -291,7 +276,7 @@ class MemorySource(DataSource):
"""
results = []
stix_objs_to_filter = None
if _is_marking(stix_id):
if is_marking(stix_id):
stix_obj = self._data.get(stix_id)
if stix_obj:
stix_objs_to_filter = [stix_obj]

View File

@ -0,0 +1,11 @@
{
"type": "identity",
"id": "identity--c78cb6e5-0c4b-4611-8297-d1b8b55e40b5",
"created": "2017-06-01T00:00:00.000Z",
"modified": "2018-11-01T23:24:48.446Z",
"name": "The MITRE Corporation",
"identity_class": "organization",
"labels": [
"version two"
]
}

View File

@ -1,34 +1,27 @@
{
"id": "bundle--f64de948-7067-4534-8018-85f03d470625",
"objects": [
{
"created": "2017-05-31T21:32:58.226477Z",
"type": "malware",
"id": "malware--6b616fc1-1505-48e3-8b2c-0d19337bff38",
"created_by_ref": "identity--c78cb6e5-0c4b-4611-8297-d1b8b55e40b5",
"created": "2017-05-31T21:32:58.226Z",
"modified": "2018-11-16T22:54:20.390Z",
"name": "Rover",
"description": "Rover is malware suspected of being used for espionage purposes. It was used in 2015 in a targeted email sent to an Indian Ambassador to Afghanistan.[[Citation: Palo Alto Rover]]",
"labels": [
"version four"
],
"external_references": [
{
"external_id": "S0090",
"source_name": "mitre-attack",
"url": "https://attack.mitre.org/wiki/Software/S0090"
"url": "https://attack.mitre.org/wiki/Software/S0090",
"external_id": "S0090"
},
{
"description": "Ray, V., Hayashi, K. (2016, February 29). New Malware \u2018Rover\u2019 Targets Indian Ambassador to Afghanistan. Retrieved February 29, 2016.",
"source_name": "Palo Alto Rover",
"description": "Ray, V., Hayashi, K. (2016, February 29). New Malware \u2018Rover\u2019 Targets Indian Ambassador to Afghanistan. Retrieved February 29, 2016.",
"url": "http://researchcenter.paloaltonetworks.com/2016/02/new-malware-rover-targets-indian-ambassador-to-afghanistan/"
}
],
"id": "malware--6b616fc1-1505-48e3-8b2c-0d19337bff38",
"labels": [
"malware"
],
"modified": "2017-05-31T21:32:58.226477Z",
"name": "Rover",
"object_marking_refs": [
"marking-definition--fa42a846-8d90-4e51-bc29-71d5b4802168"
],
"type": "malware"
}
],
"spec_version": "2.0",
"type": "bundle"
]
}

View File

@ -0,0 +1,34 @@
{
"id": "bundle--f64de948-7067-4534-8018-85f03d470625",
"objects": [
{
"created": "2017-05-31T21:32:58.226477Z",
"created_by_ref": "identity--c78cb6e5-0c4b-4611-8297-d1b8b55e40b5",
"description": "Rover is malware suspected of being used for espionage purposes. It was used in 2015 in a targeted email sent to an Indian Ambassador to Afghanistan.[[Citation: Palo Alto Rover]]",
"external_references": [
{
"external_id": "S0090",
"source_name": "mitre-attack",
"url": "https://attack.mitre.org/wiki/Software/S0090"
},
{
"description": "Ray, V., Hayashi, K. (2016, February 29). New Malware \u2018Rover\u2019 Targets Indian Ambassador to Afghanistan. Retrieved February 29, 2016.",
"source_name": "Palo Alto Rover",
"url": "http://researchcenter.paloaltonetworks.com/2016/02/new-malware-rover-targets-indian-ambassador-to-afghanistan/"
}
],
"id": "malware--6b616fc1-1505-48e3-8b2c-0d19337bff38",
"labels": [
"malware"
],
"modified": "2017-05-31T21:32:58.226477Z",
"name": "Rover",
"object_marking_refs": [
"marking-definition--fa42a846-8d90-4e51-bc29-71d5b4802168"
],
"type": "malware"
}
],
"spec_version": "2.0",
"type": "bundle"
}

View File

@ -0,0 +1,27 @@
{
"type": "malware",
"id": "malware--6b616fc1-1505-48e3-8b2c-0d19337bff38",
"created_by_ref": "identity--c78cb6e5-0c4b-4611-8297-d1b8b55e40b5",
"created": "2017-05-31T21:32:58.226Z",
"modified": "2018-11-01T23:24:48.456Z",
"name": "Rover",
"description": "Rover is malware suspected of being used for espionage purposes. It was used in 2015 in a targeted email sent to an Indian Ambassador to Afghanistan.[[Citation: Palo Alto Rover]]",
"labels": [
"version two"
],
"external_references": [
{
"source_name": "mitre-attack",
"url": "https://attack.mitre.org/wiki/Software/S0090",
"external_id": "S0090"
},
{
"source_name": "Palo Alto Rover",
"description": "Ray, V., Hayashi, K. (2016, February 29). New Malware \u2018Rover\u2019 Targets Indian Ambassador to Afghanistan. Retrieved February 29, 2016.",
"url": "http://researchcenter.paloaltonetworks.com/2016/02/new-malware-rover-targets-indian-ambassador-to-afghanistan/"
}
],
"object_marking_refs": [
"marking-definition--fa42a846-8d90-4e51-bc29-71d5b4802168"
]
}

View File

@ -0,0 +1,27 @@
{
"type": "malware",
"id": "malware--6b616fc1-1505-48e3-8b2c-0d19337bff38",
"created_by_ref": "identity--c78cb6e5-0c4b-4611-8297-d1b8b55e40b5",
"created": "2017-05-31T21:32:58.226Z",
"modified": "2018-11-01T23:24:48.457Z",
"name": "Rover",
"description": "Rover is malware suspected of being used for espionage purposes. It was used in 2015 in a targeted email sent to an Indian Ambassador to Afghanistan.[[Citation: Palo Alto Rover]]",
"labels": [
"version three"
],
"external_references": [
{
"source_name": "mitre-attack",
"url": "https://attack.mitre.org/wiki/Software/S0090",
"external_id": "S0090"
},
{
"source_name": "Palo Alto Rover",
"description": "Ray, V., Hayashi, K. (2016, February 29). New Malware \u2018Rover\u2019 Targets Indian Ambassador to Afghanistan. Retrieved February 29, 2016.",
"url": "http://researchcenter.paloaltonetworks.com/2016/02/new-malware-rover-targets-indian-ambassador-to-afghanistan/"
}
],
"object_marking_refs": [
"marking-definition--fa42a846-8d90-4e51-bc29-71d5b4802168"
]
}

View File

@ -1,12 +1,21 @@
import datetime
import errno
import json
import os
import shutil
import stat
import pytest
import pytz
from stix2 import (Bundle, Campaign, CustomObject, FileSystemSink,
FileSystemSource, FileSystemStore, Filter, Identity,
Indicator, Malware, Relationship, properties)
Indicator, Malware, MarkingDefinition, Relationship,
TLPMarking, parse, properties)
from stix2.datastore.filesystem import (AuthSet, _find_search_optimizations,
_get_matching_dir_entries,
_timestamp2filename)
from stix2.exceptions import STIXError
from stix2.test.constants import (CAMPAIGN_ID, CAMPAIGN_KWARGS, IDENTITY_ID,
IDENTITY_KWARGS, INDICATOR_ID,
INDICATOR_KWARGS, MALWARE_ID, MALWARE_KWARGS,
@ -97,7 +106,20 @@ def rel_fs_store():
yield fs
for o in stix_objs:
os.remove(os.path.join(FS_PATH, o.type, o.id + '.json'))
filepath = os.path.join(FS_PATH, o.type, o.id,
_timestamp2filename(o.modified) + '.json')
# Some test-scoped fixtures (e.g. fs_store) delete all campaigns, so by
# the time this module-scoped fixture tears itself down, it may find
# its campaigns already gone, which causes not-found errors.
try:
os.remove(filepath)
except OSError as e:
# 3 is the ERROR_PATH_NOT_FOUND windows error code. Which has an
# errno symbolic value, but not the windows meaning...
if e.errno in (errno.ENOENT, 3):
continue
raise
def test_filesystem_source_nonexistent_folder():
@ -127,32 +149,36 @@ def test_filesystem_source_bad_stix_file(fs_source, bad_stix_files):
# this tests handling of bad STIX json object
try:
fs_source.get("intrusion-set--test-non-stix")
except TypeError as e:
assert "intrusion-set--test-non-stix" in str(e)
assert "could either not be parsed to JSON or was not valid STIX JSON" in str(e)
except STIXError as e:
assert "Can't parse object with no 'type' property" in str(e)
def test_filesytem_source_get_object(fs_source):
# get object
def test_filesystem_source_get_object(fs_source):
# get (latest) object
mal = fs_source.get("malware--6b616fc1-1505-48e3-8b2c-0d19337bff38")
assert mal.id == "malware--6b616fc1-1505-48e3-8b2c-0d19337bff38"
assert mal.name == "Rover"
assert mal.modified == datetime.datetime(2018, 11, 16, 22, 54, 20, 390000,
pytz.utc)
def test_filesytem_source_get_nonexistent_object(fs_source):
def test_filesystem_source_get_nonexistent_object(fs_source):
ind = fs_source.get("indicator--6b616fc1-1505-48e3-8b2c-0d19337bff38")
assert ind is None
def test_filesytem_source_all_versions(fs_source):
# all versions - (currently not a true all versions call as FileSystem cant have multiple versions)
id_ = fs_source.get("identity--c78cb6e5-0c4b-4611-8297-d1b8b55e40b5")
assert id_.id == "identity--c78cb6e5-0c4b-4611-8297-d1b8b55e40b5"
assert id_.name == "The MITRE Corporation"
assert id_.type == "identity"
def test_filesystem_source_all_versions(fs_source):
ids = fs_source.all_versions(
"identity--c78cb6e5-0c4b-4611-8297-d1b8b55e40b5"
)
assert len(ids) == 2
assert all(id_.id == "identity--c78cb6e5-0c4b-4611-8297-d1b8b55e40b5"
for id_ in ids)
assert all(id_.name == "The MITRE Corporation" for id_ in ids)
assert all(id_.type == "identity" for id_ in ids)
def test_filesytem_source_query_single(fs_source):
def test_filesystem_source_query_single(fs_source):
# query2
is_2 = fs_source.query([Filter("external_references.external_id", '=', "T1027")])
assert len(is_2) == 1
@ -174,6 +200,24 @@ def test_filesytem_source_query_multiple(fs_source):
assert len(is_1.external_references) == 4
def test_filesystem_source_backward_compatible(fs_source):
# this specific object is outside an "ID" directory; make sure we can get
# it.
modified = datetime.datetime(2018, 11, 16, 22, 54, 20, 390000, pytz.utc)
results = fs_source.query([
Filter("type", "=", "malware"),
Filter("id", "=", "malware--6b616fc1-1505-48e3-8b2c-0d19337bff38"),
Filter("modified", "=", modified)
])
assert len(results) == 1
result = results[0]
assert result.type == "malware"
assert result.id == "malware--6b616fc1-1505-48e3-8b2c-0d19337bff38"
assert result.modified == modified
assert result.labels == ["version four"]
def test_filesystem_sink_add_python_stix_object(fs_sink, fs_source):
# add python stix object
camp1 = Campaign(name="Hannibal",
@ -182,14 +226,16 @@ def test_filesystem_sink_add_python_stix_object(fs_sink, fs_source):
fs_sink.add(camp1)
assert os.path.exists(os.path.join(FS_PATH, "campaign", camp1.id + ".json"))
filepath = os.path.join(FS_PATH, "campaign", camp1.id,
_timestamp2filename(camp1.modified) + ".json")
assert os.path.exists(filepath)
camp1_r = fs_source.get(camp1.id)
assert camp1_r.id == camp1.id
assert camp1_r.name == "Hannibal"
assert "War Elephant" in camp1_r.aliases
os.remove(os.path.join(FS_PATH, "campaign", camp1_r.id + ".json"))
os.remove(filepath)
def test_filesystem_sink_add_stix_object_dict(fs_sink, fs_source):
@ -200,19 +246,30 @@ def test_filesystem_sink_add_stix_object_dict(fs_sink, fs_source):
"objective": "German and French Intelligence Services",
"aliases": ["Purple Robes"],
"id": "campaign--8e2e2d2b-17d4-4cbf-938f-98ee46b3cd3f",
"created": "2017-05-31T21:31:53.197755Z"
"created": "2017-05-31T21:31:53.197755Z",
"modified": "2017-05-31T21:31:53.197755Z"
}
fs_sink.add(camp2)
assert os.path.exists(os.path.join(FS_PATH, "campaign", camp2["id"] + ".json"))
# Need to get the exact "modified" timestamp which would have been
# in effect at the time the object was saved to the sink, which determines
# the filename it would have been saved as. It may not be exactly the same
# as what's in the dict, since the parsing process can enforce a precision
# constraint (e.g. truncate to milliseconds), which results in a slightly
# different name.
camp2obj = parse(camp2)
filepath = os.path.join(FS_PATH, "campaign", camp2obj["id"],
_timestamp2filename(camp2obj["modified"]) + ".json")
assert os.path.exists(filepath)
camp2_r = fs_source.get(camp2["id"])
assert camp2_r.id == camp2["id"]
assert camp2_r.name == camp2["name"]
assert "Purple Robes" in camp2_r.aliases
os.remove(os.path.join(FS_PATH, "campaign", camp2_r.id + ".json"))
os.remove(filepath)
def test_filesystem_sink_add_stix_bundle_dict(fs_sink, fs_source):
@ -228,53 +285,74 @@ def test_filesystem_sink_add_stix_bundle_dict(fs_sink, fs_source):
"objective": "Bulgarian, Albanian and Romanian Intelligence Services",
"aliases": ["Huns"],
"id": "campaign--b8f86161-ccae-49de-973a-4ca320c62478",
"created": "2017-05-31T21:31:53.197755Z"
"created": "2017-05-31T21:31:53.197755Z",
"modified": "2017-05-31T21:31:53.197755Z"
}
]
}
fs_sink.add(bund)
assert os.path.exists(os.path.join(FS_PATH, "campaign", bund["objects"][0]["id"] + ".json"))
camp_obj = parse(bund["objects"][0])
filepath = os.path.join(FS_PATH, "campaign", camp_obj["id"],
_timestamp2filename(camp_obj["modified"]) + ".json")
assert os.path.exists(filepath)
camp3_r = fs_source.get(bund["objects"][0]["id"])
assert camp3_r.id == bund["objects"][0]["id"]
assert camp3_r.name == bund["objects"][0]["name"]
assert "Huns" in camp3_r.aliases
os.remove(os.path.join(FS_PATH, "campaign", camp3_r.id + ".json"))
os.remove(filepath)
def test_filesystem_sink_add_json_stix_object(fs_sink, fs_source):
# add json-encoded stix obj
camp4 = '{"type": "campaign", "id":"campaign--6a6ca372-ba07-42cc-81ef-9840fc1f963d",'\
' "created":"2017-05-31T21:31:53.197755Z", "name": "Ghengis Khan", "objective": "China and Russian infrastructure"}'
' "created":"2017-05-31T21:31:53.197755Z",'\
' "modified":"2017-05-31T21:31:53.197755Z",'\
' "name": "Ghengis Khan", "objective": "China and Russian infrastructure"}'
fs_sink.add(camp4)
assert os.path.exists(os.path.join(FS_PATH, "campaign", "campaign--6a6ca372-ba07-42cc-81ef-9840fc1f963d" + ".json"))
camp4obj = parse(camp4)
filepath = os.path.join(FS_PATH, "campaign",
"campaign--6a6ca372-ba07-42cc-81ef-9840fc1f963d",
_timestamp2filename(camp4obj["modified"]) + ".json")
assert os.path.exists(filepath)
camp4_r = fs_source.get("campaign--6a6ca372-ba07-42cc-81ef-9840fc1f963d")
assert camp4_r.id == "campaign--6a6ca372-ba07-42cc-81ef-9840fc1f963d"
assert camp4_r.name == "Ghengis Khan"
os.remove(os.path.join(FS_PATH, "campaign", camp4_r.id + ".json"))
os.remove(filepath)
def test_filesystem_sink_json_stix_bundle(fs_sink, fs_source):
# add json-encoded stix bundle
bund2 = '{"type": "bundle", "id": "bundle--3d267103-8475-4d8f-b321-35ec6eccfa37",' \
' "spec_version": "2.0", "objects": [{"type": "campaign", "id": "campaign--2c03b8bf-82ee-433e-9918-ca2cb6e9534b",' \
' "created":"2017-05-31T21:31:53.197755Z", "name": "Spartacus", "objective": "Oppressive regimes of Africa and Middle East"}]}'
' "created":"2017-05-31T21:31:53.197755Z",'\
' "modified":"2017-05-31T21:31:53.197755Z",'\
' "name": "Spartacus", "objective": "Oppressive regimes of Africa and Middle East"}]}'
fs_sink.add(bund2)
assert os.path.exists(os.path.join(FS_PATH, "campaign", "campaign--2c03b8bf-82ee-433e-9918-ca2cb6e9534b" + ".json"))
bund2obj = parse(bund2)
camp_obj = bund2obj["objects"][0]
filepath = os.path.join(FS_PATH, "campaign",
"campaign--2c03b8bf-82ee-433e-9918-ca2cb6e9534b",
_timestamp2filename(camp_obj["modified"]) + ".json")
assert os.path.exists(filepath)
camp5_r = fs_source.get("campaign--2c03b8bf-82ee-433e-9918-ca2cb6e9534b")
assert camp5_r.id == "campaign--2c03b8bf-82ee-433e-9918-ca2cb6e9534b"
assert camp5_r.name == "Spartacus"
os.remove(os.path.join(FS_PATH, "campaign", camp5_r.id + ".json"))
os.remove(filepath)
def test_filesystem_sink_add_objects_list(fs_sink, fs_source):
@ -289,13 +367,23 @@ def test_filesystem_sink_add_objects_list(fs_sink, fs_source):
"objective": "Central and Eastern Europe military commands and departments",
"aliases": ["The Frenchmen"],
"id": "campaign--122818b6-1112-4fb0-b11b-b111107ca70a",
"created": "2017-05-31T21:31:53.197755Z"
"created": "2017-05-31T21:31:53.197755Z",
"modified": "2017-05-31T21:31:53.197755Z"
}
fs_sink.add([camp6, camp7])
assert os.path.exists(os.path.join(FS_PATH, "campaign", camp6.id + ".json"))
assert os.path.exists(os.path.join(FS_PATH, "campaign", "campaign--122818b6-1112-4fb0-b11b-b111107ca70a" + ".json"))
camp7obj = parse(camp7)
camp6filepath = os.path.join(FS_PATH, "campaign", camp6.id,
_timestamp2filename(camp6["modified"]) +
".json")
camp7filepath = os.path.join(
FS_PATH, "campaign", "campaign--122818b6-1112-4fb0-b11b-b111107ca70a",
_timestamp2filename(camp7obj["modified"]) + ".json")
assert os.path.exists(camp6filepath)
assert os.path.exists(camp7filepath)
camp6_r = fs_source.get(camp6.id)
assert camp6_r.id == camp6.id
@ -306,8 +394,24 @@ def test_filesystem_sink_add_objects_list(fs_sink, fs_source):
assert "The Frenchmen" in camp7_r.aliases
# remove all added objects
os.remove(os.path.join(FS_PATH, "campaign", camp6_r.id + ".json"))
os.remove(os.path.join(FS_PATH, "campaign", camp7_r.id + ".json"))
os.remove(camp6filepath)
os.remove(camp7filepath)
def test_filesystem_sink_marking(fs_sink):
marking = MarkingDefinition(
definition_type="tlp",
definition=TLPMarking(tlp="green")
)
fs_sink.add(marking)
marking_filepath = os.path.join(
FS_PATH, "marking-definition", marking["id"] + ".json"
)
assert os.path.exists(marking_filepath)
os.remove(marking_filepath)
def test_filesystem_store_get_stored_as_bundle(fs_store):
@ -323,8 +427,9 @@ def test_filesystem_store_get_stored_as_object(fs_store):
def test_filesystem_store_all_versions(fs_store):
# all versions() - (note at this time, all_versions() is still not applicable to FileSystem, as only one version is ever stored)
rel = fs_store.all_versions("relationship--70dc6b5c-c524-429e-a6ab-0dd40f0482c1")[0]
rels = fs_store.all_versions("relationship--70dc6b5c-c524-429e-a6ab-0dd40f0482c1")
assert len(rels) == 1
rel = rels[0]
assert rel.id == "relationship--70dc6b5c-c524-429e-a6ab-0dd40f0482c1"
assert rel.type == "relationship"
@ -347,7 +452,7 @@ def test_filesystem_store_query_single_filter(fs_store):
def test_filesystem_store_empty_query(fs_store):
results = fs_store.query() # returns all
assert len(results) == 26
assert len(results) == 30
assert "tool--242f3da3-4425-4d11-8f5c-b842886da966" in [obj.id for obj in results]
assert "marking-definition--fa42a846-8d90-4e51-bc29-71d5b4802168" in [obj.id for obj in results]
@ -361,7 +466,7 @@ def test_filesystem_store_query_multiple_filters(fs_store):
def test_filesystem_store_query_dont_include_type_folder(fs_store):
results = fs_store.query(Filter("type", "!=", "tool"))
assert len(results) == 24
assert len(results) == 28
def test_filesystem_store_add(fs_store):
@ -375,8 +480,11 @@ def test_filesystem_store_add(fs_store):
assert camp1_r.id == camp1.id
assert camp1_r.name == camp1.name
filepath = os.path.join(FS_PATH, "campaign", camp1_r.id,
_timestamp2filename(camp1_r.modified) + ".json")
# remove
os.remove(os.path.join(FS_PATH, "campaign", camp1_r.id + ".json"))
os.remove(filepath)
def test_filesystem_store_add_as_bundle():
@ -387,7 +495,10 @@ def test_filesystem_store_add_as_bundle():
aliases=["Ragnar"])
fs_store.add(camp1)
with open(os.path.join(FS_PATH, "campaign", camp1.id + ".json")) as bundle_file:
filepath = os.path.join(FS_PATH, "campaign", camp1.id,
_timestamp2filename(camp1.modified) + ".json")
with open(filepath) as bundle_file:
assert '"type": "bundle"' in bundle_file.read()
camp1_r = fs_store.get(camp1.id)
@ -412,6 +523,26 @@ def test_filesystem_store_add_invalid_object(fs_store):
assert 'JSON formatted STIX bundle' in str(excinfo.value)
def test_filesystem_store_add_marking(fs_store):
marking = MarkingDefinition(
definition_type="tlp",
definition=TLPMarking(tlp="green")
)
fs_store.add(marking)
marking_filepath = os.path.join(
FS_PATH, "marking-definition", marking["id"] + ".json"
)
assert os.path.exists(marking_filepath)
marking_r = fs_store.get(marking["id"])
assert marking_r["id"] == marking["id"]
assert marking_r["definition"]["tlp"] == "green"
os.remove(marking_filepath)
def test_filesystem_object_with_custom_property(fs_store):
camp = Campaign(name="Scipio Africanus",
objective="Defeat the Carthaginians",
@ -527,3 +658,357 @@ def test_related_to_by_target(rel_fs_store):
assert len(resp) == 2
assert any(x['id'] == CAMPAIGN_ID for x in resp)
assert any(x['id'] == INDICATOR_ID for x in resp)
def test_auth_set_white1():
auth_set = AuthSet({"A"}, set())
assert auth_set.auth_type == AuthSet.WHITE
assert auth_set.values == {"A"}
def test_auth_set_white2():
auth_set = AuthSet(set(), set())
assert auth_set.auth_type == AuthSet.WHITE
assert len(auth_set.values) == 0
def test_auth_set_white3():
auth_set = AuthSet({"A", "B"}, {"B", "C"})
assert auth_set.auth_type == AuthSet.WHITE
assert auth_set.values == {"A"}
def test_auth_set_black1():
auth_set = AuthSet(None, {"B", "C"})
assert auth_set.auth_type == AuthSet.BLACK
assert auth_set.values == {"B", "C"}
def test_optimize_types1():
filters = [
Filter("type", "=", "foo")
]
auth_types, auth_ids = _find_search_optimizations(filters)
assert auth_types.auth_type == AuthSet.WHITE
assert auth_types.values == {"foo"}
assert auth_ids.auth_type == AuthSet.BLACK
assert len(auth_ids.values) == 0
def test_optimize_types2():
filters = [
Filter("type", "=", "foo"),
Filter("type", "=", "bar")
]
auth_types, auth_ids = _find_search_optimizations(filters)
assert auth_types.auth_type == AuthSet.WHITE
assert len(auth_types.values) == 0
assert auth_ids.auth_type == AuthSet.BLACK
assert len(auth_ids.values) == 0
def test_optimize_types3():
filters = [
Filter("type", "in", ["A", "B", "C"]),
Filter("type", "in", ["B", "C", "D"])
]
auth_types, auth_ids = _find_search_optimizations(filters)
assert auth_types.auth_type == AuthSet.WHITE
assert auth_types.values == {"B", "C"}
assert auth_ids.auth_type == AuthSet.BLACK
assert len(auth_ids.values) == 0
def test_optimize_types4():
filters = [
Filter("type", "in", ["A", "B", "C"]),
Filter("type", "in", ["D", "E", "F"])
]
auth_types, auth_ids = _find_search_optimizations(filters)
assert auth_types.auth_type == AuthSet.WHITE
assert len(auth_types.values) == 0
assert auth_ids.auth_type == AuthSet.BLACK
assert len(auth_ids.values) == 0
def test_optimize_types5():
filters = [
Filter("type", "in", ["foo", "bar"]),
Filter("type", "!=", "bar")
]
auth_types, auth_ids = _find_search_optimizations(filters)
assert auth_types.auth_type == AuthSet.WHITE
assert auth_types.values == {"foo"}
assert auth_ids.auth_type == AuthSet.BLACK
assert len(auth_ids.values) == 0
def test_optimize_types6():
filters = [
Filter("type", "!=", "foo"),
Filter("type", "!=", "bar")
]
auth_types, auth_ids = _find_search_optimizations(filters)
assert auth_types.auth_type == AuthSet.BLACK
assert auth_types.values == {"foo", "bar"}
assert auth_ids.auth_type == AuthSet.BLACK
assert len(auth_ids.values) == 0
def test_optimize_types7():
filters = [
Filter("type", "=", "foo"),
Filter("type", "!=", "foo")
]
auth_types, auth_ids = _find_search_optimizations(filters)
assert auth_types.auth_type == AuthSet.WHITE
assert len(auth_types.values) == 0
assert auth_ids.auth_type == AuthSet.BLACK
assert len(auth_ids.values) == 0
def test_optimize_types8():
filters = []
auth_types, auth_ids = _find_search_optimizations(filters)
assert auth_types.auth_type == AuthSet.BLACK
assert len(auth_types.values) == 0
assert auth_ids.auth_type == AuthSet.BLACK
assert len(auth_ids.values) == 0
def test_optimize_types_ids1():
filters = [
Filter("type", "in", ["foo", "bar"]),
Filter("id", "=", "foo--00000000-0000-0000-0000-000000000000")
]
auth_types, auth_ids = _find_search_optimizations(filters)
assert auth_types.auth_type == AuthSet.WHITE
assert auth_types.values == {"foo"}
assert auth_ids.auth_type == AuthSet.WHITE
assert auth_ids.values == {"foo--00000000-0000-0000-0000-000000000000"}
def test_optimize_types_ids2():
filters = [
Filter("type", "=", "foo"),
Filter("id", "=", "bar--00000000-0000-0000-0000-000000000000")
]
auth_types, auth_ids = _find_search_optimizations(filters)
assert auth_types.auth_type == AuthSet.WHITE
assert len(auth_types.values) == 0
assert auth_ids.auth_type == AuthSet.WHITE
assert len(auth_ids.values) == 0
def test_optimize_types_ids3():
filters = [
Filter("type", "in", ["foo", "bar"]),
Filter("id", "!=", "bar--00000000-0000-0000-0000-000000000000")
]
auth_types, auth_ids = _find_search_optimizations(filters)
assert auth_types.auth_type == AuthSet.WHITE
assert auth_types.values == {"foo", "bar"}
assert auth_ids.auth_type == AuthSet.BLACK
assert auth_ids.values == {"bar--00000000-0000-0000-0000-000000000000"}
def test_optimize_types_ids4():
filters = [
Filter("type", "in", ["A", "B", "C"]),
Filter("id", "in", [
"B--00000000-0000-0000-0000-000000000000",
"C--00000000-0000-0000-0000-000000000000",
"D--00000000-0000-0000-0000-000000000000",
])
]
auth_types, auth_ids = _find_search_optimizations(filters)
assert auth_types.auth_type == AuthSet.WHITE
assert auth_types.values == {"B", "C"}
assert auth_ids.auth_type == AuthSet.WHITE
assert auth_ids.values == {
"B--00000000-0000-0000-0000-000000000000",
"C--00000000-0000-0000-0000-000000000000"
}
def test_optimize_types_ids5():
filters = [
Filter("type", "in", ["A", "B", "C"]),
Filter("type", "!=", "C"),
Filter("id", "in", [
"B--00000000-0000-0000-0000-000000000000",
"C--00000000-0000-0000-0000-000000000000",
"D--00000000-0000-0000-0000-000000000000"
]),
Filter("id", "!=", "D--00000000-0000-0000-0000-000000000000")
]
auth_types, auth_ids = _find_search_optimizations(filters)
assert auth_types.auth_type == AuthSet.WHITE
assert auth_types.values == {"B"}
assert auth_ids.auth_type == AuthSet.WHITE
assert auth_ids.values == {"B--00000000-0000-0000-0000-000000000000"}
def test_optimize_types_ids6():
filters = [
Filter("id", "=", "A--00000000-0000-0000-0000-000000000000")
]
auth_types, auth_ids = _find_search_optimizations(filters)
assert auth_types.auth_type == AuthSet.WHITE
assert auth_types.values == {"A"}
assert auth_ids.auth_type == AuthSet.WHITE
assert auth_ids.values == {"A--00000000-0000-0000-0000-000000000000"}
def test_search_auth_set_white1():
auth_set = AuthSet(
{"attack-pattern", "doesntexist"},
set()
)
results = _get_matching_dir_entries(FS_PATH, auth_set, stat.S_ISDIR)
assert results == ["attack-pattern"]
results = _get_matching_dir_entries(FS_PATH, auth_set, stat.S_ISREG)
assert len(results) == 0
def test_search_auth_set_white2():
auth_set = AuthSet(
{
"malware--6b616fc1-1505-48e3-8b2c-0d19337bff38",
"malware--92ec0cbd-2c30-44a2-b270-73f4ec949841"
},
{
"malware--92ec0cbd-2c30-44a2-b270-73f4ec949841",
"malware--96b08451-b27a-4ff6-893f-790e26393a8e",
"doesntexist"
}
)
results = _get_matching_dir_entries(
os.path.join(FS_PATH, "malware"),
auth_set, stat.S_ISDIR
)
assert results == ["malware--6b616fc1-1505-48e3-8b2c-0d19337bff38"]
def test_search_auth_set_white3():
auth_set = AuthSet({"20170531213258226477", "doesntexist"}, set())
results = _get_matching_dir_entries(
os.path.join(FS_PATH, "malware",
"malware--6b616fc1-1505-48e3-8b2c-0d19337bff38"),
auth_set, stat.S_ISREG, ".json"
)
assert results == ["20170531213258226477.json"]
def test_search_auth_set_black1():
auth_set = AuthSet(
None,
{"tool--242f3da3-4425-4d11-8f5c-b842886da966", "doesntexist"}
)
results = _get_matching_dir_entries(
os.path.join(FS_PATH, "tool"),
auth_set, stat.S_ISDIR
)
assert set(results) == {
"tool--03342581-f790-4f03-ba41-e82e67392e23"
}
def test_search_auth_set_white_empty():
auth_set = AuthSet(
set(),
set()
)
results = _get_matching_dir_entries(FS_PATH, auth_set, stat.S_ISDIR)
assert len(results) == 0
def test_search_auth_set_black_empty(rel_fs_store):
# Ensure rel_fs_store fixture has run so that the type directories are
# predictable (it adds "campaign").
auth_set = AuthSet(
None,
set()
)
results = _get_matching_dir_entries(FS_PATH, auth_set, stat.S_ISDIR)
# Should get all dirs
assert set(results) == {
"attack-pattern",
"campaign",
"course-of-action",
"identity",
"indicator",
"intrusion-set",
"malware",
"marking-definition",
"relationship",
"tool"
}
def test_timestamp2filename_naive():
dt = datetime.datetime(
2010, 6, 15,
8, 30, 10, 1234
)
filename = _timestamp2filename(dt)
assert filename == "20100615083010001234"
def test_timestamp2filename_tz():
# one hour west of UTC (i.e. an hour earlier)
tz = pytz.FixedOffset(-60)
dt = datetime.datetime(
2010, 6, 15,
7, 30, 10, 1234,
tz
)
filename = _timestamp2filename(dt)
assert filename == "20100615083010001234"

View File

@ -196,8 +196,8 @@ def test_apply_common_filters3():
assert len(resp) == 3
resp = list(apply_common_filters(real_stix_objs, [filters[3]]))
assert resp[0].id == real_stix_objs[0].id
assert len(resp) == 3
assert resp[0].id == real_stix_objs[0].id
def test_apply_common_filters4():
@ -338,14 +338,6 @@ def test_datetime_filter_behavior():
filter_with_dt_obj = Filter("created", "=", parse_into_datetime("2016-02-14T00:00:00.000Z", "millisecond"))
filter_with_str = Filter("created", "=", "2016-02-14T00:00:00.000Z")
# check that filter value is converted from datetime to str
assert isinstance(filter_with_dt_obj.value, str)
# compare datetime string to filter w/ datetime obj
resp = list(apply_common_filters(stix_objs, [filter_with_dt_obj]))
assert len(resp) == 1
assert resp[0]["id"] == "vulnerability--ee916c28-c7a4-4d0d-ad56-a8d357f89fef"
# compare datetime obj to filter w/ datetime obj
resp = list(apply_common_filters(real_stix_objs, [filter_with_dt_obj]))
assert len(resp) == 1

View File

@ -7,6 +7,8 @@ import json
from dateutil import parser
import pytz
import stix2.base
from .exceptions import (InvalidValueError, RevokeError,
UnmodifiablePropertyError)
@ -364,3 +366,20 @@ def remove_custom_stix(stix_obj):
def get_type_from_id(stix_id):
return stix_id.split('--', 1)[0]
def is_marking(obj_or_id):
"""Determines whether the given object or object ID is/is for a marking
definition.
:param obj_or_id: A STIX object or object ID as a string.
:return: True if a marking definition, False otherwise.
"""
if isinstance(obj_or_id, (stix2.base._STIXBase, dict)):
result = obj_or_id["type"] == "marking-definition"
else:
# it's a string ID
result = obj_or_id.startswith("marking-definition--")
return result