2018-11-28 22:51:00 +01:00
|
|
|
"""Python STIX2 FileSystem Source/Sink"""
|
2018-10-26 03:03:27 +02:00
|
|
|
import errno
|
2018-07-10 21:51:20 +02:00
|
|
|
import io
|
2017-07-12 16:58:31 +02:00
|
|
|
import json
|
|
|
|
import os
|
2018-12-11 19:06:51 +01:00
|
|
|
import re
|
2018-10-26 03:03:27 +02:00
|
|
|
import stat
|
2017-07-12 16:58:31 +02:00
|
|
|
|
2018-10-26 03:03:27 +02:00
|
|
|
import six
|
2017-07-12 16:58:31 +02:00
|
|
|
|
2018-11-01 12:57:09 +01:00
|
|
|
from stix2 import v20, v21
|
2018-10-26 03:03:27 +02:00
|
|
|
from stix2.base import _STIXBase
|
2018-06-14 20:09:17 +02:00
|
|
|
from stix2.core import parse
|
2019-01-09 17:32:51 +01:00
|
|
|
from stix2.datastore import (
|
|
|
|
DataSink, DataSource, DataSourceError, DataStoreMixin,
|
|
|
|
)
|
2018-04-11 19:36:52 +02:00
|
|
|
from stix2.datastore.filters import Filter, FilterSet, apply_common_filters
|
2020-02-16 01:02:53 +01:00
|
|
|
from stix2.utils import format_datetime, get_type_from_id
|
2018-10-26 03:03:27 +02:00
|
|
|
|
|
|
|
|
|
|
|
def _timestamp2filename(timestamp):
|
|
|
|
"""
|
|
|
|
Encapsulates a way to create unique filenames based on an object's
|
|
|
|
"modified" property value. This should not include an extension.
|
|
|
|
|
2018-12-06 21:08:36 +01:00
|
|
|
Args:
|
|
|
|
timestamp: A timestamp, as a datetime.datetime object.
|
|
|
|
|
2018-10-26 03:03:27 +02:00
|
|
|
"""
|
2018-12-11 19:06:51 +01:00
|
|
|
# The format_datetime will determine the correct level of precision.
|
|
|
|
ts = format_datetime(timestamp)
|
|
|
|
ts = re.sub(r"[-T:\.Z ]", "", ts)
|
|
|
|
return ts
|
2018-10-26 03:03:27 +02:00
|
|
|
|
|
|
|
|
|
|
|
class AuthSet(object):
|
|
|
|
"""
|
|
|
|
Represents either a whitelist or blacklist of values, where/what we
|
|
|
|
must/must not search to find objects which match a query. (Maybe "AuthSet"
|
|
|
|
isn't the right name, but determining authorization is a typical context in
|
|
|
|
which black/white lists are used.)
|
|
|
|
|
|
|
|
The set may be empty. For a whitelist, this means you mustn't search
|
|
|
|
anywhere, which means the query was impossible to match, so you can skip
|
|
|
|
searching altogether. For a blacklist, this means nothing is excluded
|
|
|
|
and you must search everywhere.
|
|
|
|
|
2018-12-06 21:08:36 +01:00
|
|
|
"""
|
2018-10-26 03:03:27 +02:00
|
|
|
BLACK = 0
|
|
|
|
WHITE = 1
|
|
|
|
|
|
|
|
def __init__(self, allowed, prohibited):
|
|
|
|
"""
|
|
|
|
Initialize this AuthSet from the given sets of allowed and/or
|
|
|
|
prohibited values. The type of set (black or white) is determined
|
|
|
|
from the allowed and/or prohibited values given.
|
|
|
|
|
2018-12-06 21:08:36 +01:00
|
|
|
Args:
|
|
|
|
allowed: A set of allowed values (or None if no allow filters
|
|
|
|
were found in the query)
|
|
|
|
prohibited: A set of prohibited values (not None)
|
|
|
|
|
2018-10-26 03:03:27 +02:00
|
|
|
"""
|
|
|
|
if allowed is None:
|
|
|
|
self.__values = prohibited
|
|
|
|
self.__type = AuthSet.BLACK
|
|
|
|
|
|
|
|
else:
|
|
|
|
# There was at least one allow filter, so create a whitelist. But
|
|
|
|
# any matching prohibited values create a combination of conditions
|
|
|
|
# which can never match. So exclude those.
|
|
|
|
self.__values = allowed - prohibited
|
|
|
|
self.__type = AuthSet.WHITE
|
|
|
|
|
|
|
|
@property
|
|
|
|
def values(self):
|
|
|
|
"""
|
|
|
|
Get the values in this white/blacklist, as a set.
|
|
|
|
"""
|
|
|
|
return self.__values
|
|
|
|
|
|
|
|
@property
|
|
|
|
def auth_type(self):
|
|
|
|
"""
|
|
|
|
Get the type of set: AuthSet.WHITE or AuthSet.BLACK.
|
|
|
|
"""
|
|
|
|
return self.__type
|
|
|
|
|
|
|
|
def __repr__(self):
|
|
|
|
return "{}list: {}".format(
|
|
|
|
"white" if self.auth_type == AuthSet.WHITE else "black",
|
2018-12-06 21:19:50 +01:00
|
|
|
self.values,
|
2018-10-26 03:03:27 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# A fixed, reusable AuthSet which accepts anything. It came in handy.
|
|
|
|
_AUTHSET_ANY = AuthSet(None, set())
|
|
|
|
|
|
|
|
|
|
|
|
def _update_allow(allow_set, value):
|
|
|
|
"""
|
|
|
|
Updates the given set of "allow" values. The first time an update to the
|
|
|
|
set occurs, the value(s) are added. Thereafter, since all filters are
|
|
|
|
implicitly AND'd, the given values are intersected with the existing allow
|
|
|
|
set, which may remove values. At the end, it may even wind up empty.
|
|
|
|
|
2018-12-06 21:08:36 +01:00
|
|
|
Args:
|
|
|
|
allow_set: The allow set, or None
|
|
|
|
value: The value(s) to add (single value, or iterable of values)
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
The updated allow set (not None)
|
|
|
|
|
2018-10-26 03:03:27 +02:00
|
|
|
"""
|
|
|
|
adding_seq = hasattr(value, "__iter__") and \
|
|
|
|
not isinstance(value, six.string_types)
|
|
|
|
|
|
|
|
if allow_set is None:
|
|
|
|
allow_set = set()
|
|
|
|
if adding_seq:
|
|
|
|
allow_set.update(value)
|
|
|
|
else:
|
|
|
|
allow_set.add(value)
|
|
|
|
else:
|
|
|
|
# strangely, the "&=" operator requires a set on the RHS
|
|
|
|
# whereas the method allows any iterable.
|
|
|
|
if adding_seq:
|
|
|
|
allow_set.intersection_update(value)
|
|
|
|
else:
|
|
|
|
allow_set.intersection_update({value})
|
|
|
|
|
|
|
|
return allow_set
|
|
|
|
|
|
|
|
|
|
|
|
def _find_search_optimizations(filters):
|
|
|
|
"""
|
|
|
|
Searches through all the filters, and creates white/blacklists of types and
|
|
|
|
IDs, which can be used to optimize the filesystem search.
|
|
|
|
|
2018-12-06 21:08:36 +01:00
|
|
|
Args:
|
|
|
|
filters: An iterable of filter objects representing a query
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
A 2-tuple of AuthSet objects: the first is for object types, and
|
2018-10-26 03:03:27 +02:00
|
|
|
the second is for object IDs.
|
|
|
|
|
2018-12-06 21:08:36 +01:00
|
|
|
"""
|
2018-10-26 03:03:27 +02:00
|
|
|
# The basic approach to this is to determine what is allowed and
|
|
|
|
# prohibited, independently, and then combine them to create the final
|
|
|
|
# white/blacklists.
|
|
|
|
|
|
|
|
allowed_types = allowed_ids = None
|
|
|
|
prohibited_types = set()
|
|
|
|
prohibited_ids = set()
|
|
|
|
|
|
|
|
for filter_ in filters:
|
|
|
|
if filter_.property == "type":
|
|
|
|
if filter_.op in ("=", "in"):
|
|
|
|
allowed_types = _update_allow(allowed_types, filter_.value)
|
|
|
|
elif filter_.op == "!=":
|
|
|
|
prohibited_types.add(filter_.value)
|
|
|
|
|
|
|
|
elif filter_.property == "id":
|
|
|
|
if filter_.op == "=":
|
|
|
|
# An "allow" ID filter implies a type filter too, since IDs
|
|
|
|
# contain types within them.
|
|
|
|
allowed_ids = _update_allow(allowed_ids, filter_.value)
|
2018-12-06 21:19:50 +01:00
|
|
|
allowed_types = _update_allow(
|
|
|
|
allowed_types,
|
|
|
|
get_type_from_id(filter_.value),
|
|
|
|
)
|
2018-10-26 03:03:27 +02:00
|
|
|
elif filter_.op == "!=":
|
|
|
|
prohibited_ids.add(filter_.value)
|
|
|
|
elif filter_.op == "in":
|
|
|
|
allowed_ids = _update_allow(allowed_ids, filter_.value)
|
2018-12-06 21:19:50 +01:00
|
|
|
allowed_types = _update_allow(
|
|
|
|
allowed_types, (
|
|
|
|
get_type_from_id(id_) for id_ in filter_.value
|
|
|
|
),
|
|
|
|
)
|
2018-10-26 03:03:27 +02:00
|
|
|
|
|
|
|
opt_types = AuthSet(allowed_types, prohibited_types)
|
|
|
|
opt_ids = AuthSet(allowed_ids, prohibited_ids)
|
|
|
|
|
|
|
|
# If we have both type and ID whitelists, perform a type-based intersection
|
|
|
|
# on them, to further optimize. (Some of the cross-property constraints
|
|
|
|
# occur above; this is essentially a second pass which operates on the
|
|
|
|
# final whitelists, which among other things, incorporates any of the
|
|
|
|
# prohibitions found above.)
|
|
|
|
if opt_types.auth_type == AuthSet.WHITE and \
|
|
|
|
opt_ids.auth_type == AuthSet.WHITE:
|
|
|
|
|
|
|
|
opt_types.values.intersection_update(
|
2018-11-01 21:46:30 +01:00
|
|
|
get_type_from_id(id_) for id_ in opt_ids.values
|
2018-10-26 03:03:27 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
opt_ids.values.intersection_update(
|
|
|
|
id_ for id_ in opt_ids.values
|
2018-11-01 21:46:30 +01:00
|
|
|
if get_type_from_id(id_) in opt_types.values
|
2018-10-26 03:03:27 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
return opt_types, opt_ids
|
|
|
|
|
|
|
|
|
|
|
|
def _get_matching_dir_entries(parent_dir, auth_set, st_mode_test=None, ext=""):
|
|
|
|
"""
|
|
|
|
Search a directory (non-recursively), and find entries which match the
|
|
|
|
given criteria.
|
|
|
|
|
2018-12-06 21:08:36 +01:00
|
|
|
Args:
|
|
|
|
parent_dir: The directory to search
|
|
|
|
auth_set: an AuthSet instance, which represents a black/whitelist
|
|
|
|
filter on filenames
|
|
|
|
st_mode_test: A callable allowing filtering based on the type of
|
|
|
|
directory entry. E.g. just get directories, or just get files. It
|
|
|
|
will be passed the st_mode field of a stat() structure and should
|
|
|
|
return True to include the file, or False to exclude it. Easy thing to
|
|
|
|
do is pass one of the stat module functions, e.g. stat.S_ISREG. If
|
|
|
|
None, don't filter based on entry type.
|
|
|
|
ext: Determines how names from auth_set match up to directory
|
|
|
|
entries, and allows filtering by extension. The extension is added
|
|
|
|
to auth_set values to obtain directory entries; it is removed from
|
|
|
|
directory entries to obtain auth_set values. In this way, auth_set
|
|
|
|
may be treated as having only "basenames" of the entries. Only entries
|
|
|
|
having the given extension will be included in the results. If not
|
|
|
|
empty, the extension MUST include a leading ".". The default is the
|
|
|
|
empty string, which will result in direct comparisons, and no
|
|
|
|
extension-based filtering.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
(list): A list of directory entries matching the criteria. These will not
|
|
|
|
have any path info included; they will just be bare names.
|
|
|
|
|
|
|
|
Raises:
|
|
|
|
OSError: If there are errors accessing directory contents or stat()'ing
|
|
|
|
files
|
2018-10-26 03:03:27 +02:00
|
|
|
|
2018-12-06 21:08:36 +01:00
|
|
|
"""
|
2018-10-26 03:03:27 +02:00
|
|
|
results = []
|
|
|
|
if auth_set.auth_type == AuthSet.WHITE:
|
|
|
|
for value in auth_set.values:
|
2018-11-23 21:55:05 +01:00
|
|
|
filename = value + ext
|
2018-10-26 03:03:27 +02:00
|
|
|
try:
|
2018-11-23 21:55:05 +01:00
|
|
|
if st_mode_test:
|
|
|
|
s = os.stat(os.path.join(parent_dir, filename))
|
|
|
|
type_pass = st_mode_test(s.st_mode)
|
|
|
|
else:
|
|
|
|
type_pass = True
|
|
|
|
|
|
|
|
if type_pass:
|
2018-10-26 03:03:27 +02:00
|
|
|
results.append(filename)
|
|
|
|
except OSError as e:
|
|
|
|
if e.errno != errno.ENOENT:
|
2018-11-23 21:55:05 +01:00
|
|
|
raise
|
2018-10-26 03:03:27 +02:00
|
|
|
# else, file-not-found is ok, just skip
|
|
|
|
else: # auth_set is a blacklist
|
|
|
|
for entry in os.listdir(parent_dir):
|
|
|
|
if ext:
|
|
|
|
auth_name, this_ext = os.path.splitext(entry)
|
|
|
|
if this_ext != ext:
|
|
|
|
continue
|
|
|
|
else:
|
|
|
|
auth_name = entry
|
|
|
|
|
|
|
|
if auth_name in auth_set.values:
|
|
|
|
continue
|
|
|
|
|
|
|
|
try:
|
2018-11-23 21:55:05 +01:00
|
|
|
if st_mode_test:
|
|
|
|
s = os.stat(os.path.join(parent_dir, entry))
|
|
|
|
type_pass = st_mode_test(s.st_mode)
|
|
|
|
else:
|
|
|
|
type_pass = True
|
|
|
|
|
|
|
|
if type_pass:
|
2018-10-26 03:03:27 +02:00
|
|
|
results.append(entry)
|
|
|
|
except OSError as e:
|
|
|
|
if e.errno != errno.ENOENT:
|
2018-11-23 21:55:05 +01:00
|
|
|
raise
|
2018-10-26 03:03:27 +02:00
|
|
|
# else, file-not-found is ok, just skip
|
|
|
|
|
|
|
|
return results
|
|
|
|
|
|
|
|
|
2020-01-15 20:12:58 +01:00
|
|
|
def _check_object_from_file(query, filepath, allow_custom, version, encoding):
|
2018-10-26 03:03:27 +02:00
|
|
|
"""
|
|
|
|
Read a STIX object from the given file, and check it against the given
|
|
|
|
filters.
|
|
|
|
|
2018-12-06 21:08:36 +01:00
|
|
|
Args:
|
|
|
|
query: Iterable of filters
|
2020-01-15 20:12:58 +01:00
|
|
|
filepath (str): Path to file to read
|
|
|
|
allow_custom (bool): Whether to allow custom properties as well unknown
|
2018-11-27 23:36:17 +01:00
|
|
|
custom objects.
|
2018-12-06 21:08:36 +01:00
|
|
|
version (str): If present, it forces the parser to use the version
|
|
|
|
provided. Otherwise, the library will make the best effort based
|
|
|
|
on checking the "spec_version" property.
|
2020-01-15 20:12:58 +01:00
|
|
|
encoding (str): The encoding to use when reading a file from the
|
|
|
|
filesystem.
|
2018-12-06 21:08:36 +01:00
|
|
|
|
|
|
|
Returns:
|
|
|
|
The (parsed) STIX object, if the object passes the filters. If
|
2018-10-26 03:03:27 +02:00
|
|
|
not, None is returned.
|
2018-12-06 21:08:36 +01:00
|
|
|
|
|
|
|
Raises:
|
|
|
|
TypeError: If the file had invalid JSON
|
|
|
|
IOError: If there are problems opening/reading the file
|
|
|
|
stix2.exceptions.STIXError: If there were problems creating a STIX
|
|
|
|
object from the JSON
|
|
|
|
|
2018-10-26 03:03:27 +02:00
|
|
|
"""
|
|
|
|
try:
|
2020-01-15 20:12:58 +01:00
|
|
|
with io.open(filepath, "r", encoding=encoding) as f:
|
2018-11-27 23:36:17 +01:00
|
|
|
stix_json = json.load(f)
|
|
|
|
except ValueError: # not a JSON file
|
2018-10-26 03:03:27 +02:00
|
|
|
raise TypeError(
|
|
|
|
"STIX JSON object at '{0}' could either not be parsed "
|
2018-12-06 21:19:50 +01:00
|
|
|
"to JSON or was not valid STIX JSON".format(filepath),
|
2018-12-06 21:08:36 +01:00
|
|
|
)
|
2018-10-26 03:03:27 +02:00
|
|
|
|
2018-11-27 23:36:17 +01:00
|
|
|
stix_obj = parse(stix_json, allow_custom, version)
|
|
|
|
|
|
|
|
if stix_obj["type"] == "bundle":
|
|
|
|
stix_obj = stix_obj["objects"][0]
|
|
|
|
|
2018-10-26 03:03:27 +02:00
|
|
|
# check against other filters, add if match
|
|
|
|
result = next(apply_common_filters([stix_obj], query), None)
|
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
2020-02-16 01:02:53 +01:00
|
|
|
def _is_versioned_type_dir(type_path, type_name):
|
|
|
|
"""
|
|
|
|
Try to detect whether the given directory is for a versioned type of STIX
|
|
|
|
object. This is done by looking for a directory whose name is a STIX ID
|
|
|
|
of the appropriate type. If found, treat this type as versioned. This
|
|
|
|
doesn't work when a versioned type directory is empty (it will be
|
|
|
|
mis-classified as unversioned), but this detection is only necessary when
|
|
|
|
reading/querying data. If a directory is empty, you'll get no results
|
|
|
|
either way.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
type_path: A path to a directory containing one type of STIX object.
|
|
|
|
type_name: The STIX type name.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
True if the directory looks like it contains versioned objects; False
|
|
|
|
if not.
|
|
|
|
|
|
|
|
Raises:
|
|
|
|
OSError: If there are errors accessing directory contents or stat()'ing
|
|
|
|
files
|
|
|
|
"""
|
|
|
|
id_regex = re.compile(
|
|
|
|
r"^" + re.escape(type_name) +
|
|
|
|
r"--[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}"
|
|
|
|
r"-[0-9a-f]{12}$",
|
|
|
|
re.I,
|
|
|
|
)
|
|
|
|
|
|
|
|
for entry in os.listdir(type_path):
|
|
|
|
s = os.stat(os.path.join(type_path, entry))
|
|
|
|
if stat.S_ISDIR(s.st_mode) and id_regex.match(entry):
|
|
|
|
is_versioned = True
|
|
|
|
break
|
|
|
|
else:
|
|
|
|
is_versioned = False
|
|
|
|
|
|
|
|
return is_versioned
|
|
|
|
|
|
|
|
|
2020-01-15 20:12:58 +01:00
|
|
|
def _search_versioned(query, type_path, auth_ids, allow_custom, version, encoding):
|
2018-10-26 03:03:27 +02:00
|
|
|
"""
|
|
|
|
Searches the given directory, which contains data for STIX objects of a
|
2020-02-16 01:02:53 +01:00
|
|
|
particular versioned type, and return any which match the query.
|
2018-10-26 03:03:27 +02:00
|
|
|
|
2018-12-06 21:08:36 +01:00
|
|
|
Args:
|
|
|
|
query: The query to match against
|
|
|
|
type_path: The directory with type-specific STIX object files
|
|
|
|
auth_ids: Search optimization based on object ID
|
2020-01-15 20:12:58 +01:00
|
|
|
allow_custom (bool): Whether to allow custom properties as well unknown
|
2018-12-06 21:08:36 +01:00
|
|
|
custom objects.
|
|
|
|
version (str): If present, it forces the parser to use the version
|
|
|
|
provided. Otherwise, the library will make the best effort based
|
|
|
|
on checking the "spec_version" property.
|
2020-01-15 20:12:58 +01:00
|
|
|
encoding (str): The encoding to use when reading a file from the
|
|
|
|
filesystem.
|
2018-12-06 21:08:36 +01:00
|
|
|
|
|
|
|
Returns:
|
|
|
|
A list of all matching objects
|
|
|
|
|
|
|
|
Raises:
|
|
|
|
stix2.exceptions.STIXError: If any objects had invalid content
|
|
|
|
TypeError: If any objects had invalid content
|
|
|
|
IOError: If there were any problems opening/reading files
|
|
|
|
OSError: If there were any problems opening/reading files
|
|
|
|
|
2018-10-26 03:03:27 +02:00
|
|
|
"""
|
|
|
|
results = []
|
2018-12-06 21:19:50 +01:00
|
|
|
id_dirs = _get_matching_dir_entries(
|
|
|
|
type_path, auth_ids,
|
|
|
|
stat.S_ISDIR,
|
|
|
|
)
|
2018-10-26 03:03:27 +02:00
|
|
|
for id_dir in id_dirs:
|
|
|
|
id_path = os.path.join(type_path, id_dir)
|
|
|
|
|
|
|
|
# This leverages a more sophisticated function to do a simple thing:
|
|
|
|
# get all the JSON files from a directory. I guess it does give us
|
|
|
|
# file type checking, ensuring we only get regular files.
|
2018-12-06 21:19:50 +01:00
|
|
|
version_files = _get_matching_dir_entries(
|
|
|
|
id_path, _AUTHSET_ANY,
|
|
|
|
stat.S_ISREG, ".json",
|
|
|
|
)
|
2018-10-26 03:03:27 +02:00
|
|
|
for version_file in version_files:
|
|
|
|
version_path = os.path.join(id_path, version_file)
|
|
|
|
|
|
|
|
try:
|
2018-12-06 21:19:50 +01:00
|
|
|
stix_obj = _check_object_from_file(
|
|
|
|
query, version_path,
|
|
|
|
allow_custom, version,
|
2020-01-15 20:12:58 +01:00
|
|
|
encoding,
|
2018-12-06 21:19:50 +01:00
|
|
|
)
|
2018-10-26 03:03:27 +02:00
|
|
|
if stix_obj:
|
|
|
|
results.append(stix_obj)
|
|
|
|
except IOError as e:
|
|
|
|
if e.errno != errno.ENOENT:
|
2018-11-23 21:55:05 +01:00
|
|
|
raise
|
2018-10-26 03:03:27 +02:00
|
|
|
# else, file-not-found is ok, just skip
|
|
|
|
|
2018-11-23 21:55:05 +01:00
|
|
|
# For backward-compatibility, also search for plain files named after
|
|
|
|
# object IDs, in the type directory.
|
2020-02-16 01:02:53 +01:00
|
|
|
backcompat_results = _search_unversioned(
|
|
|
|
query, type_path, auth_ids, allow_custom, version, encoding,
|
2018-12-06 21:19:50 +01:00
|
|
|
)
|
2020-02-16 01:02:53 +01:00
|
|
|
results.extend(backcompat_results)
|
2018-11-23 21:55:05 +01:00
|
|
|
|
2018-10-26 03:03:27 +02:00
|
|
|
return results
|
|
|
|
|
|
|
|
|
2020-02-16 01:02:53 +01:00
|
|
|
def _search_unversioned(
|
|
|
|
query, type_path, auth_ids, allow_custom, version, encoding,
|
|
|
|
):
|
2018-10-26 03:03:27 +02:00
|
|
|
"""
|
2020-02-16 01:02:53 +01:00
|
|
|
Searches the given directory, which contains unversioned data, and return
|
|
|
|
any objects which match the query.
|
2018-10-26 03:03:27 +02:00
|
|
|
|
2018-12-06 21:08:36 +01:00
|
|
|
Args:
|
|
|
|
query: The query to match against
|
2020-02-16 01:02:53 +01:00
|
|
|
type_path: The directory with STIX files of unversioned type
|
2018-12-06 21:08:36 +01:00
|
|
|
auth_ids: Search optimization based on object ID
|
2020-01-15 20:12:58 +01:00
|
|
|
allow_custom (bool): Whether to allow custom properties as well unknown
|
2018-12-06 21:08:36 +01:00
|
|
|
custom objects.
|
|
|
|
version (str): If present, it forces the parser to use the version
|
|
|
|
provided. Otherwise, the library will make the best effort based
|
|
|
|
on checking the "spec_version" property.
|
2020-01-15 20:12:58 +01:00
|
|
|
encoding (str): The encoding to use when reading a file from the
|
|
|
|
filesystem.
|
2018-12-06 21:08:36 +01:00
|
|
|
|
|
|
|
Returns:
|
|
|
|
A list of all matching objects
|
|
|
|
|
|
|
|
Raises:
|
|
|
|
stix2.exceptions.STIXError: If any objects had invalid content
|
|
|
|
TypeError: If any objects had invalid content
|
|
|
|
IOError: If there were any problems opening/reading files
|
|
|
|
OSError: If there were any problems opening/reading files
|
|
|
|
|
2018-10-26 03:03:27 +02:00
|
|
|
"""
|
|
|
|
results = []
|
2018-12-06 21:19:50 +01:00
|
|
|
id_files = _get_matching_dir_entries(
|
2020-02-16 01:02:53 +01:00
|
|
|
type_path, auth_ids, stat.S_ISREG,
|
2018-12-06 21:19:50 +01:00
|
|
|
".json",
|
|
|
|
)
|
2018-10-26 03:03:27 +02:00
|
|
|
for id_file in id_files:
|
2020-02-16 01:02:53 +01:00
|
|
|
id_path = os.path.join(type_path, id_file)
|
2018-10-26 03:03:27 +02:00
|
|
|
|
|
|
|
try:
|
2018-12-06 21:19:50 +01:00
|
|
|
stix_obj = _check_object_from_file(
|
|
|
|
query, id_path, allow_custom,
|
2020-01-15 20:12:58 +01:00
|
|
|
version, encoding,
|
2018-12-06 21:19:50 +01:00
|
|
|
)
|
2018-10-26 03:03:27 +02:00
|
|
|
if stix_obj:
|
|
|
|
results.append(stix_obj)
|
|
|
|
except IOError as e:
|
|
|
|
if e.errno != errno.ENOENT:
|
2018-11-23 21:55:05 +01:00
|
|
|
raise
|
2018-10-26 03:03:27 +02:00
|
|
|
# else, file-not-found is ok, just skip
|
|
|
|
|
|
|
|
return results
|
2017-07-12 16:58:31 +02:00
|
|
|
|
|
|
|
|
2018-03-01 17:27:37 +01:00
|
|
|
class FileSystemStore(DataStoreMixin):
|
2017-10-18 21:42:21 +02:00
|
|
|
"""Interface to a file directory of STIX objects.
|
2017-09-22 17:29:17 +02:00
|
|
|
|
|
|
|
FileSystemStore is a wrapper around a paired FileSystemSink
|
|
|
|
and FileSystemSource.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
stix_dir (str): path to directory of STIX objects
|
2017-11-29 18:03:10 +01:00
|
|
|
allow_custom (bool): whether to allow custom STIX content to be
|
2018-12-06 21:08:36 +01:00
|
|
|
pushed/retrieved. Defaults to True for FileSystemSource side
|
|
|
|
(retrieving data) and False for FileSystemSink
|
|
|
|
side(pushing data). However, when parameter is supplied, it
|
|
|
|
will be applied to both FileSystemSource and FileSystemSink.
|
|
|
|
bundlify (bool): whether to wrap objects in bundles when saving
|
|
|
|
them. Default: False.
|
2020-01-15 20:12:58 +01:00
|
|
|
encoding (str): The encoding to use when reading a file from the
|
|
|
|
filesystem.
|
2017-09-22 17:29:17 +02:00
|
|
|
|
|
|
|
Attributes:
|
2017-11-03 02:29:25 +01:00
|
|
|
source (FileSystemSource): FileSystemSource
|
2017-09-22 17:29:17 +02:00
|
|
|
sink (FileSystemSink): FileSystemSink
|
|
|
|
|
2017-07-12 16:58:31 +02:00
|
|
|
"""
|
2020-01-15 20:12:58 +01:00
|
|
|
def __init__(self, stix_dir, allow_custom=None, bundlify=False, encoding='utf-8'):
|
2018-02-26 22:56:24 +01:00
|
|
|
if allow_custom is None:
|
2017-11-29 18:03:10 +01:00
|
|
|
allow_custom_source = True
|
|
|
|
allow_custom_sink = False
|
|
|
|
else:
|
|
|
|
allow_custom_sink = allow_custom_source = allow_custom
|
|
|
|
|
2017-11-08 19:53:21 +01:00
|
|
|
super(FileSystemStore, self).__init__(
|
2020-01-15 20:12:58 +01:00
|
|
|
source=FileSystemSource(stix_dir=stix_dir, allow_custom=allow_custom_source, encoding=encoding),
|
2018-07-13 17:10:05 +02:00
|
|
|
sink=FileSystemSink(stix_dir=stix_dir, allow_custom=allow_custom_sink, bundlify=bundlify),
|
2017-11-08 19:53:21 +01:00
|
|
|
)
|
2017-11-03 02:29:25 +01:00
|
|
|
|
2017-07-12 16:58:31 +02:00
|
|
|
|
|
|
|
class FileSystemSink(DataSink):
|
2017-10-18 21:42:21 +02:00
|
|
|
"""Interface for adding/pushing STIX objects to file directory of STIX
|
|
|
|
objects.
|
2017-09-22 17:29:17 +02:00
|
|
|
|
|
|
|
Can be paired with a FileSystemSource, together as the two
|
|
|
|
components of a FileSystemStore.
|
|
|
|
|
|
|
|
Args:
|
2017-10-27 18:23:57 +02:00
|
|
|
stix_dir (str): path to directory of STIX objects.
|
2017-11-29 18:03:10 +01:00
|
|
|
allow_custom (bool): Whether to allow custom STIX content to be
|
|
|
|
added to the FileSystemSource. Default: False
|
2017-10-27 18:23:57 +02:00
|
|
|
bundlify (bool): Whether to wrap objects in bundles when saving them.
|
|
|
|
Default: False.
|
2017-09-22 17:29:17 +02:00
|
|
|
|
2017-07-12 16:58:31 +02:00
|
|
|
"""
|
2017-11-29 18:03:10 +01:00
|
|
|
def __init__(self, stix_dir, allow_custom=False, bundlify=False):
|
2017-09-01 14:15:50 +02:00
|
|
|
super(FileSystemSink, self).__init__()
|
2017-09-22 17:29:17 +02:00
|
|
|
self._stix_dir = os.path.abspath(stix_dir)
|
2017-11-29 18:03:10 +01:00
|
|
|
self.allow_custom = allow_custom
|
2017-10-27 18:23:57 +02:00
|
|
|
self.bundlify = bundlify
|
2017-07-12 16:58:31 +02:00
|
|
|
|
2017-09-22 17:29:17 +02:00
|
|
|
if not os.path.exists(self._stix_dir):
|
2017-09-29 17:24:19 +02:00
|
|
|
raise ValueError("directory path for STIX data does not exist")
|
2017-07-12 16:58:31 +02:00
|
|
|
|
|
|
|
@property
|
|
|
|
def stix_dir(self):
|
2017-09-22 17:29:17 +02:00
|
|
|
return self._stix_dir
|
2017-07-12 16:58:31 +02:00
|
|
|
|
2018-07-10 21:51:20 +02:00
|
|
|
def _check_path_and_write(self, stix_obj, encoding='utf-8'):
|
2017-10-27 18:23:57 +02:00
|
|
|
"""Write the given STIX object to a file in the STIX file directory.
|
|
|
|
"""
|
2018-10-26 03:03:27 +02:00
|
|
|
type_dir = os.path.join(self._stix_dir, stix_obj["type"])
|
2020-02-16 01:02:53 +01:00
|
|
|
|
|
|
|
# All versioned objects should have a "modified" property.
|
|
|
|
if "modified" in stix_obj:
|
2018-11-01 21:46:30 +01:00
|
|
|
filename = _timestamp2filename(stix_obj["modified"])
|
2018-10-26 03:03:27 +02:00
|
|
|
obj_dir = os.path.join(type_dir, stix_obj["id"])
|
2020-02-16 01:02:53 +01:00
|
|
|
else:
|
|
|
|
filename = stix_obj["id"]
|
|
|
|
obj_dir = type_dir
|
2017-10-27 18:23:57 +02:00
|
|
|
|
2018-10-26 03:03:27 +02:00
|
|
|
file_path = os.path.join(obj_dir, filename + ".json")
|
2017-10-27 18:23:57 +02:00
|
|
|
|
2018-10-26 03:03:27 +02:00
|
|
|
if not os.path.exists(obj_dir):
|
|
|
|
os.makedirs(obj_dir)
|
2017-10-27 18:23:57 +02:00
|
|
|
|
|
|
|
if self.bundlify:
|
2018-07-10 21:51:20 +02:00
|
|
|
if 'spec_version' in stix_obj:
|
|
|
|
# Assuming future specs will allow multiple SDO/SROs
|
|
|
|
# versions in a single bundle we won't need to check this
|
|
|
|
# and just use the latest supported Bundle version.
|
2018-11-01 12:57:09 +01:00
|
|
|
stix_obj = v21.Bundle(stix_obj, allow_custom=self.allow_custom)
|
2018-07-10 21:51:20 +02:00
|
|
|
else:
|
|
|
|
stix_obj = v20.Bundle(stix_obj, allow_custom=self.allow_custom)
|
2017-10-27 18:23:57 +02:00
|
|
|
|
2018-12-11 19:06:51 +01:00
|
|
|
if os.path.isfile(file_path):
|
2019-01-09 14:36:10 +01:00
|
|
|
raise DataSourceError("Attempted to overwrite file (!) at: {}".format(file_path))
|
2018-12-11 19:06:51 +01:00
|
|
|
else:
|
|
|
|
with io.open(file_path, 'w', encoding=encoding) as f:
|
|
|
|
stix_obj = stix_obj.serialize(pretty=True, encoding=encoding, ensure_ascii=False)
|
|
|
|
f.write(stix_obj)
|
2017-10-27 18:23:57 +02:00
|
|
|
|
2017-11-29 18:03:10 +01:00
|
|
|
def add(self, stix_data=None, version=None):
|
2017-10-18 21:42:21 +02:00
|
|
|
"""Add STIX objects to file directory.
|
2017-07-12 16:58:31 +02:00
|
|
|
|
2017-09-22 17:29:17 +02:00
|
|
|
Args:
|
|
|
|
stix_data (STIX object OR dict OR str OR list): valid STIX 2.0 content
|
2017-10-18 21:42:21 +02:00
|
|
|
in a STIX object (or list of), dict (or list of), or a STIX 2.0
|
|
|
|
json encoded string.
|
2018-12-06 21:08:36 +01:00
|
|
|
version (str): If present, it forces the parser to use the version
|
|
|
|
provided. Otherwise, the library will make the best effort based
|
|
|
|
on checking the "spec_version" property.
|
2017-10-18 21:42:21 +02:00
|
|
|
|
2018-11-30 15:39:05 +01:00
|
|
|
Note:
|
2017-10-18 21:42:21 +02:00
|
|
|
``stix_data`` can be a Bundle object, but each object in it will be
|
|
|
|
saved separately; you will be able to retrieve any of the objects
|
|
|
|
the Bundle contained, but not the Bundle itself.
|
2017-09-22 17:29:17 +02:00
|
|
|
|
2017-07-12 16:58:31 +02:00
|
|
|
"""
|
2018-12-06 21:08:36 +01:00
|
|
|
if isinstance(stix_data, (v20.Bundle, v21.Bundle)):
|
2018-10-26 03:03:27 +02:00
|
|
|
# recursively add individual STIX objects
|
|
|
|
for stix_obj in stix_data.get("objects", []):
|
|
|
|
self.add(stix_obj, version=version)
|
|
|
|
|
|
|
|
elif isinstance(stix_data, _STIXBase):
|
2017-09-22 17:29:17 +02:00
|
|
|
# adding python STIX object
|
2017-10-27 18:23:57 +02:00
|
|
|
self._check_path_and_write(stix_data)
|
2017-09-22 17:29:17 +02:00
|
|
|
|
2017-10-18 21:42:21 +02:00
|
|
|
elif isinstance(stix_data, (str, dict)):
|
2017-11-29 18:03:10 +01:00
|
|
|
stix_data = parse(stix_data, allow_custom=self.allow_custom, version=version)
|
2018-10-26 03:03:27 +02:00
|
|
|
self.add(stix_data, version=version)
|
2017-09-22 17:29:17 +02:00
|
|
|
|
|
|
|
elif isinstance(stix_data, list):
|
2017-10-18 21:42:21 +02:00
|
|
|
# recursively add individual STIX objects
|
2017-09-22 17:29:17 +02:00
|
|
|
for stix_obj in stix_data:
|
2018-07-10 21:51:20 +02:00
|
|
|
self.add(stix_obj)
|
2017-09-29 17:24:19 +02:00
|
|
|
|
2017-09-22 17:29:17 +02:00
|
|
|
else:
|
2018-07-13 17:10:05 +02:00
|
|
|
raise TypeError(
|
|
|
|
"stix_data must be a STIX object (or list of), "
|
|
|
|
"JSON formatted STIX (or list of), "
|
2018-12-06 21:19:50 +01:00
|
|
|
"or a JSON formatted STIX bundle",
|
2018-07-13 17:10:05 +02:00
|
|
|
)
|
2017-07-12 16:58:31 +02:00
|
|
|
|
|
|
|
|
|
|
|
class FileSystemSource(DataSource):
|
2017-10-18 21:42:21 +02:00
|
|
|
"""Interface for searching/retrieving STIX objects from a STIX object file
|
|
|
|
directory.
|
2017-09-22 17:29:17 +02:00
|
|
|
|
|
|
|
Can be paired with a FileSystemSink, together as the two
|
|
|
|
components of a FileSystemStore.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
stix_dir (str): path to directory of STIX objects
|
2017-11-29 18:03:10 +01:00
|
|
|
allow_custom (bool): Whether to allow custom STIX content to be
|
|
|
|
added to the FileSystemSink. Default: True
|
2020-01-15 20:12:58 +01:00
|
|
|
encoding (str): The encoding to use when reading a file from the
|
|
|
|
filesystem.
|
2017-09-22 17:29:17 +02:00
|
|
|
|
2017-07-12 16:58:31 +02:00
|
|
|
"""
|
2020-01-15 20:12:58 +01:00
|
|
|
def __init__(self, stix_dir, allow_custom=True, encoding='utf-8'):
|
2017-09-01 14:15:50 +02:00
|
|
|
super(FileSystemSource, self).__init__()
|
2017-09-22 17:29:17 +02:00
|
|
|
self._stix_dir = os.path.abspath(stix_dir)
|
2017-11-29 18:03:10 +01:00
|
|
|
self.allow_custom = allow_custom
|
2020-01-15 20:12:58 +01:00
|
|
|
self.encoding = encoding
|
2017-07-12 16:58:31 +02:00
|
|
|
|
2017-09-22 17:29:17 +02:00
|
|
|
if not os.path.exists(self._stix_dir):
|
2017-10-04 21:57:38 +02:00
|
|
|
raise ValueError("directory path for STIX data does not exist: %s" % self._stix_dir)
|
2017-07-12 16:58:31 +02:00
|
|
|
|
|
|
|
@property
|
|
|
|
def stix_dir(self):
|
2017-09-22 17:29:17 +02:00
|
|
|
return self._stix_dir
|
2017-07-12 16:58:31 +02:00
|
|
|
|
2017-11-29 18:03:10 +01:00
|
|
|
def get(self, stix_id, version=None, _composite_filters=None):
|
2017-10-18 21:42:21 +02:00
|
|
|
"""Retrieve STIX object from file directory via STIX ID.
|
2017-09-22 17:29:17 +02:00
|
|
|
|
|
|
|
Args:
|
|
|
|
stix_id (str): The STIX ID of the STIX object to be retrieved.
|
2018-04-11 19:36:52 +02:00
|
|
|
_composite_filters (FilterSet): collection of filters passed from the parent
|
2017-09-22 17:29:17 +02:00
|
|
|
CompositeDataSource, not user supplied
|
2018-12-06 21:08:36 +01:00
|
|
|
version (str): If present, it forces the parser to use the version
|
|
|
|
provided. Otherwise, the library will make the best effort based
|
|
|
|
on checking the "spec_version" property.
|
2017-09-22 17:29:17 +02:00
|
|
|
|
|
|
|
Returns:
|
|
|
|
(STIX object): STIX object that has the supplied STIX ID.
|
|
|
|
The STIX object is loaded from its json file, parsed into
|
|
|
|
a python STIX object and then returned
|
|
|
|
|
2017-07-12 16:58:31 +02:00
|
|
|
"""
|
2018-10-26 03:03:27 +02:00
|
|
|
all_data = self.all_versions(stix_id, version=version, _composite_filters=_composite_filters)
|
2017-07-12 16:58:31 +02:00
|
|
|
|
2017-10-06 16:56:24 +02:00
|
|
|
if all_data:
|
2020-02-16 01:02:53 +01:00
|
|
|
# Simple check for a versioned STIX type: see if the objects have a
|
|
|
|
# "modified" property. (Need only check one, since they are all of
|
|
|
|
# the same type.)
|
|
|
|
is_versioned = "modified" in all_data[0]
|
|
|
|
if is_versioned:
|
2018-11-06 22:06:26 +01:00
|
|
|
stix_obj = sorted(all_data, key=lambda k: k['modified'])[-1]
|
2020-02-16 01:02:53 +01:00
|
|
|
else:
|
|
|
|
stix_obj = all_data[0]
|
2017-10-06 16:56:24 +02:00
|
|
|
else:
|
|
|
|
stix_obj = None
|
2017-07-12 16:58:31 +02:00
|
|
|
|
2017-10-06 16:56:24 +02:00
|
|
|
return stix_obj
|
2017-07-12 16:58:31 +02:00
|
|
|
|
2017-11-29 18:03:10 +01:00
|
|
|
def all_versions(self, stix_id, version=None, _composite_filters=None):
|
2017-10-18 21:42:21 +02:00
|
|
|
"""Retrieve STIX object from file directory via STIX ID, all versions.
|
2017-09-22 17:29:17 +02:00
|
|
|
|
|
|
|
Note: Since FileSystem sources/sinks don't handle multiple versions
|
|
|
|
of a STIX object, this operation is unnecessary. Pass call to get().
|
|
|
|
|
|
|
|
Args:
|
|
|
|
stix_id (str): The STIX ID of the STIX objects to be retrieved.
|
2018-07-10 21:51:20 +02:00
|
|
|
_composite_filters (FilterSet): collection of filters passed from
|
2018-12-06 21:08:36 +01:00
|
|
|
the parent CompositeDataSource, not user supplied
|
|
|
|
version (str): If present, it forces the parser to use the version
|
|
|
|
provided. Otherwise, the library will make the best effort based
|
|
|
|
on checking the "spec_version" property.
|
2017-09-22 17:29:17 +02:00
|
|
|
|
|
|
|
Returns:
|
|
|
|
(list): of STIX objects that has the supplied STIX ID.
|
|
|
|
The STIX objects are loaded from their json files, parsed into
|
|
|
|
a python STIX objects and then returned
|
2017-10-18 21:42:21 +02:00
|
|
|
|
2017-07-12 16:58:31 +02:00
|
|
|
"""
|
2018-10-26 03:03:27 +02:00
|
|
|
query = [Filter("id", "=", stix_id)]
|
|
|
|
return self.query(query, version=version, _composite_filters=_composite_filters)
|
2017-07-12 16:58:31 +02:00
|
|
|
|
2018-10-26 03:03:27 +02:00
|
|
|
def query(self, query=None, version=None, _composite_filters=None):
|
2017-10-18 21:42:21 +02:00
|
|
|
"""Search and retrieve STIX objects based on the complete query.
|
2017-09-22 17:29:17 +02:00
|
|
|
|
|
|
|
A "complete query" includes the filters from the query, the filters
|
2017-10-27 21:50:43 +02:00
|
|
|
attached to this FileSystemSource, and any filters passed from a
|
|
|
|
CompositeDataSource (i.e. _composite_filters).
|
2017-09-22 17:29:17 +02:00
|
|
|
|
|
|
|
Args:
|
|
|
|
query (list): list of filters to search on
|
2018-07-10 21:51:20 +02:00
|
|
|
_composite_filters (FilterSet): collection of filters passed from
|
|
|
|
the CompositeDataSource, not user supplied
|
2018-12-06 21:08:36 +01:00
|
|
|
version (str): If present, it forces the parser to use the version
|
|
|
|
provided. Otherwise, the library will make the best effort based
|
|
|
|
on checking the "spec_version" property.
|
2017-09-22 17:29:17 +02:00
|
|
|
|
|
|
|
Returns:
|
2018-10-26 03:03:27 +02:00
|
|
|
(list): list of STIX objects that matches the supplied
|
2017-09-22 17:29:17 +02:00
|
|
|
query. The STIX objects are loaded from their json files,
|
|
|
|
parsed into a python STIX objects and then returned.
|
|
|
|
|
2017-07-12 16:58:31 +02:00
|
|
|
"""
|
|
|
|
all_data = []
|
2018-04-11 19:36:52 +02:00
|
|
|
query = FilterSet(query)
|
2017-07-12 16:58:31 +02:00
|
|
|
|
|
|
|
# combine all query filters
|
2017-09-29 17:24:19 +02:00
|
|
|
if self.filters:
|
2018-04-11 19:36:52 +02:00
|
|
|
query.add(self.filters)
|
2017-07-12 16:58:31 +02:00
|
|
|
if _composite_filters:
|
2018-04-11 19:36:52 +02:00
|
|
|
query.add(_composite_filters)
|
2017-07-12 16:58:31 +02:00
|
|
|
|
2018-10-26 03:03:27 +02:00
|
|
|
auth_types, auth_ids = _find_search_optimizations(query)
|
2018-12-06 21:19:50 +01:00
|
|
|
type_dirs = _get_matching_dir_entries(
|
|
|
|
self._stix_dir, auth_types,
|
|
|
|
stat.S_ISDIR,
|
|
|
|
)
|
2018-10-26 03:03:27 +02:00
|
|
|
for type_dir in type_dirs:
|
|
|
|
type_path = os.path.join(self._stix_dir, type_dir)
|
2020-02-16 01:02:53 +01:00
|
|
|
type_is_versioned = _is_versioned_type_dir(type_path, type_dir)
|
|
|
|
if type_is_versioned:
|
|
|
|
type_results = _search_versioned(
|
2018-12-06 21:19:50 +01:00
|
|
|
query, type_path, auth_ids,
|
|
|
|
self.allow_custom, version,
|
2020-01-15 20:12:58 +01:00
|
|
|
self.encoding,
|
2018-12-06 21:19:50 +01:00
|
|
|
)
|
2017-08-11 14:10:20 +02:00
|
|
|
else:
|
2020-02-16 01:02:53 +01:00
|
|
|
type_results = _search_unversioned(
|
2018-12-06 21:19:50 +01:00
|
|
|
query, type_path, auth_ids,
|
|
|
|
self.allow_custom, version,
|
2020-01-15 20:12:58 +01:00
|
|
|
self.encoding,
|
2018-12-06 21:19:50 +01:00
|
|
|
)
|
2018-10-26 03:03:27 +02:00
|
|
|
all_data.extend(type_results)
|
2017-09-22 17:29:17 +02:00
|
|
|
|
2018-11-27 23:36:17 +01:00
|
|
|
return all_data
|