2020-09-11 19:54:13 +02:00
|
|
|
"""Python APIs for STIX 2 Pattern Semantic Equivalence.
|
|
|
|
|
|
|
|
.. autosummary::
|
2020-10-16 23:12:52 +02:00
|
|
|
:toctree: pattern
|
2020-09-11 19:54:13 +02:00
|
|
|
|
|
|
|
compare
|
|
|
|
transform
|
|
|
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
2021-01-15 18:34:10 +01:00
|
|
|
from ... import pattern_visitor
|
|
|
|
from ...version import DEFAULT_VERSION
|
|
|
|
from .compare.observation import observation_expression_cmp
|
|
|
|
from .transform import ChainTransformer, SettleTransformer
|
|
|
|
from .transform.observation import (
|
2020-08-13 23:44:42 +02:00
|
|
|
AbsorptionTransformer, CanonicalizeComparisonExpressionsTransformer,
|
|
|
|
DNFTransformer, FlattenTransformer, OrderDedupeTransformer,
|
2020-08-11 00:33:26 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
# Lazy-initialize
|
|
|
|
_pattern_canonicalizer = None
|
|
|
|
|
|
|
|
|
|
|
|
def _get_pattern_canonicalizer():
|
|
|
|
"""
|
|
|
|
Get a canonicalization transformer for STIX patterns.
|
|
|
|
|
2020-11-20 21:59:55 +01:00
|
|
|
Returns:
|
|
|
|
The transformer
|
2020-08-11 00:33:26 +02:00
|
|
|
"""
|
|
|
|
|
|
|
|
# The transformers are either stateless or contain no state which changes
|
|
|
|
# with each use. So we can setup the transformers once and keep reusing
|
|
|
|
# them.
|
|
|
|
global _pattern_canonicalizer
|
|
|
|
|
|
|
|
if not _pattern_canonicalizer:
|
|
|
|
canonicalize_comp_expr = \
|
|
|
|
CanonicalizeComparisonExpressionsTransformer()
|
|
|
|
|
|
|
|
obs_expr_flatten = FlattenTransformer()
|
|
|
|
obs_expr_order = OrderDedupeTransformer()
|
|
|
|
obs_expr_absorb = AbsorptionTransformer()
|
|
|
|
obs_simplify = ChainTransformer(
|
2020-08-13 23:44:42 +02:00
|
|
|
obs_expr_flatten, obs_expr_order, obs_expr_absorb,
|
2020-08-11 00:33:26 +02:00
|
|
|
)
|
|
|
|
obs_settle_simplify = SettleTransformer(obs_simplify)
|
|
|
|
|
|
|
|
obs_dnf = DNFTransformer()
|
|
|
|
|
|
|
|
_pattern_canonicalizer = ChainTransformer(
|
|
|
|
canonicalize_comp_expr,
|
2020-08-13 23:44:42 +02:00
|
|
|
obs_settle_simplify, obs_dnf, obs_settle_simplify,
|
2020-08-11 00:33:26 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
return _pattern_canonicalizer
|
|
|
|
|
|
|
|
|
2021-01-15 18:34:10 +01:00
|
|
|
def equivalent_patterns(pattern1, pattern2, stix_version=DEFAULT_VERSION):
|
2020-08-11 00:33:26 +02:00
|
|
|
"""
|
|
|
|
Determine whether two STIX patterns are semantically equivalent.
|
|
|
|
|
2020-11-20 21:59:55 +01:00
|
|
|
Args:
|
|
|
|
pattern1: The first STIX pattern
|
|
|
|
pattern2: The second STIX pattern
|
|
|
|
stix_version: The STIX version to use for pattern parsing, as a string
|
|
|
|
("2.0", "2.1", etc). Defaults to library-wide default version.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
True if the patterns are semantically equivalent; False if not
|
2020-08-11 00:33:26 +02:00
|
|
|
"""
|
2021-01-15 18:34:10 +01:00
|
|
|
patt_ast1 = pattern_visitor.create_pattern_object(
|
2020-08-15 01:55:00 +02:00
|
|
|
pattern1, version=stix_version,
|
|
|
|
)
|
2021-01-15 18:34:10 +01:00
|
|
|
patt_ast2 = pattern_visitor.create_pattern_object(
|
2020-08-15 01:55:00 +02:00
|
|
|
pattern2, version=stix_version,
|
|
|
|
)
|
2020-08-11 00:33:26 +02:00
|
|
|
|
|
|
|
pattern_canonicalizer = _get_pattern_canonicalizer()
|
|
|
|
canon_patt1, _ = pattern_canonicalizer.transform(patt_ast1)
|
|
|
|
canon_patt2, _ = pattern_canonicalizer.transform(patt_ast2)
|
|
|
|
|
|
|
|
result = observation_expression_cmp(canon_patt1, canon_patt2)
|
|
|
|
|
|
|
|
return result == 0
|
2020-08-13 23:09:04 +02:00
|
|
|
|
|
|
|
|
2020-08-15 01:55:00 +02:00
|
|
|
def find_equivalent_patterns(
|
2021-01-15 18:34:10 +01:00
|
|
|
search_pattern, patterns, stix_version=DEFAULT_VERSION,
|
2020-08-15 01:55:00 +02:00
|
|
|
):
|
2020-08-13 23:09:04 +02:00
|
|
|
"""
|
|
|
|
Find patterns from a sequence which are equivalent to a given pattern.
|
|
|
|
This is more efficient than using equivalent_patterns() in a loop, because
|
|
|
|
it doesn't re-canonicalize the search pattern over and over. This works
|
|
|
|
on an input iterable and is implemented as a generator of matches. So you
|
|
|
|
can "stream" patterns in and matching patterns will be streamed out.
|
|
|
|
|
2020-11-20 21:59:55 +01:00
|
|
|
Args:
|
|
|
|
search_pattern: A search pattern as a string
|
|
|
|
patterns: An iterable over patterns as strings
|
|
|
|
stix_version: The STIX version to use for pattern parsing, as a string
|
|
|
|
("2.0", "2.1", etc). Defaults to library-wide default version.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
A generator iterator producing the semantically equivalent patterns
|
2020-08-13 23:09:04 +02:00
|
|
|
"""
|
2021-01-15 18:34:10 +01:00
|
|
|
search_pattern_ast = pattern_visitor.create_pattern_object(
|
2020-08-15 01:55:00 +02:00
|
|
|
search_pattern, version=stix_version,
|
2020-08-13 23:09:04 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
pattern_canonicalizer = _get_pattern_canonicalizer()
|
|
|
|
canon_search_pattern_ast, _ = pattern_canonicalizer.transform(
|
2020-08-13 23:44:42 +02:00
|
|
|
search_pattern_ast,
|
2020-08-13 23:09:04 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
for pattern in patterns:
|
2021-01-15 18:34:10 +01:00
|
|
|
pattern_ast = pattern_visitor.create_pattern_object(
|
2020-08-15 01:55:00 +02:00
|
|
|
pattern, version=stix_version,
|
|
|
|
)
|
2020-08-13 23:09:04 +02:00
|
|
|
canon_pattern_ast, _ = pattern_canonicalizer.transform(pattern_ast)
|
|
|
|
|
|
|
|
result = observation_expression_cmp(
|
2020-08-13 23:44:42 +02:00
|
|
|
canon_search_pattern_ast, canon_pattern_ast,
|
2020-08-13 23:09:04 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
if result == 0:
|
|
|
|
yield pattern
|