cti-python-stix2/stix2/equivalence/pattern/__init__.py

129 lines
3.9 KiB
Python

"""Python APIs for STIX 2 Pattern Semantic Equivalence.
.. autosummary::
:toctree: pattern
compare
transform
|
"""
from ... import pattern_visitor
from ...version import DEFAULT_VERSION
from .compare.observation import observation_expression_cmp
from .transform import ChainTransformer, SettleTransformer
from .transform.observation import (
AbsorptionTransformer, NormalizeComparisonExpressionsTransformer,
DNFTransformer, FlattenTransformer, OrderDedupeTransformer,
)
# Lazy-initialize
_pattern_normalizer = None
def _get_pattern_normalizer():
"""
Get a normalization transformer for STIX patterns.
Returns:
The transformer
"""
# The transformers are either stateless or contain no state which changes
# with each use. So we can setup the transformers once and keep reusing
# them.
global _pattern_normalizer
if not _pattern_normalizer:
normalize_comp_expr = \
NormalizeComparisonExpressionsTransformer()
obs_expr_flatten = FlattenTransformer()
obs_expr_order = OrderDedupeTransformer()
obs_expr_absorb = AbsorptionTransformer()
obs_simplify = ChainTransformer(
obs_expr_flatten, obs_expr_order, obs_expr_absorb,
)
obs_settle_simplify = SettleTransformer(obs_simplify)
obs_dnf = DNFTransformer()
_pattern_normalizer = ChainTransformer(
normalize_comp_expr,
obs_settle_simplify, obs_dnf, obs_settle_simplify,
)
return _pattern_normalizer
def equivalent_patterns(pattern1, pattern2, stix_version=DEFAULT_VERSION):
"""
Determine whether two STIX patterns are semantically equivalent.
Args:
pattern1: The first STIX pattern
pattern2: The second STIX pattern
stix_version: The STIX version to use for pattern parsing, as a string
("2.0", "2.1", etc). Defaults to library-wide default version.
Returns:
True if the patterns are semantically equivalent; False if not
"""
patt_ast1 = pattern_visitor.create_pattern_object(
pattern1, version=stix_version,
)
patt_ast2 = pattern_visitor.create_pattern_object(
pattern2, version=stix_version,
)
pattern_normalizer = _get_pattern_normalizer()
norm_patt1, _ = pattern_normalizer.transform(patt_ast1)
norm_patt2, _ = pattern_normalizer.transform(patt_ast2)
result = observation_expression_cmp(norm_patt1, norm_patt2)
return result == 0
def find_equivalent_patterns(
search_pattern, patterns, stix_version=DEFAULT_VERSION,
):
"""
Find patterns from a sequence which are equivalent to a given pattern.
This is more efficient than using equivalent_patterns() in a loop, because
it doesn't re-normalize the search pattern over and over. This works
on an input iterable and is implemented as a generator of matches. So you
can "stream" patterns in and matching patterns will be streamed out.
Args:
search_pattern: A search pattern as a string
patterns: An iterable over patterns as strings
stix_version: The STIX version to use for pattern parsing, as a string
("2.0", "2.1", etc). Defaults to library-wide default version.
Returns:
A generator iterator producing the semantically equivalent patterns
"""
search_pattern_ast = pattern_visitor.create_pattern_object(
search_pattern, version=stix_version,
)
pattern_normalizer = _get_pattern_normalizer()
norm_search_pattern_ast, _ = pattern_normalizer.transform(
search_pattern_ast,
)
for pattern in patterns:
pattern_ast = pattern_visitor.create_pattern_object(
pattern, version=stix_version,
)
norm_pattern_ast, _ = pattern_normalizer.transform(pattern_ast)
result = observation_expression_cmp(
norm_search_pattern_ast, norm_pattern_ast,
)
if result == 0:
yield pattern