Add a find_equivalent_patterns() function and unit tests, in case

a user wants a more efficient search capability.  (It is more
efficient than calling equivalent_patterns() over and over in a
loop, because it doesn't repeatedly re-canonicalize the search
pattern.)
pull/1/head
Michael Chisholm 2020-08-13 17:09:04 -04:00
parent bd5635f5be
commit 16a8c544ac
2 changed files with 69 additions and 1 deletions

View File

@ -70,3 +70,37 @@ def equivalent_patterns(pattern1, pattern2):
result = observation_expression_cmp(canon_patt1, canon_patt2) result = observation_expression_cmp(canon_patt1, canon_patt2)
return result == 0 return result == 0
def find_equivalent_patterns(search_pattern, patterns):
"""
Find patterns from a sequence which are equivalent to a given pattern.
This is more efficient than using equivalent_patterns() in a loop, because
it doesn't re-canonicalize the search pattern over and over. This works
on an input iterable and is implemented as a generator of matches. So you
can "stream" patterns in and matching patterns will be streamed out.
:param search_pattern: A search pattern as a string
:param patterns: An iterable over patterns as strings
:return: A generator iterator producing the semantically equivalent
patterns
"""
search_pattern_ast = stix2.pattern_visitor.create_pattern_object(
search_pattern
)
pattern_canonicalizer = _get_pattern_canonicalizer()
canon_search_pattern_ast, _ = pattern_canonicalizer.transform(
search_pattern_ast
)
for pattern in patterns:
pattern_ast = stix2.pattern_visitor.create_pattern_object(pattern)
canon_pattern_ast, _ = pattern_canonicalizer.transform(pattern_ast)
result = observation_expression_cmp(
canon_search_pattern_ast, canon_pattern_ast
)
if result == 0:
yield pattern

View File

@ -1,5 +1,12 @@
import pytest import pytest
from stix2.equivalence.patterns import equivalent_patterns from stix2.equivalence.patterns import (
equivalent_patterns, find_equivalent_patterns
)
# # # #
# # Observation expression equivalence tests # #
# # # #
@pytest.mark.parametrize( @pytest.mark.parametrize(
@ -569,3 +576,30 @@ def test_comp_special_canonicalization_win_reg_key(patt1, patt2):
) )
def test_comp_special_canonicalization_win_reg_key_not_equivalent(patt1, patt2): def test_comp_special_canonicalization_win_reg_key_not_equivalent(patt1, patt2):
assert not equivalent_patterns(patt1, patt2) assert not equivalent_patterns(patt1, patt2)
# # # #
# # find_equivalent_patterns() tests # #
# # # #
def test_find_equivalent_patterns():
search_pattern = "[a:b=1]"
other_patterns = [
"[a:b=2]",
"[a:b=1]",
"[a:b=1] WITHIN 1 SECONDS",
"[a:b=1] OR ([a:b=2] AND [a:b=1])",
"[(a:b=2 OR a:b=1) AND a:b=1]",
"[c:d=1]",
"[a:b>1]"
]
result = list(
find_equivalent_patterns(search_pattern, other_patterns)
)
assert result == [
"[a:b=1]",
"[a:b=1] OR ([a:b=2] AND [a:b=1])",
"[(a:b=2 OR a:b=1) AND a:b=1]",
]