From 16a8c544ac3aba5fc6ab6fdf46c3c9f6db2670c8 Mon Sep 17 00:00:00 2001 From: Michael Chisholm Date: Thu, 13 Aug 2020 17:09:04 -0400 Subject: [PATCH] Add a find_equivalent_patterns() function and unit tests, in case a user wants a more efficient search capability. (It is more efficient than calling equivalent_patterns() over and over in a loop, because it doesn't repeatedly re-canonicalize the search pattern.) --- stix2/equivalence/patterns/__init__.py | 34 ++++++++++++++++++++++++ stix2/test/test_pattern_equivalence.py | 36 +++++++++++++++++++++++++- 2 files changed, 69 insertions(+), 1 deletion(-) diff --git a/stix2/equivalence/patterns/__init__.py b/stix2/equivalence/patterns/__init__.py index 9965c35..0d0aa2a 100644 --- a/stix2/equivalence/patterns/__init__.py +++ b/stix2/equivalence/patterns/__init__.py @@ -70,3 +70,37 @@ def equivalent_patterns(pattern1, pattern2): result = observation_expression_cmp(canon_patt1, canon_patt2) return result == 0 + + +def find_equivalent_patterns(search_pattern, patterns): + """ + Find patterns from a sequence which are equivalent to a given pattern. + This is more efficient than using equivalent_patterns() in a loop, because + it doesn't re-canonicalize the search pattern over and over. This works + on an input iterable and is implemented as a generator of matches. So you + can "stream" patterns in and matching patterns will be streamed out. + + :param search_pattern: A search pattern as a string + :param patterns: An iterable over patterns as strings + :return: A generator iterator producing the semantically equivalent + patterns + """ + search_pattern_ast = stix2.pattern_visitor.create_pattern_object( + search_pattern + ) + + pattern_canonicalizer = _get_pattern_canonicalizer() + canon_search_pattern_ast, _ = pattern_canonicalizer.transform( + search_pattern_ast + ) + + for pattern in patterns: + pattern_ast = stix2.pattern_visitor.create_pattern_object(pattern) + canon_pattern_ast, _ = pattern_canonicalizer.transform(pattern_ast) + + result = observation_expression_cmp( + canon_search_pattern_ast, canon_pattern_ast + ) + + if result == 0: + yield pattern diff --git a/stix2/test/test_pattern_equivalence.py b/stix2/test/test_pattern_equivalence.py index 73eca58..0488358 100644 --- a/stix2/test/test_pattern_equivalence.py +++ b/stix2/test/test_pattern_equivalence.py @@ -1,5 +1,12 @@ import pytest -from stix2.equivalence.patterns import equivalent_patterns +from stix2.equivalence.patterns import ( + equivalent_patterns, find_equivalent_patterns +) + + +# # # # +# # Observation expression equivalence tests # # +# # # # @pytest.mark.parametrize( @@ -569,3 +576,30 @@ def test_comp_special_canonicalization_win_reg_key(patt1, patt2): ) def test_comp_special_canonicalization_win_reg_key_not_equivalent(patt1, patt2): assert not equivalent_patterns(patt1, patt2) + + +# # # # +# # find_equivalent_patterns() tests # # +# # # # + +def test_find_equivalent_patterns(): + search_pattern = "[a:b=1]" + other_patterns = [ + "[a:b=2]", + "[a:b=1]", + "[a:b=1] WITHIN 1 SECONDS", + "[a:b=1] OR ([a:b=2] AND [a:b=1])", + "[(a:b=2 OR a:b=1) AND a:b=1]", + "[c:d=1]", + "[a:b>1]" + ] + + result = list( + find_equivalent_patterns(search_pattern, other_patterns) + ) + + assert result == [ + "[a:b=1]", + "[a:b=1] OR ([a:b=2] AND [a:b=1])", + "[(a:b=2 OR a:b=1) AND a:b=1]", + ]