From bd5635f5be4f30ccef5b8e86f5454de0ab675c75 Mon Sep 17 00:00:00 2001 From: Michael Chisholm Date: Thu, 13 Aug 2020 16:46:25 -0400 Subject: [PATCH] Add some unit tests for pattern equivalence. --- stix2/test/test_pattern_equivalence.py | 571 +++++++++++++++++++++++++ 1 file changed, 571 insertions(+) create mode 100644 stix2/test/test_pattern_equivalence.py diff --git a/stix2/test/test_pattern_equivalence.py b/stix2/test/test_pattern_equivalence.py new file mode 100644 index 0000000..73eca58 --- /dev/null +++ b/stix2/test/test_pattern_equivalence.py @@ -0,0 +1,571 @@ +import pytest +from stix2.equivalence.patterns import equivalent_patterns + + +@pytest.mark.parametrize( + "patt1, patt2", [ + ( + "[a:b=1] OR [a:b=1]", + "[a:b=1]" + ), + ( + "[a:b=1] OR [a:b=1] OR [a:b=1]", + "[a:b=1]" + ), + ] +) +def test_obs_dupe_equivalent(patt1, patt2): + assert equivalent_patterns(patt1, patt2) + + +@pytest.mark.parametrize( + "patt1, patt2", [ + ( + "[a:b=1] AND [a:b=1]", + "[a:b=1]" + ), + ( + "[a:b=1] FOLLOWEDBY [a:b=1]", + "[a:b=1]" + ), + ] +) +def test_obs_dupe_not_equivalent(patt1, patt2): + assert not equivalent_patterns(patt1, patt2) + + +@pytest.mark.parametrize( + "patt1, patt2", [ + ("[a:b=1]", "([a:b=1])"), + ("(((([a:b=1]))))", "([a:b=1])"), + ( + "[a:b=1] AND ([a:b=2] AND [a:b=3])", + "[a:b=1] AND [a:b=2] AND [a:b=3]", + ), + ( + "([a:b=1] AND [a:b=2]) AND [a:b=3]", + "[a:b=1] AND ([a:b=2] AND [a:b=3])", + ), + ( + "[a:b=1] OR ([a:b=2] OR [a:b=3])", + "[a:b=1] OR [a:b=2] OR [a:b=3]", + ), + ( + "([a:b=1] OR [a:b=2]) OR [a:b=3]", + "[a:b=1] OR ([a:b=2] OR [a:b=3])", + ), + ( + "[a:b=1] FOLLOWEDBY ([a:b=2] FOLLOWEDBY [a:b=3])", + "[a:b=1] FOLLOWEDBY [a:b=2] FOLLOWEDBY [a:b=3]", + ), + ( + "([a:b=1] FOLLOWEDBY [a:b=2]) FOLLOWEDBY [a:b=3]", + "[a:b=1] FOLLOWEDBY ([a:b=2] FOLLOWEDBY [a:b=3])", + ), + ( + "[a:b=1] AND ([a:b=2] AND ([a:b=3] AND [a:b=4])) AND ([a:b=5])", + "([a:b=1] AND ([a:b=2] AND [a:b=3]) AND ([a:b=4] AND [a:b=5]))", + ) + ] +) +def test_obs_flatten_equivalent(patt1, patt2): + assert equivalent_patterns(patt1, patt2) + + +@pytest.mark.parametrize( + "patt1, patt2", [ + ( + "([a:b=1] AND [a:b=2]) OR [a:b=3]", + "[a:b=1] AND ([a:b=2] OR [a:b=3])", + ), + ( + "([a:b=1] OR [a:b=2]) FOLLOWEDBY [a:b=3]", + "[a:b=1] OR ([a:b=2] FOLLOWEDBY [a:b=3])", + ), + ("[a:b=1]", "([a:b=1]) REPEATS 2 TIMES"), + ("(((([a:b=1]))))", "([a:b=1] REPEATS 2 TIMES)"), + ( + "[a:b=1] AND ([a:b=2] AND [a:b=3]) WITHIN 2 SECONDS", + "[a:b=1] WITHIN 2 SECONDS AND [a:b=2] AND [a:b=3]", + ), + ( + "[a:b=1] OR ([a:b=2] OR [a:b=3]) WITHIN 2 SECONDS", + "[a:b=1] WITHIN 2 SECONDS OR [a:b=2] OR [a:b=3]", + ), + ( + "[a:b=1] FOLLOWEDBY ([a:b=2] FOLLOWEDBY [a:b=3]) WITHIN 2 SECONDS", + "[a:b=1] WITHIN 2 SECONDS FOLLOWEDBY [a:b=2] FOLLOWEDBY [a:b=3]", + ), + ] +) +def test_obs_flatten_not_equivalent(patt1, patt2): + assert not equivalent_patterns(patt1, patt2) + + +@pytest.mark.parametrize( + "patt1, patt2", [ + ( + "[a:b=1] AND [a:b=2]", + "[a:b=2] AND [a:b=1]" + ), + ( + "[a:b=1] OR [a:b=2]", + "[a:b=2] OR [a:b=1]" + ), + ( + "[a:b=1] OR ([a:b=2] AND [a:b=3])", + "([a:b=3] AND [a:b=2]) OR [a:b=1]" + ), + ( + "[a:b=1] WITHIN 2 SECONDS AND [a:b=2] REPEATS 2 TIMES", + "[a:b=2] REPEATS 2 TIMES AND [a:b=1] WITHIN 2 SECONDS" + ) + ] +) +def test_obs_order_equivalent(patt1, patt2): + assert equivalent_patterns(patt1, patt2) + + +@pytest.mark.parametrize( + "patt1, patt2", [ + ( + "[a:b=1] FOLLOWEDBY [a:b=2]", + "[a:b=2] FOLLOWEDBY [a:b=1]" + ), + ( + "[a:b=1] WITHIN 2 SECONDS AND [a:b=2] REPEATS 2 TIMES", + "[a:b=1] REPEATS 2 TIMES AND [a:b=2] WITHIN 2 SECONDS" + ) + ] +) +def test_obs_order_not_equivalent(patt1, patt2): + assert not equivalent_patterns(patt1, patt2) + + +@pytest.mark.parametrize( + "patt1, patt2", [ + ( + "[a:b=1] OR ([a:b=1] AND [a:b=2])", + "[a:b=1]" + ), + ( + "[a:b=1] OR ([a:b=1] FOLLOWEDBY [a:b=2])", + "[a:b=1]" + ), + ( + "([a:b=3] AND [a:b=1]) OR ([a:b=1] AND [a:b=2] AND [a:b=3])", + "[a:b=3] AND [a:b=1]" + ), + ( + "([a:b=1] FOLLOWEDBY [a:b=3]) OR ([a:b=4] FOLLOWEDBY [a:b=1] FOLLOWEDBY [a:b=2] FOLLOWEDBY [a:b=3])", + "[a:b=1] FOLLOWEDBY [a:b=3]" + ), + ( + "([a:b=1] FOLLOWEDBY [a:b=2]) OR (([a:b=1] FOLLOWEDBY [a:b=2]) AND [a:b=3])", + "[a:b=1] FOLLOWEDBY [a:b=2]" + ), + ( + "([a:b=1] AND [a:b=2]) OR (([a:b=1] AND [a:b=2]) FOLLOWEDBY [a:b=3])", + "[a:b=1] AND [a:b=2]" + ), + ] +) +def test_obs_absorb_equivalent(patt1, patt2): + assert equivalent_patterns(patt1, patt2) + + +@pytest.mark.parametrize( + "patt1, patt2", [ + ( + "([a:b=1] AND [a:b=2]) OR ([a:b=2] AND [a:b=3] AND [a:b=4])", + "[a:b=1] AND [a:b=2]" + ), + ( + "([a:b=2] FOLLOWEDBY [a:b=1]) OR ([a:b=1] FOLLOWEDBY [a:b=2] FOLLOWEDBY [a:b=3])", + "[a:b=2] FOLLOWEDBY [a:b=1]" + ) + ] +) +def test_obs_absorb_not_equivalent(patt1, patt2): + assert not equivalent_patterns(patt1, patt2) + + +@pytest.mark.parametrize( + "patt1, patt2", [ + ( + "[a:b=1] AND ([a:b=2] OR [a:b=3])", + "([a:b=1] AND [a:b=2]) OR ([a:b=1] AND [a:b=3])" + ), + ( + "[a:b=1] FOLLOWEDBY ([a:b=2] OR [a:b=3])", + "([a:b=1] FOLLOWEDBY [a:b=2]) OR ([a:b=1] FOLLOWEDBY [a:b=3])" + ), + ( + "[a:b=1] AND ([a:b=2] AND ([a:b=3] OR [a:b=4]))", + "([a:b=1] AND [a:b=2] AND [a:b=3]) OR ([a:b=1] AND [a:b=2] AND [a:b=4])" + ), + ( + "[a:b=1] FOLLOWEDBY ([a:b=2] FOLLOWEDBY ([a:b=3] OR [a:b=4]))", + "([a:b=1] FOLLOWEDBY [a:b=2] FOLLOWEDBY [a:b=3]) OR ([a:b=1] FOLLOWEDBY [a:b=2] FOLLOWEDBY [a:b=4])" + ), + ( + "([a:b=1] OR [a:b=2]) AND ([a:b=3] OR [a:b=4])", + "([a:b=1] AND [a:b=3]) OR ([a:b=1] AND [a:b=4]) OR ([a:b=2] AND [a:b=3]) OR ([a:b=2] AND [a:b=4])" + ), + ( + "([a:b=1] OR [a:b=2]) FOLLOWEDBY ([a:b=3] OR [a:b=4])", + "([a:b=1] FOLLOWEDBY [a:b=3]) OR ([a:b=1] FOLLOWEDBY [a:b=4]) OR ([a:b=2] FOLLOWEDBY [a:b=3]) OR ([a:b=2] FOLLOWEDBY [a:b=4])" + ), + ] +) +def test_obs_dnf_equivalent(patt1, patt2): + assert equivalent_patterns(patt1, patt2) + + +@pytest.mark.parametrize( + "patt1, patt2", [ + ( + "[a:b=1] AND [a:b=2]", + "[a:b=1] OR [a:b=2]" + ), + ( + "[a:b=1] AND ([a:b=2] OR [a:b=3])", + "([a:b=1] AND [a:b=2]) OR [a:b=3]" + ), + ( + "[a:b=1] WITHIN 2 SECONDS", + "[a:b=1] REPEATS 2 TIMES" + ) + ] +) +def test_obs_not_equivalent(patt1, patt2): + assert not equivalent_patterns(patt1, patt2) + + +# # # # +# # Comparison expression equivalence tests # # +# # # # + + +@pytest.mark.parametrize( + "patt1, patt2", [ + ( + "[a:b=1 AND a:b=1]", + "[a:b=1]" + ), + ( + "[a:b=1 AND a:b=1 AND a:b=1]", + "[a:b=1]" + ), + ( + "[a:b=1 OR a:b=1]", + "[a:b=1]" + ), + ( + "[a:b=1 OR a:b=1 OR a:b=1]", + "[a:b=1]" + ) + ] +) +def test_comp_dupe_equivalent(patt1, patt2): + assert equivalent_patterns(patt1, patt2) + + +@pytest.mark.parametrize( + "patt1, patt2", [ + ( + "[(a:b=1)]", + "[a:b=1]" + ), + ( + "[(((((a:b=1)))))]", + "[(a:b=1)]" + ), + ( + "[a:b=1 AND (a:b=2 AND a:b=3)]", + "[(a:b=1 AND a:b=2) AND a:b=3]" + ), + ( + "[a:b=1 OR (a:b=2 OR a:b=3)]", + "[(a:b=1 OR a:b=2) OR a:b=3]" + ), + ( + "[(((a:b=1 AND ((a:b=2) AND a:b=3) AND (a:b=4))))]", + "[a:b=1 AND a:b=2 AND a:b=3 AND a:b=4]" + ), + ( + "[(((a:b=1 OR ((a:b=2) OR a:b=3) OR (a:b=4))))]", + "[a:b=1 OR a:b=2 OR a:b=3 OR a:b=4]" + ), + ] +) +def test_comp_flatten_equivalent(patt1, patt2): + assert equivalent_patterns(patt1, patt2) + + +@pytest.mark.parametrize( + "patt1, patt2", [ + ( + "[a:b=1 AND a:b=2]", + "[a:b=2 AND a:b=1]" + ), + ( + "[a:b=1 OR a:b=2]", + "[a:b=2 OR a:b=1]" + ), + ( + "[(a:b=1 OR a:b=2) AND a:b=3]", + "[a:b=3 AND (a:b=2 OR a:b=1)]", + ) + ] +) +def test_comp_order_equivalent(patt1, patt2): + assert equivalent_patterns(patt1, patt2) + + +@pytest.mark.parametrize( + "patt1, patt2", [ + ( + "[a:b=1 OR (a:b=1 AND a:b=2)]", + "[a:b=1]" + ), + ( + "[a:b=1 AND (a:b=1 OR a:b=2)]", + "[a:b=1]" + ), + ( + "[(a:b=1 AND a:b=2) OR (a:b=3 AND a:b=2 AND a:b=1)]", + "[a:b=1 AND a:b=2]" + ), + ( + "[(a:b=1 OR a:b=2) AND (a:b=3 OR a:b=2 OR a:b=1)]", + "[a:b=1 OR a:b=2]" + ) + ] +) +def test_comp_absorb_equivalent(patt1, patt2): + assert equivalent_patterns(patt1, patt2) + + +@pytest.mark.parametrize( + "patt1, patt2", [ + ( + "[a:b=1 OR (a:b=2 AND a:b=3)]", + "[(a:b=1 OR a:b=2) AND (a:b=1 OR a:b=3)]" + ), + ( + "[a:b=1 AND (a:b=2 OR a:b=3)]", + "[(a:b=1 AND a:b=2) OR (a:b=1 AND a:b=3)]" + ), + ( + "[(a:b=1 AND a:b=2) OR (a:b=3 AND a:b=4)]", + "[(a:b=1 OR a:b=3) AND (a:b=1 OR a:b=4) AND (a:b=2 OR a:b=3) AND (a:b=2 OR a:b=4)]" + ), + ( + "[(a:b=1 OR a:b=2) AND (a:b=3 OR a:b=4)]", + "[(a:b=1 AND a:b=3) OR (a:b=1 AND a:b=4) OR (a:b=2 AND a:b=3) OR (a:b=2 AND a:b=4)]" + ), + ( + "[a:b=1 AND (a:b=2 AND (a:b=3 OR a:b=4))]", + "[(a:b=1 AND a:b=2 AND a:b=3) OR (a:b=1 AND a:b=2 AND a:b=4)]" + ) + ] +) +def test_comp_dnf_equivalent(patt1, patt2): + assert equivalent_patterns(patt1, patt2) + + +@pytest.mark.parametrize( + "patt1, patt2", [ + ( + "[a:b=1]", + "[a:b=2]" + ), + ( + "[a:b=1 AND a:b=2]", + "[a:b=1 OR a:b=2]" + ), + ( + "[(a:b=1 AND a:b=2) OR a:b=3]", + "[a:b=1 AND (a:b=2 OR a:b=3)]" + ), + ] +) +def test_comp_not_equivalent(patt1, patt2): + assert not equivalent_patterns(patt1, patt2) + + +@pytest.mark.parametrize( + "patt1, patt2", [ + ( + "[ipv4-addr:value='1.2.3.4/32']", + "[ipv4-addr:value='1.2.3.4']" + ), + ( + "[ipv4-addr:value='1.2.3.4/24']", + "[ipv4-addr:value='1.2.3.0/24']" + ), + ( + "[ipv4-addr:value='1.2.255.4/23']", + "[ipv4-addr:value='1.2.254.0/23']" + ), + ( + "[ipv4-addr:value='1.2.255.4/20']", + "[ipv4-addr:value='1.2.240.0/20']" + ), + ( + "[ipv4-addr:value='1.2.255.4/0']", + "[ipv4-addr:value='0.0.0.0/0']" + ), + ( + "[ipv4-addr:value='01.02.03.04']", + "[ipv4-addr:value='1.2.3.4']" + ), + ( + "[ipv4-addr:value='1.2.3.4/-5']", + "[ipv4-addr:value='1.2.3.4/-5']" + ), + ( + "[ipv4-addr:value='1.2.3.4/99']", + "[ipv4-addr:value='1.2.3.4/99']" + ), + ( + "[ipv4-addr:value='foo']", + "[ipv4-addr:value='foo']" + ), + ] +) +def test_comp_special_canonicalization_ipv4(patt1, patt2): + assert equivalent_patterns(patt1, patt2) + + +@pytest.mark.parametrize( + "patt1, patt2", [ + ( + "[ipv4-addr:value='1.2.3.4']", + "[ipv4-addr:value='1.2.3.5']" + ), + ( + "[ipv4-addr:value='1.2.3.4/1']", + "[ipv4-addr:value='1.2.3.4/2']" + ), + ( + "[ipv4-addr:value='foo']", + "[ipv4-addr:value='bar']" + ), + ] +) +def test_comp_special_canonicalization_ipv4_not_equivalent(patt1, patt2): + assert not equivalent_patterns(patt1, patt2) + + +@pytest.mark.parametrize( + "patt1, patt2", [ + ( + "[ipv6-addr:value='1:2:3:4:5:6:7:8/128']", + "[ipv6-addr:value='1:2:3:4:5:6:7:8']" + ), + ( + "[ipv6-addr:value='1:2:3:4:5:6:7:8/112']", + "[ipv6-addr:value='1:2:3:4:5:6:7:0/112']" + ), + ( + "[ipv6-addr:value='1:2:3:4:5:6:ffff:8/111']", + "[ipv6-addr:value='1:2:3:4:5:6:fffe:0/111']" + ), + ( + "[ipv6-addr:value='1:2:3:4:5:6:ffff:8/104']", + "[ipv6-addr:value='1:2:3:4:5:6:ff00:0/104']" + ), + ( + "[ipv6-addr:value='1:2:3:4:5:6:7:8/0']", + "[ipv6-addr:value='0:0:0:0:0:0:0:0/0']" + ), + ( + "[ipv6-addr:value='0001:0000:0000:0000:0000:0000:0000:0001']", + "[ipv6-addr:value='1::1']" + ), + ( + "[ipv6-addr:value='0000:0000:0000:0000:0000:0000:0000:0000']", + "[ipv6-addr:value='::']" + ), + ( + "[ipv6-addr:value='1:2:3:4:5:6:7:8/-5']", + "[ipv6-addr:value='1:2:3:4:5:6:7:8/-5']" + ), + ( + "[ipv6-addr:value='1:2:3:4:5:6:7:8/99']", + "[ipv6-addr:value='1:2:3:4:5:6:7:8/99']" + ), + ( + "[ipv6-addr:value='foo']", + "[ipv6-addr:value='foo']" + ), + ] +) +def test_comp_special_canonicalization_ipv6(patt1, patt2): + assert equivalent_patterns(patt1, patt2) + + +@pytest.mark.parametrize( + "patt1, patt2", [ + ( + "[ipv6-addr:value='1:2:3:4:5:6:7:8']", + "[ipv6-addr:value='1:2:3:4:5:6:7:9']", + ), + ( + "[ipv6-addr:value='1:2:3:4:5:6:7:8/1']", + "[ipv6-addr:value='1:2:3:4:5:6:7:8/2']", + ), + ( + "[ipv6-addr:value='foo']", + "[ipv6-addr:value='bar']", + ), + ] +) +def test_comp_special_canonicalization_ipv6_not_equivalent(patt1, patt2): + assert not equivalent_patterns(patt1, patt2) + + +@pytest.mark.parametrize( + "patt1, patt2", [ + ( + "[windows-registry-key:key = 'aaa']", + "[windows-registry-key:key = 'AAA']", + ), + ( + "[windows-registry-key:values[0].name = 'aaa']", + "[windows-registry-key:values[0].name = 'AAA']", + ), + ( + "[windows-registry-key:values[*].name = 'aaa']", + "[windows-registry-key:values[*].name = 'AAA']", + ), + ] +) +def test_comp_special_canonicalization_win_reg_key(patt1, patt2): + assert equivalent_patterns(patt1, patt2) + + +@pytest.mark.parametrize( + "patt1, patt2", [ + ( + "[windows-registry-key:key='foo']", + "[windows-registry-key:key='bar']", + ), + ( + "[windows-registry-key:values[0].name='foo']", + "[windows-registry-key:values[0].name='bar']", + ), + ( + "[windows-registry-key:values[*].name='foo']", + "[windows-registry-key:values[*].name='bar']", + ), + ( + "[windows-registry-key:values[*].data='foo']", + "[windows-registry-key:values[*].data='FOO']", + ), + ] +) +def test_comp_special_canonicalization_win_reg_key_not_equivalent(patt1, patt2): + assert not equivalent_patterns(patt1, patt2)