cti-python-stix2/stix2/test/test_pattern_equivalence.py

606 lines
16 KiB
Python
Raw Normal View History

import pytest
2020-08-13 23:44:42 +02:00
from stix2.equivalence.patterns import (
2020-08-13 23:44:42 +02:00
equivalent_patterns, find_equivalent_patterns,
)
# # # #
# # Observation expression equivalence tests # #
# # # #
@pytest.mark.parametrize(
"patt1, patt2", [
(
"[a:b=1] OR [a:b=1]",
2020-08-13 23:44:42 +02:00
"[a:b=1]",
),
(
"[a:b=1] OR [a:b=1] OR [a:b=1]",
2020-08-13 23:44:42 +02:00
"[a:b=1]",
),
2020-08-13 23:44:42 +02:00
],
)
def test_obs_dupe_equivalent(patt1, patt2):
assert equivalent_patterns(patt1, patt2)
@pytest.mark.parametrize(
"patt1, patt2", [
(
"[a:b=1] AND [a:b=1]",
2020-08-13 23:44:42 +02:00
"[a:b=1]",
),
(
"[a:b=1] FOLLOWEDBY [a:b=1]",
2020-08-13 23:44:42 +02:00
"[a:b=1]",
),
2020-08-13 23:44:42 +02:00
],
)
def test_obs_dupe_not_equivalent(patt1, patt2):
assert not equivalent_patterns(patt1, patt2)
@pytest.mark.parametrize(
"patt1, patt2", [
("[a:b=1]", "([a:b=1])"),
("(((([a:b=1]))))", "([a:b=1])"),
(
"[a:b=1] AND ([a:b=2] AND [a:b=3])",
"[a:b=1] AND [a:b=2] AND [a:b=3]",
),
(
"([a:b=1] AND [a:b=2]) AND [a:b=3]",
"[a:b=1] AND ([a:b=2] AND [a:b=3])",
),
(
"[a:b=1] OR ([a:b=2] OR [a:b=3])",
"[a:b=1] OR [a:b=2] OR [a:b=3]",
),
(
"([a:b=1] OR [a:b=2]) OR [a:b=3]",
"[a:b=1] OR ([a:b=2] OR [a:b=3])",
),
(
"[a:b=1] FOLLOWEDBY ([a:b=2] FOLLOWEDBY [a:b=3])",
"[a:b=1] FOLLOWEDBY [a:b=2] FOLLOWEDBY [a:b=3]",
),
(
"([a:b=1] FOLLOWEDBY [a:b=2]) FOLLOWEDBY [a:b=3]",
"[a:b=1] FOLLOWEDBY ([a:b=2] FOLLOWEDBY [a:b=3])",
),
(
"[a:b=1] AND ([a:b=2] AND ([a:b=3] AND [a:b=4])) AND ([a:b=5])",
"([a:b=1] AND ([a:b=2] AND [a:b=3]) AND ([a:b=4] AND [a:b=5]))",
2020-08-13 23:44:42 +02:00
),
],
)
def test_obs_flatten_equivalent(patt1, patt2):
assert equivalent_patterns(patt1, patt2)
@pytest.mark.parametrize(
"patt1, patt2", [
(
"([a:b=1] AND [a:b=2]) OR [a:b=3]",
"[a:b=1] AND ([a:b=2] OR [a:b=3])",
),
(
"([a:b=1] OR [a:b=2]) FOLLOWEDBY [a:b=3]",
"[a:b=1] OR ([a:b=2] FOLLOWEDBY [a:b=3])",
),
("[a:b=1]", "([a:b=1]) REPEATS 2 TIMES"),
("(((([a:b=1]))))", "([a:b=1] REPEATS 2 TIMES)"),
(
"[a:b=1] AND ([a:b=2] AND [a:b=3]) WITHIN 2 SECONDS",
"[a:b=1] WITHIN 2 SECONDS AND [a:b=2] AND [a:b=3]",
),
(
"[a:b=1] OR ([a:b=2] OR [a:b=3]) WITHIN 2 SECONDS",
"[a:b=1] WITHIN 2 SECONDS OR [a:b=2] OR [a:b=3]",
),
(
"[a:b=1] FOLLOWEDBY ([a:b=2] FOLLOWEDBY [a:b=3]) WITHIN 2 SECONDS",
"[a:b=1] WITHIN 2 SECONDS FOLLOWEDBY [a:b=2] FOLLOWEDBY [a:b=3]",
),
2020-08-13 23:44:42 +02:00
],
)
def test_obs_flatten_not_equivalent(patt1, patt2):
assert not equivalent_patterns(patt1, patt2)
@pytest.mark.parametrize(
"patt1, patt2", [
(
"[a:b=1] AND [a:b=2]",
2020-08-13 23:44:42 +02:00
"[a:b=2] AND [a:b=1]",
),
(
"[a:b=1] OR [a:b=2]",
2020-08-13 23:44:42 +02:00
"[a:b=2] OR [a:b=1]",
),
(
"[a:b=1] OR ([a:b=2] AND [a:b=3])",
2020-08-13 23:44:42 +02:00
"([a:b=3] AND [a:b=2]) OR [a:b=1]",
),
(
"[a:b=1] WITHIN 2 SECONDS AND [a:b=2] REPEATS 2 TIMES",
2020-08-13 23:44:42 +02:00
"[a:b=2] REPEATS 2 TIMES AND [a:b=1] WITHIN 2 SECONDS",
),
],
)
def test_obs_order_equivalent(patt1, patt2):
assert equivalent_patterns(patt1, patt2)
@pytest.mark.parametrize(
"patt1, patt2", [
(
"[a:b=1] FOLLOWEDBY [a:b=2]",
2020-08-13 23:44:42 +02:00
"[a:b=2] FOLLOWEDBY [a:b=1]",
),
(
"[a:b=1] WITHIN 2 SECONDS AND [a:b=2] REPEATS 2 TIMES",
2020-08-13 23:44:42 +02:00
"[a:b=1] REPEATS 2 TIMES AND [a:b=2] WITHIN 2 SECONDS",
),
],
)
def test_obs_order_not_equivalent(patt1, patt2):
assert not equivalent_patterns(patt1, patt2)
@pytest.mark.parametrize(
"patt1, patt2", [
(
"[a:b=1] OR ([a:b=1] AND [a:b=2])",
2020-08-13 23:44:42 +02:00
"[a:b=1]",
),
(
"[a:b=1] OR ([a:b=1] FOLLOWEDBY [a:b=2])",
2020-08-13 23:44:42 +02:00
"[a:b=1]",
),
(
"([a:b=3] AND [a:b=1]) OR ([a:b=1] AND [a:b=2] AND [a:b=3])",
2020-08-13 23:44:42 +02:00
"[a:b=3] AND [a:b=1]",
),
(
"([a:b=1] FOLLOWEDBY [a:b=3]) OR ([a:b=4] FOLLOWEDBY [a:b=1] FOLLOWEDBY [a:b=2] FOLLOWEDBY [a:b=3])",
2020-08-13 23:44:42 +02:00
"[a:b=1] FOLLOWEDBY [a:b=3]",
),
(
"([a:b=1] FOLLOWEDBY [a:b=2]) OR (([a:b=1] FOLLOWEDBY [a:b=2]) AND [a:b=3])",
2020-08-13 23:44:42 +02:00
"[a:b=1] FOLLOWEDBY [a:b=2]",
),
(
"([a:b=1] AND [a:b=2]) OR (([a:b=1] AND [a:b=2]) FOLLOWEDBY [a:b=3])",
2020-08-13 23:44:42 +02:00
"[a:b=1] AND [a:b=2]",
),
2020-08-13 23:44:42 +02:00
],
)
def test_obs_absorb_equivalent(patt1, patt2):
assert equivalent_patterns(patt1, patt2)
@pytest.mark.parametrize(
"patt1, patt2", [
(
"([a:b=1] AND [a:b=2]) OR ([a:b=2] AND [a:b=3] AND [a:b=4])",
2020-08-13 23:44:42 +02:00
"[a:b=1] AND [a:b=2]",
),
(
"([a:b=2] FOLLOWEDBY [a:b=1]) OR ([a:b=1] FOLLOWEDBY [a:b=2] FOLLOWEDBY [a:b=3])",
2020-08-13 23:44:42 +02:00
"[a:b=2] FOLLOWEDBY [a:b=1]",
),
],
)
def test_obs_absorb_not_equivalent(patt1, patt2):
assert not equivalent_patterns(patt1, patt2)
@pytest.mark.parametrize(
"patt1, patt2", [
(
"[a:b=1] AND ([a:b=2] OR [a:b=3])",
2020-08-13 23:44:42 +02:00
"([a:b=1] AND [a:b=2]) OR ([a:b=1] AND [a:b=3])",
),
(
"[a:b=1] FOLLOWEDBY ([a:b=2] OR [a:b=3])",
2020-08-13 23:44:42 +02:00
"([a:b=1] FOLLOWEDBY [a:b=2]) OR ([a:b=1] FOLLOWEDBY [a:b=3])",
),
(
"[a:b=1] AND ([a:b=2] AND ([a:b=3] OR [a:b=4]))",
2020-08-13 23:44:42 +02:00
"([a:b=1] AND [a:b=2] AND [a:b=3]) OR ([a:b=1] AND [a:b=2] AND [a:b=4])",
),
(
"[a:b=1] FOLLOWEDBY ([a:b=2] FOLLOWEDBY ([a:b=3] OR [a:b=4]))",
2020-08-13 23:44:42 +02:00
"([a:b=1] FOLLOWEDBY [a:b=2] FOLLOWEDBY [a:b=3]) OR ([a:b=1] FOLLOWEDBY [a:b=2] FOLLOWEDBY [a:b=4])",
),
(
"([a:b=1] OR [a:b=2]) AND ([a:b=3] OR [a:b=4])",
2020-08-13 23:44:42 +02:00
"([a:b=1] AND [a:b=3]) OR ([a:b=1] AND [a:b=4]) OR ([a:b=2] AND [a:b=3]) OR ([a:b=2] AND [a:b=4])",
),
(
"([a:b=1] OR [a:b=2]) FOLLOWEDBY ([a:b=3] OR [a:b=4])",
2020-08-13 23:44:42 +02:00
"([a:b=1] FOLLOWEDBY [a:b=3]) OR ([a:b=1] FOLLOWEDBY [a:b=4]) OR ([a:b=2] FOLLOWEDBY [a:b=3]) OR ([a:b=2] FOLLOWEDBY [a:b=4])",
),
2020-08-13 23:44:42 +02:00
],
)
def test_obs_dnf_equivalent(patt1, patt2):
assert equivalent_patterns(patt1, patt2)
@pytest.mark.parametrize(
"patt1, patt2", [
(
"[a:b=1] AND [a:b=2]",
2020-08-13 23:44:42 +02:00
"[a:b=1] OR [a:b=2]",
),
(
"[a:b=1] AND ([a:b=2] OR [a:b=3])",
2020-08-13 23:44:42 +02:00
"([a:b=1] AND [a:b=2]) OR [a:b=3]",
),
(
"[a:b=1] WITHIN 2 SECONDS",
2020-08-13 23:44:42 +02:00
"[a:b=1] REPEATS 2 TIMES",
),
],
)
def test_obs_not_equivalent(patt1, patt2):
assert not equivalent_patterns(patt1, patt2)
# # # #
# # Comparison expression equivalence tests # #
# # # #
@pytest.mark.parametrize(
"patt1, patt2", [
(
"[a:b=1 AND a:b=1]",
2020-08-13 23:44:42 +02:00
"[a:b=1]",
),
(
"[a:b=1 AND a:b=1 AND a:b=1]",
2020-08-13 23:44:42 +02:00
"[a:b=1]",
),
(
"[a:b=1 OR a:b=1]",
2020-08-13 23:44:42 +02:00
"[a:b=1]",
),
(
"[a:b=1 OR a:b=1 OR a:b=1]",
2020-08-13 23:44:42 +02:00
"[a:b=1]",
),
],
)
def test_comp_dupe_equivalent(patt1, patt2):
assert equivalent_patterns(patt1, patt2)
@pytest.mark.parametrize(
"patt1, patt2", [
(
"[(a:b=1)]",
2020-08-13 23:44:42 +02:00
"[a:b=1]",
),
(
"[(((((a:b=1)))))]",
2020-08-13 23:44:42 +02:00
"[(a:b=1)]",
),
(
"[a:b=1 AND (a:b=2 AND a:b=3)]",
2020-08-13 23:44:42 +02:00
"[(a:b=1 AND a:b=2) AND a:b=3]",
),
(
"[a:b=1 OR (a:b=2 OR a:b=3)]",
2020-08-13 23:44:42 +02:00
"[(a:b=1 OR a:b=2) OR a:b=3]",
),
(
"[(((a:b=1 AND ((a:b=2) AND a:b=3) AND (a:b=4))))]",
2020-08-13 23:44:42 +02:00
"[a:b=1 AND a:b=2 AND a:b=3 AND a:b=4]",
),
(
"[(((a:b=1 OR ((a:b=2) OR a:b=3) OR (a:b=4))))]",
2020-08-13 23:44:42 +02:00
"[a:b=1 OR a:b=2 OR a:b=3 OR a:b=4]",
),
2020-08-13 23:44:42 +02:00
],
)
def test_comp_flatten_equivalent(patt1, patt2):
assert equivalent_patterns(patt1, patt2)
@pytest.mark.parametrize(
"patt1, patt2", [
(
"[a:b=1 AND a:b=2]",
2020-08-13 23:44:42 +02:00
"[a:b=2 AND a:b=1]",
),
(
"[a:b=1 OR a:b=2]",
2020-08-13 23:44:42 +02:00
"[a:b=2 OR a:b=1]",
),
(
"[(a:b=1 OR a:b=2) AND a:b=3]",
"[a:b=3 AND (a:b=2 OR a:b=1)]",
2020-08-13 23:44:42 +02:00
),
],
)
def test_comp_order_equivalent(patt1, patt2):
assert equivalent_patterns(patt1, patt2)
@pytest.mark.parametrize(
"patt1, patt2", [
(
"[a:b=1 OR (a:b=1 AND a:b=2)]",
2020-08-13 23:44:42 +02:00
"[a:b=1]",
),
(
"[a:b=1 AND (a:b=1 OR a:b=2)]",
2020-08-13 23:44:42 +02:00
"[a:b=1]",
),
(
"[(a:b=1 AND a:b=2) OR (a:b=3 AND a:b=2 AND a:b=1)]",
2020-08-13 23:44:42 +02:00
"[a:b=1 AND a:b=2]",
),
(
"[(a:b=1 OR a:b=2) AND (a:b=3 OR a:b=2 OR a:b=1)]",
2020-08-13 23:44:42 +02:00
"[a:b=1 OR a:b=2]",
),
],
)
def test_comp_absorb_equivalent(patt1, patt2):
assert equivalent_patterns(patt1, patt2)
@pytest.mark.parametrize(
"patt1, patt2", [
(
"[a:b=1 OR (a:b=2 AND a:b=3)]",
2020-08-13 23:44:42 +02:00
"[(a:b=1 OR a:b=2) AND (a:b=1 OR a:b=3)]",
),
(
"[a:b=1 AND (a:b=2 OR a:b=3)]",
2020-08-13 23:44:42 +02:00
"[(a:b=1 AND a:b=2) OR (a:b=1 AND a:b=3)]",
),
(
"[(a:b=1 AND a:b=2) OR (a:b=3 AND a:b=4)]",
2020-08-13 23:44:42 +02:00
"[(a:b=1 OR a:b=3) AND (a:b=1 OR a:b=4) AND (a:b=2 OR a:b=3) AND (a:b=2 OR a:b=4)]",
),
(
"[(a:b=1 OR a:b=2) AND (a:b=3 OR a:b=4)]",
2020-08-13 23:44:42 +02:00
"[(a:b=1 AND a:b=3) OR (a:b=1 AND a:b=4) OR (a:b=2 AND a:b=3) OR (a:b=2 AND a:b=4)]",
),
(
"[a:b=1 AND (a:b=2 AND (a:b=3 OR a:b=4))]",
2020-08-13 23:44:42 +02:00
"[(a:b=1 AND a:b=2 AND a:b=3) OR (a:b=1 AND a:b=2 AND a:b=4)]",
),
],
)
def test_comp_dnf_equivalent(patt1, patt2):
assert equivalent_patterns(patt1, patt2)
@pytest.mark.parametrize(
"patt1, patt2", [
(
"[a:b=1]",
2020-08-13 23:44:42 +02:00
"[a:b=2]",
),
(
"[a:b=1 AND a:b=2]",
2020-08-13 23:44:42 +02:00
"[a:b=1 OR a:b=2]",
),
(
"[(a:b=1 AND a:b=2) OR a:b=3]",
2020-08-13 23:44:42 +02:00
"[a:b=1 AND (a:b=2 OR a:b=3)]",
),
2020-08-13 23:44:42 +02:00
],
)
def test_comp_not_equivalent(patt1, patt2):
assert not equivalent_patterns(patt1, patt2)
@pytest.mark.parametrize(
"patt1, patt2", [
(
"[ipv4-addr:value='1.2.3.4/32']",
2020-08-13 23:44:42 +02:00
"[ipv4-addr:value='1.2.3.4']",
),
(
"[ipv4-addr:value='1.2.3.4/24']",
2020-08-13 23:44:42 +02:00
"[ipv4-addr:value='1.2.3.0/24']",
),
(
"[ipv4-addr:value='1.2.255.4/23']",
2020-08-13 23:44:42 +02:00
"[ipv4-addr:value='1.2.254.0/23']",
),
(
"[ipv4-addr:value='1.2.255.4/20']",
2020-08-13 23:44:42 +02:00
"[ipv4-addr:value='1.2.240.0/20']",
),
(
"[ipv4-addr:value='1.2.255.4/0']",
2020-08-13 23:44:42 +02:00
"[ipv4-addr:value='0.0.0.0/0']",
),
(
"[ipv4-addr:value='01.02.03.04']",
2020-08-13 23:44:42 +02:00
"[ipv4-addr:value='1.2.3.4']",
),
(
"[ipv4-addr:value='1.2.3.4/-5']",
2020-08-13 23:44:42 +02:00
"[ipv4-addr:value='1.2.3.4/-5']",
),
(
"[ipv4-addr:value='1.2.3.4/99']",
2020-08-13 23:44:42 +02:00
"[ipv4-addr:value='1.2.3.4/99']",
),
(
"[ipv4-addr:value='foo']",
2020-08-13 23:44:42 +02:00
"[ipv4-addr:value='foo']",
),
2020-08-13 23:44:42 +02:00
],
)
def test_comp_special_canonicalization_ipv4(patt1, patt2):
assert equivalent_patterns(patt1, patt2)
@pytest.mark.parametrize(
"patt1, patt2", [
(
"[ipv4-addr:value='1.2.3.4']",
2020-08-13 23:44:42 +02:00
"[ipv4-addr:value='1.2.3.5']",
),
(
"[ipv4-addr:value='1.2.3.4/1']",
2020-08-13 23:44:42 +02:00
"[ipv4-addr:value='1.2.3.4/2']",
),
(
"[ipv4-addr:value='foo']",
2020-08-13 23:44:42 +02:00
"[ipv4-addr:value='bar']",
),
2020-08-13 23:44:42 +02:00
],
)
def test_comp_special_canonicalization_ipv4_not_equivalent(patt1, patt2):
assert not equivalent_patterns(patt1, patt2)
@pytest.mark.parametrize(
"patt1, patt2", [
(
"[ipv6-addr:value='1:2:3:4:5:6:7:8/128']",
2020-08-13 23:44:42 +02:00
"[ipv6-addr:value='1:2:3:4:5:6:7:8']",
),
(
"[ipv6-addr:value='1:2:3:4:5:6:7:8/112']",
2020-08-13 23:44:42 +02:00
"[ipv6-addr:value='1:2:3:4:5:6:7:0/112']",
),
(
"[ipv6-addr:value='1:2:3:4:5:6:ffff:8/111']",
2020-08-13 23:44:42 +02:00
"[ipv6-addr:value='1:2:3:4:5:6:fffe:0/111']",
),
(
"[ipv6-addr:value='1:2:3:4:5:6:ffff:8/104']",
2020-08-13 23:44:42 +02:00
"[ipv6-addr:value='1:2:3:4:5:6:ff00:0/104']",
),
(
"[ipv6-addr:value='1:2:3:4:5:6:7:8/0']",
2020-08-13 23:44:42 +02:00
"[ipv6-addr:value='0:0:0:0:0:0:0:0/0']",
),
(
"[ipv6-addr:value='0001:0000:0000:0000:0000:0000:0000:0001']",
2020-08-13 23:44:42 +02:00
"[ipv6-addr:value='1::1']",
),
(
"[ipv6-addr:value='0000:0000:0000:0000:0000:0000:0000:0000']",
2020-08-13 23:44:42 +02:00
"[ipv6-addr:value='::']",
),
(
"[ipv6-addr:value='1:2:3:4:5:6:7:8/-5']",
2020-08-13 23:44:42 +02:00
"[ipv6-addr:value='1:2:3:4:5:6:7:8/-5']",
),
(
"[ipv6-addr:value='1:2:3:4:5:6:7:8/99']",
2020-08-13 23:44:42 +02:00
"[ipv6-addr:value='1:2:3:4:5:6:7:8/99']",
),
(
"[ipv6-addr:value='foo']",
2020-08-13 23:44:42 +02:00
"[ipv6-addr:value='foo']",
),
2020-08-13 23:44:42 +02:00
],
)
def test_comp_special_canonicalization_ipv6(patt1, patt2):
assert equivalent_patterns(patt1, patt2)
@pytest.mark.parametrize(
"patt1, patt2", [
(
"[ipv6-addr:value='1:2:3:4:5:6:7:8']",
"[ipv6-addr:value='1:2:3:4:5:6:7:9']",
),
(
"[ipv6-addr:value='1:2:3:4:5:6:7:8/1']",
"[ipv6-addr:value='1:2:3:4:5:6:7:8/2']",
),
(
"[ipv6-addr:value='foo']",
"[ipv6-addr:value='bar']",
),
2020-08-13 23:44:42 +02:00
],
)
def test_comp_special_canonicalization_ipv6_not_equivalent(patt1, patt2):
assert not equivalent_patterns(patt1, patt2)
@pytest.mark.parametrize(
"patt1, patt2", [
(
"[windows-registry-key:key = 'aaa']",
"[windows-registry-key:key = 'AAA']",
),
(
"[windows-registry-key:values[0].name = 'aaa']",
"[windows-registry-key:values[0].name = 'AAA']",
),
(
"[windows-registry-key:values[*].name = 'aaa']",
"[windows-registry-key:values[*].name = 'AAA']",
),
2020-08-13 23:44:42 +02:00
],
)
def test_comp_special_canonicalization_win_reg_key(patt1, patt2):
assert equivalent_patterns(patt1, patt2)
@pytest.mark.parametrize(
"patt1, patt2", [
(
"[windows-registry-key:key='foo']",
"[windows-registry-key:key='bar']",
),
(
"[windows-registry-key:values[0].name='foo']",
"[windows-registry-key:values[0].name='bar']",
),
(
"[windows-registry-key:values[*].name='foo']",
"[windows-registry-key:values[*].name='bar']",
),
(
"[windows-registry-key:values[*].data='foo']",
"[windows-registry-key:values[*].data='FOO']",
),
2020-08-13 23:44:42 +02:00
],
)
def test_comp_special_canonicalization_win_reg_key_not_equivalent(patt1, patt2):
assert not equivalent_patterns(patt1, patt2)
# # # #
# # find_equivalent_patterns() tests # #
# # # #
def test_find_equivalent_patterns():
search_pattern = "[a:b=1]"
other_patterns = [
"[a:b=2]",
"[a:b=1]",
"[a:b=1] WITHIN 1 SECONDS",
"[a:b=1] OR ([a:b=2] AND [a:b=1])",
"[(a:b=2 OR a:b=1) AND a:b=1]",
"[c:d=1]",
2020-08-13 23:44:42 +02:00
"[a:b>1]",
]
result = list(
2020-08-13 23:44:42 +02:00
find_equivalent_patterns(search_pattern, other_patterns),
)
assert result == [
"[a:b=1]",
"[a:b=1] OR ([a:b=2] AND [a:b=1])",
"[(a:b=2 OR a:b=1) AND a:b=1]",
]