Fix pattern semantic equivalence bug: comparison expression

DNF transform could trigger an error from the AST code which
does the SCO type checks on comparison expression ANDs.  The
transform had been ignoring SCO types.  Now, it will catch
the exception and drop the AND from the pattern.  Added a
couple new unit tests for this.
pull/1/head
Michael Chisholm 2021-12-23 00:59:41 -05:00
parent 81550cab92
commit 3086b7ab5b
3 changed files with 45 additions and 22 deletions

View File

@ -13,8 +13,8 @@ from stix2.equivalence.pattern.transform.specials import (
ipv4_addr, ipv6_addr, windows_reg_key,
)
from stix2.patterns import (
AndBooleanExpression, OrBooleanExpression, ParentheticalExpression,
_BooleanExpression, _ComparisonExpression,
AndBooleanExpression, ObjectPath, OrBooleanExpression,
ParentheticalExpression, _BooleanExpression, _ComparisonExpression,
)
@ -22,12 +22,6 @@ def _dupe_ast(ast):
"""
Create a duplicate of the given AST.
Note:
The comparison expression "leaves", i.e. simple <path> <op> <value>
comparisons are currently not duplicated. I don't think it's necessary
as of this writing; they are never changed. But revisit this if/when
necessary.
Args:
ast: The AST to duplicate
@ -45,9 +39,13 @@ def _dupe_ast(ast):
])
elif isinstance(ast, _ComparisonExpression):
# Change this to create a dupe, if we ever need to change simple
# comparison expressions as part of normalization.
result = ast
# Maybe we go as far as duping the ObjectPath object too?
new_object_path = ObjectPath(
ast.lhs.object_type_name, ast.lhs.property_path,
)
result = _ComparisonExpression(
ast.operator, new_object_path, ast.rhs, ast.negated,
)
else:
raise TypeError("Can't duplicate " + type(ast).__name__)
@ -333,17 +331,33 @@ class DNFTransformer(ComparisonExpressionTransformer):
other_children.append(child)
if or_children:
distributed_children = [
AndBooleanExpression([
# Make dupes: distribution implies adding repetition, and
# we should ensure each repetition is independent of the
# others.
_dupe_ast(sub_ast) for sub_ast in itertools.chain(
other_children, prod_seq,
)
])
distributed_and_arg_sets = (
itertools.chain(other_children, prod_seq)
for prod_seq in itertools.product(*or_children)
]
)
# The AST implementation will error if AND boolean comparison
# operands have no common SCO types. We need to handle that here.
# The following will drop AND's with no common SCO types, which is
# harmless (since they're impossible patterns and couldn't match
# anything anyway). It also acts as a nice simplification of the
# pattern. If the original AND node was legal (operands had at
# least one SCO type in common), it is guaranteed that there will
# be at least one legal distributed AND node (distributed_children
# below will not wind up empty).
distributed_children = []
for and_arg_set in distributed_and_arg_sets:
try:
and_node = AndBooleanExpression(
# Make dupes: distribution implies adding repetition,
# and we should ensure each repetition is independent
# of the others.
_dupe_ast(arg) for arg in and_arg_set
)
except ValueError:
pass
else:
distributed_children.append(and_node)
# Need to recursively continue to distribute AND over OR in
# any of our new sub-expressions which need it. This causes

View File

@ -505,7 +505,7 @@ class _BooleanExpression(_PatternExpression):
def __init__(self, operator, operands):
self.operator = operator
self.operands = list(operands)
for arg in operands:
for arg in self.operands:
if not hasattr(self, "root_types"):
self.root_types = arg.root_types
elif operator == "AND":

View File

@ -384,6 +384,15 @@ def test_comp_absorb_equivalent(patt1, patt2):
"[a:b=1 AND (a:b=2 AND (a:b=3 OR a:b=4))]",
"[(a:b=1 AND a:b=2 AND a:b=3) OR (a:b=1 AND a:b=2 AND a:b=4)]",
),
# Some tests with different SCO types
(
"[(a:b=1 OR b:c=1) AND (b:d=1 OR c:d=1)]",
"[b:c=1 AND b:d=1]",
),
(
"[(a:b=1 OR b:c=1) AND (b:d=1 OR c:d=1)]",
"[(z:y=1 OR b:c=1) AND (b:d=1 OR x:w=1 OR v:u=1)]",
),
],
)
def test_comp_dnf_equivalent(patt1, patt2):