Merge pull request #490 from chisholm/rename_canonical
Change canonicalization to normalizationpull/1/head
commit
f9ca68458a
|
@ -14,17 +14,17 @@ from ...version import DEFAULT_VERSION
|
||||||
from .compare.observation import observation_expression_cmp
|
from .compare.observation import observation_expression_cmp
|
||||||
from .transform import ChainTransformer, SettleTransformer
|
from .transform import ChainTransformer, SettleTransformer
|
||||||
from .transform.observation import (
|
from .transform.observation import (
|
||||||
AbsorptionTransformer, CanonicalizeComparisonExpressionsTransformer,
|
AbsorptionTransformer, DNFTransformer, FlattenTransformer,
|
||||||
DNFTransformer, FlattenTransformer, OrderDedupeTransformer,
|
NormalizeComparisonExpressionsTransformer, OrderDedupeTransformer,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Lazy-initialize
|
# Lazy-initialize
|
||||||
_pattern_canonicalizer = None
|
_pattern_normalizer = None
|
||||||
|
|
||||||
|
|
||||||
def _get_pattern_canonicalizer():
|
def _get_pattern_normalizer():
|
||||||
"""
|
"""
|
||||||
Get a canonicalization transformer for STIX patterns.
|
Get a normalization transformer for STIX patterns.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
The transformer
|
The transformer
|
||||||
|
@ -33,11 +33,11 @@ def _get_pattern_canonicalizer():
|
||||||
# The transformers are either stateless or contain no state which changes
|
# The transformers are either stateless or contain no state which changes
|
||||||
# with each use. So we can setup the transformers once and keep reusing
|
# with each use. So we can setup the transformers once and keep reusing
|
||||||
# them.
|
# them.
|
||||||
global _pattern_canonicalizer
|
global _pattern_normalizer
|
||||||
|
|
||||||
if not _pattern_canonicalizer:
|
if not _pattern_normalizer:
|
||||||
canonicalize_comp_expr = \
|
normalize_comp_expr = \
|
||||||
CanonicalizeComparisonExpressionsTransformer()
|
NormalizeComparisonExpressionsTransformer()
|
||||||
|
|
||||||
obs_expr_flatten = FlattenTransformer()
|
obs_expr_flatten = FlattenTransformer()
|
||||||
obs_expr_order = OrderDedupeTransformer()
|
obs_expr_order = OrderDedupeTransformer()
|
||||||
|
@ -49,12 +49,12 @@ def _get_pattern_canonicalizer():
|
||||||
|
|
||||||
obs_dnf = DNFTransformer()
|
obs_dnf = DNFTransformer()
|
||||||
|
|
||||||
_pattern_canonicalizer = ChainTransformer(
|
_pattern_normalizer = ChainTransformer(
|
||||||
canonicalize_comp_expr,
|
normalize_comp_expr,
|
||||||
obs_settle_simplify, obs_dnf, obs_settle_simplify,
|
obs_settle_simplify, obs_dnf, obs_settle_simplify,
|
||||||
)
|
)
|
||||||
|
|
||||||
return _pattern_canonicalizer
|
return _pattern_normalizer
|
||||||
|
|
||||||
|
|
||||||
def equivalent_patterns(pattern1, pattern2, stix_version=DEFAULT_VERSION):
|
def equivalent_patterns(pattern1, pattern2, stix_version=DEFAULT_VERSION):
|
||||||
|
@ -77,11 +77,11 @@ def equivalent_patterns(pattern1, pattern2, stix_version=DEFAULT_VERSION):
|
||||||
pattern2, version=stix_version,
|
pattern2, version=stix_version,
|
||||||
)
|
)
|
||||||
|
|
||||||
pattern_canonicalizer = _get_pattern_canonicalizer()
|
pattern_normalizer = _get_pattern_normalizer()
|
||||||
canon_patt1, _ = pattern_canonicalizer.transform(patt_ast1)
|
norm_patt1, _ = pattern_normalizer.transform(patt_ast1)
|
||||||
canon_patt2, _ = pattern_canonicalizer.transform(patt_ast2)
|
norm_patt2, _ = pattern_normalizer.transform(patt_ast2)
|
||||||
|
|
||||||
result = observation_expression_cmp(canon_patt1, canon_patt2)
|
result = observation_expression_cmp(norm_patt1, norm_patt2)
|
||||||
|
|
||||||
return result == 0
|
return result == 0
|
||||||
|
|
||||||
|
@ -92,7 +92,7 @@ def find_equivalent_patterns(
|
||||||
"""
|
"""
|
||||||
Find patterns from a sequence which are equivalent to a given pattern.
|
Find patterns from a sequence which are equivalent to a given pattern.
|
||||||
This is more efficient than using equivalent_patterns() in a loop, because
|
This is more efficient than using equivalent_patterns() in a loop, because
|
||||||
it doesn't re-canonicalize the search pattern over and over. This works
|
it doesn't re-normalize the search pattern over and over. This works
|
||||||
on an input iterable and is implemented as a generator of matches. So you
|
on an input iterable and is implemented as a generator of matches. So you
|
||||||
can "stream" patterns in and matching patterns will be streamed out.
|
can "stream" patterns in and matching patterns will be streamed out.
|
||||||
|
|
||||||
|
@ -109,8 +109,8 @@ def find_equivalent_patterns(
|
||||||
search_pattern, version=stix_version,
|
search_pattern, version=stix_version,
|
||||||
)
|
)
|
||||||
|
|
||||||
pattern_canonicalizer = _get_pattern_canonicalizer()
|
pattern_normalizer = _get_pattern_normalizer()
|
||||||
canon_search_pattern_ast, _ = pattern_canonicalizer.transform(
|
norm_search_pattern_ast, _ = pattern_normalizer.transform(
|
||||||
search_pattern_ast,
|
search_pattern_ast,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -118,10 +118,10 @@ def find_equivalent_patterns(
|
||||||
pattern_ast = pattern_visitor.create_pattern_object(
|
pattern_ast = pattern_visitor.create_pattern_object(
|
||||||
pattern, version=stix_version,
|
pattern, version=stix_version,
|
||||||
)
|
)
|
||||||
canon_pattern_ast, _ = pattern_canonicalizer.transform(pattern_ast)
|
norm_pattern_ast, _ = pattern_normalizer.transform(pattern_ast)
|
||||||
|
|
||||||
result = observation_expression_cmp(
|
result = observation_expression_cmp(
|
||||||
canon_search_pattern_ast, canon_pattern_ast,
|
norm_search_pattern_ast, norm_pattern_ast,
|
||||||
)
|
)
|
||||||
|
|
||||||
if result == 0:
|
if result == 0:
|
||||||
|
|
|
@ -346,7 +346,7 @@ def comparison_expression_cmp(expr1, expr2):
|
||||||
"""
|
"""
|
||||||
Compare two comparison expressions. This is sensitive to the order of the
|
Compare two comparison expressions. This is sensitive to the order of the
|
||||||
expressions' sub-components. To achieve an order-insensitive comparison,
|
expressions' sub-components. To achieve an order-insensitive comparison,
|
||||||
the ASTs must be canonically ordered first.
|
the sub-component ASTs must be ordered first.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
expr1: The first comparison expression
|
expr1: The first comparison expression
|
||||||
|
|
|
@ -62,7 +62,7 @@ def observation_expression_cmp(expr1, expr2):
|
||||||
"""
|
"""
|
||||||
Compare two observation expression ASTs. This is sensitive to the order of
|
Compare two observation expression ASTs. This is sensitive to the order of
|
||||||
the expressions' sub-components. To achieve an order-insensitive
|
the expressions' sub-components. To achieve an order-insensitive
|
||||||
comparison, the ASTs must be canonically ordered first.
|
comparison, the sub-component ASTs must be ordered first.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
expr1: The first observation expression
|
expr1: The first observation expression
|
||||||
|
|
|
@ -46,7 +46,7 @@ def _dupe_ast(ast):
|
||||||
|
|
||||||
elif isinstance(ast, _ComparisonExpression):
|
elif isinstance(ast, _ComparisonExpression):
|
||||||
# Change this to create a dupe, if we ever need to change simple
|
# Change this to create a dupe, if we ever need to change simple
|
||||||
# comparison expressions as part of canonicalization.
|
# comparison expressions as part of normalization.
|
||||||
result = ast
|
result = ast
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
@ -147,9 +147,8 @@ class OrderDedupeTransformer(
|
||||||
ComparisonExpressionTransformer,
|
ComparisonExpressionTransformer,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Canonically order the children of all nodes in the AST. Because the
|
Order the children of all nodes in the AST. Because the deduping algorithm
|
||||||
deduping algorithm is based on sorted data, this transformation also does
|
is based on sorted data, this transformation also does deduping.
|
||||||
deduping.
|
|
||||||
|
|
||||||
E.g.:
|
E.g.:
|
||||||
A and A => A
|
A and A => A
|
||||||
|
|
|
@ -234,7 +234,7 @@ class OrderDedupeTransformer(
|
||||||
ObservationExpressionTransformer,
|
ObservationExpressionTransformer,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Canonically order AND/OR expressions, and dedupe ORs. E.g.:
|
Order AND/OR expressions, and dedupe ORs. E.g.:
|
||||||
|
|
||||||
A or A => A
|
A or A => A
|
||||||
B or A => A or B
|
B or A => A or B
|
||||||
|
@ -489,11 +489,11 @@ class DNFTransformer(ObservationExpressionTransformer):
|
||||||
return self.__transform(ast)
|
return self.__transform(ast)
|
||||||
|
|
||||||
|
|
||||||
class CanonicalizeComparisonExpressionsTransformer(
|
class NormalizeComparisonExpressionsTransformer(
|
||||||
ObservationExpressionTransformer,
|
ObservationExpressionTransformer,
|
||||||
):
|
):
|
||||||
"""
|
"""
|
||||||
Canonicalize all comparison expressions.
|
Normalize all comparison expressions.
|
||||||
"""
|
"""
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
comp_flatten = CFlattenTransformer()
|
comp_flatten = CFlattenTransformer()
|
||||||
|
@ -504,13 +504,13 @@ class CanonicalizeComparisonExpressionsTransformer(
|
||||||
|
|
||||||
comp_special = SpecialValueCanonicalization()
|
comp_special = SpecialValueCanonicalization()
|
||||||
comp_dnf = CDNFTransformer()
|
comp_dnf = CDNFTransformer()
|
||||||
self.__comp_canonicalize = ChainTransformer(
|
self.__comp_normalize = ChainTransformer(
|
||||||
comp_special, settle_simplify, comp_dnf, settle_simplify,
|
comp_special, settle_simplify, comp_dnf, settle_simplify,
|
||||||
)
|
)
|
||||||
|
|
||||||
def transform_observation(self, ast):
|
def transform_observation(self, ast):
|
||||||
comp_expr = ast.operand
|
comp_expr = ast.operand
|
||||||
canon_comp_expr, changed = self.__comp_canonicalize.transform(comp_expr)
|
norm_comp_expr, changed = self.__comp_normalize.transform(comp_expr)
|
||||||
ast.operand = canon_comp_expr
|
ast.operand = norm_comp_expr
|
||||||
|
|
||||||
return ast, changed
|
return ast, changed
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
"""
|
"""
|
||||||
Some simple comparison expression canonicalization functions.
|
Some simple comparison expression normalization functions.
|
||||||
"""
|
"""
|
||||||
import socket
|
import socket
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue