Merge pull request #490 from chisholm/rename_canonical

Change canonicalization to normalization
pull/1/head
Chris Lenk 2021-02-14 21:08:10 -05:00 committed by GitHub
commit f9ca68458a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 33 additions and 34 deletions

View File

@ -14,17 +14,17 @@ from ...version import DEFAULT_VERSION
from .compare.observation import observation_expression_cmp from .compare.observation import observation_expression_cmp
from .transform import ChainTransformer, SettleTransformer from .transform import ChainTransformer, SettleTransformer
from .transform.observation import ( from .transform.observation import (
AbsorptionTransformer, CanonicalizeComparisonExpressionsTransformer, AbsorptionTransformer, DNFTransformer, FlattenTransformer,
DNFTransformer, FlattenTransformer, OrderDedupeTransformer, NormalizeComparisonExpressionsTransformer, OrderDedupeTransformer,
) )
# Lazy-initialize # Lazy-initialize
_pattern_canonicalizer = None _pattern_normalizer = None
def _get_pattern_canonicalizer(): def _get_pattern_normalizer():
""" """
Get a canonicalization transformer for STIX patterns. Get a normalization transformer for STIX patterns.
Returns: Returns:
The transformer The transformer
@ -33,11 +33,11 @@ def _get_pattern_canonicalizer():
# The transformers are either stateless or contain no state which changes # The transformers are either stateless or contain no state which changes
# with each use. So we can setup the transformers once and keep reusing # with each use. So we can setup the transformers once and keep reusing
# them. # them.
global _pattern_canonicalizer global _pattern_normalizer
if not _pattern_canonicalizer: if not _pattern_normalizer:
canonicalize_comp_expr = \ normalize_comp_expr = \
CanonicalizeComparisonExpressionsTransformer() NormalizeComparisonExpressionsTransformer()
obs_expr_flatten = FlattenTransformer() obs_expr_flatten = FlattenTransformer()
obs_expr_order = OrderDedupeTransformer() obs_expr_order = OrderDedupeTransformer()
@ -49,12 +49,12 @@ def _get_pattern_canonicalizer():
obs_dnf = DNFTransformer() obs_dnf = DNFTransformer()
_pattern_canonicalizer = ChainTransformer( _pattern_normalizer = ChainTransformer(
canonicalize_comp_expr, normalize_comp_expr,
obs_settle_simplify, obs_dnf, obs_settle_simplify, obs_settle_simplify, obs_dnf, obs_settle_simplify,
) )
return _pattern_canonicalizer return _pattern_normalizer
def equivalent_patterns(pattern1, pattern2, stix_version=DEFAULT_VERSION): def equivalent_patterns(pattern1, pattern2, stix_version=DEFAULT_VERSION):
@ -77,11 +77,11 @@ def equivalent_patterns(pattern1, pattern2, stix_version=DEFAULT_VERSION):
pattern2, version=stix_version, pattern2, version=stix_version,
) )
pattern_canonicalizer = _get_pattern_canonicalizer() pattern_normalizer = _get_pattern_normalizer()
canon_patt1, _ = pattern_canonicalizer.transform(patt_ast1) norm_patt1, _ = pattern_normalizer.transform(patt_ast1)
canon_patt2, _ = pattern_canonicalizer.transform(patt_ast2) norm_patt2, _ = pattern_normalizer.transform(patt_ast2)
result = observation_expression_cmp(canon_patt1, canon_patt2) result = observation_expression_cmp(norm_patt1, norm_patt2)
return result == 0 return result == 0
@ -92,7 +92,7 @@ def find_equivalent_patterns(
""" """
Find patterns from a sequence which are equivalent to a given pattern. Find patterns from a sequence which are equivalent to a given pattern.
This is more efficient than using equivalent_patterns() in a loop, because This is more efficient than using equivalent_patterns() in a loop, because
it doesn't re-canonicalize the search pattern over and over. This works it doesn't re-normalize the search pattern over and over. This works
on an input iterable and is implemented as a generator of matches. So you on an input iterable and is implemented as a generator of matches. So you
can "stream" patterns in and matching patterns will be streamed out. can "stream" patterns in and matching patterns will be streamed out.
@ -109,8 +109,8 @@ def find_equivalent_patterns(
search_pattern, version=stix_version, search_pattern, version=stix_version,
) )
pattern_canonicalizer = _get_pattern_canonicalizer() pattern_normalizer = _get_pattern_normalizer()
canon_search_pattern_ast, _ = pattern_canonicalizer.transform( norm_search_pattern_ast, _ = pattern_normalizer.transform(
search_pattern_ast, search_pattern_ast,
) )
@ -118,10 +118,10 @@ def find_equivalent_patterns(
pattern_ast = pattern_visitor.create_pattern_object( pattern_ast = pattern_visitor.create_pattern_object(
pattern, version=stix_version, pattern, version=stix_version,
) )
canon_pattern_ast, _ = pattern_canonicalizer.transform(pattern_ast) norm_pattern_ast, _ = pattern_normalizer.transform(pattern_ast)
result = observation_expression_cmp( result = observation_expression_cmp(
canon_search_pattern_ast, canon_pattern_ast, norm_search_pattern_ast, norm_pattern_ast,
) )
if result == 0: if result == 0:

View File

@ -346,7 +346,7 @@ def comparison_expression_cmp(expr1, expr2):
""" """
Compare two comparison expressions. This is sensitive to the order of the Compare two comparison expressions. This is sensitive to the order of the
expressions' sub-components. To achieve an order-insensitive comparison, expressions' sub-components. To achieve an order-insensitive comparison,
the ASTs must be canonically ordered first. the sub-component ASTs must be ordered first.
Args: Args:
expr1: The first comparison expression expr1: The first comparison expression

View File

@ -62,7 +62,7 @@ def observation_expression_cmp(expr1, expr2):
""" """
Compare two observation expression ASTs. This is sensitive to the order of Compare two observation expression ASTs. This is sensitive to the order of
the expressions' sub-components. To achieve an order-insensitive the expressions' sub-components. To achieve an order-insensitive
comparison, the ASTs must be canonically ordered first. comparison, the sub-component ASTs must be ordered first.
Args: Args:
expr1: The first observation expression expr1: The first observation expression

View File

@ -46,7 +46,7 @@ def _dupe_ast(ast):
elif isinstance(ast, _ComparisonExpression): elif isinstance(ast, _ComparisonExpression):
# Change this to create a dupe, if we ever need to change simple # Change this to create a dupe, if we ever need to change simple
# comparison expressions as part of canonicalization. # comparison expressions as part of normalization.
result = ast result = ast
else: else:
@ -147,9 +147,8 @@ class OrderDedupeTransformer(
ComparisonExpressionTransformer, ComparisonExpressionTransformer,
): ):
""" """
Canonically order the children of all nodes in the AST. Because the Order the children of all nodes in the AST. Because the deduping algorithm
deduping algorithm is based on sorted data, this transformation also does is based on sorted data, this transformation also does deduping.
deduping.
E.g.: E.g.:
A and A => A A and A => A

View File

@ -234,7 +234,7 @@ class OrderDedupeTransformer(
ObservationExpressionTransformer, ObservationExpressionTransformer,
): ):
""" """
Canonically order AND/OR expressions, and dedupe ORs. E.g.: Order AND/OR expressions, and dedupe ORs. E.g.:
A or A => A A or A => A
B or A => A or B B or A => A or B
@ -489,11 +489,11 @@ class DNFTransformer(ObservationExpressionTransformer):
return self.__transform(ast) return self.__transform(ast)
class CanonicalizeComparisonExpressionsTransformer( class NormalizeComparisonExpressionsTransformer(
ObservationExpressionTransformer, ObservationExpressionTransformer,
): ):
""" """
Canonicalize all comparison expressions. Normalize all comparison expressions.
""" """
def __init__(self): def __init__(self):
comp_flatten = CFlattenTransformer() comp_flatten = CFlattenTransformer()
@ -504,13 +504,13 @@ class CanonicalizeComparisonExpressionsTransformer(
comp_special = SpecialValueCanonicalization() comp_special = SpecialValueCanonicalization()
comp_dnf = CDNFTransformer() comp_dnf = CDNFTransformer()
self.__comp_canonicalize = ChainTransformer( self.__comp_normalize = ChainTransformer(
comp_special, settle_simplify, comp_dnf, settle_simplify, comp_special, settle_simplify, comp_dnf, settle_simplify,
) )
def transform_observation(self, ast): def transform_observation(self, ast):
comp_expr = ast.operand comp_expr = ast.operand
canon_comp_expr, changed = self.__comp_canonicalize.transform(comp_expr) norm_comp_expr, changed = self.__comp_normalize.transform(comp_expr)
ast.operand = canon_comp_expr ast.operand = norm_comp_expr
return ast, changed return ast, changed

View File

@ -1,5 +1,5 @@
""" """
Some simple comparison expression canonicalization functions. Some simple comparison expression normalization functions.
""" """
import socket import socket