Merge pull request #490 from chisholm/rename_canonical

Change canonicalization to normalization
pull/1/head
Chris Lenk 2021-02-14 21:08:10 -05:00 committed by GitHub
commit f9ca68458a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 33 additions and 34 deletions

View File

@ -14,17 +14,17 @@ from ...version import DEFAULT_VERSION
from .compare.observation import observation_expression_cmp
from .transform import ChainTransformer, SettleTransformer
from .transform.observation import (
AbsorptionTransformer, CanonicalizeComparisonExpressionsTransformer,
DNFTransformer, FlattenTransformer, OrderDedupeTransformer,
AbsorptionTransformer, DNFTransformer, FlattenTransformer,
NormalizeComparisonExpressionsTransformer, OrderDedupeTransformer,
)
# Lazy-initialize
_pattern_canonicalizer = None
_pattern_normalizer = None
def _get_pattern_canonicalizer():
def _get_pattern_normalizer():
"""
Get a canonicalization transformer for STIX patterns.
Get a normalization transformer for STIX patterns.
Returns:
The transformer
@ -33,11 +33,11 @@ def _get_pattern_canonicalizer():
# The transformers are either stateless or contain no state which changes
# with each use. So we can setup the transformers once and keep reusing
# them.
global _pattern_canonicalizer
global _pattern_normalizer
if not _pattern_canonicalizer:
canonicalize_comp_expr = \
CanonicalizeComparisonExpressionsTransformer()
if not _pattern_normalizer:
normalize_comp_expr = \
NormalizeComparisonExpressionsTransformer()
obs_expr_flatten = FlattenTransformer()
obs_expr_order = OrderDedupeTransformer()
@ -49,12 +49,12 @@ def _get_pattern_canonicalizer():
obs_dnf = DNFTransformer()
_pattern_canonicalizer = ChainTransformer(
canonicalize_comp_expr,
_pattern_normalizer = ChainTransformer(
normalize_comp_expr,
obs_settle_simplify, obs_dnf, obs_settle_simplify,
)
return _pattern_canonicalizer
return _pattern_normalizer
def equivalent_patterns(pattern1, pattern2, stix_version=DEFAULT_VERSION):
@ -77,11 +77,11 @@ def equivalent_patterns(pattern1, pattern2, stix_version=DEFAULT_VERSION):
pattern2, version=stix_version,
)
pattern_canonicalizer = _get_pattern_canonicalizer()
canon_patt1, _ = pattern_canonicalizer.transform(patt_ast1)
canon_patt2, _ = pattern_canonicalizer.transform(patt_ast2)
pattern_normalizer = _get_pattern_normalizer()
norm_patt1, _ = pattern_normalizer.transform(patt_ast1)
norm_patt2, _ = pattern_normalizer.transform(patt_ast2)
result = observation_expression_cmp(canon_patt1, canon_patt2)
result = observation_expression_cmp(norm_patt1, norm_patt2)
return result == 0
@ -92,7 +92,7 @@ def find_equivalent_patterns(
"""
Find patterns from a sequence which are equivalent to a given pattern.
This is more efficient than using equivalent_patterns() in a loop, because
it doesn't re-canonicalize the search pattern over and over. This works
it doesn't re-normalize the search pattern over and over. This works
on an input iterable and is implemented as a generator of matches. So you
can "stream" patterns in and matching patterns will be streamed out.
@ -109,8 +109,8 @@ def find_equivalent_patterns(
search_pattern, version=stix_version,
)
pattern_canonicalizer = _get_pattern_canonicalizer()
canon_search_pattern_ast, _ = pattern_canonicalizer.transform(
pattern_normalizer = _get_pattern_normalizer()
norm_search_pattern_ast, _ = pattern_normalizer.transform(
search_pattern_ast,
)
@ -118,10 +118,10 @@ def find_equivalent_patterns(
pattern_ast = pattern_visitor.create_pattern_object(
pattern, version=stix_version,
)
canon_pattern_ast, _ = pattern_canonicalizer.transform(pattern_ast)
norm_pattern_ast, _ = pattern_normalizer.transform(pattern_ast)
result = observation_expression_cmp(
canon_search_pattern_ast, canon_pattern_ast,
norm_search_pattern_ast, norm_pattern_ast,
)
if result == 0:

View File

@ -346,7 +346,7 @@ def comparison_expression_cmp(expr1, expr2):
"""
Compare two comparison expressions. This is sensitive to the order of the
expressions' sub-components. To achieve an order-insensitive comparison,
the ASTs must be canonically ordered first.
the sub-component ASTs must be ordered first.
Args:
expr1: The first comparison expression

View File

@ -62,7 +62,7 @@ def observation_expression_cmp(expr1, expr2):
"""
Compare two observation expression ASTs. This is sensitive to the order of
the expressions' sub-components. To achieve an order-insensitive
comparison, the ASTs must be canonically ordered first.
comparison, the sub-component ASTs must be ordered first.
Args:
expr1: The first observation expression

View File

@ -46,7 +46,7 @@ def _dupe_ast(ast):
elif isinstance(ast, _ComparisonExpression):
# Change this to create a dupe, if we ever need to change simple
# comparison expressions as part of canonicalization.
# comparison expressions as part of normalization.
result = ast
else:
@ -147,9 +147,8 @@ class OrderDedupeTransformer(
ComparisonExpressionTransformer,
):
"""
Canonically order the children of all nodes in the AST. Because the
deduping algorithm is based on sorted data, this transformation also does
deduping.
Order the children of all nodes in the AST. Because the deduping algorithm
is based on sorted data, this transformation also does deduping.
E.g.:
A and A => A

View File

@ -234,7 +234,7 @@ class OrderDedupeTransformer(
ObservationExpressionTransformer,
):
"""
Canonically order AND/OR expressions, and dedupe ORs. E.g.:
Order AND/OR expressions, and dedupe ORs. E.g.:
A or A => A
B or A => A or B
@ -489,11 +489,11 @@ class DNFTransformer(ObservationExpressionTransformer):
return self.__transform(ast)
class CanonicalizeComparisonExpressionsTransformer(
class NormalizeComparisonExpressionsTransformer(
ObservationExpressionTransformer,
):
"""
Canonicalize all comparison expressions.
Normalize all comparison expressions.
"""
def __init__(self):
comp_flatten = CFlattenTransformer()
@ -504,13 +504,13 @@ class CanonicalizeComparisonExpressionsTransformer(
comp_special = SpecialValueCanonicalization()
comp_dnf = CDNFTransformer()
self.__comp_canonicalize = ChainTransformer(
self.__comp_normalize = ChainTransformer(
comp_special, settle_simplify, comp_dnf, settle_simplify,
)
def transform_observation(self, ast):
comp_expr = ast.operand
canon_comp_expr, changed = self.__comp_canonicalize.transform(comp_expr)
ast.operand = canon_comp_expr
norm_comp_expr, changed = self.__comp_normalize.transform(comp_expr)
ast.operand = norm_comp_expr
return ast, changed

View File

@ -1,5 +1,5 @@
"""
Some simple comparison expression canonicalization functions.
Some simple comparison expression normalization functions.
"""
import socket