From 7955a41997ef9e0f77a3ebbc90993b480096ffb4 Mon Sep 17 00:00:00 2001 From: Michael Chisholm Date: Wed, 20 May 2020 15:06:53 -0400 Subject: [PATCH] Drop python-dateutil as a dependency and switch to the builtin datetime module for parsing timestamps. Dateutil proved too slow. --- setup.py | 1 - stix2/pattern_visitor.py | 6 +++++- stix2/patterns.py | 2 +- stix2/test/v20/test_attack_pattern.py | 26 +++++++++++++------------- stix2/test/v20/test_properties.py | 4 +--- stix2/test/v20/test_utils.py | 12 +++++------- stix2/test/v21/test_attack_pattern.py | 26 +++++++++++++------------- stix2/test/v21/test_properties.py | 4 +--- stix2/test/v21/test_utils.py | 12 +++++------- stix2/utils.py | 7 +++++-- 10 files changed, 49 insertions(+), 51 deletions(-) diff --git a/setup.py b/setup.py index b028ffe..d4daa16 100644 --- a/setup.py +++ b/setup.py @@ -52,7 +52,6 @@ setup( packages=find_packages(exclude=['*.test', '*.test.*']), install_requires=[ 'enum34 ; python_version<"3.4"', - 'python-dateutil', 'pytz', 'requests', 'simplejson', diff --git a/stix2/pattern_visitor.py b/stix2/pattern_visitor.py index c0a0fdb..317ffa1 100644 --- a/stix2/pattern_visitor.py +++ b/stix2/pattern_visitor.py @@ -310,7 +310,11 @@ class STIXPatternVisitorForSTIX2(): elif node.symbol.type == self.parser_class.BoolLiteral: return BooleanConstant(node.getText()) elif node.symbol.type == self.parser_class.TimestampLiteral: - return TimestampConstant(node.getText()) + value = node.getText() + # STIX 2.1 uses a special timestamp literal syntax + if value.startswith("t"): + value = value[2:-1] + return TimestampConstant(value) else: return node diff --git a/stix2/patterns.py b/stix2/patterns.py index a44f68e..6592335 100644 --- a/stix2/patterns.py +++ b/stix2/patterns.py @@ -228,7 +228,7 @@ def make_constant(value): try: return parse_into_datetime(value) - except ValueError: + except (ValueError, TypeError): pass if isinstance(value, str): diff --git a/stix2/test/v20/test_attack_pattern.py b/stix2/test/v20/test_attack_pattern.py index 8d35e52..cc24fa9 100644 --- a/stix2/test/v20/test_attack_pattern.py +++ b/stix2/test/v20/test_attack_pattern.py @@ -4,6 +4,7 @@ import pytest import pytz import stix2 +import stix2.exceptions from .constants import ATTACK_PATTERN_ID @@ -83,19 +84,18 @@ def test_attack_pattern_invalid_labels(): def test_overly_precise_timestamps(): - ap = stix2.v20.AttackPattern( - id=ATTACK_PATTERN_ID, - created="2016-05-12T08:17:27.0000342Z", - modified="2016-05-12T08:17:27.000287Z", - name="Spear Phishing", - external_references=[{ - "source_name": "capec", - "external_id": "CAPEC-163", - }], - description="...", - ) - - assert str(ap) == EXPECTED + with pytest.raises(stix2.exceptions.InvalidValueError): + stix2.v20.AttackPattern( + id=ATTACK_PATTERN_ID, + created="2016-05-12T08:17:27.0000342Z", + modified="2016-05-12T08:17:27.000287Z", + name="Spear Phishing", + external_references=[{ + "source_name": "capec", + "external_id": "CAPEC-163", + }], + description="...", + ) def test_less_precise_timestamps(): diff --git a/stix2/test/v20/test_properties.py b/stix2/test/v20/test_properties.py index 1d1474a..802d865 100644 --- a/stix2/test/v20/test_properties.py +++ b/stix2/test/v20/test_properties.py @@ -300,8 +300,6 @@ def test_reference_property_specific_type(): @pytest.mark.parametrize( "value", [ '2017-01-01T12:34:56Z', - '2017-01-01 12:34:56', - 'Jan 1 2017 12:34:56', ], ) def test_timestamp_property_valid(value): @@ -311,7 +309,7 @@ def test_timestamp_property_valid(value): def test_timestamp_property_invalid(): ts_prop = TimestampProperty() - with pytest.raises(ValueError): + with pytest.raises(TypeError): ts_prop.clean(1) with pytest.raises(ValueError): ts_prop.clean("someday sometime") diff --git a/stix2/test/v20/test_utils.py b/stix2/test/v20/test_utils.py index 0433fd5..67750de 100644 --- a/stix2/test/v20/test_utils.py +++ b/stix2/test/v20/test_utils.py @@ -35,8 +35,6 @@ def test_timestamp_formatting(dttm, timestamp): (dt.datetime(2017, 1, 1, 0, tzinfo=pytz.utc), dt.datetime(2017, 1, 1, 0, 0, 0, tzinfo=pytz.utc)), (dt.date(2017, 1, 1), dt.datetime(2017, 1, 1, 0, 0, 0, tzinfo=pytz.utc)), ('2017-01-01T00:00:00Z', dt.datetime(2017, 1, 1, 0, 0, 0, tzinfo=pytz.utc)), - ('2017-01-01T02:00:00+2:00', dt.datetime(2017, 1, 1, 0, 0, 0, tzinfo=pytz.utc)), - ('2017-01-01T00:00:00', dt.datetime(2017, 1, 1, 0, 0, 0, tzinfo=pytz.utc)), ], ) def test_parse_datetime(timestamp, dttm): @@ -45,11 +43,11 @@ def test_parse_datetime(timestamp, dttm): @pytest.mark.parametrize( 'timestamp, dttm, precision', [ - ('2017-01-01T01:02:03.000001', dt.datetime(2017, 1, 1, 1, 2, 3, 0, tzinfo=pytz.utc), 'millisecond'), - ('2017-01-01T01:02:03.001', dt.datetime(2017, 1, 1, 1, 2, 3, 1000, tzinfo=pytz.utc), 'millisecond'), - ('2017-01-01T01:02:03.1', dt.datetime(2017, 1, 1, 1, 2, 3, 100000, tzinfo=pytz.utc), 'millisecond'), - ('2017-01-01T01:02:03.45', dt.datetime(2017, 1, 1, 1, 2, 3, 450000, tzinfo=pytz.utc), 'millisecond'), - ('2017-01-01T01:02:03.45', dt.datetime(2017, 1, 1, 1, 2, 3, tzinfo=pytz.utc), 'second'), + ('2017-01-01T01:02:03.000001Z', dt.datetime(2017, 1, 1, 1, 2, 3, 0, tzinfo=pytz.utc), 'millisecond'), + ('2017-01-01T01:02:03.001Z', dt.datetime(2017, 1, 1, 1, 2, 3, 1000, tzinfo=pytz.utc), 'millisecond'), + ('2017-01-01T01:02:03.1Z', dt.datetime(2017, 1, 1, 1, 2, 3, 100000, tzinfo=pytz.utc), 'millisecond'), + ('2017-01-01T01:02:03.45Z', dt.datetime(2017, 1, 1, 1, 2, 3, 450000, tzinfo=pytz.utc), 'millisecond'), + ('2017-01-01T01:02:03.45Z', dt.datetime(2017, 1, 1, 1, 2, 3, tzinfo=pytz.utc), 'second'), ], ) def test_parse_datetime_precision(timestamp, dttm, precision): diff --git a/stix2/test/v21/test_attack_pattern.py b/stix2/test/v21/test_attack_pattern.py index b826f1e..0beda64 100644 --- a/stix2/test/v21/test_attack_pattern.py +++ b/stix2/test/v21/test_attack_pattern.py @@ -4,6 +4,7 @@ import pytest import pytz import stix2 +import stix2.exceptions from .constants import ATTACK_PATTERN_ID @@ -86,19 +87,18 @@ def test_attack_pattern_invalid_labels(): def test_overly_precise_timestamps(): - ap = stix2.v21.AttackPattern( - id=ATTACK_PATTERN_ID, - created="2016-05-12T08:17:27.000000342Z", - modified="2016-05-12T08:17:27.000000287Z", - name="Spear Phishing", - external_references=[{ - "source_name": "capec", - "external_id": "CAPEC-163", - }], - description="...", - ) - - assert str(ap) == EXPECTED + with pytest.raises(stix2.exceptions.InvalidValueError): + stix2.v21.AttackPattern( + id=ATTACK_PATTERN_ID, + created="2016-05-12T08:17:27.000000342Z", + modified="2016-05-12T08:17:27.000000287Z", + name="Spear Phishing", + external_references=[{ + "source_name": "capec", + "external_id": "CAPEC-163", + }], + description="...", + ) def test_less_precise_timestamps(): diff --git a/stix2/test/v21/test_properties.py b/stix2/test/v21/test_properties.py index 31dd941..84e87c4 100644 --- a/stix2/test/v21/test_properties.py +++ b/stix2/test/v21/test_properties.py @@ -303,8 +303,6 @@ def test_reference_property_specific_type(): @pytest.mark.parametrize( "value", [ '2017-01-01T12:34:56Z', - '2017-01-01 12:34:56', - 'Jan 1 2017 12:34:56', ], ) def test_timestamp_property_valid(value): @@ -314,7 +312,7 @@ def test_timestamp_property_valid(value): def test_timestamp_property_invalid(): ts_prop = TimestampProperty() - with pytest.raises(ValueError): + with pytest.raises(TypeError): ts_prop.clean(1) with pytest.raises(ValueError): ts_prop.clean("someday sometime") diff --git a/stix2/test/v21/test_utils.py b/stix2/test/v21/test_utils.py index 5cf88e4..f81c93f 100644 --- a/stix2/test/v21/test_utils.py +++ b/stix2/test/v21/test_utils.py @@ -35,8 +35,6 @@ def test_timestamp_formatting(dttm, timestamp): (dt.datetime(2017, 1, 1, 0, tzinfo=pytz.utc), dt.datetime(2017, 1, 1, 0, 0, 0, tzinfo=pytz.utc)), (dt.date(2017, 1, 1), dt.datetime(2017, 1, 1, 0, 0, 0, tzinfo=pytz.utc)), ('2017-01-01T00:00:00Z', dt.datetime(2017, 1, 1, 0, 0, 0, tzinfo=pytz.utc)), - ('2017-01-01T02:00:00+2:00', dt.datetime(2017, 1, 1, 0, 0, 0, tzinfo=pytz.utc)), - ('2017-01-01T00:00:00', dt.datetime(2017, 1, 1, 0, 0, 0, tzinfo=pytz.utc)), ], ) def test_parse_datetime(timestamp, dttm): @@ -45,11 +43,11 @@ def test_parse_datetime(timestamp, dttm): @pytest.mark.parametrize( 'timestamp, dttm, precision', [ - ('2017-01-01T01:02:03.000001', dt.datetime(2017, 1, 1, 1, 2, 3, 0, tzinfo=pytz.utc), 'millisecond'), - ('2017-01-01T01:02:03.001', dt.datetime(2017, 1, 1, 1, 2, 3, 1000, tzinfo=pytz.utc), 'millisecond'), - ('2017-01-01T01:02:03.1', dt.datetime(2017, 1, 1, 1, 2, 3, 100000, tzinfo=pytz.utc), 'millisecond'), - ('2017-01-01T01:02:03.45', dt.datetime(2017, 1, 1, 1, 2, 3, 450000, tzinfo=pytz.utc), 'millisecond'), - ('2017-01-01T01:02:03.45', dt.datetime(2017, 1, 1, 1, 2, 3, tzinfo=pytz.utc), 'second'), + ('2017-01-01T01:02:03.000001Z', dt.datetime(2017, 1, 1, 1, 2, 3, 0, tzinfo=pytz.utc), 'millisecond'), + ('2017-01-01T01:02:03.001Z', dt.datetime(2017, 1, 1, 1, 2, 3, 1000, tzinfo=pytz.utc), 'millisecond'), + ('2017-01-01T01:02:03.1Z', dt.datetime(2017, 1, 1, 1, 2, 3, 100000, tzinfo=pytz.utc), 'millisecond'), + ('2017-01-01T01:02:03.45Z', dt.datetime(2017, 1, 1, 1, 2, 3, 450000, tzinfo=pytz.utc), 'millisecond'), + ('2017-01-01T01:02:03.45Z', dt.datetime(2017, 1, 1, 1, 2, 3, tzinfo=pytz.utc), 'second'), ], ) def test_parse_datetime_precision(timestamp, dttm, precision): diff --git a/stix2/utils.py b/stix2/utils.py index 47e89a1..766fd4b 100644 --- a/stix2/utils.py +++ b/stix2/utils.py @@ -10,7 +10,6 @@ import enum import json import re -from dateutil import parser import pytz import six @@ -32,6 +31,9 @@ TYPE_REGEX = re.compile(r'^\-?[a-z0-9]+(-[a-z0-9]+)*\-?$') TYPE_21_REGEX = re.compile(r'^([a-z][a-z0-9]*)+(-[a-z0-9]+)*\-?$') PREFIX_21_REGEX = re.compile(r'^[a-z].*') +_TIMESTAMP_FORMAT = "%Y-%m-%dT%H:%M:%SZ" +_TIMESTAMP_FORMAT_FRAC = "%Y-%m-%dT%H:%M:%S.%fZ" + class Precision(enum.Enum): """ @@ -252,8 +254,9 @@ def parse_into_datetime( ts = dt.datetime.combine(value, dt.time(0, 0, tzinfo=pytz.utc)) else: # value isn't a date or datetime object so assume it's a string + fmt = _TIMESTAMP_FORMAT_FRAC if "." in value else _TIMESTAMP_FORMAT try: - parsed = parser.parse(value) + parsed = dt.datetime.strptime(value, fmt) except (TypeError, ValueError): # Unknown format raise ValueError(