Drop python-dateutil as a dependency and switch to the builtin

datetime module for parsing timestamps.  Dateutil proved too
slow.
master^2^2
Michael Chisholm 2020-05-20 15:06:53 -04:00
parent 33e07edf3b
commit 7955a41997
10 changed files with 49 additions and 51 deletions

View File

@ -52,7 +52,6 @@ setup(
packages=find_packages(exclude=['*.test', '*.test.*']), packages=find_packages(exclude=['*.test', '*.test.*']),
install_requires=[ install_requires=[
'enum34 ; python_version<"3.4"', 'enum34 ; python_version<"3.4"',
'python-dateutil',
'pytz', 'pytz',
'requests', 'requests',
'simplejson', 'simplejson',

View File

@ -310,7 +310,11 @@ class STIXPatternVisitorForSTIX2():
elif node.symbol.type == self.parser_class.BoolLiteral: elif node.symbol.type == self.parser_class.BoolLiteral:
return BooleanConstant(node.getText()) return BooleanConstant(node.getText())
elif node.symbol.type == self.parser_class.TimestampLiteral: elif node.symbol.type == self.parser_class.TimestampLiteral:
return TimestampConstant(node.getText()) value = node.getText()
# STIX 2.1 uses a special timestamp literal syntax
if value.startswith("t"):
value = value[2:-1]
return TimestampConstant(value)
else: else:
return node return node

View File

@ -228,7 +228,7 @@ def make_constant(value):
try: try:
return parse_into_datetime(value) return parse_into_datetime(value)
except ValueError: except (ValueError, TypeError):
pass pass
if isinstance(value, str): if isinstance(value, str):

View File

@ -4,6 +4,7 @@ import pytest
import pytz import pytz
import stix2 import stix2
import stix2.exceptions
from .constants import ATTACK_PATTERN_ID from .constants import ATTACK_PATTERN_ID
@ -83,19 +84,18 @@ def test_attack_pattern_invalid_labels():
def test_overly_precise_timestamps(): def test_overly_precise_timestamps():
ap = stix2.v20.AttackPattern( with pytest.raises(stix2.exceptions.InvalidValueError):
id=ATTACK_PATTERN_ID, stix2.v20.AttackPattern(
created="2016-05-12T08:17:27.0000342Z", id=ATTACK_PATTERN_ID,
modified="2016-05-12T08:17:27.000287Z", created="2016-05-12T08:17:27.0000342Z",
name="Spear Phishing", modified="2016-05-12T08:17:27.000287Z",
external_references=[{ name="Spear Phishing",
"source_name": "capec", external_references=[{
"external_id": "CAPEC-163", "source_name": "capec",
}], "external_id": "CAPEC-163",
description="...", }],
) description="...",
)
assert str(ap) == EXPECTED
def test_less_precise_timestamps(): def test_less_precise_timestamps():

View File

@ -300,8 +300,6 @@ def test_reference_property_specific_type():
@pytest.mark.parametrize( @pytest.mark.parametrize(
"value", [ "value", [
'2017-01-01T12:34:56Z', '2017-01-01T12:34:56Z',
'2017-01-01 12:34:56',
'Jan 1 2017 12:34:56',
], ],
) )
def test_timestamp_property_valid(value): def test_timestamp_property_valid(value):
@ -311,7 +309,7 @@ def test_timestamp_property_valid(value):
def test_timestamp_property_invalid(): def test_timestamp_property_invalid():
ts_prop = TimestampProperty() ts_prop = TimestampProperty()
with pytest.raises(ValueError): with pytest.raises(TypeError):
ts_prop.clean(1) ts_prop.clean(1)
with pytest.raises(ValueError): with pytest.raises(ValueError):
ts_prop.clean("someday sometime") ts_prop.clean("someday sometime")

View File

@ -35,8 +35,6 @@ def test_timestamp_formatting(dttm, timestamp):
(dt.datetime(2017, 1, 1, 0, tzinfo=pytz.utc), dt.datetime(2017, 1, 1, 0, 0, 0, tzinfo=pytz.utc)), (dt.datetime(2017, 1, 1, 0, tzinfo=pytz.utc), dt.datetime(2017, 1, 1, 0, 0, 0, tzinfo=pytz.utc)),
(dt.date(2017, 1, 1), dt.datetime(2017, 1, 1, 0, 0, 0, tzinfo=pytz.utc)), (dt.date(2017, 1, 1), dt.datetime(2017, 1, 1, 0, 0, 0, tzinfo=pytz.utc)),
('2017-01-01T00:00:00Z', dt.datetime(2017, 1, 1, 0, 0, 0, tzinfo=pytz.utc)), ('2017-01-01T00:00:00Z', dt.datetime(2017, 1, 1, 0, 0, 0, tzinfo=pytz.utc)),
('2017-01-01T02:00:00+2:00', dt.datetime(2017, 1, 1, 0, 0, 0, tzinfo=pytz.utc)),
('2017-01-01T00:00:00', dt.datetime(2017, 1, 1, 0, 0, 0, tzinfo=pytz.utc)),
], ],
) )
def test_parse_datetime(timestamp, dttm): def test_parse_datetime(timestamp, dttm):
@ -45,11 +43,11 @@ def test_parse_datetime(timestamp, dttm):
@pytest.mark.parametrize( @pytest.mark.parametrize(
'timestamp, dttm, precision', [ 'timestamp, dttm, precision', [
('2017-01-01T01:02:03.000001', dt.datetime(2017, 1, 1, 1, 2, 3, 0, tzinfo=pytz.utc), 'millisecond'), ('2017-01-01T01:02:03.000001Z', dt.datetime(2017, 1, 1, 1, 2, 3, 0, tzinfo=pytz.utc), 'millisecond'),
('2017-01-01T01:02:03.001', dt.datetime(2017, 1, 1, 1, 2, 3, 1000, tzinfo=pytz.utc), 'millisecond'), ('2017-01-01T01:02:03.001Z', dt.datetime(2017, 1, 1, 1, 2, 3, 1000, tzinfo=pytz.utc), 'millisecond'),
('2017-01-01T01:02:03.1', dt.datetime(2017, 1, 1, 1, 2, 3, 100000, tzinfo=pytz.utc), 'millisecond'), ('2017-01-01T01:02:03.1Z', dt.datetime(2017, 1, 1, 1, 2, 3, 100000, tzinfo=pytz.utc), 'millisecond'),
('2017-01-01T01:02:03.45', dt.datetime(2017, 1, 1, 1, 2, 3, 450000, tzinfo=pytz.utc), 'millisecond'), ('2017-01-01T01:02:03.45Z', dt.datetime(2017, 1, 1, 1, 2, 3, 450000, tzinfo=pytz.utc), 'millisecond'),
('2017-01-01T01:02:03.45', dt.datetime(2017, 1, 1, 1, 2, 3, tzinfo=pytz.utc), 'second'), ('2017-01-01T01:02:03.45Z', dt.datetime(2017, 1, 1, 1, 2, 3, tzinfo=pytz.utc), 'second'),
], ],
) )
def test_parse_datetime_precision(timestamp, dttm, precision): def test_parse_datetime_precision(timestamp, dttm, precision):

View File

@ -4,6 +4,7 @@ import pytest
import pytz import pytz
import stix2 import stix2
import stix2.exceptions
from .constants import ATTACK_PATTERN_ID from .constants import ATTACK_PATTERN_ID
@ -86,19 +87,18 @@ def test_attack_pattern_invalid_labels():
def test_overly_precise_timestamps(): def test_overly_precise_timestamps():
ap = stix2.v21.AttackPattern( with pytest.raises(stix2.exceptions.InvalidValueError):
id=ATTACK_PATTERN_ID, stix2.v21.AttackPattern(
created="2016-05-12T08:17:27.000000342Z", id=ATTACK_PATTERN_ID,
modified="2016-05-12T08:17:27.000000287Z", created="2016-05-12T08:17:27.000000342Z",
name="Spear Phishing", modified="2016-05-12T08:17:27.000000287Z",
external_references=[{ name="Spear Phishing",
"source_name": "capec", external_references=[{
"external_id": "CAPEC-163", "source_name": "capec",
}], "external_id": "CAPEC-163",
description="...", }],
) description="...",
)
assert str(ap) == EXPECTED
def test_less_precise_timestamps(): def test_less_precise_timestamps():

View File

@ -303,8 +303,6 @@ def test_reference_property_specific_type():
@pytest.mark.parametrize( @pytest.mark.parametrize(
"value", [ "value", [
'2017-01-01T12:34:56Z', '2017-01-01T12:34:56Z',
'2017-01-01 12:34:56',
'Jan 1 2017 12:34:56',
], ],
) )
def test_timestamp_property_valid(value): def test_timestamp_property_valid(value):
@ -314,7 +312,7 @@ def test_timestamp_property_valid(value):
def test_timestamp_property_invalid(): def test_timestamp_property_invalid():
ts_prop = TimestampProperty() ts_prop = TimestampProperty()
with pytest.raises(ValueError): with pytest.raises(TypeError):
ts_prop.clean(1) ts_prop.clean(1)
with pytest.raises(ValueError): with pytest.raises(ValueError):
ts_prop.clean("someday sometime") ts_prop.clean("someday sometime")

View File

@ -35,8 +35,6 @@ def test_timestamp_formatting(dttm, timestamp):
(dt.datetime(2017, 1, 1, 0, tzinfo=pytz.utc), dt.datetime(2017, 1, 1, 0, 0, 0, tzinfo=pytz.utc)), (dt.datetime(2017, 1, 1, 0, tzinfo=pytz.utc), dt.datetime(2017, 1, 1, 0, 0, 0, tzinfo=pytz.utc)),
(dt.date(2017, 1, 1), dt.datetime(2017, 1, 1, 0, 0, 0, tzinfo=pytz.utc)), (dt.date(2017, 1, 1), dt.datetime(2017, 1, 1, 0, 0, 0, tzinfo=pytz.utc)),
('2017-01-01T00:00:00Z', dt.datetime(2017, 1, 1, 0, 0, 0, tzinfo=pytz.utc)), ('2017-01-01T00:00:00Z', dt.datetime(2017, 1, 1, 0, 0, 0, tzinfo=pytz.utc)),
('2017-01-01T02:00:00+2:00', dt.datetime(2017, 1, 1, 0, 0, 0, tzinfo=pytz.utc)),
('2017-01-01T00:00:00', dt.datetime(2017, 1, 1, 0, 0, 0, tzinfo=pytz.utc)),
], ],
) )
def test_parse_datetime(timestamp, dttm): def test_parse_datetime(timestamp, dttm):
@ -45,11 +43,11 @@ def test_parse_datetime(timestamp, dttm):
@pytest.mark.parametrize( @pytest.mark.parametrize(
'timestamp, dttm, precision', [ 'timestamp, dttm, precision', [
('2017-01-01T01:02:03.000001', dt.datetime(2017, 1, 1, 1, 2, 3, 0, tzinfo=pytz.utc), 'millisecond'), ('2017-01-01T01:02:03.000001Z', dt.datetime(2017, 1, 1, 1, 2, 3, 0, tzinfo=pytz.utc), 'millisecond'),
('2017-01-01T01:02:03.001', dt.datetime(2017, 1, 1, 1, 2, 3, 1000, tzinfo=pytz.utc), 'millisecond'), ('2017-01-01T01:02:03.001Z', dt.datetime(2017, 1, 1, 1, 2, 3, 1000, tzinfo=pytz.utc), 'millisecond'),
('2017-01-01T01:02:03.1', dt.datetime(2017, 1, 1, 1, 2, 3, 100000, tzinfo=pytz.utc), 'millisecond'), ('2017-01-01T01:02:03.1Z', dt.datetime(2017, 1, 1, 1, 2, 3, 100000, tzinfo=pytz.utc), 'millisecond'),
('2017-01-01T01:02:03.45', dt.datetime(2017, 1, 1, 1, 2, 3, 450000, tzinfo=pytz.utc), 'millisecond'), ('2017-01-01T01:02:03.45Z', dt.datetime(2017, 1, 1, 1, 2, 3, 450000, tzinfo=pytz.utc), 'millisecond'),
('2017-01-01T01:02:03.45', dt.datetime(2017, 1, 1, 1, 2, 3, tzinfo=pytz.utc), 'second'), ('2017-01-01T01:02:03.45Z', dt.datetime(2017, 1, 1, 1, 2, 3, tzinfo=pytz.utc), 'second'),
], ],
) )
def test_parse_datetime_precision(timestamp, dttm, precision): def test_parse_datetime_precision(timestamp, dttm, precision):

View File

@ -10,7 +10,6 @@ import enum
import json import json
import re import re
from dateutil import parser
import pytz import pytz
import six import six
@ -32,6 +31,9 @@ TYPE_REGEX = re.compile(r'^\-?[a-z0-9]+(-[a-z0-9]+)*\-?$')
TYPE_21_REGEX = re.compile(r'^([a-z][a-z0-9]*)+(-[a-z0-9]+)*\-?$') TYPE_21_REGEX = re.compile(r'^([a-z][a-z0-9]*)+(-[a-z0-9]+)*\-?$')
PREFIX_21_REGEX = re.compile(r'^[a-z].*') PREFIX_21_REGEX = re.compile(r'^[a-z].*')
_TIMESTAMP_FORMAT = "%Y-%m-%dT%H:%M:%SZ"
_TIMESTAMP_FORMAT_FRAC = "%Y-%m-%dT%H:%M:%S.%fZ"
class Precision(enum.Enum): class Precision(enum.Enum):
""" """
@ -252,8 +254,9 @@ def parse_into_datetime(
ts = dt.datetime.combine(value, dt.time(0, 0, tzinfo=pytz.utc)) ts = dt.datetime.combine(value, dt.time(0, 0, tzinfo=pytz.utc))
else: else:
# value isn't a date or datetime object so assume it's a string # value isn't a date or datetime object so assume it's a string
fmt = _TIMESTAMP_FORMAT_FRAC if "." in value else _TIMESTAMP_FORMAT
try: try:
parsed = parser.parse(value) parsed = dt.datetime.strptime(value, fmt)
except (TypeError, ValueError): except (TypeError, ValueError):
# Unknown format # Unknown format
raise ValueError( raise ValueError(