diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 08ff3e7..febc048 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -36,5 +36,11 @@ jobs: poetry run pytest --cov=pymisp tests/test_*.py poetry run mypy tests/testlive_comprehensive.py tests/test_mispevent.py tests/testlive_sync.py pymisp + - name: Test with nosetests with orjson + run: | + pip3 install orjson + poetry run pytest --cov=pymisp tests/test_*.py + poetry run mypy tests/testlive_comprehensive.py tests/test_mispevent.py tests/testlive_sync.py pymisp + - name: Upload coverage to Codecov uses: codecov/codecov-action@v3 diff --git a/pymisp/abstract.py b/pymisp/abstract.py index 15a123c..8a4be01 100644 --- a/pymisp/abstract.py +++ b/pymisp/abstract.py @@ -1,41 +1,33 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- - +import logging from datetime import date, datetime - from deprecated import deprecated # type: ignore from json import JSONEncoder from uuid import UUID from abc import ABCMeta - -try: - from rapidjson import load # type: ignore - from rapidjson import loads # type: ignore - from rapidjson import dumps # type: ignore - HAS_RAPIDJSON = True -except ImportError: - from json import load - from json import loads - from json import dumps - HAS_RAPIDJSON = False - -import logging from enum import Enum from typing import Union, Optional, Any, Dict, List, Set, Mapping - -from .exceptions import PyMISPInvalidFormat, PyMISPError - - from collections.abc import MutableMapping from functools import lru_cache from pathlib import Path +try: + import orjson # type: ignore + from orjson import loads, dumps # type: ignore + HAS_ORJSON = True +except ImportError: + from json import loads, dumps + HAS_ORJSON = False + +from .exceptions import PyMISPInvalidFormat, PyMISPError + logger = logging.getLogger('pymisp') + resources_path = Path(__file__).parent / 'data' misp_objects_path = resources_path / 'misp-objects' / 'objects' -with (resources_path / 'describeTypes.json').open('r') as f: - describe_types = load(f)['result'] +with (resources_path / 'describeTypes.json').open('rb') as f: + describe_types = loads(f.read())['result'] class MISPFileCache(object): @@ -43,11 +35,11 @@ class MISPFileCache(object): @staticmethod @lru_cache(maxsize=150) - def _load_json(path: Path) -> Union[dict, None]: + def _load_json(path: Path) -> Optional[dict]: if not path.exists(): return None - with path.open('r', encoding='utf-8') as f: - data = load(f) + with path.open('rb') as f: + data = loads(f.read()) return data @@ -249,6 +241,15 @@ class AbstractMISP(MutableMapping, MISPFileCache, metaclass=ABCMeta): def to_json(self, sort_keys: bool = False, indent: Optional[int] = None) -> str: """Dump recursively any class of type MISPAbstract to a json string""" + if HAS_ORJSON: + option = 0 + if sort_keys: + option |= orjson.OPT_SORT_KEYS + if indent: + option |= orjson.OPT_INDENT_2 + + return dumps(self, default=pymisp_json_default, option=option).decode("utf-8") + return dumps(self, default=pymisp_json_default, sort_keys=sort_keys, indent=indent) def __getitem__(self, key): @@ -406,23 +407,13 @@ class MISPTag(AbstractMISP): return '<{self.__class__.__name__}(NotInitialized)>'.format(self=self) -if HAS_RAPIDJSON: - def pymisp_json_default(obj: Union[AbstractMISP, datetime, date, Enum, UUID]) -> Union[Dict, str]: - if isinstance(obj, AbstractMISP): - return obj.jsonable() - elif isinstance(obj, (datetime, date)): - return obj.isoformat() - elif isinstance(obj, Enum): - return obj.value - elif isinstance(obj, UUID): - return str(obj) -else: - def pymisp_json_default(obj: Union[AbstractMISP, datetime, date, Enum, UUID]) -> Union[Dict, str]: - if isinstance(obj, AbstractMISP): - return obj.jsonable() - elif isinstance(obj, (datetime, date)): - return obj.isoformat() - elif isinstance(obj, Enum): - return obj.value - elif isinstance(obj, UUID): - return str(obj) +# UUID, datetime, date and Enum is serialized by ORJSON by default +def pymisp_json_default(obj: Union[AbstractMISP, datetime, date, Enum, UUID]) -> Union[Dict, str]: + if isinstance(obj, AbstractMISP): + return obj.jsonable() + elif isinstance(obj, (datetime, date)): + return obj.isoformat() + elif isinstance(obj, Enum): + return obj.value + elif isinstance(obj, UUID): + return str(obj) diff --git a/pymisp/api.py b/pymisp/api.py index baceea7..85f6b00 100644 --- a/pymisp/api.py +++ b/pymisp/api.py @@ -1,13 +1,9 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - from typing import TypeVar, Optional, Tuple, List, Dict, Union, Any, Mapping, Iterable, MutableMapping from datetime import date, datetime import csv from pathlib import Path import logging from urllib.parse import urljoin -import json import requests from requests.auth import AuthBase import re @@ -18,6 +14,14 @@ import copy import urllib3 # type: ignore from io import BytesIO, StringIO +try: + # orjson is optional dependency that speedups parsing and encoding JSON + from orjson import loads, dumps # type: ignore + HAS_ORJSON = True +except ImportError: + from json import loads, dumps + HAS_ORJSON = False + from . import __version__, everything_broken from .exceptions import MISPServerError, PyMISPUnexpectedResponse, PyMISPError, NoURL, NoKey from .mispevent import MISPEvent, MISPAttribute, MISPSighting, MISPLog, MISPObject, \ @@ -297,7 +301,7 @@ class PyMISP: """Get the most recent version from github""" r = requests.get('https://raw.githubusercontent.com/MISP/MISP/2.4/VERSION.json') if r.status_code == 200: - master_version = json.loads(r.text) + master_version = loads(r.content) return {'version': '{}.{}.{}'.format(master_version['major'], master_version['minor'], master_version['hotfix'])} return {'error': 'Impossible to retrieve the version of the master branch.'} @@ -3345,7 +3349,7 @@ class PyMISP: """ query: Dict[str, Any] = {'setting': user_setting} if isinstance(value, dict): - value = json.dumps(value) + value = dumps(value).decode("utf-8") if HAS_ORJSON else dumps(value) query['value'] = value if user: query['user_id'] = get_uuid_or_id_from_abstract_misp(user) @@ -3682,7 +3686,7 @@ class PyMISP: if 400 <= response.status_code < 500: # The server returns a json message with the error details try: - error_message = response.json() + error_message = loads(response.content) except Exception: raise MISPServerError(f'Error code {response.status_code}:\n{response.text}') @@ -3692,7 +3696,7 @@ class PyMISP: # At this point, we had no error. try: - response_json = response.json() + response_json = loads(response.content) logger.debug(response_json) if isinstance(response_json, dict) and response_json.get('response') is not None: # Cleanup. @@ -3721,7 +3725,7 @@ class PyMISP: if url[0] == '/': # strip it: it will fail if MISP is in a sub directory url = url[1:] - # Cake PHP being an idiot, it doesn't accepts %20 (space) in the URL path, + # Cake PHP being an idiot, it doesn't accept %20 (space) in the URL path, # so we need to make it a + instead and hope for the best url = url.replace(' ', '+') url = urljoin(self.root_url, url) @@ -3733,7 +3737,7 @@ class PyMISP: if isinstance(data, dict): # Remove None values. data = {k: v for k, v in data.items() if v is not None} - d = json.dumps(data, default=pymisp_json_default) + d = dumps(data, default=pymisp_json_default) logger.debug(f'{request_type} - {url}') if d is not None: diff --git a/pymisp/mispevent.py b/pymisp/mispevent.py index 0575851..f0e6949 100644 --- a/pymisp/mispevent.py +++ b/pymisp/mispevent.py @@ -1,8 +1,5 @@ -# -*- coding: utf-8 -*- - from datetime import timezone, datetime, date import copy -import json import os import base64 import sys @@ -17,6 +14,12 @@ from pathlib import Path from typing import List, Optional, Union, IO, Dict, Any import warnings +try: + # orjson is optional dependency that speedups parsing and encoding JSON + import orjson as json # type: ignore +except ImportError: + import json + from .abstract import AbstractMISP, MISPTag from .exceptions import (UnknownMISPObjectTemplate, InvalidMISPGalaxy, InvalidMISPObject, PyMISPError, NewEventError, NewAttributeError, NewEventReportError, @@ -1090,7 +1093,7 @@ class MISPObject(AbstractMISP): self._validate() return super(MISPObject, self).to_dict(json_format) - def to_json(self, sort_keys: bool = False, indent: Optional[int] = None, strict: bool = False): + def to_json(self, sort_keys: bool = False, indent: Optional[int] = None, strict: bool = False) -> str: if strict or self._strict and self._known_template: self._validate() return super(MISPObject, self).to_json(sort_keys=sort_keys, indent=indent) @@ -1760,7 +1763,7 @@ class MISPEvent(AbstractMISP): event.pop('Object', None) self.from_dict(**event) if validate: - warnings.warn('''The validate parameter is deprecated because PyMISP is more flexible at loading event than the schema''') + warnings.warn('The validate parameter is deprecated because PyMISP is more flexible at loading event than the schema') def __setattr__(self, name, value): if name in ['date']: