new: [internal] Add support for orjson

orjson is much faster library for decoding and encoding JSON formats
pull/1126/head
Jakub Onderka 2024-01-05 21:18:44 +01:00
parent dd628a7fe1
commit fca0b233cd
4 changed files with 64 additions and 60 deletions

View File

@ -36,5 +36,11 @@ jobs:
poetry run pytest --cov=pymisp tests/test_*.py poetry run pytest --cov=pymisp tests/test_*.py
poetry run mypy tests/testlive_comprehensive.py tests/test_mispevent.py tests/testlive_sync.py pymisp poetry run mypy tests/testlive_comprehensive.py tests/test_mispevent.py tests/testlive_sync.py pymisp
- name: Test with nosetests with orjson
run: |
pip3 install orjson
poetry run pytest --cov=pymisp tests/test_*.py
poetry run mypy tests/testlive_comprehensive.py tests/test_mispevent.py tests/testlive_sync.py pymisp
- name: Upload coverage to Codecov - name: Upload coverage to Codecov
uses: codecov/codecov-action@v3 uses: codecov/codecov-action@v3

View File

@ -1,41 +1,33 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- import logging
from datetime import date, datetime from datetime import date, datetime
from deprecated import deprecated # type: ignore from deprecated import deprecated # type: ignore
from json import JSONEncoder from json import JSONEncoder
from uuid import UUID from uuid import UUID
from abc import ABCMeta from abc import ABCMeta
try:
from rapidjson import load # type: ignore
from rapidjson import loads # type: ignore
from rapidjson import dumps # type: ignore
HAS_RAPIDJSON = True
except ImportError:
from json import load
from json import loads
from json import dumps
HAS_RAPIDJSON = False
import logging
from enum import Enum from enum import Enum
from typing import Union, Optional, Any, Dict, List, Set, Mapping from typing import Union, Optional, Any, Dict, List, Set, Mapping
from .exceptions import PyMISPInvalidFormat, PyMISPError
from collections.abc import MutableMapping from collections.abc import MutableMapping
from functools import lru_cache from functools import lru_cache
from pathlib import Path from pathlib import Path
try:
import orjson # type: ignore
from orjson import loads, dumps # type: ignore
HAS_ORJSON = True
except ImportError:
from json import loads, dumps
HAS_ORJSON = False
from .exceptions import PyMISPInvalidFormat, PyMISPError
logger = logging.getLogger('pymisp') logger = logging.getLogger('pymisp')
resources_path = Path(__file__).parent / 'data' resources_path = Path(__file__).parent / 'data'
misp_objects_path = resources_path / 'misp-objects' / 'objects' misp_objects_path = resources_path / 'misp-objects' / 'objects'
with (resources_path / 'describeTypes.json').open('r') as f: with (resources_path / 'describeTypes.json').open('rb') as f:
describe_types = load(f)['result'] describe_types = loads(f.read())['result']
class MISPFileCache(object): class MISPFileCache(object):
@ -43,11 +35,11 @@ class MISPFileCache(object):
@staticmethod @staticmethod
@lru_cache(maxsize=150) @lru_cache(maxsize=150)
def _load_json(path: Path) -> Union[dict, None]: def _load_json(path: Path) -> Optional[dict]:
if not path.exists(): if not path.exists():
return None return None
with path.open('r', encoding='utf-8') as f: with path.open('rb') as f:
data = load(f) data = loads(f.read())
return data return data
@ -249,6 +241,15 @@ class AbstractMISP(MutableMapping, MISPFileCache, metaclass=ABCMeta):
def to_json(self, sort_keys: bool = False, indent: Optional[int] = None) -> str: def to_json(self, sort_keys: bool = False, indent: Optional[int] = None) -> str:
"""Dump recursively any class of type MISPAbstract to a json string""" """Dump recursively any class of type MISPAbstract to a json string"""
if HAS_ORJSON:
option = 0
if sort_keys:
option |= orjson.OPT_SORT_KEYS
if indent:
option |= orjson.OPT_INDENT_2
return dumps(self, default=pymisp_json_default, option=option).decode("utf-8")
return dumps(self, default=pymisp_json_default, sort_keys=sort_keys, indent=indent) return dumps(self, default=pymisp_json_default, sort_keys=sort_keys, indent=indent)
def __getitem__(self, key): def __getitem__(self, key):
@ -406,23 +407,13 @@ class MISPTag(AbstractMISP):
return '<{self.__class__.__name__}(NotInitialized)>'.format(self=self) return '<{self.__class__.__name__}(NotInitialized)>'.format(self=self)
if HAS_RAPIDJSON: # UUID, datetime, date and Enum is serialized by ORJSON by default
def pymisp_json_default(obj: Union[AbstractMISP, datetime, date, Enum, UUID]) -> Union[Dict, str]: def pymisp_json_default(obj: Union[AbstractMISP, datetime, date, Enum, UUID]) -> Union[Dict, str]:
if isinstance(obj, AbstractMISP): if isinstance(obj, AbstractMISP):
return obj.jsonable() return obj.jsonable()
elif isinstance(obj, (datetime, date)): elif isinstance(obj, (datetime, date)):
return obj.isoformat() return obj.isoformat()
elif isinstance(obj, Enum): elif isinstance(obj, Enum):
return obj.value return obj.value
elif isinstance(obj, UUID): elif isinstance(obj, UUID):
return str(obj) return str(obj)
else:
def pymisp_json_default(obj: Union[AbstractMISP, datetime, date, Enum, UUID]) -> Union[Dict, str]:
if isinstance(obj, AbstractMISP):
return obj.jsonable()
elif isinstance(obj, (datetime, date)):
return obj.isoformat()
elif isinstance(obj, Enum):
return obj.value
elif isinstance(obj, UUID):
return str(obj)

View File

@ -1,13 +1,9 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from typing import TypeVar, Optional, Tuple, List, Dict, Union, Any, Mapping, Iterable, MutableMapping from typing import TypeVar, Optional, Tuple, List, Dict, Union, Any, Mapping, Iterable, MutableMapping
from datetime import date, datetime from datetime import date, datetime
import csv import csv
from pathlib import Path from pathlib import Path
import logging import logging
from urllib.parse import urljoin from urllib.parse import urljoin
import json
import requests import requests
from requests.auth import AuthBase from requests.auth import AuthBase
import re import re
@ -18,6 +14,14 @@ import copy
import urllib3 # type: ignore import urllib3 # type: ignore
from io import BytesIO, StringIO from io import BytesIO, StringIO
try:
# orjson is optional dependency that speedups parsing and encoding JSON
from orjson import loads, dumps # type: ignore
HAS_ORJSON = True
except ImportError:
from json import loads, dumps
HAS_ORJSON = False
from . import __version__, everything_broken from . import __version__, everything_broken
from .exceptions import MISPServerError, PyMISPUnexpectedResponse, PyMISPError, NoURL, NoKey from .exceptions import MISPServerError, PyMISPUnexpectedResponse, PyMISPError, NoURL, NoKey
from .mispevent import MISPEvent, MISPAttribute, MISPSighting, MISPLog, MISPObject, \ from .mispevent import MISPEvent, MISPAttribute, MISPSighting, MISPLog, MISPObject, \
@ -297,7 +301,7 @@ class PyMISP:
"""Get the most recent version from github""" """Get the most recent version from github"""
r = requests.get('https://raw.githubusercontent.com/MISP/MISP/2.4/VERSION.json') r = requests.get('https://raw.githubusercontent.com/MISP/MISP/2.4/VERSION.json')
if r.status_code == 200: if r.status_code == 200:
master_version = json.loads(r.text) master_version = loads(r.content)
return {'version': '{}.{}.{}'.format(master_version['major'], master_version['minor'], master_version['hotfix'])} return {'version': '{}.{}.{}'.format(master_version['major'], master_version['minor'], master_version['hotfix'])}
return {'error': 'Impossible to retrieve the version of the master branch.'} return {'error': 'Impossible to retrieve the version of the master branch.'}
@ -3345,7 +3349,7 @@ class PyMISP:
""" """
query: Dict[str, Any] = {'setting': user_setting} query: Dict[str, Any] = {'setting': user_setting}
if isinstance(value, dict): if isinstance(value, dict):
value = json.dumps(value) value = dumps(value).decode("utf-8") if HAS_ORJSON else dumps(value)
query['value'] = value query['value'] = value
if user: if user:
query['user_id'] = get_uuid_or_id_from_abstract_misp(user) query['user_id'] = get_uuid_or_id_from_abstract_misp(user)
@ -3682,7 +3686,7 @@ class PyMISP:
if 400 <= response.status_code < 500: if 400 <= response.status_code < 500:
# The server returns a json message with the error details # The server returns a json message with the error details
try: try:
error_message = response.json() error_message = loads(response.content)
except Exception: except Exception:
raise MISPServerError(f'Error code {response.status_code}:\n{response.text}') raise MISPServerError(f'Error code {response.status_code}:\n{response.text}')
@ -3692,7 +3696,7 @@ class PyMISP:
# At this point, we had no error. # At this point, we had no error.
try: try:
response_json = response.json() response_json = loads(response.content)
logger.debug(response_json) logger.debug(response_json)
if isinstance(response_json, dict) and response_json.get('response') is not None: if isinstance(response_json, dict) and response_json.get('response') is not None:
# Cleanup. # Cleanup.
@ -3721,7 +3725,7 @@ class PyMISP:
if url[0] == '/': if url[0] == '/':
# strip it: it will fail if MISP is in a sub directory # strip it: it will fail if MISP is in a sub directory
url = url[1:] url = url[1:]
# Cake PHP being an idiot, it doesn't accepts %20 (space) in the URL path, # Cake PHP being an idiot, it doesn't accept %20 (space) in the URL path,
# so we need to make it a + instead and hope for the best # so we need to make it a + instead and hope for the best
url = url.replace(' ', '+') url = url.replace(' ', '+')
url = urljoin(self.root_url, url) url = urljoin(self.root_url, url)
@ -3733,7 +3737,7 @@ class PyMISP:
if isinstance(data, dict): if isinstance(data, dict):
# Remove None values. # Remove None values.
data = {k: v for k, v in data.items() if v is not None} data = {k: v for k, v in data.items() if v is not None}
d = json.dumps(data, default=pymisp_json_default) d = dumps(data, default=pymisp_json_default)
logger.debug(f'{request_type} - {url}') logger.debug(f'{request_type} - {url}')
if d is not None: if d is not None:

View File

@ -1,8 +1,5 @@
# -*- coding: utf-8 -*-
from datetime import timezone, datetime, date from datetime import timezone, datetime, date
import copy import copy
import json
import os import os
import base64 import base64
import sys import sys
@ -17,6 +14,12 @@ from pathlib import Path
from typing import List, Optional, Union, IO, Dict, Any from typing import List, Optional, Union, IO, Dict, Any
import warnings import warnings
try:
# orjson is optional dependency that speedups parsing and encoding JSON
import orjson as json # type: ignore
except ImportError:
import json
from .abstract import AbstractMISP, MISPTag from .abstract import AbstractMISP, MISPTag
from .exceptions import (UnknownMISPObjectTemplate, InvalidMISPGalaxy, InvalidMISPObject, from .exceptions import (UnknownMISPObjectTemplate, InvalidMISPGalaxy, InvalidMISPObject,
PyMISPError, NewEventError, NewAttributeError, NewEventReportError, PyMISPError, NewEventError, NewAttributeError, NewEventReportError,
@ -1090,7 +1093,7 @@ class MISPObject(AbstractMISP):
self._validate() self._validate()
return super(MISPObject, self).to_dict(json_format) return super(MISPObject, self).to_dict(json_format)
def to_json(self, sort_keys: bool = False, indent: Optional[int] = None, strict: bool = False): def to_json(self, sort_keys: bool = False, indent: Optional[int] = None, strict: bool = False) -> str:
if strict or self._strict and self._known_template: if strict or self._strict and self._known_template:
self._validate() self._validate()
return super(MISPObject, self).to_json(sort_keys=sort_keys, indent=indent) return super(MISPObject, self).to_json(sort_keys=sort_keys, indent=indent)
@ -1760,7 +1763,7 @@ class MISPEvent(AbstractMISP):
event.pop('Object', None) event.pop('Object', None)
self.from_dict(**event) self.from_dict(**event)
if validate: if validate:
warnings.warn('''The validate parameter is deprecated because PyMISP is more flexible at loading event than the schema''') warnings.warn('The validate parameter is deprecated because PyMISP is more flexible at loading event than the schema')
def __setattr__(self, name, value): def __setattr__(self, name, value):
if name in ['date']: if name in ['date']: