new: [internal] Add support for orjson

orjson is much faster library for decoding and encoding JSON formats
pull/1126/head
Jakub Onderka 2024-01-05 21:18:44 +01:00
parent dd628a7fe1
commit fca0b233cd
4 changed files with 64 additions and 60 deletions

View File

@ -36,5 +36,11 @@ jobs:
poetry run pytest --cov=pymisp tests/test_*.py
poetry run mypy tests/testlive_comprehensive.py tests/test_mispevent.py tests/testlive_sync.py pymisp
- name: Test with nosetests with orjson
run: |
pip3 install orjson
poetry run pytest --cov=pymisp tests/test_*.py
poetry run mypy tests/testlive_comprehensive.py tests/test_mispevent.py tests/testlive_sync.py pymisp
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3

View File

@ -1,41 +1,33 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import logging
from datetime import date, datetime
from deprecated import deprecated # type: ignore
from json import JSONEncoder
from uuid import UUID
from abc import ABCMeta
try:
from rapidjson import load # type: ignore
from rapidjson import loads # type: ignore
from rapidjson import dumps # type: ignore
HAS_RAPIDJSON = True
except ImportError:
from json import load
from json import loads
from json import dumps
HAS_RAPIDJSON = False
import logging
from enum import Enum
from typing import Union, Optional, Any, Dict, List, Set, Mapping
from .exceptions import PyMISPInvalidFormat, PyMISPError
from collections.abc import MutableMapping
from functools import lru_cache
from pathlib import Path
try:
import orjson # type: ignore
from orjson import loads, dumps # type: ignore
HAS_ORJSON = True
except ImportError:
from json import loads, dumps
HAS_ORJSON = False
from .exceptions import PyMISPInvalidFormat, PyMISPError
logger = logging.getLogger('pymisp')
resources_path = Path(__file__).parent / 'data'
misp_objects_path = resources_path / 'misp-objects' / 'objects'
with (resources_path / 'describeTypes.json').open('r') as f:
describe_types = load(f)['result']
with (resources_path / 'describeTypes.json').open('rb') as f:
describe_types = loads(f.read())['result']
class MISPFileCache(object):
@ -43,11 +35,11 @@ class MISPFileCache(object):
@staticmethod
@lru_cache(maxsize=150)
def _load_json(path: Path) -> Union[dict, None]:
def _load_json(path: Path) -> Optional[dict]:
if not path.exists():
return None
with path.open('r', encoding='utf-8') as f:
data = load(f)
with path.open('rb') as f:
data = loads(f.read())
return data
@ -249,6 +241,15 @@ class AbstractMISP(MutableMapping, MISPFileCache, metaclass=ABCMeta):
def to_json(self, sort_keys: bool = False, indent: Optional[int] = None) -> str:
"""Dump recursively any class of type MISPAbstract to a json string"""
if HAS_ORJSON:
option = 0
if sort_keys:
option |= orjson.OPT_SORT_KEYS
if indent:
option |= orjson.OPT_INDENT_2
return dumps(self, default=pymisp_json_default, option=option).decode("utf-8")
return dumps(self, default=pymisp_json_default, sort_keys=sort_keys, indent=indent)
def __getitem__(self, key):
@ -406,17 +407,7 @@ class MISPTag(AbstractMISP):
return '<{self.__class__.__name__}(NotInitialized)>'.format(self=self)
if HAS_RAPIDJSON:
def pymisp_json_default(obj: Union[AbstractMISP, datetime, date, Enum, UUID]) -> Union[Dict, str]:
if isinstance(obj, AbstractMISP):
return obj.jsonable()
elif isinstance(obj, (datetime, date)):
return obj.isoformat()
elif isinstance(obj, Enum):
return obj.value
elif isinstance(obj, UUID):
return str(obj)
else:
# UUID, datetime, date and Enum is serialized by ORJSON by default
def pymisp_json_default(obj: Union[AbstractMISP, datetime, date, Enum, UUID]) -> Union[Dict, str]:
if isinstance(obj, AbstractMISP):
return obj.jsonable()

View File

@ -1,13 +1,9 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from typing import TypeVar, Optional, Tuple, List, Dict, Union, Any, Mapping, Iterable, MutableMapping
from datetime import date, datetime
import csv
from pathlib import Path
import logging
from urllib.parse import urljoin
import json
import requests
from requests.auth import AuthBase
import re
@ -18,6 +14,14 @@ import copy
import urllib3 # type: ignore
from io import BytesIO, StringIO
try:
# orjson is optional dependency that speedups parsing and encoding JSON
from orjson import loads, dumps # type: ignore
HAS_ORJSON = True
except ImportError:
from json import loads, dumps
HAS_ORJSON = False
from . import __version__, everything_broken
from .exceptions import MISPServerError, PyMISPUnexpectedResponse, PyMISPError, NoURL, NoKey
from .mispevent import MISPEvent, MISPAttribute, MISPSighting, MISPLog, MISPObject, \
@ -297,7 +301,7 @@ class PyMISP:
"""Get the most recent version from github"""
r = requests.get('https://raw.githubusercontent.com/MISP/MISP/2.4/VERSION.json')
if r.status_code == 200:
master_version = json.loads(r.text)
master_version = loads(r.content)
return {'version': '{}.{}.{}'.format(master_version['major'], master_version['minor'], master_version['hotfix'])}
return {'error': 'Impossible to retrieve the version of the master branch.'}
@ -3345,7 +3349,7 @@ class PyMISP:
"""
query: Dict[str, Any] = {'setting': user_setting}
if isinstance(value, dict):
value = json.dumps(value)
value = dumps(value).decode("utf-8") if HAS_ORJSON else dumps(value)
query['value'] = value
if user:
query['user_id'] = get_uuid_or_id_from_abstract_misp(user)
@ -3682,7 +3686,7 @@ class PyMISP:
if 400 <= response.status_code < 500:
# The server returns a json message with the error details
try:
error_message = response.json()
error_message = loads(response.content)
except Exception:
raise MISPServerError(f'Error code {response.status_code}:\n{response.text}')
@ -3692,7 +3696,7 @@ class PyMISP:
# At this point, we had no error.
try:
response_json = response.json()
response_json = loads(response.content)
logger.debug(response_json)
if isinstance(response_json, dict) and response_json.get('response') is not None:
# Cleanup.
@ -3721,7 +3725,7 @@ class PyMISP:
if url[0] == '/':
# strip it: it will fail if MISP is in a sub directory
url = url[1:]
# Cake PHP being an idiot, it doesn't accepts %20 (space) in the URL path,
# Cake PHP being an idiot, it doesn't accept %20 (space) in the URL path,
# so we need to make it a + instead and hope for the best
url = url.replace(' ', '+')
url = urljoin(self.root_url, url)
@ -3733,7 +3737,7 @@ class PyMISP:
if isinstance(data, dict):
# Remove None values.
data = {k: v for k, v in data.items() if v is not None}
d = json.dumps(data, default=pymisp_json_default)
d = dumps(data, default=pymisp_json_default)
logger.debug(f'{request_type} - {url}')
if d is not None:

View File

@ -1,8 +1,5 @@
# -*- coding: utf-8 -*-
from datetime import timezone, datetime, date
import copy
import json
import os
import base64
import sys
@ -17,6 +14,12 @@ from pathlib import Path
from typing import List, Optional, Union, IO, Dict, Any
import warnings
try:
# orjson is optional dependency that speedups parsing and encoding JSON
import orjson as json # type: ignore
except ImportError:
import json
from .abstract import AbstractMISP, MISPTag
from .exceptions import (UnknownMISPObjectTemplate, InvalidMISPGalaxy, InvalidMISPObject,
PyMISPError, NewEventError, NewAttributeError, NewEventReportError,
@ -1090,7 +1093,7 @@ class MISPObject(AbstractMISP):
self._validate()
return super(MISPObject, self).to_dict(json_format)
def to_json(self, sort_keys: bool = False, indent: Optional[int] = None, strict: bool = False):
def to_json(self, sort_keys: bool = False, indent: Optional[int] = None, strict: bool = False) -> str:
if strict or self._strict and self._known_template:
self._validate()
return super(MISPObject, self).to_json(sort_keys=sort_keys, indent=indent)
@ -1760,7 +1763,7 @@ class MISPEvent(AbstractMISP):
event.pop('Object', None)
self.from_dict(**event)
if validate:
warnings.warn('''The validate parameter is deprecated because PyMISP is more flexible at loading event than the schema''')
warnings.warn('The validate parameter is deprecated because PyMISP is more flexible at loading event than the schema')
def __setattr__(self, name, value):
if name in ['date']: