Cache JSON definitions in memory LFU cache provided by cachetools

- Path and modified time of JSON file are used as the cache key
- Global state is hidden away inside a root-class for re-use
- Maximum size is 150 considering the number of JSON definitions

During my tests the memory usage of the test suites was halved.
pull/471/head
Marc Hoersken 2019-10-03 19:12:19 +02:00
parent 73c8d8b87d
commit e05c7d9b4f
6 changed files with 56 additions and 41 deletions

14
Pipfile.lock generated
View File

@ -31,6 +31,13 @@
], ],
"version": "==4.8.0" "version": "==4.8.0"
}, },
"cachetools": {
"hashes": [
"sha256:428266a1c0d36dc5aca63a2d7c5942e88c2c898d72139fca0e97fdd2380517ae",
"sha256:8ea2d3ce97850f31e4a08b0e2b5e6c34997d7216a9d2c98e0f3978630d4da69a"
],
"version": "==3.1.1"
},
"certifi": { "certifi": {
"hashes": [ "hashes": [
"sha256:046832c04d4e752f37383b628bc601a7ea7211496b4638f6514d0e5b9acc4939", "sha256:046832c04d4e752f37383b628bc601a7ea7211496b4638f6514d0e5b9acc4939",
@ -266,6 +273,13 @@
], ],
"version": "==4.8.0" "version": "==4.8.0"
}, },
"cachetools": {
"hashes": [
"sha256:428266a1c0d36dc5aca63a2d7c5942e88c2c898d72139fca0e97fdd2380517ae",
"sha256:8ea2d3ce97850f31e4a08b0e2b5e6c34997d7216a9d2c98e0f3978630d4da69a"
],
"version": "==3.1.1"
},
"certifi": { "certifi": {
"hashes": [ "hashes": [
"sha256:046832c04d4e752f37383b628bc601a7ea7211496b4638f6514d0e5b9acc4939", "sha256:046832c04d4e752f37383b628bc601a7ea7211496b4638f6514d0e5b9acc4939",

View File

@ -4,9 +4,11 @@
import sys import sys
import datetime import datetime
import json import json
import os
from json import JSONEncoder from json import JSONEncoder
import logging import logging
from enum import Enum from enum import Enum
import cachetools
from .exceptions import PyMISPInvalidFormat from .exceptions import PyMISPInvalidFormat
@ -38,6 +40,12 @@ if sys.version_info < (3, 0):
return timedelta(0) return timedelta(0)
if (3, 0) <= sys.version_info < (3, 6):
OLD_PY3 = True
else:
OLD_PY3 = False
class Distribution(Enum): class Distribution(Enum):
your_organisation_only = 0 your_organisation_only = 0
this_community_only = 1 this_community_only = 1
@ -80,7 +88,30 @@ class MISPEncode(JSONEncoder):
return JSONEncoder.default(self, obj) return JSONEncoder.default(self, obj)
class AbstractMISP(MutableMapping): class MISPFileCache(object):
# cache up to 150 JSON structures in class attribute
_file_cache = cachetools.LFUCache(150)
@staticmethod
def _load_json(path):
# use hard-coded root class attribute
file_cache = MISPFileCache._file_cache
# use modified time with path as cache key
mtime = os.path.getmtime(path)
if path in file_cache:
ctime, data = file_cache[path]
if ctime == mtime:
return data
with open(path, 'rb') as f:
if OLD_PY3:
data = json.loads(f.read().decode())
else:
data = json.load(f)
file_cache[path] = (mtime, data)
return data
class AbstractMISP(MutableMapping, MISPFileCache):
def __init__(self, **kwargs): def __init__(self, **kwargs):
"""Abstract class for all the MISP objects""" """Abstract class for all the MISP objects"""

View File

@ -19,7 +19,7 @@ from deprecated import deprecated
from . import __version__, warning_2020 from . import __version__, warning_2020
from .exceptions import PyMISPError, SearchError, NoURL, NoKey, PyMISPEmptyResponse from .exceptions import PyMISPError, SearchError, NoURL, NoKey, PyMISPEmptyResponse
from .mispevent import MISPEvent, MISPAttribute, MISPUser, MISPOrganisation, MISPSighting, MISPFeed, MISPObject, MISPSharingGroup from .mispevent import MISPEvent, MISPAttribute, MISPUser, MISPOrganisation, MISPSighting, MISPFeed, MISPObject, MISPSharingGroup
from .abstract import AbstractMISP, MISPEncode from .abstract import AbstractMISP, MISPEncode, MISPFileCache
logger = logging.getLogger('pymisp') logger = logging.getLogger('pymisp')
@ -37,11 +37,6 @@ try:
except ImportError: except ImportError:
HAVE_REQUESTS = False HAVE_REQUESTS = False
if (3, 0) <= sys.version_info < (3, 6):
OLD_PY3 = True
else:
OLD_PY3 = False
try: try:
from requests_futures.sessions import FuturesSession from requests_futures.sessions import FuturesSession
ASYNC_OK = True ASYNC_OK = True
@ -58,7 +53,7 @@ Response (if any):
{}''' {}'''
class PyMISP(object): # pragma: no cover class PyMISP(MISPFileCache): # pragma: no cover
"""Python API for MISP """Python API for MISP
:param url: URL of the MISP instance you want to connect to :param url: URL of the MISP instance you want to connect to
@ -140,11 +135,7 @@ class PyMISP(object): # pragma: no cover
@deprecated(reason="Use ExpandedPyMISP.describe_types_local", version='2.4.110') @deprecated(reason="Use ExpandedPyMISP.describe_types_local", version='2.4.110')
def get_local_describe_types(self): def get_local_describe_types(self):
with open(os.path.join(self.resources_path, 'describeTypes.json'), 'rb') as f: describe_types = self._load_json(os.path.join(self.resources_path, 'describeTypes.json'))
if OLD_PY3:
describe_types = json.loads(f.read().decode())
else:
describe_types = json.load(f)
return describe_types['result'] return describe_types['result']
@deprecated(reason="Use ExpandedPyMISP.describe_types_remote", version='2.4.110') @deprecated(reason="Use ExpandedPyMISP.describe_types_remote", version='2.4.110')

View File

@ -105,8 +105,7 @@ class ExpandedPyMISP(PyMISP):
@property @property
def describe_types_local(self): def describe_types_local(self):
'''Returns the content of describe types from the package''' '''Returns the content of describe types from the package'''
with (self.resources_path / 'describeTypes.json').open() as f: describe_types = self._load_json(str(self.resources_path / 'describeTypes.json'))
describe_types = json.load(f)
return describe_types['result'] return describe_types['result']
@property @property

View File

@ -109,11 +109,7 @@ class MISPAttribute(AbstractMISP):
super(MISPAttribute, self).__init__() super(MISPAttribute, self).__init__()
if not describe_types: if not describe_types:
ressources_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data') ressources_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data')
with open(os.path.join(ressources_path, 'describeTypes.json'), 'rb') as f: t = self._load_json(os.path.join(ressources_path, 'describeTypes.json'))
if OLD_PY3:
t = json.loads(f.read().decode())
else:
t = json.load(f)
describe_types = t['result'] describe_types = t['result']
self.__categories = describe_types['categories'] self.__categories = describe_types['categories']
self._types = describe_types['types'] self._types = describe_types['types']
@ -411,26 +407,14 @@ class MISPEvent(AbstractMISP):
super(MISPEvent, self).__init__(**kwargs) super(MISPEvent, self).__init__(**kwargs)
ressources_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data') ressources_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data')
if strict_validation: if strict_validation:
with open(os.path.join(ressources_path, 'schema.json'), 'rb') as f: self.__json_schema = self._load_json(os.path.join(ressources_path, 'schema.json'))
if OLD_PY3:
self.__json_schema = json.loads(f.read().decode())
else:
self.__json_schema = json.load(f)
else: else:
with open(os.path.join(ressources_path, 'schema-lax.json'), 'rb') as f: self.__json_schema = self._load_json(os.path.join(ressources_path, 'schema-lax.json'))
if OLD_PY3:
self.__json_schema = json.loads(f.read().decode())
else:
self.__json_schema = json.load(f)
if describe_types: if describe_types:
# This variable is used in add_attribute in order to avoid duplicating the structure # This variable is used in add_attribute in order to avoid duplicating the structure
self._describe_types = describe_types self._describe_types = describe_types
else: else:
with open(os.path.join(ressources_path, 'describeTypes.json'), 'rb') as f: t = self._load_json(os.path.join(ressources_path, 'describeTypes.json'))
if OLD_PY3:
t = json.loads(f.read().decode())
else:
t = json.load(f)
self._describe_types = t['result'] self._describe_types = t['result']
self._types = self._describe_types['types'] self._types = self._describe_types['types']
@ -1190,11 +1174,7 @@ class MISPObject(AbstractMISP):
def _load_template_path(self, template_path): def _load_template_path(self, template_path):
if not os.path.exists(template_path): if not os.path.exists(template_path):
return False return False
with open(template_path, 'rb') as f: self._definition = self._load_json(template_path)
if OLD_PY3:
self._definition = json.loads(f.read().decode())
else:
self._definition = json.load(f)
setattr(self, 'meta-category', self._definition['meta-category']) setattr(self, 'meta-category', self._definition['meta-category'])
self.template_uuid = self._definition['uuid'] self.template_uuid = self._definition['uuid']
self.description = self._definition['description'] self.description = self._definition['description']

View File

@ -41,7 +41,7 @@ setup(
], ],
install_requires=['six', 'requests', 'python-dateutil', 'jsonschema', install_requires=['six', 'requests', 'python-dateutil', 'jsonschema',
'python-dateutil', 'enum34;python_version<"3.4"', 'python-dateutil', 'enum34;python_version<"3.4"',
'functools32;python_version<"3.0"', 'deprecated'], 'functools32;python_version<"3.0"', 'deprecated', 'cachetools'],
extras_require={'fileobjects': ['lief>=0.8,<0.10;python_version<"3.5"', 'lief>=0.10.0.dev0;python_version>"3.5"', 'python-magic', 'pydeep'], extras_require={'fileobjects': ['lief>=0.8,<0.10;python_version<"3.5"', 'lief>=0.10.0.dev0;python_version>"3.5"', 'python-magic', 'pydeep'],
'neo': ['py2neo'], 'neo': ['py2neo'],
'openioc': ['beautifulsoup4'], 'openioc': ['beautifulsoup4'],