Cache JSON definitions in memory LFU cache provided by cachetools

- Path and modified time of JSON file are used as the cache key
- Global state is hidden away inside a root-class for re-use
- Maximum size is 150 considering the number of JSON definitions

During my tests the memory usage of the test suites was halved.
pull/471/head
Marc Hoersken 2019-10-03 19:12:19 +02:00
parent 73c8d8b87d
commit e05c7d9b4f
6 changed files with 56 additions and 41 deletions

14
Pipfile.lock generated
View File

@ -31,6 +31,13 @@
],
"version": "==4.8.0"
},
"cachetools": {
"hashes": [
"sha256:428266a1c0d36dc5aca63a2d7c5942e88c2c898d72139fca0e97fdd2380517ae",
"sha256:8ea2d3ce97850f31e4a08b0e2b5e6c34997d7216a9d2c98e0f3978630d4da69a"
],
"version": "==3.1.1"
},
"certifi": {
"hashes": [
"sha256:046832c04d4e752f37383b628bc601a7ea7211496b4638f6514d0e5b9acc4939",
@ -266,6 +273,13 @@
],
"version": "==4.8.0"
},
"cachetools": {
"hashes": [
"sha256:428266a1c0d36dc5aca63a2d7c5942e88c2c898d72139fca0e97fdd2380517ae",
"sha256:8ea2d3ce97850f31e4a08b0e2b5e6c34997d7216a9d2c98e0f3978630d4da69a"
],
"version": "==3.1.1"
},
"certifi": {
"hashes": [
"sha256:046832c04d4e752f37383b628bc601a7ea7211496b4638f6514d0e5b9acc4939",

View File

@ -4,9 +4,11 @@
import sys
import datetime
import json
import os
from json import JSONEncoder
import logging
from enum import Enum
import cachetools
from .exceptions import PyMISPInvalidFormat
@ -38,6 +40,12 @@ if sys.version_info < (3, 0):
return timedelta(0)
if (3, 0) <= sys.version_info < (3, 6):
OLD_PY3 = True
else:
OLD_PY3 = False
class Distribution(Enum):
your_organisation_only = 0
this_community_only = 1
@ -80,7 +88,30 @@ class MISPEncode(JSONEncoder):
return JSONEncoder.default(self, obj)
class AbstractMISP(MutableMapping):
class MISPFileCache(object):
# cache up to 150 JSON structures in class attribute
_file_cache = cachetools.LFUCache(150)
@staticmethod
def _load_json(path):
# use hard-coded root class attribute
file_cache = MISPFileCache._file_cache
# use modified time with path as cache key
mtime = os.path.getmtime(path)
if path in file_cache:
ctime, data = file_cache[path]
if ctime == mtime:
return data
with open(path, 'rb') as f:
if OLD_PY3:
data = json.loads(f.read().decode())
else:
data = json.load(f)
file_cache[path] = (mtime, data)
return data
class AbstractMISP(MutableMapping, MISPFileCache):
def __init__(self, **kwargs):
"""Abstract class for all the MISP objects"""

View File

@ -19,7 +19,7 @@ from deprecated import deprecated
from . import __version__, warning_2020
from .exceptions import PyMISPError, SearchError, NoURL, NoKey, PyMISPEmptyResponse
from .mispevent import MISPEvent, MISPAttribute, MISPUser, MISPOrganisation, MISPSighting, MISPFeed, MISPObject, MISPSharingGroup
from .abstract import AbstractMISP, MISPEncode
from .abstract import AbstractMISP, MISPEncode, MISPFileCache
logger = logging.getLogger('pymisp')
@ -37,11 +37,6 @@ try:
except ImportError:
HAVE_REQUESTS = False
if (3, 0) <= sys.version_info < (3, 6):
OLD_PY3 = True
else:
OLD_PY3 = False
try:
from requests_futures.sessions import FuturesSession
ASYNC_OK = True
@ -58,7 +53,7 @@ Response (if any):
{}'''
class PyMISP(object): # pragma: no cover
class PyMISP(MISPFileCache): # pragma: no cover
"""Python API for MISP
:param url: URL of the MISP instance you want to connect to
@ -140,11 +135,7 @@ class PyMISP(object): # pragma: no cover
@deprecated(reason="Use ExpandedPyMISP.describe_types_local", version='2.4.110')
def get_local_describe_types(self):
with open(os.path.join(self.resources_path, 'describeTypes.json'), 'rb') as f:
if OLD_PY3:
describe_types = json.loads(f.read().decode())
else:
describe_types = json.load(f)
describe_types = self._load_json(os.path.join(self.resources_path, 'describeTypes.json'))
return describe_types['result']
@deprecated(reason="Use ExpandedPyMISP.describe_types_remote", version='2.4.110')

View File

@ -105,8 +105,7 @@ class ExpandedPyMISP(PyMISP):
@property
def describe_types_local(self):
'''Returns the content of describe types from the package'''
with (self.resources_path / 'describeTypes.json').open() as f:
describe_types = json.load(f)
describe_types = self._load_json(str(self.resources_path / 'describeTypes.json'))
return describe_types['result']
@property

View File

@ -109,11 +109,7 @@ class MISPAttribute(AbstractMISP):
super(MISPAttribute, self).__init__()
if not describe_types:
ressources_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data')
with open(os.path.join(ressources_path, 'describeTypes.json'), 'rb') as f:
if OLD_PY3:
t = json.loads(f.read().decode())
else:
t = json.load(f)
t = self._load_json(os.path.join(ressources_path, 'describeTypes.json'))
describe_types = t['result']
self.__categories = describe_types['categories']
self._types = describe_types['types']
@ -411,26 +407,14 @@ class MISPEvent(AbstractMISP):
super(MISPEvent, self).__init__(**kwargs)
ressources_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data')
if strict_validation:
with open(os.path.join(ressources_path, 'schema.json'), 'rb') as f:
if OLD_PY3:
self.__json_schema = json.loads(f.read().decode())
else:
self.__json_schema = json.load(f)
self.__json_schema = self._load_json(os.path.join(ressources_path, 'schema.json'))
else:
with open(os.path.join(ressources_path, 'schema-lax.json'), 'rb') as f:
if OLD_PY3:
self.__json_schema = json.loads(f.read().decode())
else:
self.__json_schema = json.load(f)
self.__json_schema = self._load_json(os.path.join(ressources_path, 'schema-lax.json'))
if describe_types:
# This variable is used in add_attribute in order to avoid duplicating the structure
self._describe_types = describe_types
else:
with open(os.path.join(ressources_path, 'describeTypes.json'), 'rb') as f:
if OLD_PY3:
t = json.loads(f.read().decode())
else:
t = json.load(f)
t = self._load_json(os.path.join(ressources_path, 'describeTypes.json'))
self._describe_types = t['result']
self._types = self._describe_types['types']
@ -1190,11 +1174,7 @@ class MISPObject(AbstractMISP):
def _load_template_path(self, template_path):
if not os.path.exists(template_path):
return False
with open(template_path, 'rb') as f:
if OLD_PY3:
self._definition = json.loads(f.read().decode())
else:
self._definition = json.load(f)
self._definition = self._load_json(template_path)
setattr(self, 'meta-category', self._definition['meta-category'])
self.template_uuid = self._definition['uuid']
self.description = self._definition['description']

View File

@ -41,7 +41,7 @@ setup(
],
install_requires=['six', 'requests', 'python-dateutil', 'jsonschema',
'python-dateutil', 'enum34;python_version<"3.4"',
'functools32;python_version<"3.0"', 'deprecated'],
'functools32;python_version<"3.0"', 'deprecated', 'cachetools'],
extras_require={'fileobjects': ['lief>=0.8,<0.10;python_version<"3.5"', 'lief>=0.10.0.dev0;python_version>"3.5"', 'python-magic', 'pydeep'],
'neo': ['py2neo'],
'openioc': ['beautifulsoup4'],