From e05c7d9b4ff723e5d6843d9ae54db9f4a76c5056 Mon Sep 17 00:00:00 2001 From: Marc Hoersken Date: Thu, 3 Oct 2019 19:12:19 +0200 Subject: [PATCH 1/2] Cache JSON definitions in memory LFU cache provided by cachetools - Path and modified time of JSON file are used as the cache key - Global state is hidden away inside a root-class for re-use - Maximum size is 150 considering the number of JSON definitions During my tests the memory usage of the test suites was halved. --- Pipfile.lock | 14 ++++++++++++++ pymisp/abstract.py | 33 ++++++++++++++++++++++++++++++++- pymisp/api.py | 15 +++------------ pymisp/aping.py | 3 +-- pymisp/mispevent.py | 30 +++++------------------------- setup.py | 2 +- 6 files changed, 56 insertions(+), 41 deletions(-) diff --git a/Pipfile.lock b/Pipfile.lock index 5eb4506..5a70930 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -31,6 +31,13 @@ ], "version": "==4.8.0" }, + "cachetools": { + "hashes": [ + "sha256:428266a1c0d36dc5aca63a2d7c5942e88c2c898d72139fca0e97fdd2380517ae", + "sha256:8ea2d3ce97850f31e4a08b0e2b5e6c34997d7216a9d2c98e0f3978630d4da69a" + ], + "version": "==3.1.1" + }, "certifi": { "hashes": [ "sha256:046832c04d4e752f37383b628bc601a7ea7211496b4638f6514d0e5b9acc4939", @@ -266,6 +273,13 @@ ], "version": "==4.8.0" }, + "cachetools": { + "hashes": [ + "sha256:428266a1c0d36dc5aca63a2d7c5942e88c2c898d72139fca0e97fdd2380517ae", + "sha256:8ea2d3ce97850f31e4a08b0e2b5e6c34997d7216a9d2c98e0f3978630d4da69a" + ], + "version": "==3.1.1" + }, "certifi": { "hashes": [ "sha256:046832c04d4e752f37383b628bc601a7ea7211496b4638f6514d0e5b9acc4939", diff --git a/pymisp/abstract.py b/pymisp/abstract.py index c85ef3b..ad273ed 100644 --- a/pymisp/abstract.py +++ b/pymisp/abstract.py @@ -4,9 +4,11 @@ import sys import datetime import json +import os from json import JSONEncoder import logging from enum import Enum +import cachetools from .exceptions import PyMISPInvalidFormat @@ -38,6 +40,12 @@ if sys.version_info < (3, 0): return timedelta(0) +if (3, 0) <= sys.version_info < (3, 6): + OLD_PY3 = True +else: + OLD_PY3 = False + + class Distribution(Enum): your_organisation_only = 0 this_community_only = 1 @@ -80,7 +88,30 @@ class MISPEncode(JSONEncoder): return JSONEncoder.default(self, obj) -class AbstractMISP(MutableMapping): +class MISPFileCache(object): + # cache up to 150 JSON structures in class attribute + _file_cache = cachetools.LFUCache(150) + + @staticmethod + def _load_json(path): + # use hard-coded root class attribute + file_cache = MISPFileCache._file_cache + # use modified time with path as cache key + mtime = os.path.getmtime(path) + if path in file_cache: + ctime, data = file_cache[path] + if ctime == mtime: + return data + with open(path, 'rb') as f: + if OLD_PY3: + data = json.loads(f.read().decode()) + else: + data = json.load(f) + file_cache[path] = (mtime, data) + return data + + +class AbstractMISP(MutableMapping, MISPFileCache): def __init__(self, **kwargs): """Abstract class for all the MISP objects""" diff --git a/pymisp/api.py b/pymisp/api.py index 6f2963e..4134312 100644 --- a/pymisp/api.py +++ b/pymisp/api.py @@ -19,7 +19,7 @@ from deprecated import deprecated from . import __version__, warning_2020 from .exceptions import PyMISPError, SearchError, NoURL, NoKey, PyMISPEmptyResponse from .mispevent import MISPEvent, MISPAttribute, MISPUser, MISPOrganisation, MISPSighting, MISPFeed, MISPObject, MISPSharingGroup -from .abstract import AbstractMISP, MISPEncode +from .abstract import AbstractMISP, MISPEncode, MISPFileCache logger = logging.getLogger('pymisp') @@ -37,11 +37,6 @@ try: except ImportError: HAVE_REQUESTS = False -if (3, 0) <= sys.version_info < (3, 6): - OLD_PY3 = True -else: - OLD_PY3 = False - try: from requests_futures.sessions import FuturesSession ASYNC_OK = True @@ -58,7 +53,7 @@ Response (if any): {}''' -class PyMISP(object): # pragma: no cover +class PyMISP(MISPFileCache): # pragma: no cover """Python API for MISP :param url: URL of the MISP instance you want to connect to @@ -140,11 +135,7 @@ class PyMISP(object): # pragma: no cover @deprecated(reason="Use ExpandedPyMISP.describe_types_local", version='2.4.110') def get_local_describe_types(self): - with open(os.path.join(self.resources_path, 'describeTypes.json'), 'rb') as f: - if OLD_PY3: - describe_types = json.loads(f.read().decode()) - else: - describe_types = json.load(f) + describe_types = self._load_json(os.path.join(self.resources_path, 'describeTypes.json')) return describe_types['result'] @deprecated(reason="Use ExpandedPyMISP.describe_types_remote", version='2.4.110') diff --git a/pymisp/aping.py b/pymisp/aping.py index 99b14d6..3885db3 100644 --- a/pymisp/aping.py +++ b/pymisp/aping.py @@ -105,8 +105,7 @@ class ExpandedPyMISP(PyMISP): @property def describe_types_local(self): '''Returns the content of describe types from the package''' - with (self.resources_path / 'describeTypes.json').open() as f: - describe_types = json.load(f) + describe_types = self._load_json(str(self.resources_path / 'describeTypes.json')) return describe_types['result'] @property diff --git a/pymisp/mispevent.py b/pymisp/mispevent.py index 15a3550..e3507f8 100644 --- a/pymisp/mispevent.py +++ b/pymisp/mispevent.py @@ -109,11 +109,7 @@ class MISPAttribute(AbstractMISP): super(MISPAttribute, self).__init__() if not describe_types: ressources_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data') - with open(os.path.join(ressources_path, 'describeTypes.json'), 'rb') as f: - if OLD_PY3: - t = json.loads(f.read().decode()) - else: - t = json.load(f) + t = self._load_json(os.path.join(ressources_path, 'describeTypes.json')) describe_types = t['result'] self.__categories = describe_types['categories'] self._types = describe_types['types'] @@ -411,26 +407,14 @@ class MISPEvent(AbstractMISP): super(MISPEvent, self).__init__(**kwargs) ressources_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'data') if strict_validation: - with open(os.path.join(ressources_path, 'schema.json'), 'rb') as f: - if OLD_PY3: - self.__json_schema = json.loads(f.read().decode()) - else: - self.__json_schema = json.load(f) + self.__json_schema = self._load_json(os.path.join(ressources_path, 'schema.json')) else: - with open(os.path.join(ressources_path, 'schema-lax.json'), 'rb') as f: - if OLD_PY3: - self.__json_schema = json.loads(f.read().decode()) - else: - self.__json_schema = json.load(f) + self.__json_schema = self._load_json(os.path.join(ressources_path, 'schema-lax.json')) if describe_types: # This variable is used in add_attribute in order to avoid duplicating the structure self._describe_types = describe_types else: - with open(os.path.join(ressources_path, 'describeTypes.json'), 'rb') as f: - if OLD_PY3: - t = json.loads(f.read().decode()) - else: - t = json.load(f) + t = self._load_json(os.path.join(ressources_path, 'describeTypes.json')) self._describe_types = t['result'] self._types = self._describe_types['types'] @@ -1190,11 +1174,7 @@ class MISPObject(AbstractMISP): def _load_template_path(self, template_path): if not os.path.exists(template_path): return False - with open(template_path, 'rb') as f: - if OLD_PY3: - self._definition = json.loads(f.read().decode()) - else: - self._definition = json.load(f) + self._definition = self._load_json(template_path) setattr(self, 'meta-category', self._definition['meta-category']) self.template_uuid = self._definition['uuid'] self.description = self._definition['description'] diff --git a/setup.py b/setup.py index 83055d9..be3284b 100644 --- a/setup.py +++ b/setup.py @@ -41,7 +41,7 @@ setup( ], install_requires=['six', 'requests', 'python-dateutil', 'jsonschema', 'python-dateutil', 'enum34;python_version<"3.4"', - 'functools32;python_version<"3.0"', 'deprecated'], + 'functools32;python_version<"3.0"', 'deprecated', 'cachetools'], extras_require={'fileobjects': ['lief>=0.8,<0.10;python_version<"3.5"', 'lief>=0.10.0.dev0;python_version>"3.5"', 'python-magic', 'pydeep'], 'neo': ['py2neo'], 'openioc': ['beautifulsoup4'], From 4be029a0f6fb25c8e485cb6dbc497e7b41636c1d Mon Sep 17 00:00:00 2001 From: Marc Hoersken Date: Fri, 4 Oct 2019 08:55:55 +0200 Subject: [PATCH 2/2] Use classmethod instead of staticmethod and avoid hard-coded reference --- pymisp/abstract.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pymisp/abstract.py b/pymisp/abstract.py index ad273ed..2fbc8c5 100644 --- a/pymisp/abstract.py +++ b/pymisp/abstract.py @@ -90,12 +90,12 @@ class MISPEncode(JSONEncoder): class MISPFileCache(object): # cache up to 150 JSON structures in class attribute - _file_cache = cachetools.LFUCache(150) + __file_cache = cachetools.LFUCache(150) - @staticmethod - def _load_json(path): - # use hard-coded root class attribute - file_cache = MISPFileCache._file_cache + @classmethod + def _load_json(cls, path): + # use root class attribute as global cache + file_cache = cls.__file_cache # use modified time with path as cache key mtime = os.path.getmtime(path) if path in file_cache: