From ec42182cb1616d2c7319ebfc1bf9a6a4c0bc4457 Mon Sep 17 00:00:00 2001 From: = Date: Fri, 17 Nov 2017 12:19:06 -0500 Subject: [PATCH] issue #107 , also MemorySource.load_from_file() parses JSON into python-stix2 objects now --- docs/guide/memory.ipynb | 578 ++++++++++++++++++++++++++++++++-------- stix2/sources/memory.py | 12 +- 2 files changed, 479 insertions(+), 111 deletions(-) diff --git a/docs/guide/memory.ipynb b/docs/guide/memory.ipynb index 75c0475..1119a64 100644 --- a/docs/guide/memory.ipynb +++ b/docs/guide/memory.ipynb @@ -62,8 +62,9 @@ "\n", "\n", "### Memory API\n", + "A note on adding and retreiving STIX content to the Memory suite. As mentioned, under the hood is an internal, in-memory dictionary. STIX content that is to be added can be in the following forms: python-stix2 objects, (Python) dictionaries (of valid STIX objects or Bundles), JSON-encoded strings (of valid STIX objects or Bundles), or a (Python) list of any of the previously listed types. [MemoryStore](../api/sources/stix2.sources.memory.rst#stix2.sources.memory.MemoryStore) actually stores STIX content either as python-stix2 objects or as (Python) dictionaries, reducing and converting any of the aforementioned types to one of those. Additionally, whatever form the STIX object is stored as, is how it will be returned when retrieved. python-stix2 objects, and json-encoded strings (of STIX content) are stored as python-stix2 objects. While (Python) dictionaries (of STIX objects) are stored as (Python) dictionaries.\n", "\n", - "A note on [load_from_file()](../api/sources/stix2.sources.memory.rst#stix2.sources.memory.MemoryStore.load_from_file) and [save_to_file()](../api/sources/stix2.sources.memory.rst#stix2.sources.memory.MemoryStore.save_to_file). These methods both add STIX content to an internal dictionary (maintained by [MemoryStore](../api/sources/stix2.sources.memory.rst#stix2.sources.memory.MemoryStore)). STIX content that is to be added can be in the following forms: Python STIX objects, Python dictionaries (of valid STIX objects or Bundles), JSON-encoded strings (of valid STIX objects or Bundles), or a (Python) list of any of the previously listed types. [MemoryStore](../api/sources/stix2.sources.memory.rst#stix2.sources.memory.MemoryStore) actually stores STIX content either as python STIX objects or as python dictionaries, reducing and converting any of the aforementioned types to one of those; and whatever form the STIX object is stored as, is how it will be returned as when queried or retrieved. Python STIX objects, and json-encoded strings (of STIX content) are stored as python STIX objects. Python dictionaries (of STIX objects) are stored as Python dictionaries. This is done, as can be efficiently supported, in order to return STIX content in the form it was added to the [MemoryStore](../api/sources/stix2.sources.memory.rst#stix2.sources.memory.MemoryStore). Also, for [load_from_file()](../api/sources/stix2.sources.memory.rst#stix2.sources.memory.MemoryStore.load_from_file), STIX content is assumed to be in JSON form within the file, individually or in a Bundle. \n", + "A note on [load_from_file()](../api/sources/stix2.sources.memory.rst#stix2.sources.memory.MemoryStore.load_from_file) . For [load_from_file()](../api/sources/stix2.sources.memory.rst#stix2.sources.memory.MemoryStore.load_from_file), STIX content is assumed to be in JSON form within the file, as an individual STIX object or in a Bundle. When the JSON is loaded, the STIX objects are parsed into python-stix2 objects before being stored in the in-memory dictionary.\n", "\n", "A note on [save_to_file()](../api/sources/stix2.sources.memory.rst#stix2.sources.memory.MemoryStore.save_to_file). This method dumps all STIX content that is in [MemoryStore](../api/sources/stix2.sources.memory.rst#stix2.sources.memory.MemoryStore) to the specified file. The file format will be JSON, and the STIX content will be within a STIX Bundle. Note also that the the output form will be a JSON STIX Bundle regardless of the form that the individual STIX objects are stored (i.e. supplied) to the [MemoryStore](../api/sources/stix2.sources.memory.rst#stix2.sources.memory.MemoryStore). \n", "\n", @@ -74,26 +75,101 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\n", - " \"type\": \"indicator\",\n", - " \"id\": \"indicator--d91ef175-8a82-470a-a610-bbd2ee8a1516\",\n", - " \"created\": \"2017-09-29T19:52:16.930Z\",\n", - " \"modified\": \"2017-09-29T19:52:16.930Z\",\n", - " \"labels\": [\n", - " \"malicious-activity\"\n", - " ],\n", - " \"description\": \"Crusades C2 implant\",\n", - " \"pattern\": \"[file:hashes.'SHA-256' = '54b7e05e39a59428743635242e4a867c932140a999f52a1e54fa7ee6a440c73b']\",\n", - " \"valid_from\": \"2017-09-29T19:52:16.930909Z\"\n", - "}\n" - ] + "data": { + "text/html": [ + "
{\n",
+       "    "type": "indicator",\n",
+       "    "id": "indicator--2f61e4e7-0891-4e09-b79a-66f5e594fec0",\n",
+       "    "created": "2017-11-17T17:01:31.590Z",\n",
+       "    "modified": "2017-11-17T17:01:31.590Z",\n",
+       "    "description": "Crusades C2 implant",\n",
+       "    "pattern": "[file:hashes.'SHA-256' = '54b7e05e39a59428743635242e4a867c932140a999f52a1e54fa7ee6a440c73b']",\n",
+       "    "valid_from": "2017-11-17T17:01:31.590939Z",\n",
+       "    "labels": [\n",
+       "        "malicious-activity"\n",
+       "    ]\n",
+       "}\n",
+       "
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -115,26 +191,101 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\n", - " \"type\": \"indicator\",\n", - " \"id\": \"indicator--79fdaad7-c461-49bb-ad1d-caa5e9c51c90\",\n", - " \"created\": \"2017-09-29T19:52:17.021Z\",\n", - " \"modified\": \"2017-09-29T19:52:17.021Z\",\n", - " \"labels\": [\n", - " \"malicious-activity\"\n", - " ],\n", - " \"description\": \"Crusades stage 2 implant variant\",\n", - " \"pattern\": \"[file:hashes.'SHA-256' = '31a45e777e4d58b97f4c43e38006f8cd6580ddabc4037905b2fad734712b582c']\",\n", - " \"valid_from\": \"2017-09-29T19:52:17.021728Z\"\n", - "}\n" - ] + "data": { + "text/html": [ + "
{\n",
+       "    "type": "indicator",\n",
+       "    "id": "indicator--ddb765ba-ff1e-4285-bf33-1f6d08f583d6",\n",
+       "    "created": "2017-11-17T17:01:31.799Z",\n",
+       "    "modified": "2017-11-17T17:01:31.799Z",\n",
+       "    "description": "Crusades stage 2 implant variant",\n",
+       "    "pattern": "[file:hashes.'SHA-256' = '31a45e777e4d58b97f4c43e38006f8cd6580ddabc4037905b2fad734712b582c']",\n",
+       "    "valid_from": "2017-11-17T17:01:31.799228Z",\n",
+       "    "labels": [\n",
+       "        "malicious-activity"\n",
+       "    ]\n",
+       "}\n",
+       "
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -157,82 +308,209 @@ }, { "cell_type": "code", - "execution_count": 3, - "metadata": {}, + "execution_count": 5, + "metadata": { + "scrolled": true + }, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "-----------------------\n", - "{'name': 'Urban2', 'created': '2017-09-12T13:26:18.023Z', 'labels': ['rootkit'], 'modified': '2017-09-12T13:26:18.023Z', 'type': 'malware', 'id': 'malware--2daa14d6-cbf3-4308-bb8e-226d324a08e4'}\n", - "-----------------------\n", - "{\n", - " \"type\": \"malware\",\n", - " \"id\": \"malware--2b3dd412-18a5-4e81-8742-4977068eb3eb\",\n", - " \"created\": \"2017-09-29T19:52:17.028Z\",\n", - " \"modified\": \"2017-09-29T19:52:17.028Z\",\n", - " \"name\": \"Alexios\",\n", - " \"labels\": [\n", - " \"rootkit\"\n", - " ]\n", - "}\n" - ] + "data": { + "text/html": [ + "
{\n",
+       "    "type": "malware",\n",
+       "    "id": "malware--e8170e70-522f-4ec3-aa22-afb55bfad0b0",\n",
+       "    "created": "2017-11-17T17:01:31.806Z",\n",
+       "    "modified": "2017-11-17T17:01:31.806Z",\n",
+       "    "name": "Alexios",\n",
+       "    "labels": [\n",
+       "        "rootkit"\n",
+       "    ]\n",
+       "}\n",
+       "
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ "from stix2 import Filter\n", "\n", - "# add dictionary (of STIX object) to MemoryStore\n", - "# (this dict would assumably come from output of another source,\n", - "# i.e. a loaded json file, NOT manually created as done here for sample purposes)\n", - "\n", - "malware = {\n", - " \"type\": \"malware\",\n", - " \"id\" : \"malware--2daa14d6-cbf3-4308-bb8e-226d324a08e4\",\n", - " \"labels\": [\"rootkit\"],\n", - " \"name\": \"Urban2\",\n", - " \"created\": \"2017-09-12T13:26:18.023Z\",\n", - " \"modified\": \"2017-09-12T13:26:18.023Z\"\n", - "}\n", - "\n", - "mem.add(malware)\n", - "\n", - "results = mem.query([Filter(\"labels\",\"=\", \"rootkit\")])\n", - "for r in results:\n", - " # note that python STIX objects are pretty-printed\n", - " # due to some python dunder method magic, but normal\n", - " # python dictionaries are not by default. Thus the\n", - " # python STIX objects and python STIX dictionaries\n", - " # that match the above query can be easily identified visually\n", - " print(\"-----------------------\")\n", - " print(r)" + "mal = mem.query([Filter(\"labels\",\"=\", \"rootkit\")])[0]\n", + "print(mal)" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\n", - " \"type\": \"report\",\n", - " \"id\": \"report--2add14d6-bbf3-4308-bb8e-226d314a08e4\",\n", - " \"created\": \"2017-05-08T18:34:08.042Z\",\n", - " \"modified\": \"2017-05-08T18:34:08.042Z\",\n", - " \"name\": \"The Crusades: Looking into the relentless infiltration of Israels digital infrastructure.\",\n", - " \"published\": \"2017-05-08T10:24:11.011Z\",\n", - " \"object_refs\": [\n", - " \"malware--2daa14d6-cbf3-4308-bb8e-226d324a08e4\"\n", - " ],\n", - " \"labels\": [\n", - " \"threat-report\"\n", - " ]\n", - "}\n" - ] + "data": { + "text/html": [ + "
{\n",
+       "    "type": "report",\n",
+       "    "id": "report--2add14d6-bbf3-4308-bb8e-226d314a08e4",\n",
+       "    "created": "2017-05-08T18:34:08.042Z",\n",
+       "    "modified": "2017-05-08T18:34:08.042Z",\n",
+       "    "name": "The Crusades: Looking into the relentless infiltration of Israels digital infrastructure.",\n",
+       "    "published": "2017-05-08T10:24:11.011Z",\n",
+       "    "object_refs": [\n",
+       "        "malware--2daa14d6-cbf3-4308-bb8e-226d324a08e4"\n",
+       "    ],\n",
+       "    "labels": [\n",
+       "        "threat-report"\n",
+       "    ]\n",
+       "}\n",
+       "
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -257,15 +535,103 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 8, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "{u'name': u'The Crusades: Looking into the relentless infiltration of Israels digital infrastructure.', u'created': u'2017-05-08T18:34:08.042Z', u'labels': [u'threat-report'], u'modified': u'2017-05-08T18:34:08.042Z', u'object_refs': [u'malware--2daa14d6-cbf3-4308-bb8e-226d324a08e4'], u'published': u'2017-05-08T10:24:11.011Z', u'type': u'report', u'id': u'report--2add14d6-bbf3-4308-bb8e-226d314a08e4'}\n" - ] + "data": { + "text/html": [ + "
{\n",
+       "    "type": "report",\n",
+       "    "id": "report--2add14d6-bbf3-4308-bb8e-226d314a08e4",\n",
+       "    "created": "2017-05-08T18:34:08.042Z",\n",
+       "    "modified": "2017-05-08T18:34:08.042Z",\n",
+       "    "name": "The Crusades: Looking into the relentless infiltration of Israels digital infrastructure.",\n",
+       "    "published": "2017-05-08T10:24:11.011Z",\n",
+       "    "object_refs": [\n",
+       "        "malware--2daa14d6-cbf3-4308-bb8e-226d324a08e4"\n",
+       "    ],\n",
+       "    "labels": [\n",
+       "        "threat-report"\n",
+       "    ]\n",
+       "}\n",
+       "
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -280,17 +646,15 @@ "report = mem_2.get(\"report--2add14d6-bbf3-4308-bb8e-226d314a08e4\")\n", "\n", "# for visualpurposes\n", - "# Note: Since STIX content was added to MemoryStore as json,\n", - "# it is maintained as python dictionaries ( as opposed to STIX objects)\n", "print(report)" ] } ], "metadata": { "kernelspec": { - "display_name": "Python 2", + "display_name": "cti-python-stix2", "language": "python", - "name": "python2" + "name": "cti-python-stix2" }, "language_info": { "codemirror_mode": { diff --git a/stix2/sources/memory.py b/stix2/sources/memory.py index 308d0d0..32a6756 100644 --- a/stix2/sources/memory.py +++ b/stix2/sources/memory.py @@ -1,9 +1,6 @@ """ Python STIX 2.0 Memory Source/Sink -TODO: - Run through tests again, lot of changes. - TODO: Use deduplicate() calls only when memory corpus is dirty (been added to) can save a lot of time for successive queries @@ -302,7 +299,14 @@ class MemorySource(DataSource): return all_data def load_from_file(self, file_path, allow_custom=False, version=None): + """ Load JSON formatted STIX content from file and add to Memory.""" file_path = os.path.abspath(file_path) - stix_data = json.load(open(file_path, "r")) + + # converting the STIX content to JSON encoded string before calling + # _add() so that the STIX content is added as python-stix2 objects + # to the in-memory dict. Otherwise, if you pass a dict to _add(), + # it gets stored as a dict. + stix_data = json.dumps(json.load(open(file_path, "r"))) + _add(self, stix_data, allow_custom=allow_custom, version=version) load_from_file.__doc__ = MemoryStore.load_from_file.__doc__