2018-10-19 12:35:36 +02:00
|
|
|
#!/usr/bin/env python3
|
2017-07-25 18:04:15 +02:00
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
|
|
import json
|
2017-07-25 18:21:23 +02:00
|
|
|
from json import JSONEncoder
|
2017-07-25 18:04:15 +02:00
|
|
|
import os
|
|
|
|
import sys
|
2020-02-20 14:10:36 +01:00
|
|
|
from collections.abc import Mapping
|
2017-07-25 18:04:15 +02:00
|
|
|
from glob import glob
|
2017-07-25 18:57:18 +02:00
|
|
|
import re
|
2021-11-05 10:30:45 +01:00
|
|
|
from typing import List, Dict, Optional, Any, Tuple, Iterator, overload, Union
|
2021-11-05 11:04:28 +01:00
|
|
|
|
|
|
|
if sys.version_info >= (3, 8):
|
|
|
|
from typing import Literal
|
|
|
|
else:
|
|
|
|
from typing_extensions import Literal
|
2017-07-25 18:04:15 +02:00
|
|
|
|
|
|
|
try:
|
2020-02-17 21:38:12 +01:00
|
|
|
import jsonschema # type: ignore
|
2017-07-25 18:04:15 +02:00
|
|
|
HAS_JSONSCHEMA = True
|
|
|
|
except ImportError:
|
|
|
|
HAS_JSONSCHEMA = False
|
|
|
|
|
|
|
|
|
2017-07-25 18:21:23 +02:00
|
|
|
class EncodeGalaxies(JSONEncoder):
|
2021-05-13 20:43:34 +02:00
|
|
|
def default(self, obj: Any) -> Dict[str, str]:
|
2017-11-01 19:20:09 +01:00
|
|
|
if isinstance(obj, Galaxy):
|
|
|
|
return obj.to_dict()
|
|
|
|
return JSONEncoder.default(self, obj)
|
|
|
|
|
2018-02-23 13:46:48 +01:00
|
|
|
|
2017-11-01 19:20:09 +01:00
|
|
|
class EncodeClusters(JSONEncoder):
|
2021-05-13 20:43:34 +02:00
|
|
|
def default(self, obj: Any) -> Dict[str, str]:
|
2017-11-01 19:20:09 +01:00
|
|
|
if isinstance(obj, (Cluster, ClusterValue, ClusterValueMeta)):
|
|
|
|
return obj.to_dict()
|
|
|
|
return JSONEncoder.default(self, obj)
|
2017-07-25 18:21:23 +02:00
|
|
|
|
|
|
|
|
2017-07-25 20:27:58 +02:00
|
|
|
class PyMISPGalaxiesError(Exception):
|
2020-02-17 21:38:12 +01:00
|
|
|
def __init__(self, message: str):
|
2017-07-25 20:27:58 +02:00
|
|
|
super(PyMISPGalaxiesError, self).__init__(message)
|
|
|
|
self.message = message
|
|
|
|
|
|
|
|
|
|
|
|
class UnableToRevertMachinetag(PyMISPGalaxiesError):
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
2017-07-25 18:04:15 +02:00
|
|
|
class Galaxy():
|
2024-06-18 09:32:14 +02:00
|
|
|
"""
|
|
|
|
Represents a galaxy in the PyMISPGalaxies library.
|
|
|
|
|
|
|
|
Attributes:
|
|
|
|
galaxy (Dict[str, str]): The dictionary containing the galaxy data.
|
|
|
|
type (str): The type of the galaxy.
|
|
|
|
name (str): The name of the galaxy.
|
|
|
|
icon (str): The icon of the galaxy.
|
|
|
|
description (str): The description of the galaxy.
|
|
|
|
version (str): The version of the galaxy.
|
|
|
|
uuid (str): The UUID of the galaxy.
|
|
|
|
namespace (str, optional): The namespace of the galaxy.
|
|
|
|
kill_chain_order (str, optional): The kill chain order of the galaxy.
|
|
|
|
"""
|
2017-07-25 18:04:15 +02:00
|
|
|
|
2024-06-18 15:03:10 +02:00
|
|
|
def __init__(self, galaxy: Union[str, Dict[str, str]]):
|
2024-06-18 14:32:09 +02:00
|
|
|
"""
|
|
|
|
Initializes a Galaxy object from an existing galaxy.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
galaxy (str): The name of the existing galaxy to load from the data folder.
|
2024-06-18 09:32:14 +02:00
|
|
|
galaxy (Dict[str, str]): The dictionary containing the galaxy data.
|
|
|
|
"""
|
2024-06-18 14:32:09 +02:00
|
|
|
if isinstance(galaxy, str):
|
2024-06-18 15:03:10 +02:00
|
|
|
root_dir_galaxies = os.path.join(os.path.abspath(os.path.dirname(sys.modules['pymispgalaxies'].__file__)), 'data', 'misp-galaxy', 'galaxies') # type: ignore [type-var, arg-type]
|
2024-06-18 14:32:09 +02:00
|
|
|
galaxy_file = os.path.join(root_dir_galaxies, f"{galaxy}.json")
|
|
|
|
with open(galaxy_file, 'r') as f:
|
2024-06-18 15:03:10 +02:00
|
|
|
self.galaxy = json.load(f)
|
2024-06-18 14:32:09 +02:00
|
|
|
else:
|
|
|
|
self.galaxy = galaxy
|
2024-06-18 15:03:10 +02:00
|
|
|
self.type = self.galaxy['type']
|
|
|
|
self.name = self.galaxy['name']
|
|
|
|
self.icon = self.galaxy['icon']
|
|
|
|
self.description = self.galaxy['description']
|
|
|
|
self.version = self.galaxy['version']
|
|
|
|
self.uuid = self.galaxy['uuid']
|
|
|
|
self.namespace = self.galaxy.pop('namespace', None)
|
|
|
|
self.kill_chain_order = self.galaxy.pop('kill_chain_order', None)
|
2024-06-18 14:32:09 +02:00
|
|
|
|
|
|
|
def save(self, name: str) -> None:
|
|
|
|
"""
|
|
|
|
Saves the galaxy to a file <name>.json
|
|
|
|
|
|
|
|
Args:
|
|
|
|
name (str): The name of the file to save the galaxy to.
|
|
|
|
"""
|
2024-06-18 15:03:10 +02:00
|
|
|
root_dir_galaxies = os.path.join(os.path.abspath(os.path.dirname(sys.modules['pymispgalaxies'].__file__)), 'data', 'misp-galaxy', 'galaxies') # type: ignore [type-var, arg-type]
|
2024-06-18 14:32:09 +02:00
|
|
|
galaxy_file = os.path.join(root_dir_galaxies, f"{name}.json")
|
|
|
|
with open(galaxy_file, 'w') as f:
|
|
|
|
json.dump(self, f, cls=EncodeGalaxies, indent=2, sort_keys=True, ensure_ascii=False)
|
|
|
|
f.write('\n') # needed for the beauty and to be compliant with jq_all_the_things
|
2017-07-25 18:04:15 +02:00
|
|
|
|
2020-02-17 21:38:12 +01:00
|
|
|
def to_json(self) -> str:
|
2024-06-18 09:32:14 +02:00
|
|
|
"""
|
|
|
|
Converts the galaxy object to a JSON string.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
str: The JSON representation of the galaxy object.
|
|
|
|
"""
|
2017-11-01 19:26:45 +01:00
|
|
|
return json.dumps(self, cls=EncodeGalaxies)
|
|
|
|
|
2020-02-17 21:38:12 +01:00
|
|
|
def to_dict(self) -> Dict[str, str]:
|
2024-06-18 09:32:14 +02:00
|
|
|
"""
|
|
|
|
Converts the galaxy object to a dictionary.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
Dict[str, str]: The dictionary representation of the galaxy object.
|
|
|
|
"""
|
2018-05-23 14:24:05 +02:00
|
|
|
to_return = {'type': self.type, 'name': self.name, 'description': self.description,
|
|
|
|
'version': self.version, 'uuid': self.uuid, 'icon': self.icon}
|
|
|
|
if self.namespace:
|
|
|
|
to_return['namespace'] = self.namespace
|
2019-03-09 06:31:03 +01:00
|
|
|
if self.kill_chain_order:
|
|
|
|
to_return['kill_chain_order'] = self.kill_chain_order
|
2018-05-23 14:24:05 +02:00
|
|
|
return to_return
|
2017-07-25 18:04:15 +02:00
|
|
|
|
|
|
|
|
2021-05-13 20:43:34 +02:00
|
|
|
class Galaxies(Mapping): # type: ignore
|
2024-06-18 09:32:14 +02:00
|
|
|
"""
|
|
|
|
A class representing a collection of MISP galaxies.
|
|
|
|
|
|
|
|
Parameters:
|
|
|
|
- galaxies: A list of dictionaries representing the galaxies. Each dictionary should contain the name and other properties of a galaxy.
|
|
|
|
If left empty, the galaxies are loaded from the data folder.
|
|
|
|
|
|
|
|
Attributes:
|
|
|
|
- galaxies: A dictionary containing the galaxies, where the keys are the names of the galaxies and the values are instances of the Galaxy class.
|
|
|
|
- root_dir_galaxies: The root directory of the MISP galaxies.
|
|
|
|
|
|
|
|
Methods:
|
|
|
|
- validate_with_schema: Validates the galaxies against the schema.
|
|
|
|
- __getitem__: Returns the galaxy with the specified name.
|
|
|
|
- __iter__: Returns an iterator over the galaxy names.
|
|
|
|
- __len__: Returns the number of galaxies in the collection.
|
|
|
|
"""
|
2017-07-25 18:04:15 +02:00
|
|
|
|
2024-06-18 07:44:47 +02:00
|
|
|
def __init__(self, galaxies: List[Dict[str, str]] = []):
|
2024-06-18 09:32:14 +02:00
|
|
|
"""
|
|
|
|
Initializes a new instance of the Galaxies class.
|
|
|
|
|
|
|
|
Parameters:
|
|
|
|
- galaxies: A list of dictionaries representing the galaxies. Each dictionary should contain the name and other properties of a galaxy.
|
|
|
|
If left empty, the galaxies are loaded from the data folder.
|
|
|
|
"""
|
2018-02-23 13:46:48 +01:00
|
|
|
if not galaxies:
|
|
|
|
galaxies = []
|
2021-12-28 17:12:40 +01:00
|
|
|
self.root_dir_galaxies = os.path.join(os.path.abspath(os.path.dirname(sys.modules['pymispgalaxies'].__file__)), # type: ignore
|
2018-02-23 13:46:48 +01:00
|
|
|
'data', 'misp-galaxy', 'galaxies')
|
|
|
|
for galaxy_file in glob(os.path.join(self.root_dir_galaxies, '*.json')):
|
|
|
|
with open(galaxy_file, 'r') as f:
|
|
|
|
galaxies.append(json.load(f))
|
|
|
|
|
2017-07-25 18:04:15 +02:00
|
|
|
self.galaxies = {}
|
2018-02-23 13:46:48 +01:00
|
|
|
for galaxy in galaxies:
|
2017-07-25 18:04:15 +02:00
|
|
|
self.galaxies[galaxy['name']] = Galaxy(galaxy)
|
|
|
|
|
2021-05-13 20:43:34 +02:00
|
|
|
def validate_with_schema(self) -> None:
|
2024-06-18 09:32:14 +02:00
|
|
|
"""
|
|
|
|
Validates the galaxies against the schema.
|
|
|
|
|
|
|
|
Raises:
|
|
|
|
- ImportError: If the jsonschema module is not installed.
|
|
|
|
"""
|
2017-07-25 18:04:15 +02:00
|
|
|
if not HAS_JSONSCHEMA:
|
|
|
|
raise ImportError('jsonschema is required: pip install jsonschema')
|
2021-12-28 17:12:40 +01:00
|
|
|
schema = os.path.join(os.path.abspath(os.path.dirname(sys.modules['pymispgalaxies'].__file__)), # type: ignore
|
2017-07-25 18:04:15 +02:00
|
|
|
'data', 'misp-galaxy', 'schema_galaxies.json')
|
|
|
|
with open(schema, 'r') as f:
|
|
|
|
loaded_schema = json.load(f)
|
|
|
|
for g in self.galaxies.values():
|
|
|
|
jsonschema.validate(g.galaxy, loaded_schema)
|
|
|
|
|
2021-05-13 20:43:34 +02:00
|
|
|
def __getitem__(self, name: str) -> Galaxy:
|
2024-06-18 09:32:14 +02:00
|
|
|
"""
|
|
|
|
Returns the galaxy with the specified name.
|
|
|
|
|
|
|
|
Parameters:
|
|
|
|
- name: The name of the galaxy.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
- The Galaxy instance with the specified name.
|
|
|
|
|
|
|
|
Raises:
|
|
|
|
- KeyError: If the galaxy with the specified name does not exist.
|
|
|
|
"""
|
2017-07-25 18:04:15 +02:00
|
|
|
return self.galaxies[name]
|
|
|
|
|
2021-05-13 20:43:34 +02:00
|
|
|
def __iter__(self) -> Iterator[str]:
|
2024-06-18 09:32:14 +02:00
|
|
|
"""
|
|
|
|
Returns an iterator over the galaxy names.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
- An iterator over the galaxy names.
|
|
|
|
"""
|
2017-07-25 18:04:15 +02:00
|
|
|
return iter(self.galaxies)
|
|
|
|
|
2021-05-13 20:43:34 +02:00
|
|
|
def __len__(self) -> int:
|
2024-06-18 09:32:14 +02:00
|
|
|
"""
|
|
|
|
Returns the number of galaxies in the collection.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
- The number of galaxies in the collection.
|
|
|
|
"""
|
2017-07-25 18:04:15 +02:00
|
|
|
return len(self.galaxies)
|
|
|
|
|
|
|
|
|
|
|
|
class ClusterValueMeta():
|
|
|
|
|
2020-02-17 21:38:12 +01:00
|
|
|
def __init__(self, m: Dict[str, str]):
|
2017-07-25 18:04:15 +02:00
|
|
|
self.type = m.pop('type', None)
|
|
|
|
self.complexity = m.pop('complexity', None)
|
|
|
|
self.effectiveness = m.pop('effectiveness', None)
|
|
|
|
self.country = m.pop('country', None)
|
|
|
|
self.possible_issues = m.pop('possible_issues', None)
|
|
|
|
self.colour = m.pop('colour', None)
|
|
|
|
self.motive = m.pop('motive', None)
|
|
|
|
self.impact = m.pop('impact', None)
|
|
|
|
self.refs = m.pop('refs', None)
|
|
|
|
self.synonyms = m.pop('synonyms', None)
|
|
|
|
self.derivated_from = m.pop('derivated_from', None)
|
|
|
|
self.status = m.pop('status', None)
|
|
|
|
self.date = m.pop('date', None)
|
|
|
|
self.encryption = m.pop('encryption', None)
|
|
|
|
self.extensions = m.pop('extensions', None)
|
|
|
|
self.ransomnotes = m.pop('ransomnotes', None)
|
|
|
|
# NOTE: meta can have aditional properties. We only load the ones
|
|
|
|
# defined on the schema
|
|
|
|
self.additional_properties = m
|
|
|
|
|
2020-02-17 21:38:12 +01:00
|
|
|
def to_json(self) -> str:
|
2017-11-01 19:26:45 +01:00
|
|
|
return json.dumps(self, cls=EncodeClusters)
|
|
|
|
|
2020-02-17 21:38:12 +01:00
|
|
|
def to_dict(self) -> Dict[str, str]:
|
2017-07-25 18:04:15 +02:00
|
|
|
to_return = {}
|
|
|
|
if self.type:
|
|
|
|
to_return['type'] = self.type
|
|
|
|
if self.complexity:
|
|
|
|
to_return['complexity'] = self.complexity
|
|
|
|
if self.effectiveness:
|
|
|
|
to_return['effectiveness'] = self.effectiveness
|
|
|
|
if self.country:
|
|
|
|
to_return['country'] = self.country
|
|
|
|
if self.possible_issues:
|
|
|
|
to_return['possible_issues'] = self.possible_issues
|
|
|
|
if self.colour:
|
|
|
|
to_return['colour'] = self.colour
|
|
|
|
if self.motive:
|
|
|
|
to_return['motive'] = self.motive
|
|
|
|
if self.impact:
|
|
|
|
to_return['impact'] = self.impact
|
|
|
|
if self.refs:
|
|
|
|
to_return['refs'] = self.refs
|
|
|
|
if self.synonyms:
|
|
|
|
to_return['synonyms'] = self.synonyms
|
|
|
|
if self.derivated_from:
|
|
|
|
to_return['derivated_from'] = self.derivated_from
|
|
|
|
if self.status:
|
|
|
|
to_return['status'] = self.status
|
|
|
|
if self.date:
|
|
|
|
to_return['date'] = self.date
|
|
|
|
if self.encryption:
|
|
|
|
to_return['encryption'] = self.encryption
|
|
|
|
if self.extensions:
|
|
|
|
to_return['extensions'] = self.extensions
|
|
|
|
if self.ransomnotes:
|
|
|
|
to_return['ransomnotes'] = self.ransomnotes
|
|
|
|
if self.additional_properties:
|
|
|
|
to_return.update(self.additional_properties)
|
|
|
|
return to_return
|
|
|
|
|
|
|
|
|
|
|
|
class ClusterValue():
|
2024-06-18 09:32:14 +02:00
|
|
|
"""
|
|
|
|
Represents a cluster value.
|
|
|
|
|
|
|
|
Attributes:
|
|
|
|
uuid (str): The UUID of the cluster value.
|
|
|
|
value (Any): The value of the cluster.
|
|
|
|
description (str): The description of the cluster value.
|
|
|
|
meta (ClusterValueMeta): The metadata associated with the cluster value.
|
|
|
|
searchable (List[str]): A list of searchable terms for the cluster value.
|
|
|
|
|
|
|
|
Methods:
|
|
|
|
__init__(self, v: Dict[str, Any]): Initializes a ClusterValue object.
|
|
|
|
__init_meta(self, m: Optional[Dict[str, str]]) -> Optional[ClusterValueMeta]: Initializes the metadata for the cluster value.
|
|
|
|
to_json(self) -> str: Converts the ClusterValue object to a JSON string.
|
|
|
|
to_dict(self) -> Dict[str, Any]: Converts the ClusterValue object to a dictionary.
|
|
|
|
"""
|
2017-07-25 18:04:15 +02:00
|
|
|
|
2020-02-17 21:38:12 +01:00
|
|
|
def __init__(self, v: Dict[str, Any]):
|
2024-06-18 09:32:14 +02:00
|
|
|
"""
|
|
|
|
Initializes a ClusterValue object.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
v (Dict[str, Any]): A dictionary containing the cluster value information.
|
|
|
|
|
|
|
|
Raises:
|
|
|
|
PyMISPGalaxiesError: If the cluster value is invalid (no value).
|
|
|
|
"""
|
2017-07-25 20:27:58 +02:00
|
|
|
if not v['value']:
|
|
|
|
raise PyMISPGalaxiesError("Invalid cluster (no value): {}".format(v))
|
2018-04-05 11:36:24 +02:00
|
|
|
self.uuid = v.get('uuid', None)
|
2017-07-25 18:04:15 +02:00
|
|
|
self.value = v['value']
|
|
|
|
self.description = v.get('description')
|
|
|
|
self.meta = self.__init_meta(v.get('meta'))
|
2024-06-18 14:32:09 +02:00
|
|
|
self.related = []
|
|
|
|
try:
|
|
|
|
# LATER convert related to a class?
|
|
|
|
self.related = v['related']
|
|
|
|
except KeyError:
|
|
|
|
pass
|
2017-07-26 15:03:43 +02:00
|
|
|
self.searchable = [self.value]
|
2018-04-05 11:36:24 +02:00
|
|
|
if self.uuid:
|
|
|
|
self.searchable.append(self.uuid)
|
2017-07-26 15:03:43 +02:00
|
|
|
if self.meta and self.meta.synonyms:
|
|
|
|
self.searchable += self.meta.synonyms
|
2018-07-06 15:28:53 +02:00
|
|
|
self.searchable = list(set(self.searchable))
|
2017-07-25 18:04:15 +02:00
|
|
|
|
2020-02-17 21:38:12 +01:00
|
|
|
def __init_meta(self, m: Optional[Dict[str, str]]) -> Optional[ClusterValueMeta]:
|
2024-06-18 09:32:14 +02:00
|
|
|
"""
|
|
|
|
Initializes the metadata for the cluster value.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
m (Optional[Dict[str, str]]): A dictionary containing the metadata for the cluster value.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
Optional[ClusterValueMeta]: The initialized ClusterValueMeta object or None if no metadata is provided.
|
|
|
|
"""
|
2017-07-25 18:04:15 +02:00
|
|
|
if not m:
|
|
|
|
return None
|
|
|
|
return ClusterValueMeta(m)
|
|
|
|
|
2020-02-17 21:38:12 +01:00
|
|
|
def to_json(self) -> str:
|
2024-06-18 09:32:14 +02:00
|
|
|
"""
|
|
|
|
Converts the ClusterValue object to a JSON string.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
str: The JSON representation of the ClusterValue object.
|
|
|
|
"""
|
2017-11-01 19:26:45 +01:00
|
|
|
return json.dumps(self, cls=EncodeClusters)
|
|
|
|
|
2021-05-13 20:43:34 +02:00
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
2024-06-18 09:32:14 +02:00
|
|
|
"""
|
|
|
|
Converts the ClusterValue object to a dictionary.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
Dict[str, Any]: The dictionary representation of the ClusterValue object.
|
|
|
|
"""
|
2017-07-25 18:04:15 +02:00
|
|
|
to_return = {'value': self.value}
|
2018-04-05 11:36:24 +02:00
|
|
|
if self.uuid:
|
|
|
|
to_return['uuid'] = self.uuid
|
2017-07-25 18:04:15 +02:00
|
|
|
if self.description:
|
|
|
|
to_return['description'] = self.description
|
|
|
|
if self.meta:
|
2017-11-01 19:20:09 +01:00
|
|
|
to_return['meta'] = self.meta
|
2024-06-18 14:32:09 +02:00
|
|
|
if self.related:
|
|
|
|
to_return['related'] = self.related
|
2017-07-25 18:04:15 +02:00
|
|
|
return to_return
|
|
|
|
|
|
|
|
|
2021-05-13 20:43:34 +02:00
|
|
|
class Cluster(Mapping): # type: ignore
|
2024-06-18 09:32:14 +02:00
|
|
|
"""
|
|
|
|
Represents a cluster in the PyMISPGalaxies library.
|
|
|
|
|
|
|
|
Attributes:
|
|
|
|
cluster (Dict[str, Any]): The dictionary containing the cluster data.
|
2024-06-18 10:43:17 +02:00
|
|
|
cluster (str): The name of the existing cluster to load from the data folder.
|
2024-06-18 09:32:14 +02:00
|
|
|
name (str): The name of the cluster.
|
|
|
|
type (str): The type of the cluster.
|
|
|
|
source (str): The source of the cluster.
|
|
|
|
authors (str): The authors of the cluster.
|
|
|
|
description (str): The description of the cluster.
|
|
|
|
uuid (str): The UUID of the cluster.
|
|
|
|
version (str): The version of the cluster.
|
|
|
|
category (str): The category of the cluster.
|
|
|
|
cluster_values (Dict[str, ClusterValue]): A dictionary containing the cluster values, where the keys are the values of the cluster and the values are instances of the ClusterValue class.
|
|
|
|
duplicates (List[Tuple[str, str]]): A list of tuples representing duplicate values in the cluster, where each tuple contains the name of the cluster and the duplicate value.
|
|
|
|
|
|
|
|
Methods:
|
2024-06-18 10:16:00 +02:00
|
|
|
__init__(self, cluster: Dict[str, Any] | str, skip_duplicates: bool = False): Initializes a Cluster object from a dict or existing cluster file
|
2024-06-18 09:32:14 +02:00
|
|
|
search(self, query: str, return_tags: bool = False) -> Union[List[ClusterValue], List[str]]: Searches for values in the cluster that match the given query.
|
|
|
|
machinetags(self) -> List[str]: Returns a list of machine tags for the cluster.
|
|
|
|
get_by_external_id(self, external_id: str) -> ClusterValue: Returns the cluster value with the specified external ID.
|
2024-06-18 14:32:09 +02:00
|
|
|
save(self, name:str) -> None: Saves the cluster to a file <name>.json
|
2024-06-18 09:32:14 +02:00
|
|
|
__str__(self) -> str: Returns a string representation of the cluster.
|
|
|
|
__getitem__(self, name: str) -> ClusterValue: Returns the cluster value with the specified name.
|
|
|
|
__len__(self) -> int: Returns the number of cluster values in the cluster.
|
|
|
|
__iter__(self) -> Iterator[str]: Returns an iterator over the cluster values.
|
|
|
|
to_json(self) -> str: Converts the Cluster object to a JSON string.
|
|
|
|
to_dict(self) -> Dict[str, Any]: Converts the Cluster object to a dictionary.
|
|
|
|
"""
|
2024-06-18 15:03:10 +02:00
|
|
|
|
|
|
|
def __init__(self, cluster: Union[Dict[str, Any], str], skip_duplicates: bool = False):
|
2024-06-18 10:43:17 +02:00
|
|
|
"""
|
|
|
|
Initializes a Cluster object from an existing cluster.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
cluster (str): The name of the existing cluster to load from the data folder.
|
|
|
|
cluster (Dict[str, Any]): A dictionary containing the cluster data.
|
2024-06-18 09:32:14 +02:00
|
|
|
skip_duplicates (bool, optional): Flag indicating whether to skip duplicate values. Defaults to False.
|
|
|
|
"""
|
2024-06-18 10:16:00 +02:00
|
|
|
if isinstance(cluster, str):
|
2024-06-18 15:03:10 +02:00
|
|
|
root_dir_clusters = os.path.join(os.path.abspath(os.path.dirname(sys.modules['pymispgalaxies'].__file__)), 'data', 'misp-galaxy', 'clusters') # type: ignore [type-var, arg-type]
|
2024-06-18 10:16:00 +02:00
|
|
|
cluster_file = os.path.join(root_dir_clusters, f"{cluster}.json")
|
|
|
|
with open(cluster_file, 'r') as f:
|
2024-06-18 15:03:10 +02:00
|
|
|
self.cluster = json.load(f)
|
2024-06-18 10:16:00 +02:00
|
|
|
else:
|
|
|
|
self.cluster = cluster
|
2024-06-18 15:03:10 +02:00
|
|
|
self.name = self.cluster['name']
|
|
|
|
self.type = self.cluster['type']
|
|
|
|
self.source = self.cluster['source']
|
|
|
|
self.authors = self.cluster['authors']
|
|
|
|
self.description = self.cluster['description']
|
|
|
|
self.uuid = self.cluster['uuid']
|
|
|
|
self.version = self.cluster['version']
|
|
|
|
self.category = self.cluster['category']
|
|
|
|
self.cluster_values: Dict[str, Any] = {}
|
|
|
|
self.duplicates: List[Tuple[str, str]] = []
|
|
|
|
try:
|
|
|
|
for value in self.cluster['values']:
|
|
|
|
new_cluster_value = ClusterValue(value)
|
|
|
|
self.append(new_cluster_value, skip_duplicates)
|
|
|
|
except KeyError:
|
|
|
|
pass
|
2017-07-26 15:03:43 +02:00
|
|
|
|
2021-11-05 10:30:45 +01:00
|
|
|
@overload
|
2024-06-18 07:44:47 +02:00
|
|
|
def search(self, query: str, return_tags: Literal[False] = False) -> List[ClusterValue]:
|
2021-12-28 17:12:40 +01:00
|
|
|
...
|
|
|
|
|
2021-11-05 10:30:45 +01:00
|
|
|
@overload
|
2021-12-28 17:12:40 +01:00
|
|
|
def search(self, query: str, return_tags: Literal[True]) -> List[str]:
|
|
|
|
...
|
|
|
|
|
2021-11-05 10:30:45 +01:00
|
|
|
@overload
|
2021-12-28 17:12:40 +01:00
|
|
|
def search(self, query: str, return_tags: bool) -> Union[List[ClusterValue], List[str]]:
|
|
|
|
...
|
2021-11-05 10:30:45 +01:00
|
|
|
|
2024-06-18 07:44:47 +02:00
|
|
|
def search(self, query: str, return_tags: bool = False) -> Union[List[ClusterValue], List[str]]:
|
2024-06-18 09:32:14 +02:00
|
|
|
"""
|
|
|
|
Searches for values in the cluster that match the given query.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
query (str): The query to search for.
|
|
|
|
return_tags (bool, optional): Flag indicating whether to return machine tags instead of cluster values. Defaults to False.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
Union[List[ClusterValue], List[str]]: A list of matching cluster values or machine tags.
|
|
|
|
"""
|
2017-07-26 15:03:43 +02:00
|
|
|
matching = []
|
2017-07-26 16:37:13 +02:00
|
|
|
for v in self.values():
|
2017-07-26 15:03:43 +02:00
|
|
|
if [s for s in v.searchable if query.lower() in s.lower()]:
|
2018-07-06 15:28:53 +02:00
|
|
|
if return_tags:
|
|
|
|
matching.append('misp-galaxy:{}="{}"'.format(self.type, v.value))
|
|
|
|
else:
|
|
|
|
matching.append(v)
|
2017-07-26 15:03:43 +02:00
|
|
|
return matching
|
2017-07-25 18:04:15 +02:00
|
|
|
|
2020-02-17 21:38:12 +01:00
|
|
|
def machinetags(self) -> List[str]:
|
2024-06-18 09:32:14 +02:00
|
|
|
"""
|
|
|
|
Returns a list of machine tags for the cluster.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
List[str]: A list of machine tags.
|
|
|
|
"""
|
2017-07-25 18:43:49 +02:00
|
|
|
to_return = []
|
2017-07-26 16:37:13 +02:00
|
|
|
for v in self.values():
|
2017-07-25 18:43:49 +02:00
|
|
|
to_return.append('misp-galaxy:{}="{}"'.format(self.type, v.value))
|
|
|
|
return to_return
|
|
|
|
|
2024-06-18 09:32:14 +02:00
|
|
|
def get_by_external_id(self, external_id: str) -> ClusterValue:
|
|
|
|
"""
|
|
|
|
Returns the cluster value with the specified external ID.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
external_id (str): The external ID to search for.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
ClusterValue: The cluster value with the specified external ID.
|
|
|
|
|
|
|
|
Raises:
|
|
|
|
KeyError: If no value with the specified external ID is found.
|
|
|
|
"""
|
|
|
|
for value in self.cluster_values.values():
|
|
|
|
if value.meta and value.meta.additional_properties and value.meta.additional_properties.get('external_id') == external_id:
|
|
|
|
return value
|
|
|
|
raise KeyError('No value with external_id: {}'.format(external_id))
|
|
|
|
|
2024-06-18 15:03:10 +02:00
|
|
|
def get_kill_chain_tactics(self) -> Dict[str, List[str]]:
|
2024-06-18 14:32:09 +02:00
|
|
|
"""
|
|
|
|
Returns the sorted kill chain tactics associated with the cluster.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
List[str]: A list of kill chain tactics.
|
|
|
|
"""
|
|
|
|
items = set()
|
|
|
|
for v in self.cluster_values.values():
|
|
|
|
if v.meta and v.meta.additional_properties and v.meta.additional_properties.get('kill_chain'):
|
|
|
|
for item in v.meta.additional_properties.get('kill_chain'):
|
|
|
|
items.add(item)
|
2024-06-18 15:03:10 +02:00
|
|
|
result: Dict[str, List[str]] = {}
|
2024-06-18 14:32:09 +02:00
|
|
|
for item in items:
|
|
|
|
key, value = item.split(':')
|
|
|
|
if key not in result:
|
|
|
|
result[key] = []
|
|
|
|
result[key].append(value)
|
|
|
|
|
|
|
|
for key in result.keys():
|
|
|
|
result[key] = sorted(result[key])
|
|
|
|
return result
|
|
|
|
|
2024-06-18 15:03:10 +02:00
|
|
|
def append(self, cv: Union[Dict[str, Any], ClusterValue], skip_duplicates: bool = False) -> None:
|
2024-06-18 10:36:15 +02:00
|
|
|
"""
|
|
|
|
Adds a cluster value to the cluster.
|
|
|
|
"""
|
2024-06-18 10:43:17 +02:00
|
|
|
if isinstance(cv, dict):
|
|
|
|
cv = ClusterValue(cv)
|
2024-06-18 10:36:15 +02:00
|
|
|
if self.get(cv.value):
|
|
|
|
if skip_duplicates:
|
|
|
|
self.duplicates.append((self.name, cv.value))
|
|
|
|
else:
|
|
|
|
raise PyMISPGalaxiesError("Duplicate value ({}) in cluster: {}".format(cv.value, self.name))
|
2024-06-18 15:42:11 +02:00
|
|
|
self.cluster_values[cv.value.lower()] = cv
|
2024-06-18 10:36:15 +02:00
|
|
|
|
2024-06-18 14:32:09 +02:00
|
|
|
def save(self, name: str) -> None:
|
|
|
|
"""
|
|
|
|
Saves the cluster to a file <name>.json
|
|
|
|
|
|
|
|
Args:
|
|
|
|
name (str): The name of the file to save the cluster to.
|
|
|
|
"""
|
2024-06-18 15:03:10 +02:00
|
|
|
root_dir_clusters = os.path.join(os.path.abspath(os.path.dirname(sys.modules['pymispgalaxies'].__file__)), 'data', 'misp-galaxy', 'clusters') # type: ignore [type-var, arg-type]
|
2024-06-18 14:32:09 +02:00
|
|
|
cluster_file = os.path.join(root_dir_clusters, f"{name}.json")
|
|
|
|
with open(cluster_file, 'w') as f:
|
|
|
|
json.dump(self, f, cls=EncodeClusters, indent=2, sort_keys=True, ensure_ascii=False)
|
|
|
|
f.write('\n') # needed for the beauty and to be compliant with jq_all_the_things
|
|
|
|
|
2021-05-13 20:43:34 +02:00
|
|
|
def __str__(self) -> str:
|
2024-06-18 09:32:14 +02:00
|
|
|
"""
|
|
|
|
Returns a string representation of the cluster.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
str: A string representation of the cluster.
|
|
|
|
"""
|
2017-07-25 18:43:49 +02:00
|
|
|
return '\n'.join(self.machinetags())
|
|
|
|
|
2021-05-13 20:43:34 +02:00
|
|
|
def __getitem__(self, name: str) -> ClusterValue:
|
2024-06-18 09:32:14 +02:00
|
|
|
"""
|
|
|
|
Returns the cluster value with the specified name.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
name (str): The name of the cluster value.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
ClusterValue: The cluster value with the specified name.
|
|
|
|
"""
|
2024-06-18 15:42:11 +02:00
|
|
|
return self.cluster_values[name.lower()]
|
2017-07-26 15:03:43 +02:00
|
|
|
|
2021-05-13 20:43:34 +02:00
|
|
|
def __len__(self) -> int:
|
2024-06-18 09:32:14 +02:00
|
|
|
"""
|
|
|
|
Returns the number of cluster values in the cluster.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
int: The number of cluster values.
|
|
|
|
"""
|
2017-07-26 16:37:13 +02:00
|
|
|
return len(self.cluster_values)
|
2017-07-26 15:03:43 +02:00
|
|
|
|
2021-05-13 20:43:34 +02:00
|
|
|
def __iter__(self) -> Iterator[str]:
|
2024-06-18 09:32:14 +02:00
|
|
|
"""
|
|
|
|
Returns an iterator over the cluster values.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
Iterator[str]: An iterator over the cluster values.
|
|
|
|
"""
|
2017-07-26 16:37:13 +02:00
|
|
|
return iter(self.cluster_values)
|
2017-07-26 15:03:43 +02:00
|
|
|
|
2020-02-17 21:38:12 +01:00
|
|
|
def to_json(self) -> str:
|
2024-06-18 09:32:14 +02:00
|
|
|
"""
|
|
|
|
Converts the Cluster object to a JSON string.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
str: The JSON representation of the Cluster object.
|
|
|
|
"""
|
2017-11-01 19:26:45 +01:00
|
|
|
return json.dumps(self, cls=EncodeClusters)
|
|
|
|
|
2021-05-13 20:43:34 +02:00
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
2024-06-18 09:32:14 +02:00
|
|
|
"""
|
|
|
|
Converts the Cluster object to a dictionary.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
Dict[str, Any]: The dictionary representation of the Cluster object.
|
|
|
|
"""
|
2017-07-25 18:04:15 +02:00
|
|
|
to_return = {'name': self.name, 'type': self.type, 'source': self.source,
|
|
|
|
'authors': self.authors, 'description': self.description,
|
2019-01-22 16:14:55 +01:00
|
|
|
'uuid': self.uuid, 'version': self.version, 'category': self.category,
|
|
|
|
'values': []}
|
2017-11-01 19:20:09 +01:00
|
|
|
to_return['values'] = [v for v in self.values()]
|
2017-07-25 18:04:15 +02:00
|
|
|
return to_return
|
|
|
|
|
|
|
|
|
2021-05-13 20:43:34 +02:00
|
|
|
class Clusters(Mapping): # type: ignore
|
2017-07-25 18:04:15 +02:00
|
|
|
|
2024-06-18 07:44:47 +02:00
|
|
|
def __init__(self, clusters: List[Dict[str, str]] = [], skip_duplicates: bool = False):
|
2024-06-18 09:32:14 +02:00
|
|
|
"""
|
|
|
|
Allows to interact with a group of clusters.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
clusters (List[Dict[str, str]], optional): A list of dictionaries representing clusters. If left empty, load the clusters from the data folder.
|
|
|
|
skip_duplicates (bool, optional): Flag indicating whether to skip duplicate clusters. Defaults to False.
|
|
|
|
"""
|
2018-02-23 13:46:48 +01:00
|
|
|
if not clusters:
|
|
|
|
clusters = []
|
2021-12-28 17:12:40 +01:00
|
|
|
self.root_dir_clusters = os.path.join(os.path.abspath(os.path.dirname(sys.modules['pymispgalaxies'].__file__)), # type: ignore
|
2018-02-23 13:46:48 +01:00
|
|
|
'data', 'misp-galaxy', 'clusters')
|
|
|
|
for cluster_file in glob(os.path.join(self.root_dir_clusters, '*.json')):
|
|
|
|
with open(cluster_file, 'r') as f:
|
|
|
|
clusters.append(json.load(f))
|
2017-07-25 18:04:15 +02:00
|
|
|
self.clusters = {}
|
2018-02-23 13:46:48 +01:00
|
|
|
for cluster in clusters:
|
2018-03-22 16:07:07 +01:00
|
|
|
self.clusters[cluster['type']] = Cluster(cluster, skip_duplicates=skip_duplicates)
|
2017-07-25 18:04:15 +02:00
|
|
|
|
2021-05-13 20:43:34 +02:00
|
|
|
def validate_with_schema(self) -> None:
|
2024-06-18 09:32:14 +02:00
|
|
|
"""
|
|
|
|
Validates the clusters against the schema.
|
|
|
|
|
|
|
|
Raises:
|
|
|
|
ImportError: If jsonschema is not installed.
|
|
|
|
"""
|
2017-07-25 18:04:15 +02:00
|
|
|
if not HAS_JSONSCHEMA:
|
|
|
|
raise ImportError('jsonschema is required: pip install jsonschema')
|
2021-12-28 17:12:40 +01:00
|
|
|
schema = os.path.join(os.path.abspath(os.path.dirname(sys.modules['pymispgalaxies'].__file__)), # type: ignore
|
2017-07-25 18:04:15 +02:00
|
|
|
'data', 'misp-galaxy', 'schema_clusters.json')
|
|
|
|
with open(schema, 'r') as f:
|
|
|
|
loaded_schema = json.load(f)
|
2017-07-26 17:01:26 +02:00
|
|
|
for c in self.values():
|
2017-07-25 18:04:15 +02:00
|
|
|
jsonschema.validate(c.cluster, loaded_schema)
|
|
|
|
|
2020-02-17 21:38:12 +01:00
|
|
|
def all_machinetags(self) -> List[str]:
|
2024-06-18 09:32:14 +02:00
|
|
|
"""
|
|
|
|
Returns a list of all machinetags in the clusters.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
List[str]: A list of machinetags.
|
|
|
|
"""
|
2017-07-26 17:01:26 +02:00
|
|
|
return [cluster.machinetags() for cluster in self.values()]
|
2017-07-25 18:43:49 +02:00
|
|
|
|
2021-05-13 20:43:34 +02:00
|
|
|
def revert_machinetag(self, machinetag: str) -> Tuple[Cluster, ClusterValue]:
|
2024-06-18 09:32:14 +02:00
|
|
|
"""
|
|
|
|
Reverts a machinetag to its original cluster and value.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
machinetag (str): The machinetag to revert.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
Tuple[Cluster, ClusterValue]: A tuple containing the original cluster and value.
|
|
|
|
|
|
|
|
Raises:
|
|
|
|
UnableToRevertMachinetag: If the machinetag could not be found.
|
|
|
|
"""
|
2017-07-26 16:11:46 +02:00
|
|
|
try:
|
2017-07-26 17:06:44 +02:00
|
|
|
_, cluster_type, cluster_value = re.findall('^([^:]*):([^=]*)="([^"]*)"$', machinetag)[0]
|
2020-02-17 21:38:12 +01:00
|
|
|
cluster: Cluster = self[cluster_type]
|
|
|
|
value: ClusterValue = cluster[cluster_value]
|
2017-07-26 16:11:46 +02:00
|
|
|
return cluster, value
|
2018-02-23 13:46:48 +01:00
|
|
|
except Exception:
|
2017-07-26 16:11:46 +02:00
|
|
|
raise UnableToRevertMachinetag('The machinetag {} could not be found.'.format(machinetag))
|
2017-07-25 18:57:18 +02:00
|
|
|
|
2024-06-18 07:44:47 +02:00
|
|
|
def search(self, query: str, return_tags: bool = False) -> List[Tuple[Cluster, str]]:
|
2024-06-18 09:32:14 +02:00
|
|
|
"""
|
|
|
|
Searches for clusters and values matching the given query.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
query (str): The query to search for.
|
|
|
|
return_tags (bool, optional): Flag indicating whether to return the matching tags. Defaults to False.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
List[Tuple[Cluster, str]]: A list of tuples containing the matching cluster and value.
|
|
|
|
|
|
|
|
"""
|
2017-07-26 15:03:43 +02:00
|
|
|
to_return = []
|
2017-07-26 17:01:26 +02:00
|
|
|
for cluster in self.values():
|
2018-07-06 15:28:53 +02:00
|
|
|
values = cluster.search(query, return_tags)
|
2017-07-26 15:03:43 +02:00
|
|
|
if not values:
|
|
|
|
continue
|
|
|
|
to_return.append((cluster, values))
|
|
|
|
return to_return
|
|
|
|
|
2021-05-13 20:43:34 +02:00
|
|
|
def __getitem__(self, name: str) -> Cluster:
|
2024-06-18 09:32:14 +02:00
|
|
|
"""
|
|
|
|
Returns the cluster with the specified name.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
name (str): The name of the cluster.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
Cluster: The cluster object.
|
|
|
|
|
|
|
|
Raises:
|
|
|
|
KeyError: If the cluster with the specified name does not exist.
|
|
|
|
"""
|
2017-07-25 18:04:15 +02:00
|
|
|
return self.clusters[name]
|
|
|
|
|
2021-05-13 20:43:34 +02:00
|
|
|
def __iter__(self) -> Iterator[str]:
|
2024-06-18 09:32:14 +02:00
|
|
|
"""
|
|
|
|
Returns an iterator over the cluster names.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
Iterator[str]: An iterator over the cluster names.
|
|
|
|
"""
|
2017-07-25 18:04:15 +02:00
|
|
|
return iter(self.clusters)
|
|
|
|
|
2021-05-13 20:43:34 +02:00
|
|
|
def __len__(self) -> int:
|
2024-06-18 09:32:14 +02:00
|
|
|
"""
|
|
|
|
Returns the number of clusters.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
int: The number of clusters.
|
|
|
|
"""
|
2017-07-25 18:04:15 +02:00
|
|
|
return len(self.clusters)
|
|
|
|
|
2021-05-13 20:43:34 +02:00
|
|
|
def __str__(self) -> str:
|
2024-06-18 09:32:14 +02:00
|
|
|
"""
|
|
|
|
Returns a string representation of the Clusters object.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
str: A string representation of the Clusters object.
|
|
|
|
"""
|
2017-07-25 18:04:15 +02:00
|
|
|
to_print = ''
|
2017-07-26 17:01:26 +02:00
|
|
|
for cluster in self.values():
|
2017-07-25 18:43:49 +02:00
|
|
|
to_print += '{}\n\n'.format(cluster)
|
2017-07-25 18:04:15 +02:00
|
|
|
return to_print
|