From ab5a95ffc63a27639892c6d42ee7fbed631d88b7 Mon Sep 17 00:00:00 2001 From: niclas Date: Tue, 5 Mar 2024 11:01:51 +0100 Subject: [PATCH] chg [tool] code formatting --- tools/mkdocs/generator.py | 86 ++++++++++++++++++++++++-------- tools/mkdocs/modules/cluster.py | 11 ++-- tools/mkdocs/modules/galaxy.py | 17 +++++-- tools/mkdocs/modules/site.py | 55 ++++++++++++++++---- tools/mkdocs/modules/universe.py | 40 +++++++++++---- tools/mkdocs/utils/helper.py | 12 +++-- 6 files changed, 168 insertions(+), 53 deletions(-) diff --git a/tools/mkdocs/generator.py b/tools/mkdocs/generator.py index f765353..34bd3cd 100644 --- a/tools/mkdocs/generator.py +++ b/tools/mkdocs/generator.py @@ -12,25 +12,30 @@ import os import time import sys -sys.setrecursionlimit(10000) +sys.setrecursionlimit(10000) -FILES_TO_IGNORE = [] +FILES_TO_IGNORE = [] CLUSTER_PATH = "../../clusters" SITE_PATH = "./site/docs" GALAXY_PATH = "../../galaxies" + def write_relations_table(cluster): if cluster.relationships: print(f"Writing {cluster.uuid}.md") with open(os.path.join(relation_path, f"{cluster.uuid}.md"), "w") as index: index.write(generate_relations_table(cluster.relationships)) + def get_cluster_relationships(cluster_data): galaxy, cluster = cluster_data - relationships = universe.get_relationships_with_levels(universe.galaxies[galaxy].clusters[cluster]) + relationships = universe.get_relationships_with_levels( + universe.galaxies[galaxy].clusters[cluster] + ) print(f"Processed {galaxy}, {cluster}") return cluster, galaxy, relationships + def get_deprecated_galaxy_files(): deprecated_galaxy_files = [] for f in os.listdir(GALAXY_PATH): @@ -41,6 +46,7 @@ def get_deprecated_galaxy_files(): return deprecated_galaxy_files + if __name__ == "__main__": start_time = time.time() universe = Universe() @@ -56,15 +62,20 @@ if __name__ == "__main__": for galaxy in galaxies_fnames: with open(os.path.join(CLUSTER_PATH, galaxy)) as fr: galaxy_json = json.load(fr) - universe.add_galaxy(galaxy_name=galaxy_json["name"], json_file_name=galaxy, authors=galaxy_json["authors"], description=galaxy_json["description"]) + universe.add_galaxy( + galaxy_name=galaxy_json["name"], + json_file_name=galaxy, + authors=galaxy_json["authors"], + description=galaxy_json["description"], + ) for cluster in galaxy_json["values"]: universe.add_cluster( - galaxy_name=galaxy_json.get("name", None), - uuid=cluster.get("uuid", None), - description=cluster.get("description", None), - value=cluster.get("value", None), - meta=cluster.get("meta", None) - ) + galaxy_name=galaxy_json.get("name", None), + uuid=cluster.get("uuid", None), + description=cluster.get("description", None), + value=cluster.get("value", None), + meta=cluster.get("meta", None), + ) # Define the relationships between clusters for galaxy in galaxies_fnames: @@ -73,7 +84,9 @@ if __name__ == "__main__": for cluster in galaxy_json["values"]: if "related" in cluster: for related in cluster["related"]: - universe.define_relationship(cluster["uuid"], related["dest-uuid"]) + universe.define_relationship( + cluster["uuid"], related["dest-uuid"] + ) tasks = [] for galaxy_name, galaxy in universe.galaxies.items(): @@ -93,26 +106,56 @@ if __name__ == "__main__": # Write output if not os.path.exists(SITE_PATH): os.mkdir(SITE_PATH) - + index = IndexSite(SITE_PATH) - index.add_content("# MISP Galaxy\n\nThe MISP galaxy offers a streamlined approach for representing large entities, known as clusters, which can be linked to MISP events or attributes. Each cluster consists of one or more elements, represented as key-value pairs. MISP galaxy comes with a default knowledge base, encompassing areas like Threat Actors, Tools, Ransomware, and ATT&CK matrices. However, users have the flexibility to modify, update, replace, or share these elements according to their needs.\n\nClusters and vocabularies within MISP galaxy can be utilized in their original form or as a foundational knowledge base. The distribution settings for each cluster can be adjusted, allowing for either restricted or wide dissemination.\n\nAdditionally, MISP galaxies enable the representation of existing standards like the MITRE ATT&CK™ framework, as well as custom matrices.\n\nThe aim is to provide a core set of clusters for organizations embarking on analysis, which can be further tailored to include localized, private information or additional, shareable data.\n\nClusters serve as an open and freely accessible knowledge base, which can be utilized and expanded within [MISP](https://www.misp-project.org/) or other threat intelligence platforms.\n\n![Overview of the integration of MISP galaxy in the MISP Threat Intelligence Sharing Platform](https://raw.githubusercontent.com/MISP/misp-galaxy/aa41337fd78946a60aef3783f58f337d2342430a/doc/images/galaxy.png)\n\n## Publicly available clusters\n") + index.add_content( + "# MISP Galaxy\n\nThe MISP galaxy offers a streamlined approach for representing large entities, known as clusters, which can be linked to MISP events or attributes. Each cluster consists of one or more elements, represented as key-value pairs. MISP galaxy comes with a default knowledge base, encompassing areas like Threat Actors, Tools, Ransomware, and ATT&CK matrices. However, users have the flexibility to modify, update, replace, or share these elements according to their needs.\n\nClusters and vocabularies within MISP galaxy can be utilized in their original form or as a foundational knowledge base. The distribution settings for each cluster can be adjusted, allowing for either restricted or wide dissemination.\n\nAdditionally, MISP galaxies enable the representation of existing standards like the MITRE ATT&CK™ framework, as well as custom matrices.\n\nThe aim is to provide a core set of clusters for organizations embarking on analysis, which can be further tailored to include localized, private information or additional, shareable data.\n\nClusters serve as an open and freely accessible knowledge base, which can be utilized and expanded within [MISP](https://www.misp-project.org/) or other threat intelligence platforms.\n\n![Overview of the integration of MISP galaxy in the MISP Threat Intelligence Sharing Platform](https://raw.githubusercontent.com/MISP/misp-galaxy/aa41337fd78946a60aef3783f58f337d2342430a/doc/images/galaxy.png)\n\n## Publicly available clusters\n" + ) index.add_toc(universe.galaxies.values()) - index.add_content("## Statistics\n\nYou can find some statistics about MISP galaxies [here](./statistics.md).\n\n") - index.add_content("# Contributing\n\nIn the dynamic realm of threat intelligence, a variety of models and approaches exist to systematically organize, categorize, and delineate threat actors, hazards, or activity groups. We embrace innovative methodologies for articulating threat intelligence. The galaxy model is particularly versatile, enabling you to leverage and integrate methodologies that you trust and are already utilizing within your organization or community.\n\nWe encourage collaboration and contributions to the [MISP Galaxy JSON files](https://github.com/MISP/misp-galaxy/). Feel free to fork the project, enhance existing elements or clusters, or introduce new ones. Your insights are valuable - share them with us through a pull-request.\n") + index.add_content( + "## Statistics\n\nYou can find some statistics about MISP galaxies [here](./statistics.md).\n\n" + ) + index.add_content( + "# Contributing\n\nIn the dynamic realm of threat intelligence, a variety of models and approaches exist to systematically organize, categorize, and delineate threat actors, hazards, or activity groups. We embrace innovative methodologies for articulating threat intelligence. The galaxy model is particularly versatile, enabling you to leverage and integrate methodologies that you trust and are already utilizing within your organization or community.\n\nWe encourage collaboration and contributions to the [MISP Galaxy JSON files](https://github.com/MISP/misp-galaxy/). Feel free to fork the project, enhance existing elements or clusters, or introduce new ones. Your insights are valuable - share them with us through a pull-request.\n" + ) index.write_entry() statistics = StatisticsSite(SITE_PATH) - statistics.add_cluster_statistics(len([cluster for galaxy in universe.galaxies.values() for cluster in galaxy.clusters.values()]), len(universe.private_clusters)) + statistics.add_content("# MISP Galaxy Statistics\n\n") + statistics.add_cluster_statistics( + len( + [ + cluster + for galaxy in universe.galaxies.values() + for cluster in galaxy.clusters.values() + ] + ), + len(universe.private_clusters), + ) statistics.add_galaxy_statistics(universe.galaxies.values()) - statistics.add_relation_statistics([cluster for galaxy in universe.galaxies.values() for cluster in galaxy.clusters.values()]) - statistics.add_synonym_statistics([cluster for galaxy in universe.galaxies.values() for cluster in galaxy.clusters.values()]) + statistics.add_relation_statistics( + [ + cluster + for galaxy in universe.galaxies.values() + for cluster in galaxy.clusters.values() + ] + ) + statistics.add_synonym_statistics( + [ + cluster + for galaxy in universe.galaxies.values() + for cluster in galaxy.clusters.values() + ] + ) statistics.write_entry() for galaxy in universe.galaxies.values(): galaxy.write_entry(SITE_PATH) for galaxy in universe.galaxies.values(): - galaxy_path = os.path.join(SITE_PATH, f"{galaxy.json_file_name}".replace(".json", "")) + galaxy_path = os.path.join( + SITE_PATH, f"{galaxy.json_file_name}".replace(".json", "") + ) if not os.path.exists(galaxy_path): os.mkdir(galaxy_path) relation_path = os.path.join(galaxy_path, "relations") @@ -121,8 +164,9 @@ if __name__ == "__main__": with open(os.path.join(relation_path, ".pages"), "w") as index: index.write(f"hide: true\n") - with ThreadPoolExecutor(max_workers=(multiprocessing.cpu_count() * 4)) as executor: + with ThreadPoolExecutor( + max_workers=(multiprocessing.cpu_count() * 4) + ) as executor: executor.map(write_relations_table, galaxy.clusters.values()) - print(f"Finished in {time.time() - start_time} seconds") diff --git a/tools/mkdocs/modules/cluster.py b/tools/mkdocs/modules/cluster.py index 7919679..739429b 100644 --- a/tools/mkdocs/modules/cluster.py +++ b/tools/mkdocs/modules/cluster.py @@ -1,5 +1,6 @@ import validators + class Cluster: def __init__(self, uuid, galaxy, description=None, value=None, meta=None): self.uuid = uuid @@ -8,9 +9,9 @@ class Cluster: self.meta = meta self.galaxy = galaxy # Reference to the Galaxy object this cluster belongs to - self.outbound_relationships = set() - self.inbound_relationships = set() - self.relationships = set() + self.outbound_relationships = set() + self.inbound_relationships = set() + self.relationships = set() def add_outbound_relationship(self, cluster): self.outbound_relationships.add(cluster) @@ -32,7 +33,7 @@ class Cluster: if self.relationships: entry += self._create_related_entry() return entry - + def _create_title_entry(self): entry = "" entry += f"## {self.value}\n" @@ -106,4 +107,4 @@ class Cluster: entry += f'??? info "Related clusters"\n' entry += f"\n" entry += f" To see the related clusters, click [here](./relations/{self.uuid}.md).\n" - return entry \ No newline at end of file + return entry diff --git a/tools/mkdocs/modules/galaxy.py b/tools/mkdocs/modules/galaxy.py index 997ac9d..2de8ae4 100644 --- a/tools/mkdocs/modules/galaxy.py +++ b/tools/mkdocs/modules/galaxy.py @@ -2,8 +2,15 @@ from modules.cluster import Cluster from typing import List import os + class Galaxy: - def __init__(self, galaxy_name: str, json_file_name: str, authors: List[str], description: str): + def __init__( + self, + galaxy_name: str, + json_file_name: str, + authors: List[str], + description: str, + ): self.galaxy_name = galaxy_name self.json_file_name = json_file_name self.authors = authors @@ -13,7 +20,9 @@ class Galaxy: def add_cluster(self, uuid, description, value, meta): if uuid not in self.clusters: - self.clusters[uuid] = Cluster(uuid=uuid, galaxy=self, description=description, value=value, meta=meta) + self.clusters[uuid] = Cluster( + uuid=uuid, galaxy=self, description=description, value=value, meta=meta + ) def write_entry(self, path): galaxy_path = os.path.join(path, f"{self.json_file_name}".replace(".json", "")) @@ -30,7 +39,7 @@ class Galaxy: entry += self._create_authors_entry() entry += self._create_clusters_entry() return entry - + def _create_metadata_entry(self): entry = "" entry += "---\n" @@ -66,4 +75,4 @@ class Galaxy: entry = "" for cluster in self.clusters.values(): entry += cluster.generate_entry() - return entry \ No newline at end of file + return entry diff --git a/tools/mkdocs/modules/site.py b/tools/mkdocs/modules/site.py index 2938874..5306b21 100644 --- a/tools/mkdocs/modules/site.py +++ b/tools/mkdocs/modules/site.py @@ -2,6 +2,7 @@ import os from utils.helper import create_bar_chart, get_top_x, create_pie_chart + class Site: def __init__(self, path, name) -> None: self.path = path @@ -17,6 +18,7 @@ class Site: with open(os.path.join(self.path, self.name), "w") as index: index.write(self.content) + class IndexSite(Site): def __init__(self, path) -> None: super().__init__(path=path, name="index.md") @@ -27,6 +29,7 @@ class IndexSite(Site): self.add_content(f"- [{galaxy.galaxy_name}](./{galaxy_folder}/index.md)\n") self.add_content("\n") + class StatisticsSite(Site): def __init__(self, path) -> None: super().__init__(path=path, name="statistics.md") @@ -37,15 +40,28 @@ class StatisticsSite(Site): flop_20 = get_top_x(galaxy_cluster_count, 20, False) self.add_content(f"# Galaxy statistics\n") self.add_content(f"## Galaxies with the most clusters\n\n") - self.add_content(create_bar_chart(x_axis="Galaxy", y_axis="Count", values=top_20, galaxy=True)) + self.add_content( + create_bar_chart( + x_axis="Galaxy", y_axis="Count", values=top_20, galaxy=True + ) + ) self.add_content(f"## Galaxies with the least clusters\n\n") - self.add_content(create_bar_chart(x_axis="Galaxy", y_axis="Count", values=flop_20, galaxy=True)) + self.add_content( + create_bar_chart( + x_axis="Galaxy", y_axis="Count", values=flop_20, galaxy=True + ) + ) def add_cluster_statistics(self, public_clusters, private_clusters): - values = {"Public clusters": public_clusters, "Private clusters": private_clusters} + values = { + "Public clusters": public_clusters, + "Private clusters": private_clusters, + } self.add_content(f"# Cluster statistics\n") self.add_content(f"## Number of clusters\n") - self.add_content(f"Here you can find the total number of clusters including public and private clusters.The number of public clusters has been calculated based on the number of unique Clusters in the MISP galaxy JSON files. The number of private clusters could only be approximated based on the number of relations to non-existing clusters. Therefore the number of private clusters is not accurate and only an approximation.\n\n") + self.add_content( + f"Here you can find the total number of clusters including public and private clusters.The number of public clusters has been calculated based on the number of unique Clusters in the MISP galaxy JSON files. The number of private clusters could only be approximated based on the number of relations to non-existing clusters. Therefore the number of private clusters is not accurate and only an approximation.\n\n" + ) self.add_content(create_pie_chart(sector="Type", unit="Count", values=values)) def add_relation_statistics(self, clusters): @@ -62,14 +78,31 @@ class StatisticsSite(Site): top_20 = get_top_x(cluster_relations, 20) flop_20 = get_top_x(cluster_relations, 20, False) self.add_content(f"# Relation statistics\n") - self.add_content(f"Here you can find the total number of relations including public and private relations. The number includes relations between public clusters and relations between public and private clusters. Therefore relatons between private clusters are not included in the statistics.\n\n") + self.add_content( + f"Here you can find the total number of relations including public and private relations. The number includes relations between public clusters and relations between public and private clusters. Therefore relatons between private clusters are not included in the statistics.\n\n" + ) self.add_content(f"## Number of relations\n\n") - self.add_content(create_pie_chart(sector="Type", unit="Count", values={"Public relations": public_relations, "Private relations": private_relations})) - self.add_content(f"**Average number of relations per cluster**: {int(sum(cluster_relations.values()) / len(cluster_relations))}\n") + self.add_content( + create_pie_chart( + sector="Type", + unit="Count", + values={ + "Public relations": public_relations, + "Private relations": private_relations, + }, + ) + ) + self.add_content( + f"**Average number of relations per cluster**: {int(sum(cluster_relations.values()) / len(cluster_relations))}\n" + ) self.add_content(f"## Cluster with the most relations\n\n") - self.add_content(create_bar_chart(x_axis="Cluster", y_axis="Count", values=top_20)) + self.add_content( + create_bar_chart(x_axis="Cluster", y_axis="Count", values=top_20) + ) self.add_content(f"## Cluster with the least relations\n\n") - self.add_content(create_bar_chart(x_axis="Cluster", y_axis="Count", values=flop_20)) + self.add_content( + create_bar_chart(x_axis="Cluster", y_axis="Count", values=flop_20) + ) def add_synonym_statistics(self, clusters): synonyms = {} @@ -79,4 +112,6 @@ class StatisticsSite(Site): top_20 = get_top_x(synonyms, 20) self.add_content(f"# Synonym statistics\n") self.add_content(f"## Cluster with the most synonyms\n\n") - self.add_content(create_bar_chart(x_axis="Cluster", y_axis="Count", values=top_20)) + self.add_content( + create_bar_chart(x_axis="Cluster", y_axis="Count", values=top_20) + ) diff --git a/tools/mkdocs/modules/universe.py b/tools/mkdocs/modules/universe.py index a22f48c..a470395 100644 --- a/tools/mkdocs/modules/universe.py +++ b/tools/mkdocs/modules/universe.py @@ -12,11 +12,18 @@ class Universe: def add_galaxy(self, galaxy_name, json_file_name, authors, description): if galaxy_name not in self.galaxies: - self.galaxies[galaxy_name] = Galaxy(galaxy_name=galaxy_name, json_file_name=json_file_name, authors=authors, description=description) + self.galaxies[galaxy_name] = Galaxy( + galaxy_name=galaxy_name, + json_file_name=json_file_name, + authors=authors, + description=description, + ) def add_cluster(self, galaxy_name, uuid, description, value, meta): if galaxy_name in self.galaxies: - self.galaxies[galaxy_name].add_cluster(uuid=uuid, description=description, value=value, meta=meta) + self.galaxies[galaxy_name].add_cluster( + uuid=uuid, description=description, value=value, meta=meta + ) def define_relationship(self, cluster_a_id, cluster_b_id): cluster_a = None @@ -41,17 +48,25 @@ class Universe: else: if cluster_a: # private_cluster = self.add_cluster(uuid=cluster_b_id, galaxy_name="Unknown", description=None, value="Private Cluster", meta=None) - private_cluster = Cluster(uuid=cluster_b_id, galaxy=None, description=None, value="Private Cluster", meta=None) + private_cluster = Cluster( + uuid=cluster_b_id, + galaxy=None, + description=None, + value="Private Cluster", + meta=None, + ) self.private_clusters[cluster_b_id] = private_cluster cluster_a.add_outbound_relationship(private_cluster) else: raise ValueError(f"Cluster {cluster_a} not found in any galaxy") - + def get_relationships_with_levels(self, start_cluster): def bfs_with_undirected_relationships(start_cluster): visited = set() # Tracks whether a cluster has been visited - relationships = defaultdict(lambda: float('inf')) # Tracks the lowest level for each cluster pair + relationships = defaultdict( + lambda: float("inf") + ) # Tracks the lowest level for each cluster pair queue = deque([(start_cluster, 0)]) # Queue of (cluster, level) @@ -62,22 +77,27 @@ class Universe: # Process all relationships regardless of direction if self.add_inbound_relationship: - neighbors = current_cluster.outbound_relationships.union(current_cluster.inbound_relationships) + neighbors = current_cluster.outbound_relationships.union( + current_cluster.inbound_relationships + ) else: neighbors = current_cluster.outbound_relationships for neighbor in neighbors: link = frozenset([current_cluster, neighbor]) if level + 1 < relationships[link]: relationships[link] = level + 1 - if neighbor not in visited and neighbor.value != "Private Cluster": + if ( + neighbor not in visited + and neighbor.value != "Private Cluster" + ): queue.append((neighbor, level + 1)) - + # Convert the defaultdict to a list of tuples, ignoring direction processed_relationships = [] for link, lvl in relationships.items(): # Extract clusters from the frozenset; direction is irrelevant clusters = list(link) - + # Arbitrarily choose the first cluster as 'source' for consistency if clusters[0].value == "Private Cluster": processed_relationships.append((clusters[1], clusters[0], lvl)) @@ -86,4 +106,4 @@ class Universe: return processed_relationships - return bfs_with_undirected_relationships(start_cluster) \ No newline at end of file + return bfs_with_undirected_relationships(start_cluster) diff --git a/tools/mkdocs/utils/helper.py b/tools/mkdocs/utils/helper.py index c043fe7..73c257b 100644 --- a/tools/mkdocs/utils/helper.py +++ b/tools/mkdocs/utils/helper.py @@ -1,5 +1,6 @@ import operator + def get_top_x(dict, x, big_to_small=True): sorted_dict = sorted( dict.items(), key=operator.itemgetter(1), reverse=big_to_small @@ -7,6 +8,7 @@ def get_top_x(dict, x, big_to_small=True): top_x = {key: value for key, value in sorted_dict} return top_x + def name_to_section(name): placeholder = "__TMP__" return ( @@ -18,6 +20,7 @@ def name_to_section(name): .replace(placeholder, "-") ) # Replace the placeholder with "-" + def create_bar_chart(x_axis, y_axis, values, log=False, galaxy=False): if not log: chart = f"| No. | {x_axis} | {y_axis} {{ .bar-chart }}|\n" @@ -32,6 +35,7 @@ def create_bar_chart(x_axis, y_axis, values, log=False, galaxy=False): chart += "\n" return chart + def create_pie_chart(sector, unit, values): chart = f"| No. | {sector} | {unit} {{ .pie-chart }}|\n" chart += f"|----|--------|-------|\n" @@ -40,11 +44,11 @@ def create_pie_chart(sector, unit, values): chart += "\n" return chart + def cluster_transform_to_link(cluster, uuid=False): placeholder = "__TMP__" section = ( - cluster - .value.lower() + cluster.value.lower() .replace(" - ", placeholder) # Replace " - " first .replace(" ", "-") .replace("/", "") @@ -57,10 +61,12 @@ def cluster_transform_to_link(cluster, uuid=False): else: return f"[{cluster.value}](../../{galaxy_folder}/index.md#{section})" + def galaxy_transform_to_link(galaxy): galaxy_folder = galaxy.json_file_name.replace(".json", "") return f"[{galaxy.galaxy_name}](../../{galaxy_folder}/index.md)" + def generate_relations_table(relationships): markdown = "|Cluster A | Galaxy A | Cluster B | Galaxy B | Level { .graph } |\n" markdown += "| --- | --- | --- | --- | --- |\n" @@ -71,4 +77,4 @@ def generate_relations_table(relationships): markdown += f"{cluster_transform_to_link(from_cluster, uuid=True)} | {galaxy_transform_to_link(from_galaxy)} | {cluster_transform_to_link(to_cluster, uuid=True)} | {galaxy_transform_to_link(to_galaxy)} | {level}\n" else: markdown += f"{cluster_transform_to_link(from_cluster, uuid=True)} | {galaxy_transform_to_link(from_galaxy)} | {to_cluster.value} | Unknown | {level}\n" - return markdown \ No newline at end of file + return markdown