chg: [tools] clean-up python script to generate the asciidoctor files

2024-03-04 10:06:48 +01:00 · 2024-03-04 10:06:48 +01:00 · 5fc23d4795
parent a62a3bdad7
commit 5fc23d4795
1 changed files with 207 additions and 83 deletions
--- a/tools/machinetag.py
+++ b/tools/machinetag.py
@ -32,25 +32,35 @@ import argparse
 import os
 import sys

-skip_list = ['death-possibilities', 'poison-taxonomy', 'doping-substances']
+skip_list = ["death-possibilities", "poison-taxonomy", "doping-substances"]
 taxonomies = []

 # Get our current directory from file location
 thisDir = os.path.dirname(__file__)

-argParser = argparse.ArgumentParser(description='Dump Machine Tags (Triple Tags) from MISP taxonomies', epilog='Available taxonomies are {0}'.format(taxonomies))
-argParser.add_argument('-e', action='store_true', help='Include expanded tags')
-argParser.add_argument('-a', action='store_true', help='Generate asciidoctor document from MISP taxonomies')
-argParser.add_argument('-v', action='store_true', help='Include descriptions')
-argParser.add_argument('-n', default=False, help='Show only the specified namespace')
-argParser.add_argument('--disable-skip-list', default=False, action='store_true', help='disable default skip list')
+argParser = argparse.ArgumentParser(
+    description="Dump Machine Tags (Triple Tags) from MISP taxonomies",
+    epilog="Available taxonomies are {0}".format(taxonomies),
+)
+argParser.add_argument("-e", action="store_true", help="Include expanded tags")
+argParser.add_argument(
+    "-a", action="store_true", help="Generate asciidoctor document from MISP taxonomies"
+)
+argParser.add_argument("-v", action="store_true", help="Include descriptions")
+argParser.add_argument("-n", default=False, help="Show only the specified namespace")
+argParser.add_argument(
+    "--disable-skip-list",
+    default=False,
+    action="store_true",
+    help="disable default skip list",
+)
 args = argParser.parse_args()

 if args.disable_skip_list:
-    skip_list = ''
+    skip_list = ""

-for folder in os.listdir(os.path.join(thisDir, '../')):
-    if os.path.isfile(os.path.join(thisDir, '../', folder, 'machinetag.json')):
+for folder in os.listdir(os.path.join(thisDir, "../")):
+    if os.path.isfile(os.path.join(thisDir, "../", folder, "machinetag.json")):
        if folder in skip_list:
            continue
        taxonomies.append(folder)
@ -58,23 +68,35 @@ for folder in os.listdir(os.path.join(thisDir, '../')):
 taxonomies.sort()


-doc = ''
+doc = ""
 if args.a:
    dedication = "\n[dedication]\n== Funding and Support\nThe MISP project is financially and resource supported by https://www.circl.lu/[CIRCL Computer Incident Response Center Luxembourg ].\n\nimage:{images-misp}logo.png[CIRCL logo]\n\nA CEF (Connecting Europe Facility) funding under CEF-TC-2016-3 - Cyber Security has been granted from 1st September 2017 until 31th August 2019 as ***Improving MISP as building blocks for next-generation information sharing***.\n\nimage:{images-misp}en_cef.png[CEF funding]\n\nIf you are interested to co-fund projects around MISP, feel free to get in touch with us.\n\n"
    doc = doc + ":toc: right\n"
    doc = doc + ":toclevels: 1\n"
    doc = doc + ":icons: font\n"
-    doc = doc + ":images-cdn: https://raw.githubusercontent.com/MISP/MISP/2.4/INSTALL/logos/\n"
+    doc = (
+        doc
+        + ":images-cdn: https://raw.githubusercontent.com/MISP/MISP/2.4/INSTALL/logos/\n"
+    )
    doc = doc + ":images-misp: https://www.misp-project.org/assets/images/\n"
    doc = doc + "= MISP taxonomies and classification as machine tags\n\n"
    doc = doc + "= Introduction\n"
    doc = doc + "\nimage::{images-cdn}misp-logo.png[MISP logo]\n"
-    doc = doc + "The MISP threat sharing platform is a free and open source software helping information sharing of threat intelligence including cyber security indicators, financial fraud or counter-terrorism information. The MISP project includes multiple sub-projects to support the operational requirements of analysts and improve the overall quality of information shared.\n\n"
+    doc = (
+        doc
+        + "The MISP threat sharing platform is a free and open source software helping information sharing of threat intelligence including cyber security indicators, financial fraud or counter-terrorism information. The MISP project includes multiple sub-projects to support the operational requirements of analysts and improve the overall quality of information shared.\n\n"
+    )
    doc = doc + ""
-    doc = "{} {} {} {}".format(doc, "\nTaxonomies that can be used in MISP (2.4) and other information sharing tool and expressed in Machine Tags (Triple Tags).",
-                               "A machine tag is composed of a namespace (MUST), a predicate (MUST) and an (OPTIONAL) value.",
-                               "Machine tags are often called triple tag due to their format.\n")
-    doc = doc + "The following document is generated from the machine-readable JSON describing the https://github.com/MISP/misp-taxonomies[MISP taxonomies]."
+    doc = "{} {} {} {}".format(
+        doc,
+        "\nTaxonomies that can be used in MISP (2.4) and other information sharing tool and expressed in Machine Tags (Triple Tags).",
+        "A machine tag is composed of a namespace (MUST), a predicate (MUST) and an (OPTIONAL) value.",
+        "Machine tags are often called triple tag due to their format.\n",
+    )
+    doc = (
+        doc
+        + "The following document is generated from the machine-readable JSON describing the https://github.com/MISP/misp-taxonomies[MISP taxonomies]."
+    )
    doc = doc + "\n\n"
    doc = doc + "<<<\n"
    doc = doc + dedication
@ -87,31 +109,37 @@ if args.n:
    taxonomies.append(args.n)


-def asciidoc(content=False, adoc=doc, t='title', toplevel=False):
+def asciidoc(content=False, adoc=doc, t="title", toplevel=False):
    if not args.a:
        return False
    adoc = adoc + "\n"
-    if t == 'title':
-        content = '==== ' + content
-    elif t == 'predicate':
-        content = '=== ' + content
-    elif t == 'namespace':
-        content = '== ' + content + '\n'
-        content = "{}\n{}{} {}{}{} {}".format(content, 'NOTE: ', namespace, 'namespace available in JSON format at https://github.com/MISP/misp-taxonomies/blob/main/',
-                                                namespace, '/machinetag.json[*this location*]. The JSON format can be freely reused in your application',
-                                                'or automatically enabled in https://www.github.com/MISP/MISP[MISP] taxonomy.')
-    elif t == 'description' and toplevel is True:
+    if t == "title":
+        content = "==== " + content
+    elif t == "predicate":
+        content = "=== " + content
+    elif t == "namespace":
+        content = "== " + content + "\n"
+        content = "{}\n{}{} {}{}{} {}".format(
+            content,
+            "NOTE: ",
+            namespace,
+            "namespace available in JSON format at https://github.com/MISP/misp-taxonomies/blob/main/",
+            namespace,
+            "/machinetag.json[*this location*]. The JSON format can be freely reused in your application",
+            "or automatically enabled in https://www.github.com/MISP/MISP[MISP] taxonomy.",
+        )
+    elif t == "description" and toplevel is True:
        content = "\n{} \n".format(content)
-    elif t == 'description' and toplevel is False:
+    elif t == "description" and toplevel is False:
        try:
            (n, value) = content.split(":", 1)
            content = "\n{} \n".format(value)
        except:
            content = "\n{} \n".format(content)
-    elif t == 'numerical_value':
+    elif t == "numerical_value":
        (n, value) = content.split(":", 1)
-        content = "\nAssociated numerical value=\"{}\" \n".format(value)
-    elif t == 'exclusive':
+        content = '\nAssociated numerical value="{}" \n'.format(value)
+    elif t == "exclusive":
        (n, value) = content.split(":", 1)
        if n:
            content = "\nIMPORTANT: Exclusive flag set which means the values or predicate below must be set exclusively.\n"
@ -124,9 +152,9 @@ def machineTag(namespace=False, predicate=False, value=None):
    if namespace is False or predicate is False:
        return None
    if value is None:
-        return (u'{0}:{1}'.format(namespace, predicate))
+        return "{0}:{1}".format(namespace, predicate)
    else:
-        return (u'{0}:{1}=\"{2}\"'.format(namespace, predicate, value))
+        return '{0}:{1}="{2}"'.format(namespace, predicate, value)


 for taxonomy in taxonomies:
@ -136,70 +164,166 @@ for taxonomy in taxonomies:
    filename = os.path.join(thisDir, "../", taxonomy, "machinetag.json")
    with open(filename) as fp:
        t = json.load(fp)
-    namespace = t['namespace']
-    if t.get('expanded'):
-        expanded_namespace = t['expanded']
+    namespace = t["namespace"]
+    if t.get("expanded"):
+        expanded_namespace = t["expanded"]
    else:
        expanded_namespace = namespace
    if args.a:
-        doc = asciidoc(content=t['namespace'], adoc=doc, t='namespace')
-        doc = asciidoc(content=t['description'], adoc=doc, t='description', toplevel = True)
-        if t.get('exclusive'):
-                doc = asciidoc(content=machineTag(namespace=namespace, predicate=t['exclusive']), adoc=doc, t='exclusive')
+        doc = asciidoc(content=t["namespace"], adoc=doc, t="namespace")
+        doc = asciidoc(
+            content=t["description"], adoc=doc, t="description", toplevel=True
+        )
+        if t.get("exclusive"):
+            doc = asciidoc(
+                content=machineTag(namespace=namespace, predicate=t["exclusive"]),
+                adoc=doc,
+                t="exclusive",
+            )
    if args.v:
-        print('{0}'.format(t['description']))
-    for predicate in t['predicates']:
+        print("{0}".format(t["description"]))
+    for predicate in t["predicates"]:
        if args.a:
-            doc = asciidoc(content=predicate['value'], adoc=doc, t='predicate')
-            if predicate.get('description'):
-                doc = asciidoc(content=machineTag(namespace=namespace, predicate=predicate['description']), adoc=doc, t='description')
-            if predicate.get('exclusive'):
-                doc = asciidoc(content=machineTag(namespace=namespace, predicate=predicate['exclusive']), adoc=doc, t='exclusive')
+            doc = asciidoc(content=predicate["value"], adoc=doc, t="predicate")
+            if predicate.get("description"):
+                doc = asciidoc(
+                    content=machineTag(
+                        namespace=namespace, predicate=predicate["description"]
+                    ),
+                    adoc=doc,
+                    t="description",
+                )
+            if predicate.get("exclusive"):
+                doc = asciidoc(
+                    content=machineTag(
+                        namespace=namespace, predicate=predicate["exclusive"]
+                    ),
+                    adoc=doc,
+                    t="exclusive",
+                )

-        if t.get('values') is None:
+        if t.get("values") is None:
            if args.a:
-                doc = asciidoc(content=machineTag(namespace=namespace, predicate=predicate['value']), adoc=doc)
-                doc = asciidoc(content=machineTag(namespace=namespace, predicate=predicate['expanded']), adoc=doc, t='description')
-                if predicate.get('description'):
-                    doc = asciidoc(content=machineTag(namespace=namespace, predicate=predicate['description']), adoc=doc, t='description')
-                if predicate.get('numerical_value'):
-                    doc = asciidoc(content=machineTag(namespace=namespace, predicate=predicate['numerical_value']), adoc=doc, t='description')
-                if predicate.get('exclusive'):
-                    doc = asciidoc(content=machineTag(namespace=namespace, predicate=predicate['exclusive']), adoc=doc, t='exclusive')
+                doc = asciidoc(
+                    content=machineTag(
+                        namespace=namespace, predicate=predicate["value"]
+                    ),
+                    adoc=doc,
+                )
+                doc = asciidoc(
+                    content=machineTag(
+                        namespace=namespace, predicate=predicate["expanded"]
+                    ),
+                    adoc=doc,
+                    t="description",
+                )
+                if predicate.get("description"):
+                    doc = asciidoc(
+                        content=machineTag(
+                            namespace=namespace, predicate=predicate["description"]
+                        ),
+                        adoc=doc,
+                        t="description",
+                    )
+                if predicate.get("numerical_value"):
+                    doc = asciidoc(
+                        content=machineTag(
+                            namespace=namespace, predicate=predicate["numerical_value"]
+                        ),
+                        adoc=doc,
+                        t="description",
+                    )
+                if predicate.get("exclusive"):
+                    doc = asciidoc(
+                        content=machineTag(
+                            namespace=namespace, predicate=predicate["exclusive"]
+                        ),
+                        adoc=doc,
+                        t="exclusive",
+                    )
            else:
-                print(machineTag(namespace=namespace, predicate=predicate['value']))
+                print(machineTag(namespace=namespace, predicate=predicate["value"]))
            if args.e:
-                print("--> " + machineTag(namespace=expanded_namespace, predicate=predicate['expanded']))
-                if predicate.get('description'):
-                    print("--> " + predicate['description'])
+                print(
+                    "--> "
+                    + machineTag(
+                        namespace=expanded_namespace, predicate=predicate["expanded"]
+                    )
+                )
+                if predicate.get("description"):
+                    print("--> " + predicate["description"])
        else:
-            for e in t['values']:
-                if e['predicate'] == predicate['value']:
-                    if 'expanded' in predicate:
-                        expanded = predicate['expanded']
-                    for v in e['entry']:
-                        if args.a and 'expanded' in v:
-                            doc = asciidoc(content=machineTag(namespace=namespace, predicate=e['predicate'], value=v['value']), adoc=doc)
-                            doc = asciidoc(content=machineTag(namespace=namespace, predicate=v['expanded']), adoc=doc, t='description')
-                            if 'description' in v:
-                                doc = asciidoc(content=machineTag(namespace=namespace, predicate=v['description']), adoc=doc, t='description')
-                            if v.get('numerical_value'):
-                                 doc = asciidoc(content=machineTag(namespace=namespace, predicate=v['numerical_value']), adoc=doc, t='numerical_value')
+            for e in t["values"]:
+                if e["predicate"] == predicate["value"]:
+                    if "expanded" in predicate:
+                        expanded = predicate["expanded"]
+                    for v in e["entry"]:
+                        if args.a and "expanded" in v:
+                            doc = asciidoc(
+                                content=machineTag(
+                                    namespace=namespace,
+                                    predicate=e["predicate"],
+                                    value=v["value"],
+                                ),
+                                adoc=doc,
+                            )
+                            doc = asciidoc(
+                                content=machineTag(
+                                    namespace=namespace, predicate=v["expanded"]
+                                ),
+                                adoc=doc,
+                                t="description",
+                            )
+                            if "description" in v:
+                                doc = asciidoc(
+                                    content=machineTag(
+                                        namespace=namespace, predicate=v["description"]
+                                    ),
+                                    adoc=doc,
+                                    t="description",
+                                )
+                            if v.get("numerical_value"):
+                                doc = asciidoc(
+                                    content=machineTag(
+                                        namespace=namespace,
+                                        predicate=v["numerical_value"],
+                                    ),
+                                    adoc=doc,
+                                    t="numerical_value",
+                                )
                        else:
-                            print(machineTag(namespace=namespace, predicate=e['predicate'], value=v['value']))
+                            print(
+                                machineTag(
+                                    namespace=namespace,
+                                    predicate=e["predicate"],
+                                    value=v["value"],
+                                )
+                            )
                        if args.e:
-                            if'expanded' in v:
-                                print("--> " + machineTag(namespace=namespace, predicate=expanded, value=v['expanded']))
+                            if "expanded" in v:
+                                print(
+                                    "--> "
+                                    + machineTag(
+                                        namespace=namespace,
+                                        predicate=expanded,
+                                        value=v["expanded"],
+                                    )
+                                )

-with open('../mapping/mapping.json') as mapping:
+with open("../mapping/mapping.json") as mapping:
    m = json.load(mapping)
-    output = '\n= Mapping of taxonomies\n'
-    output = '{}{}'.format(output, 'Analysts relying on taxonomies don\'t always know the appropriate namespace to use but know which value to use for classification. The MISP mapping taxonomy allows to map a single classification into a series of machine-tag synonyms.\n')
+    output = "\n= Mapping of taxonomies\n"
+    output = "{}{}".format(
+        output,
+        "Analysts relying on taxonomies don't always know the appropriate namespace to use but know which value to use for classification. The MISP mapping taxonomy allows to map a single classification into a series of machine-tag synonyms.\n",
+    )
    for value in sorted(m.keys()):
-        output = '{}{} **{}**{}{}\n'.format(output,'\n.Mapping table - ',value,'\n|===\n|',value)
-        for mapped in m[value]['values']:
-            output = '{}|{}\n'.format(output,mapped)
-        output = '{}|===\n'.format(output)
+        output = "{}{} **{}**{}{}\n".format(
+            output, "\n.Mapping table - ", value, "\n|===\n|", value
+        )
+        for mapped in m[value]["values"]:
+            output = "{}|{}\n".format(output, mapped)
+        output = "{}|===\n".format(output)
    doc = doc + output

 if args.a: