chg: [tools] clean-up python script to generate the asciidoctor files

master^2
Alexandre Dulaunoy 2024-03-04 10:06:48 +01:00
parent a62a3bdad7
commit 5fc23d4795
No known key found for this signature in database
GPG Key ID: 09E2CD4944E6CBCD
1 changed files with 207 additions and 83 deletions

View File

@ -32,25 +32,35 @@ import argparse
import os import os
import sys import sys
skip_list = ['death-possibilities', 'poison-taxonomy', 'doping-substances'] skip_list = ["death-possibilities", "poison-taxonomy", "doping-substances"]
taxonomies = [] taxonomies = []
# Get our current directory from file location # Get our current directory from file location
thisDir = os.path.dirname(__file__) thisDir = os.path.dirname(__file__)
argParser = argparse.ArgumentParser(description='Dump Machine Tags (Triple Tags) from MISP taxonomies', epilog='Available taxonomies are {0}'.format(taxonomies)) argParser = argparse.ArgumentParser(
argParser.add_argument('-e', action='store_true', help='Include expanded tags') description="Dump Machine Tags (Triple Tags) from MISP taxonomies",
argParser.add_argument('-a', action='store_true', help='Generate asciidoctor document from MISP taxonomies') epilog="Available taxonomies are {0}".format(taxonomies),
argParser.add_argument('-v', action='store_true', help='Include descriptions') )
argParser.add_argument('-n', default=False, help='Show only the specified namespace') argParser.add_argument("-e", action="store_true", help="Include expanded tags")
argParser.add_argument('--disable-skip-list', default=False, action='store_true', help='disable default skip list') argParser.add_argument(
"-a", action="store_true", help="Generate asciidoctor document from MISP taxonomies"
)
argParser.add_argument("-v", action="store_true", help="Include descriptions")
argParser.add_argument("-n", default=False, help="Show only the specified namespace")
argParser.add_argument(
"--disable-skip-list",
default=False,
action="store_true",
help="disable default skip list",
)
args = argParser.parse_args() args = argParser.parse_args()
if args.disable_skip_list: if args.disable_skip_list:
skip_list = '' skip_list = ""
for folder in os.listdir(os.path.join(thisDir, '../')): for folder in os.listdir(os.path.join(thisDir, "../")):
if os.path.isfile(os.path.join(thisDir, '../', folder, 'machinetag.json')): if os.path.isfile(os.path.join(thisDir, "../", folder, "machinetag.json")):
if folder in skip_list: if folder in skip_list:
continue continue
taxonomies.append(folder) taxonomies.append(folder)
@ -58,23 +68,35 @@ for folder in os.listdir(os.path.join(thisDir, '../')):
taxonomies.sort() taxonomies.sort()
doc = '' doc = ""
if args.a: if args.a:
dedication = "\n[dedication]\n== Funding and Support\nThe MISP project is financially and resource supported by https://www.circl.lu/[CIRCL Computer Incident Response Center Luxembourg ].\n\nimage:{images-misp}logo.png[CIRCL logo]\n\nA CEF (Connecting Europe Facility) funding under CEF-TC-2016-3 - Cyber Security has been granted from 1st September 2017 until 31th August 2019 as ***Improving MISP as building blocks for next-generation information sharing***.\n\nimage:{images-misp}en_cef.png[CEF funding]\n\nIf you are interested to co-fund projects around MISP, feel free to get in touch with us.\n\n" dedication = "\n[dedication]\n== Funding and Support\nThe MISP project is financially and resource supported by https://www.circl.lu/[CIRCL Computer Incident Response Center Luxembourg ].\n\nimage:{images-misp}logo.png[CIRCL logo]\n\nA CEF (Connecting Europe Facility) funding under CEF-TC-2016-3 - Cyber Security has been granted from 1st September 2017 until 31th August 2019 as ***Improving MISP as building blocks for next-generation information sharing***.\n\nimage:{images-misp}en_cef.png[CEF funding]\n\nIf you are interested to co-fund projects around MISP, feel free to get in touch with us.\n\n"
doc = doc + ":toc: right\n" doc = doc + ":toc: right\n"
doc = doc + ":toclevels: 1\n" doc = doc + ":toclevels: 1\n"
doc = doc + ":icons: font\n" doc = doc + ":icons: font\n"
doc = doc + ":images-cdn: https://raw.githubusercontent.com/MISP/MISP/2.4/INSTALL/logos/\n" doc = (
doc
+ ":images-cdn: https://raw.githubusercontent.com/MISP/MISP/2.4/INSTALL/logos/\n"
)
doc = doc + ":images-misp: https://www.misp-project.org/assets/images/\n" doc = doc + ":images-misp: https://www.misp-project.org/assets/images/\n"
doc = doc + "= MISP taxonomies and classification as machine tags\n\n" doc = doc + "= MISP taxonomies and classification as machine tags\n\n"
doc = doc + "= Introduction\n" doc = doc + "= Introduction\n"
doc = doc + "\nimage::{images-cdn}misp-logo.png[MISP logo]\n" doc = doc + "\nimage::{images-cdn}misp-logo.png[MISP logo]\n"
doc = doc + "The MISP threat sharing platform is a free and open source software helping information sharing of threat intelligence including cyber security indicators, financial fraud or counter-terrorism information. The MISP project includes multiple sub-projects to support the operational requirements of analysts and improve the overall quality of information shared.\n\n" doc = (
doc
+ "The MISP threat sharing platform is a free and open source software helping information sharing of threat intelligence including cyber security indicators, financial fraud or counter-terrorism information. The MISP project includes multiple sub-projects to support the operational requirements of analysts and improve the overall quality of information shared.\n\n"
)
doc = doc + "" doc = doc + ""
doc = "{} {} {} {}".format(doc, "\nTaxonomies that can be used in MISP (2.4) and other information sharing tool and expressed in Machine Tags (Triple Tags).", doc = "{} {} {} {}".format(
"A machine tag is composed of a namespace (MUST), a predicate (MUST) and an (OPTIONAL) value.", doc,
"Machine tags are often called triple tag due to their format.\n") "\nTaxonomies that can be used in MISP (2.4) and other information sharing tool and expressed in Machine Tags (Triple Tags).",
doc = doc + "The following document is generated from the machine-readable JSON describing the https://github.com/MISP/misp-taxonomies[MISP taxonomies]." "A machine tag is composed of a namespace (MUST), a predicate (MUST) and an (OPTIONAL) value.",
"Machine tags are often called triple tag due to their format.\n",
)
doc = (
doc
+ "The following document is generated from the machine-readable JSON describing the https://github.com/MISP/misp-taxonomies[MISP taxonomies]."
)
doc = doc + "\n\n" doc = doc + "\n\n"
doc = doc + "<<<\n" doc = doc + "<<<\n"
doc = doc + dedication doc = doc + dedication
@ -87,31 +109,37 @@ if args.n:
taxonomies.append(args.n) taxonomies.append(args.n)
def asciidoc(content=False, adoc=doc, t='title', toplevel=False): def asciidoc(content=False, adoc=doc, t="title", toplevel=False):
if not args.a: if not args.a:
return False return False
adoc = adoc + "\n" adoc = adoc + "\n"
if t == 'title': if t == "title":
content = '==== ' + content content = "==== " + content
elif t == 'predicate': elif t == "predicate":
content = '=== ' + content content = "=== " + content
elif t == 'namespace': elif t == "namespace":
content = '== ' + content + '\n' content = "== " + content + "\n"
content = "{}\n{}{} {}{}{} {}".format(content, 'NOTE: ', namespace, 'namespace available in JSON format at https://github.com/MISP/misp-taxonomies/blob/main/', content = "{}\n{}{} {}{}{} {}".format(
namespace, '/machinetag.json[*this location*]. The JSON format can be freely reused in your application', content,
'or automatically enabled in https://www.github.com/MISP/MISP[MISP] taxonomy.') "NOTE: ",
elif t == 'description' and toplevel is True: namespace,
"namespace available in JSON format at https://github.com/MISP/misp-taxonomies/blob/main/",
namespace,
"/machinetag.json[*this location*]. The JSON format can be freely reused in your application",
"or automatically enabled in https://www.github.com/MISP/MISP[MISP] taxonomy.",
)
elif t == "description" and toplevel is True:
content = "\n{} \n".format(content) content = "\n{} \n".format(content)
elif t == 'description' and toplevel is False: elif t == "description" and toplevel is False:
try: try:
(n, value) = content.split(":", 1) (n, value) = content.split(":", 1)
content = "\n{} \n".format(value) content = "\n{} \n".format(value)
except: except:
content = "\n{} \n".format(content) content = "\n{} \n".format(content)
elif t == 'numerical_value': elif t == "numerical_value":
(n, value) = content.split(":", 1) (n, value) = content.split(":", 1)
content = "\nAssociated numerical value=\"{}\" \n".format(value) content = '\nAssociated numerical value="{}" \n'.format(value)
elif t == 'exclusive': elif t == "exclusive":
(n, value) = content.split(":", 1) (n, value) = content.split(":", 1)
if n: if n:
content = "\nIMPORTANT: Exclusive flag set which means the values or predicate below must be set exclusively.\n" content = "\nIMPORTANT: Exclusive flag set which means the values or predicate below must be set exclusively.\n"
@ -124,9 +152,9 @@ def machineTag(namespace=False, predicate=False, value=None):
if namespace is False or predicate is False: if namespace is False or predicate is False:
return None return None
if value is None: if value is None:
return (u'{0}:{1}'.format(namespace, predicate)) return "{0}:{1}".format(namespace, predicate)
else: else:
return (u'{0}:{1}=\"{2}\"'.format(namespace, predicate, value)) return '{0}:{1}="{2}"'.format(namespace, predicate, value)
for taxonomy in taxonomies: for taxonomy in taxonomies:
@ -136,70 +164,166 @@ for taxonomy in taxonomies:
filename = os.path.join(thisDir, "../", taxonomy, "machinetag.json") filename = os.path.join(thisDir, "../", taxonomy, "machinetag.json")
with open(filename) as fp: with open(filename) as fp:
t = json.load(fp) t = json.load(fp)
namespace = t['namespace'] namespace = t["namespace"]
if t.get('expanded'): if t.get("expanded"):
expanded_namespace = t['expanded'] expanded_namespace = t["expanded"]
else: else:
expanded_namespace = namespace expanded_namespace = namespace
if args.a: if args.a:
doc = asciidoc(content=t['namespace'], adoc=doc, t='namespace') doc = asciidoc(content=t["namespace"], adoc=doc, t="namespace")
doc = asciidoc(content=t['description'], adoc=doc, t='description', toplevel = True) doc = asciidoc(
if t.get('exclusive'): content=t["description"], adoc=doc, t="description", toplevel=True
doc = asciidoc(content=machineTag(namespace=namespace, predicate=t['exclusive']), adoc=doc, t='exclusive') )
if t.get("exclusive"):
doc = asciidoc(
content=machineTag(namespace=namespace, predicate=t["exclusive"]),
adoc=doc,
t="exclusive",
)
if args.v: if args.v:
print('{0}'.format(t['description'])) print("{0}".format(t["description"]))
for predicate in t['predicates']: for predicate in t["predicates"]:
if args.a: if args.a:
doc = asciidoc(content=predicate['value'], adoc=doc, t='predicate') doc = asciidoc(content=predicate["value"], adoc=doc, t="predicate")
if predicate.get('description'): if predicate.get("description"):
doc = asciidoc(content=machineTag(namespace=namespace, predicate=predicate['description']), adoc=doc, t='description') doc = asciidoc(
if predicate.get('exclusive'): content=machineTag(
doc = asciidoc(content=machineTag(namespace=namespace, predicate=predicate['exclusive']), adoc=doc, t='exclusive') namespace=namespace, predicate=predicate["description"]
),
adoc=doc,
t="description",
)
if predicate.get("exclusive"):
doc = asciidoc(
content=machineTag(
namespace=namespace, predicate=predicate["exclusive"]
),
adoc=doc,
t="exclusive",
)
if t.get('values') is None: if t.get("values") is None:
if args.a: if args.a:
doc = asciidoc(content=machineTag(namespace=namespace, predicate=predicate['value']), adoc=doc) doc = asciidoc(
doc = asciidoc(content=machineTag(namespace=namespace, predicate=predicate['expanded']), adoc=doc, t='description') content=machineTag(
if predicate.get('description'): namespace=namespace, predicate=predicate["value"]
doc = asciidoc(content=machineTag(namespace=namespace, predicate=predicate['description']), adoc=doc, t='description') ),
if predicate.get('numerical_value'): adoc=doc,
doc = asciidoc(content=machineTag(namespace=namespace, predicate=predicate['numerical_value']), adoc=doc, t='description') )
if predicate.get('exclusive'): doc = asciidoc(
doc = asciidoc(content=machineTag(namespace=namespace, predicate=predicate['exclusive']), adoc=doc, t='exclusive') content=machineTag(
namespace=namespace, predicate=predicate["expanded"]
),
adoc=doc,
t="description",
)
if predicate.get("description"):
doc = asciidoc(
content=machineTag(
namespace=namespace, predicate=predicate["description"]
),
adoc=doc,
t="description",
)
if predicate.get("numerical_value"):
doc = asciidoc(
content=machineTag(
namespace=namespace, predicate=predicate["numerical_value"]
),
adoc=doc,
t="description",
)
if predicate.get("exclusive"):
doc = asciidoc(
content=machineTag(
namespace=namespace, predicate=predicate["exclusive"]
),
adoc=doc,
t="exclusive",
)
else: else:
print(machineTag(namespace=namespace, predicate=predicate['value'])) print(machineTag(namespace=namespace, predicate=predicate["value"]))
if args.e: if args.e:
print("--> " + machineTag(namespace=expanded_namespace, predicate=predicate['expanded'])) print(
if predicate.get('description'): "--> "
print("--> " + predicate['description']) + machineTag(
namespace=expanded_namespace, predicate=predicate["expanded"]
)
)
if predicate.get("description"):
print("--> " + predicate["description"])
else: else:
for e in t['values']: for e in t["values"]:
if e['predicate'] == predicate['value']: if e["predicate"] == predicate["value"]:
if 'expanded' in predicate: if "expanded" in predicate:
expanded = predicate['expanded'] expanded = predicate["expanded"]
for v in e['entry']: for v in e["entry"]:
if args.a and 'expanded' in v: if args.a and "expanded" in v:
doc = asciidoc(content=machineTag(namespace=namespace, predicate=e['predicate'], value=v['value']), adoc=doc) doc = asciidoc(
doc = asciidoc(content=machineTag(namespace=namespace, predicate=v['expanded']), adoc=doc, t='description') content=machineTag(
if 'description' in v: namespace=namespace,
doc = asciidoc(content=machineTag(namespace=namespace, predicate=v['description']), adoc=doc, t='description') predicate=e["predicate"],
if v.get('numerical_value'): value=v["value"],
doc = asciidoc(content=machineTag(namespace=namespace, predicate=v['numerical_value']), adoc=doc, t='numerical_value') ),
adoc=doc,
)
doc = asciidoc(
content=machineTag(
namespace=namespace, predicate=v["expanded"]
),
adoc=doc,
t="description",
)
if "description" in v:
doc = asciidoc(
content=machineTag(
namespace=namespace, predicate=v["description"]
),
adoc=doc,
t="description",
)
if v.get("numerical_value"):
doc = asciidoc(
content=machineTag(
namespace=namespace,
predicate=v["numerical_value"],
),
adoc=doc,
t="numerical_value",
)
else: else:
print(machineTag(namespace=namespace, predicate=e['predicate'], value=v['value'])) print(
machineTag(
namespace=namespace,
predicate=e["predicate"],
value=v["value"],
)
)
if args.e: if args.e:
if'expanded' in v: if "expanded" in v:
print("--> " + machineTag(namespace=namespace, predicate=expanded, value=v['expanded'])) print(
"--> "
+ machineTag(
namespace=namespace,
predicate=expanded,
value=v["expanded"],
)
)
with open('../mapping/mapping.json') as mapping: with open("../mapping/mapping.json") as mapping:
m = json.load(mapping) m = json.load(mapping)
output = '\n= Mapping of taxonomies\n' output = "\n= Mapping of taxonomies\n"
output = '{}{}'.format(output, 'Analysts relying on taxonomies don\'t always know the appropriate namespace to use but know which value to use for classification. The MISP mapping taxonomy allows to map a single classification into a series of machine-tag synonyms.\n') output = "{}{}".format(
output,
"Analysts relying on taxonomies don't always know the appropriate namespace to use but know which value to use for classification. The MISP mapping taxonomy allows to map a single classification into a series of machine-tag synonyms.\n",
)
for value in sorted(m.keys()): for value in sorted(m.keys()):
output = '{}{} **{}**{}{}\n'.format(output,'\n.Mapping table - ',value,'\n|===\n|',value) output = "{}{} **{}**{}{}\n".format(
for mapped in m[value]['values']: output, "\n.Mapping table - ", value, "\n|===\n|", value
output = '{}|{}\n'.format(output,mapped) )
output = '{}|===\n'.format(output) for mapped in m[value]["values"]:
output = "{}|{}\n".format(output, mapped)
output = "{}|===\n".format(output)
doc = doc + output doc = doc + output
if args.a: if args.a: