chg: [tools] clean-up python script to generate the asciidoctor files

master^2
Alexandre Dulaunoy 2024-03-04 10:06:48 +01:00
parent a62a3bdad7
commit 5fc23d4795
No known key found for this signature in database
GPG Key ID: 09E2CD4944E6CBCD
1 changed files with 207 additions and 83 deletions

View File

@ -32,25 +32,35 @@ import argparse
import os import os
import sys import sys
skip_list = ['death-possibilities', 'poison-taxonomy', 'doping-substances'] skip_list = ["death-possibilities", "poison-taxonomy", "doping-substances"]
taxonomies = [] taxonomies = []
# Get our current directory from file location # Get our current directory from file location
thisDir = os.path.dirname(__file__) thisDir = os.path.dirname(__file__)
argParser = argparse.ArgumentParser(description='Dump Machine Tags (Triple Tags) from MISP taxonomies', epilog='Available taxonomies are {0}'.format(taxonomies)) argParser = argparse.ArgumentParser(
argParser.add_argument('-e', action='store_true', help='Include expanded tags') description="Dump Machine Tags (Triple Tags) from MISP taxonomies",
argParser.add_argument('-a', action='store_true', help='Generate asciidoctor document from MISP taxonomies') epilog="Available taxonomies are {0}".format(taxonomies),
argParser.add_argument('-v', action='store_true', help='Include descriptions') )
argParser.add_argument('-n', default=False, help='Show only the specified namespace') argParser.add_argument("-e", action="store_true", help="Include expanded tags")
argParser.add_argument('--disable-skip-list', default=False, action='store_true', help='disable default skip list') argParser.add_argument(
"-a", action="store_true", help="Generate asciidoctor document from MISP taxonomies"
)
argParser.add_argument("-v", action="store_true", help="Include descriptions")
argParser.add_argument("-n", default=False, help="Show only the specified namespace")
argParser.add_argument(
"--disable-skip-list",
default=False,
action="store_true",
help="disable default skip list",
)
args = argParser.parse_args() args = argParser.parse_args()
if args.disable_skip_list: if args.disable_skip_list:
skip_list = '' skip_list = ""
for folder in os.listdir(os.path.join(thisDir, '../')): for folder in os.listdir(os.path.join(thisDir, "../")):
if os.path.isfile(os.path.join(thisDir, '../', folder, 'machinetag.json')): if os.path.isfile(os.path.join(thisDir, "../", folder, "machinetag.json")):
if folder in skip_list: if folder in skip_list:
continue continue
taxonomies.append(folder) taxonomies.append(folder)
@ -58,23 +68,35 @@ for folder in os.listdir(os.path.join(thisDir, '../')):
taxonomies.sort() taxonomies.sort()
doc = '' doc = ""
if args.a: if args.a:
dedication = "\n[dedication]\n== Funding and Support\nThe MISP project is financially and resource supported by https://www.circl.lu/[CIRCL Computer Incident Response Center Luxembourg ].\n\nimage:{images-misp}logo.png[CIRCL logo]\n\nA CEF (Connecting Europe Facility) funding under CEF-TC-2016-3 - Cyber Security has been granted from 1st September 2017 until 31th August 2019 as ***Improving MISP as building blocks for next-generation information sharing***.\n\nimage:{images-misp}en_cef.png[CEF funding]\n\nIf you are interested to co-fund projects around MISP, feel free to get in touch with us.\n\n" dedication = "\n[dedication]\n== Funding and Support\nThe MISP project is financially and resource supported by https://www.circl.lu/[CIRCL Computer Incident Response Center Luxembourg ].\n\nimage:{images-misp}logo.png[CIRCL logo]\n\nA CEF (Connecting Europe Facility) funding under CEF-TC-2016-3 - Cyber Security has been granted from 1st September 2017 until 31th August 2019 as ***Improving MISP as building blocks for next-generation information sharing***.\n\nimage:{images-misp}en_cef.png[CEF funding]\n\nIf you are interested to co-fund projects around MISP, feel free to get in touch with us.\n\n"
doc = doc + ":toc: right\n" doc = doc + ":toc: right\n"
doc = doc + ":toclevels: 1\n" doc = doc + ":toclevels: 1\n"
doc = doc + ":icons: font\n" doc = doc + ":icons: font\n"
doc = doc + ":images-cdn: https://raw.githubusercontent.com/MISP/MISP/2.4/INSTALL/logos/\n" doc = (
doc
+ ":images-cdn: https://raw.githubusercontent.com/MISP/MISP/2.4/INSTALL/logos/\n"
)
doc = doc + ":images-misp: https://www.misp-project.org/assets/images/\n" doc = doc + ":images-misp: https://www.misp-project.org/assets/images/\n"
doc = doc + "= MISP taxonomies and classification as machine tags\n\n" doc = doc + "= MISP taxonomies and classification as machine tags\n\n"
doc = doc + "= Introduction\n" doc = doc + "= Introduction\n"
doc = doc + "\nimage::{images-cdn}misp-logo.png[MISP logo]\n" doc = doc + "\nimage::{images-cdn}misp-logo.png[MISP logo]\n"
doc = doc + "The MISP threat sharing platform is a free and open source software helping information sharing of threat intelligence including cyber security indicators, financial fraud or counter-terrorism information. The MISP project includes multiple sub-projects to support the operational requirements of analysts and improve the overall quality of information shared.\n\n" doc = (
doc
+ "The MISP threat sharing platform is a free and open source software helping information sharing of threat intelligence including cyber security indicators, financial fraud or counter-terrorism information. The MISP project includes multiple sub-projects to support the operational requirements of analysts and improve the overall quality of information shared.\n\n"
)
doc = doc + "" doc = doc + ""
doc = "{} {} {} {}".format(doc, "\nTaxonomies that can be used in MISP (2.4) and other information sharing tool and expressed in Machine Tags (Triple Tags).", doc = "{} {} {} {}".format(
doc,
"\nTaxonomies that can be used in MISP (2.4) and other information sharing tool and expressed in Machine Tags (Triple Tags).",
"A machine tag is composed of a namespace (MUST), a predicate (MUST) and an (OPTIONAL) value.", "A machine tag is composed of a namespace (MUST), a predicate (MUST) and an (OPTIONAL) value.",
"Machine tags are often called triple tag due to their format.\n") "Machine tags are often called triple tag due to their format.\n",
doc = doc + "The following document is generated from the machine-readable JSON describing the https://github.com/MISP/misp-taxonomies[MISP taxonomies]." )
doc = (
doc
+ "The following document is generated from the machine-readable JSON describing the https://github.com/MISP/misp-taxonomies[MISP taxonomies]."
)
doc = doc + "\n\n" doc = doc + "\n\n"
doc = doc + "<<<\n" doc = doc + "<<<\n"
doc = doc + dedication doc = doc + dedication
@ -87,31 +109,37 @@ if args.n:
taxonomies.append(args.n) taxonomies.append(args.n)
def asciidoc(content=False, adoc=doc, t='title', toplevel=False): def asciidoc(content=False, adoc=doc, t="title", toplevel=False):
if not args.a: if not args.a:
return False return False
adoc = adoc + "\n" adoc = adoc + "\n"
if t == 'title': if t == "title":
content = '==== ' + content content = "==== " + content
elif t == 'predicate': elif t == "predicate":
content = '=== ' + content content = "=== " + content
elif t == 'namespace': elif t == "namespace":
content = '== ' + content + '\n' content = "== " + content + "\n"
content = "{}\n{}{} {}{}{} {}".format(content, 'NOTE: ', namespace, 'namespace available in JSON format at https://github.com/MISP/misp-taxonomies/blob/main/', content = "{}\n{}{} {}{}{} {}".format(
namespace, '/machinetag.json[*this location*]. The JSON format can be freely reused in your application', content,
'or automatically enabled in https://www.github.com/MISP/MISP[MISP] taxonomy.') "NOTE: ",
elif t == 'description' and toplevel is True: namespace,
"namespace available in JSON format at https://github.com/MISP/misp-taxonomies/blob/main/",
namespace,
"/machinetag.json[*this location*]. The JSON format can be freely reused in your application",
"or automatically enabled in https://www.github.com/MISP/MISP[MISP] taxonomy.",
)
elif t == "description" and toplevel is True:
content = "\n{} \n".format(content) content = "\n{} \n".format(content)
elif t == 'description' and toplevel is False: elif t == "description" and toplevel is False:
try: try:
(n, value) = content.split(":", 1) (n, value) = content.split(":", 1)
content = "\n{} \n".format(value) content = "\n{} \n".format(value)
except: except:
content = "\n{} \n".format(content) content = "\n{} \n".format(content)
elif t == 'numerical_value': elif t == "numerical_value":
(n, value) = content.split(":", 1) (n, value) = content.split(":", 1)
content = "\nAssociated numerical value=\"{}\" \n".format(value) content = '\nAssociated numerical value="{}" \n'.format(value)
elif t == 'exclusive': elif t == "exclusive":
(n, value) = content.split(":", 1) (n, value) = content.split(":", 1)
if n: if n:
content = "\nIMPORTANT: Exclusive flag set which means the values or predicate below must be set exclusively.\n" content = "\nIMPORTANT: Exclusive flag set which means the values or predicate below must be set exclusively.\n"
@ -124,9 +152,9 @@ def machineTag(namespace=False, predicate=False, value=None):
if namespace is False or predicate is False: if namespace is False or predicate is False:
return None return None
if value is None: if value is None:
return (u'{0}:{1}'.format(namespace, predicate)) return "{0}:{1}".format(namespace, predicate)
else: else:
return (u'{0}:{1}=\"{2}\"'.format(namespace, predicate, value)) return '{0}:{1}="{2}"'.format(namespace, predicate, value)
for taxonomy in taxonomies: for taxonomy in taxonomies:
@ -136,70 +164,166 @@ for taxonomy in taxonomies:
filename = os.path.join(thisDir, "../", taxonomy, "machinetag.json") filename = os.path.join(thisDir, "../", taxonomy, "machinetag.json")
with open(filename) as fp: with open(filename) as fp:
t = json.load(fp) t = json.load(fp)
namespace = t['namespace'] namespace = t["namespace"]
if t.get('expanded'): if t.get("expanded"):
expanded_namespace = t['expanded'] expanded_namespace = t["expanded"]
else: else:
expanded_namespace = namespace expanded_namespace = namespace
if args.a: if args.a:
doc = asciidoc(content=t['namespace'], adoc=doc, t='namespace') doc = asciidoc(content=t["namespace"], adoc=doc, t="namespace")
doc = asciidoc(content=t['description'], adoc=doc, t='description', toplevel = True) doc = asciidoc(
if t.get('exclusive'): content=t["description"], adoc=doc, t="description", toplevel=True
doc = asciidoc(content=machineTag(namespace=namespace, predicate=t['exclusive']), adoc=doc, t='exclusive') )
if t.get("exclusive"):
doc = asciidoc(
content=machineTag(namespace=namespace, predicate=t["exclusive"]),
adoc=doc,
t="exclusive",
)
if args.v: if args.v:
print('{0}'.format(t['description'])) print("{0}".format(t["description"]))
for predicate in t['predicates']: for predicate in t["predicates"]:
if args.a: if args.a:
doc = asciidoc(content=predicate['value'], adoc=doc, t='predicate') doc = asciidoc(content=predicate["value"], adoc=doc, t="predicate")
if predicate.get('description'): if predicate.get("description"):
doc = asciidoc(content=machineTag(namespace=namespace, predicate=predicate['description']), adoc=doc, t='description') doc = asciidoc(
if predicate.get('exclusive'): content=machineTag(
doc = asciidoc(content=machineTag(namespace=namespace, predicate=predicate['exclusive']), adoc=doc, t='exclusive') namespace=namespace, predicate=predicate["description"]
),
adoc=doc,
t="description",
)
if predicate.get("exclusive"):
doc = asciidoc(
content=machineTag(
namespace=namespace, predicate=predicate["exclusive"]
),
adoc=doc,
t="exclusive",
)
if t.get('values') is None: if t.get("values") is None:
if args.a: if args.a:
doc = asciidoc(content=machineTag(namespace=namespace, predicate=predicate['value']), adoc=doc) doc = asciidoc(
doc = asciidoc(content=machineTag(namespace=namespace, predicate=predicate['expanded']), adoc=doc, t='description') content=machineTag(
if predicate.get('description'): namespace=namespace, predicate=predicate["value"]
doc = asciidoc(content=machineTag(namespace=namespace, predicate=predicate['description']), adoc=doc, t='description') ),
if predicate.get('numerical_value'): adoc=doc,
doc = asciidoc(content=machineTag(namespace=namespace, predicate=predicate['numerical_value']), adoc=doc, t='description') )
if predicate.get('exclusive'): doc = asciidoc(
doc = asciidoc(content=machineTag(namespace=namespace, predicate=predicate['exclusive']), adoc=doc, t='exclusive') content=machineTag(
namespace=namespace, predicate=predicate["expanded"]
),
adoc=doc,
t="description",
)
if predicate.get("description"):
doc = asciidoc(
content=machineTag(
namespace=namespace, predicate=predicate["description"]
),
adoc=doc,
t="description",
)
if predicate.get("numerical_value"):
doc = asciidoc(
content=machineTag(
namespace=namespace, predicate=predicate["numerical_value"]
),
adoc=doc,
t="description",
)
if predicate.get("exclusive"):
doc = asciidoc(
content=machineTag(
namespace=namespace, predicate=predicate["exclusive"]
),
adoc=doc,
t="exclusive",
)
else: else:
print(machineTag(namespace=namespace, predicate=predicate['value'])) print(machineTag(namespace=namespace, predicate=predicate["value"]))
if args.e: if args.e:
print("--> " + machineTag(namespace=expanded_namespace, predicate=predicate['expanded'])) print(
if predicate.get('description'): "--> "
print("--> " + predicate['description']) + machineTag(
namespace=expanded_namespace, predicate=predicate["expanded"]
)
)
if predicate.get("description"):
print("--> " + predicate["description"])
else: else:
for e in t['values']: for e in t["values"]:
if e['predicate'] == predicate['value']: if e["predicate"] == predicate["value"]:
if 'expanded' in predicate: if "expanded" in predicate:
expanded = predicate['expanded'] expanded = predicate["expanded"]
for v in e['entry']: for v in e["entry"]:
if args.a and 'expanded' in v: if args.a and "expanded" in v:
doc = asciidoc(content=machineTag(namespace=namespace, predicate=e['predicate'], value=v['value']), adoc=doc) doc = asciidoc(
doc = asciidoc(content=machineTag(namespace=namespace, predicate=v['expanded']), adoc=doc, t='description') content=machineTag(
if 'description' in v: namespace=namespace,
doc = asciidoc(content=machineTag(namespace=namespace, predicate=v['description']), adoc=doc, t='description') predicate=e["predicate"],
if v.get('numerical_value'): value=v["value"],
doc = asciidoc(content=machineTag(namespace=namespace, predicate=v['numerical_value']), adoc=doc, t='numerical_value') ),
adoc=doc,
)
doc = asciidoc(
content=machineTag(
namespace=namespace, predicate=v["expanded"]
),
adoc=doc,
t="description",
)
if "description" in v:
doc = asciidoc(
content=machineTag(
namespace=namespace, predicate=v["description"]
),
adoc=doc,
t="description",
)
if v.get("numerical_value"):
doc = asciidoc(
content=machineTag(
namespace=namespace,
predicate=v["numerical_value"],
),
adoc=doc,
t="numerical_value",
)
else: else:
print(machineTag(namespace=namespace, predicate=e['predicate'], value=v['value'])) print(
machineTag(
namespace=namespace,
predicate=e["predicate"],
value=v["value"],
)
)
if args.e: if args.e:
if'expanded' in v: if "expanded" in v:
print("--> " + machineTag(namespace=namespace, predicate=expanded, value=v['expanded'])) print(
"--> "
+ machineTag(
namespace=namespace,
predicate=expanded,
value=v["expanded"],
)
)
with open('../mapping/mapping.json') as mapping: with open("../mapping/mapping.json") as mapping:
m = json.load(mapping) m = json.load(mapping)
output = '\n= Mapping of taxonomies\n' output = "\n= Mapping of taxonomies\n"
output = '{}{}'.format(output, 'Analysts relying on taxonomies don\'t always know the appropriate namespace to use but know which value to use for classification. The MISP mapping taxonomy allows to map a single classification into a series of machine-tag synonyms.\n') output = "{}{}".format(
output,
"Analysts relying on taxonomies don't always know the appropriate namespace to use but know which value to use for classification. The MISP mapping taxonomy allows to map a single classification into a series of machine-tag synonyms.\n",
)
for value in sorted(m.keys()): for value in sorted(m.keys()):
output = '{}{} **{}**{}{}\n'.format(output,'\n.Mapping table - ',value,'\n|===\n|',value) output = "{}{} **{}**{}{}\n".format(
for mapped in m[value]['values']: output, "\n.Mapping table - ", value, "\n|===\n|", value
output = '{}|{}\n'.format(output,mapped) )
output = '{}|===\n'.format(output) for mapped in m[value]["values"]:
output = "{}|{}\n".format(output, mapped)
output = "{}|===\n".format(output)
doc = doc + output doc = doc + output
if args.a: if args.a: