commit
8be1cf5cab
|
@ -742,6 +742,11 @@
|
||||||
"description": "Workflow support language is a common language to support intelligence analysts to perform their analysis on data and information.",
|
"description": "Workflow support language is a common language to support intelligence analysts to perform their analysis on data and information.",
|
||||||
"name": "workflow",
|
"name": "workflow",
|
||||||
"version": 11
|
"version": 11
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"description": "This taxonomy aims to list doping substances",
|
||||||
|
"name": "doping-substances",
|
||||||
|
"version": 2
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"url": "https://raw.githubusercontent.com/MISP/misp-taxonomies/main/",
|
"url": "https://raw.githubusercontent.com/MISP/misp-taxonomies/main/",
|
||||||
|
|
Binary file not shown.
After Width: | Height: | Size: 10 KiB |
|
@ -0,0 +1,44 @@
|
||||||
|
# MISP_DopingSubstanceTaxonomy
|
||||||
|
|
||||||
|
This project aims to gather information about all the prohibited sports Doping Substances.
|
||||||
|
|
||||||
|
We collected all of the information on the [WADA website](https://www.wada-ama.org/en/prohibited-list).
|
||||||
|
|
||||||
|
To do that we have created a python script to scrap this website and generate a JSON file (Taxonomy).
|
||||||
|
|
||||||
|
This Taxonomy could be add in MISP to help sports organizations to fight against usage of doping substances.
|
||||||
|
|
||||||
|
## MISP
|
||||||
|
|
||||||
|
![logo](Misp-logo.png)
|
||||||
|
|
||||||
|
What is MISP ?
|
||||||
|
|
||||||
|
>A threat intelligence platform for sharing, storing and correlating
|
||||||
|
Indicators of Compromise of targeted attacks, threat intelligence,
|
||||||
|
financial fraud information, vulnerability information or even
|
||||||
|
counter-terrorism information. Discover how MISP is used today in
|
||||||
|
multiple organisations. Not only to store, share, collaborate on cyber
|
||||||
|
security indicators, malware analysis, but also to use the IoCs and
|
||||||
|
information to detect and prevent attacks, frauds or threats against ICT
|
||||||
|
infrastructures, organisations or people.
|
||||||
|
|
||||||
|
## JSON Generation
|
||||||
|
|
||||||
|
In order to build the JSON file, we created a Python script which scrap the WADA (World Anti-Doping Agency) ‘s prohibited list.
|
||||||
|
|
||||||
|
Thanks to BeautifulSoup, a useful library that helps a lot when it comes to scrap HTLM documents, the script is able to get all the list of doping substances.
|
||||||
|
|
||||||
|
The file is created with PyTaxonomies, a MISP library that help to create valid JSON file according to the [MISP Platform](https://www.misp-project.org/taxonomies.html#_misp_taxonomies).
|
||||||
|
|
||||||
|
Finally, the script generates all predicates (doping categories) and the entries associated (the doping substances themselves).
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
|
||||||
|
If you want to try it out yourself, you need to have both BeautifulSoup & PyTaxonomies installated.
|
||||||
|
|
||||||
|
## Authors
|
||||||
|
|
||||||
|
DELUS Thibaut : https://github.com/WooZyhh
|
||||||
|
|
||||||
|
JACOB Lucas : https://github.com/Chaamoxs
|
|
@ -0,0 +1,63 @@
|
||||||
|
import json
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from pathlib import Path
|
||||||
|
from pytaxonomies import Entry, Predicate, Taxonomy
|
||||||
|
|
||||||
|
CONTENT_URL = 'https://www.wada-ama.org/en/prohibited-list'
|
||||||
|
|
||||||
|
TAXONOMY_DESCRIPTION = 'This taxonomy aims to list doping substances'
|
||||||
|
TAXONOMY_EXPANDED = 'Doping substances'
|
||||||
|
TAXONOMY_NAME = 'doping-substances'
|
||||||
|
|
||||||
|
ignore = ('NON-APPROVED SUBSTANCES', )
|
||||||
|
|
||||||
|
|
||||||
|
def list_predicates(articles):
|
||||||
|
predicates = {}
|
||||||
|
for article in articles:
|
||||||
|
title = article.find('p', attrs={'class': 'h3 panel-title'}).text
|
||||||
|
if title in ignore:
|
||||||
|
continue
|
||||||
|
predicate = Predicate()
|
||||||
|
predicate.predicate = title
|
||||||
|
div = article.find('div', attrs={'class': 'layout-wysiwyg'})
|
||||||
|
description = div.find('p')
|
||||||
|
predicate.description = description.find_next_sibling().text
|
||||||
|
predicates[title] = predicate
|
||||||
|
return predicates
|
||||||
|
|
||||||
|
|
||||||
|
def generate_taxonomy():
|
||||||
|
new_taxonomy = Taxonomy()
|
||||||
|
|
||||||
|
new_taxonomy.name = TAXONOMY_NAME
|
||||||
|
new_taxonomy.expanded = TAXONOMY_EXPANDED
|
||||||
|
new_taxonomy.description = TAXONOMY_DESCRIPTION
|
||||||
|
|
||||||
|
response = requests.get(CONTENT_URL)
|
||||||
|
soup = BeautifulSoup(response.text, 'html.parser')
|
||||||
|
articles = soup.findAll('article', attrs={'class': 'panel hide-reader'})
|
||||||
|
|
||||||
|
new_taxonomy.predicates = list_predicates(articles)
|
||||||
|
|
||||||
|
for article in articles:
|
||||||
|
title = article.find('p', attrs={'class': 'h3 panel-title'}).text
|
||||||
|
if title in ignore:
|
||||||
|
continue
|
||||||
|
products = article.findAll('li')
|
||||||
|
products_list = {}
|
||||||
|
for product in products:
|
||||||
|
entry = Entry()
|
||||||
|
entry.value = product.text
|
||||||
|
products_list[entry.value] = entry
|
||||||
|
new_taxonomy.predicates[title].entries = products_list
|
||||||
|
|
||||||
|
return new_taxonomy
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
taxonomy = generate_taxonomy()
|
||||||
|
taxonomy.version = 2
|
||||||
|
with open(Path(__file__).resolve().parent / 'machinetag.json', 'wt', encoding='utf-8') as f:
|
||||||
|
json.dump(taxonomy.to_dict(), f, indent=2, ensure_ascii=False)
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue