mirror of https://github.com/MISP/misp-galaxy
chg [intel] mistakes on wikipedia got fixed
parent
64803fb28c
commit
7885a8fd00
|
@ -458,6 +458,16 @@
|
|||
"uuid": "46b43a4e-f9db-5a9f-a65f-c0d444315d26",
|
||||
"value": "Financial Intelligence Unit (Bahamas)"
|
||||
},
|
||||
{
|
||||
"description": "National Crime Intelligence Agency (NCIA)",
|
||||
"meta": {
|
||||
"country": "BS",
|
||||
"country_name": "Bahamas"
|
||||
},
|
||||
"related": [],
|
||||
"uuid": "afc0c983-dd11-50bc-8ab8-6f9879bbddf2",
|
||||
"value": "National Crime Intelligence Agency (NCIA)"
|
||||
},
|
||||
{
|
||||
"description": "NSA – National Security Agency",
|
||||
"meta": {
|
||||
|
@ -2353,6 +2363,29 @@
|
|||
"uuid": "82947bb1-4702-5c23-8d8a-aed56968e6df",
|
||||
"value": "Intelligence Protection Organization of Army of the Guardians of the Islamic Revolution"
|
||||
},
|
||||
{
|
||||
"description": "Intelligence org of FARAJA",
|
||||
"meta": {
|
||||
"country": "IR",
|
||||
"country_name": "Iran"
|
||||
},
|
||||
"related": [],
|
||||
"uuid": "0f5e5eed-104d-56d8-a136-50da25ff1211",
|
||||
"value": "Intelligence org of FARAJA"
|
||||
},
|
||||
{
|
||||
"description": "Intelligence org of the Islamic Republic of Iran[12]",
|
||||
"meta": {
|
||||
"country": "IR",
|
||||
"country_name": "Iran",
|
||||
"refs": [
|
||||
"https://en.wikipedia.org#cite_note-12"
|
||||
]
|
||||
},
|
||||
"related": [],
|
||||
"uuid": "fe4ae08b-ee63-5b38-a58c-fd2b3288c826",
|
||||
"value": "Intelligence org of the Islamic Republic of Iran[12]"
|
||||
},
|
||||
{
|
||||
"description": "General Security Directorate - (GSD) - (Internal security agency)",
|
||||
"meta": {
|
||||
|
|
|
@ -44,10 +44,6 @@ def get_notes_on_lower_level(content):
|
|||
if li.find('ul'):
|
||||
notes.extend(get_notes_on_lower_level(li.find('ul')))
|
||||
else:
|
||||
|
||||
if li.text in ["Islamic Republic of Iran Army:", "Islamic Revolutionary Guard Corps:", "FARAJA", "Judicial system of the Islamic Republic of Iran", "Intelligence [12]", "Intelligence org"]: # These are not intelligence agencies but Iran's entry is broken
|
||||
continue
|
||||
|
||||
a_tag = li.find('a')
|
||||
|
||||
title = li.text
|
||||
|
@ -71,16 +67,7 @@ def get_notes_on_lower_level(content):
|
|||
def get_agencies_from_country(heading, current_country):
|
||||
agencies = []
|
||||
contents = []
|
||||
if current_country != "Gambia": # Gambia has a mistake on the wikipedia page
|
||||
contents.append(heading.find_next('ul'))
|
||||
else:
|
||||
soup = BeautifulSoup(str(heading), 'html.parser')
|
||||
ul_tag = soup.new_tag('ul')
|
||||
li_tag = soup.new_tag('li')
|
||||
a_tag = heading.find_next('p').find('a')
|
||||
li_tag.append(a_tag)
|
||||
ul_tag.append(li_tag)
|
||||
contents.append(ul_tag)
|
||||
contents.append(heading.find_next('ul'))
|
||||
|
||||
current_content = contents[0]
|
||||
while True:
|
||||
|
@ -89,10 +76,6 @@ def get_agencies_from_country(heading, current_country):
|
|||
if next_sibling is None or next_sibling.name == 'h2':
|
||||
break
|
||||
|
||||
if current_country == "Bahamas" and next_sibling.name == 'h2': # Bahamas has a mistake on the wikipedia page
|
||||
current_country = None
|
||||
continue
|
||||
|
||||
if next_sibling.name == 'ul':
|
||||
contents.append(next_sibling)
|
||||
|
||||
|
|
Loading…
Reference in New Issue