chg [intel] mistakes on wikipedia got fixed

pull/946/head
niclas 2024-03-13 10:10:35 +01:00
parent 64803fb28c
commit 7885a8fd00
2 changed files with 35 additions and 19 deletions

View File

@ -458,6 +458,16 @@
"uuid": "46b43a4e-f9db-5a9f-a65f-c0d444315d26",
"value": "Financial Intelligence Unit (Bahamas)"
},
{
"description": "National Crime Intelligence Agency (NCIA)",
"meta": {
"country": "BS",
"country_name": "Bahamas"
},
"related": [],
"uuid": "afc0c983-dd11-50bc-8ab8-6f9879bbddf2",
"value": "National Crime Intelligence Agency (NCIA)"
},
{
"description": "NSA National Security Agency",
"meta": {
@ -2353,6 +2363,29 @@
"uuid": "82947bb1-4702-5c23-8d8a-aed56968e6df",
"value": "Intelligence Protection Organization of Army of the Guardians of the Islamic Revolution"
},
{
"description": "Intelligence org of FARAJA",
"meta": {
"country": "IR",
"country_name": "Iran"
},
"related": [],
"uuid": "0f5e5eed-104d-56d8-a136-50da25ff1211",
"value": "Intelligence org of FARAJA"
},
{
"description": "Intelligence org of the Islamic Republic of Iran[12]",
"meta": {
"country": "IR",
"country_name": "Iran",
"refs": [
"https://en.wikipedia.org#cite_note-12"
]
},
"related": [],
"uuid": "fe4ae08b-ee63-5b38-a58c-fd2b3288c826",
"value": "Intelligence org of the Islamic Republic of Iran[12]"
},
{
"description": "General Security Directorate - (GSD) - (Internal security agency)",
"meta": {

View File

@ -44,10 +44,6 @@ def get_notes_on_lower_level(content):
if li.find('ul'):
notes.extend(get_notes_on_lower_level(li.find('ul')))
else:
if li.text in ["Islamic Republic of Iran Army:", "Islamic Revolutionary Guard Corps:", "FARAJA", "Judicial system of the Islamic Republic of Iran", "Intelligence [12]", "Intelligence org"]: # These are not intelligence agencies but Iran's entry is broken
continue
a_tag = li.find('a')
title = li.text
@ -71,17 +67,8 @@ def get_notes_on_lower_level(content):
def get_agencies_from_country(heading, current_country):
agencies = []
contents = []
if current_country != "Gambia": # Gambia has a mistake on the wikipedia page
contents.append(heading.find_next('ul'))
else:
soup = BeautifulSoup(str(heading), 'html.parser')
ul_tag = soup.new_tag('ul')
li_tag = soup.new_tag('li')
a_tag = heading.find_next('p').find('a')
li_tag.append(a_tag)
ul_tag.append(li_tag)
contents.append(ul_tag)
contents.append(heading.find_next('ul'))
current_content = contents[0]
while True:
next_sibling = current_content.find_next_sibling()
@ -89,10 +76,6 @@ def get_agencies_from_country(heading, current_country):
if next_sibling is None or next_sibling.name == 'h2':
break
if current_country == "Bahamas" and next_sibling.name == 'h2': # Bahamas has a mistake on the wikipedia page
current_country = None
continue
if next_sibling.name == 'ul':
contents.append(next_sibling)