chg [intel] mistakes on wikipedia got fixed

pull/946/head
niclas 2024-03-13 10:10:35 +01:00
parent 64803fb28c
commit 7885a8fd00
2 changed files with 35 additions and 19 deletions

View File

@ -458,6 +458,16 @@
"uuid": "46b43a4e-f9db-5a9f-a65f-c0d444315d26", "uuid": "46b43a4e-f9db-5a9f-a65f-c0d444315d26",
"value": "Financial Intelligence Unit (Bahamas)" "value": "Financial Intelligence Unit (Bahamas)"
}, },
{
"description": "National Crime Intelligence Agency (NCIA)",
"meta": {
"country": "BS",
"country_name": "Bahamas"
},
"related": [],
"uuid": "afc0c983-dd11-50bc-8ab8-6f9879bbddf2",
"value": "National Crime Intelligence Agency (NCIA)"
},
{ {
"description": "NSA National Security Agency", "description": "NSA National Security Agency",
"meta": { "meta": {
@ -2353,6 +2363,29 @@
"uuid": "82947bb1-4702-5c23-8d8a-aed56968e6df", "uuid": "82947bb1-4702-5c23-8d8a-aed56968e6df",
"value": "Intelligence Protection Organization of Army of the Guardians of the Islamic Revolution" "value": "Intelligence Protection Organization of Army of the Guardians of the Islamic Revolution"
}, },
{
"description": "Intelligence org of FARAJA",
"meta": {
"country": "IR",
"country_name": "Iran"
},
"related": [],
"uuid": "0f5e5eed-104d-56d8-a136-50da25ff1211",
"value": "Intelligence org of FARAJA"
},
{
"description": "Intelligence org of the Islamic Republic of Iran[12]",
"meta": {
"country": "IR",
"country_name": "Iran",
"refs": [
"https://en.wikipedia.org#cite_note-12"
]
},
"related": [],
"uuid": "fe4ae08b-ee63-5b38-a58c-fd2b3288c826",
"value": "Intelligence org of the Islamic Republic of Iran[12]"
},
{ {
"description": "General Security Directorate - (GSD) - (Internal security agency)", "description": "General Security Directorate - (GSD) - (Internal security agency)",
"meta": { "meta": {

View File

@ -44,10 +44,6 @@ def get_notes_on_lower_level(content):
if li.find('ul'): if li.find('ul'):
notes.extend(get_notes_on_lower_level(li.find('ul'))) notes.extend(get_notes_on_lower_level(li.find('ul')))
else: else:
if li.text in ["Islamic Republic of Iran Army:", "Islamic Revolutionary Guard Corps:", "FARAJA", "Judicial system of the Islamic Republic of Iran", "Intelligence [12]", "Intelligence org"]: # These are not intelligence agencies but Iran's entry is broken
continue
a_tag = li.find('a') a_tag = li.find('a')
title = li.text title = li.text
@ -71,17 +67,8 @@ def get_notes_on_lower_level(content):
def get_agencies_from_country(heading, current_country): def get_agencies_from_country(heading, current_country):
agencies = [] agencies = []
contents = [] contents = []
if current_country != "Gambia": # Gambia has a mistake on the wikipedia page contents.append(heading.find_next('ul'))
contents.append(heading.find_next('ul'))
else:
soup = BeautifulSoup(str(heading), 'html.parser')
ul_tag = soup.new_tag('ul')
li_tag = soup.new_tag('li')
a_tag = heading.find_next('p').find('a')
li_tag.append(a_tag)
ul_tag.append(li_tag)
contents.append(ul_tag)
current_content = contents[0] current_content = contents[0]
while True: while True:
next_sibling = current_content.find_next_sibling() next_sibling = current_content.find_next_sibling()
@ -89,10 +76,6 @@ def get_agencies_from_country(heading, current_country):
if next_sibling is None or next_sibling.name == 'h2': if next_sibling is None or next_sibling.name == 'h2':
break break
if current_country == "Bahamas" and next_sibling.name == 'h2': # Bahamas has a mistake on the wikipedia page
current_country = None
continue
if next_sibling.name == 'ul': if next_sibling.name == 'ul':
contents.append(next_sibling) contents.append(next_sibling)