Add [agencies] refs

pull/946/head
niclas 2024-03-12 11:22:30 +01:00
parent 0d26334448
commit bb28408b14
5 changed files with 4751 additions and 33 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,9 @@
{
"description": "List of intelligence agencies",
"icon": "ninja",
"name": "intelligence-agencies",
"namespace": "intelligence-agency",
"type": "intelligence-agency",
"uuid": "3ef969e7-96cd-4048-aa83-191ac457d0db",
"version": 1
}

View File

View File

@ -3,19 +3,19 @@ from modules.intel import IntelAgency, Meta, Galaxy, Cluster
import os
import uuid
import json
import re
from bs4 import BeautifulSoup
CLUSTER_PATH = '../../clusters'
GALAXY_PATH = '../../galaxies'
GALAXY_NAME = 'intelligence-agencies'
UUID = str(uuid.uuid4())
UUID = "3ef969e7-96cd-4048-aa83-191ac457d0db"
WIKIPEDIA_URL = "https://en.wikipedia.org"
def get_UUIDs():
if GALAXY_NAME in os.listdir(CLUSTER_PATH):
if f"{GALAXY_NAME}.json" in os.listdir(CLUSTER_PATH):
uuids = {}
with open(os.path.join(CLUSTER_PATH, GALAXY_NAME)) as fr:
with open(os.path.join(CLUSTER_PATH, f"{GALAXY_NAME}.json")) as fr:
galaxy_json = json.load(fr)
for cluster in galaxy_json["values"]:
uuids[cluster["value"]] = cluster["uuid"]
@ -28,18 +28,29 @@ def get_notes_on_lower_level(content):
if li.find('ul'):
notes.extend(get_notes_on_lower_level(li.find('ul')))
else:
notes.append(li.text)
a_tag = li.find('a')
title = li.text
link_href = None
description = li.text
if a_tag:
title = a_tag.get('title', description)
if a_tag.has_attr('href'):
link_href = f'{WIKIPEDIA_URL}{a_tag["href"]}'
notes.append((title, link_href, description, None))
return notes
def get_agencies_from_country(heading, current_country, uuids):
agencies = []
content = heading.find_next('ul')
agency_names = get_notes_on_lower_level(content)
for name in agency_names:
for name, links, description, synonyms in agency_names:
if uuids and name in uuids:
agencies.append(IntelAgency(value=name, uuid=uuids[name], meta=Meta(country=current_country)))
agencies.append(IntelAgency(value=name, uuid=uuids[name], meta=Meta(country=current_country, refs=[links]), description=description))
else:
agencies.append(IntelAgency(value=name, meta=Meta(country=current_country), uuid=str(uuid.uuid4())))
agencies.append(IntelAgency(value=name, meta=Meta(country=current_country, refs=[links]), uuid=str(uuid.uuid4()), description=description))
return agencies
def extract_info(content, uuids):
@ -93,6 +104,5 @@ if __name__ == '__main__':
)
for agency in agencies:
cluster.add_value(agency)
print(cluster.values)
print(cluster.uuid)
cluster.save_to_file(os.path.join(CLUSTER_PATH, f'{GALAXY_NAME}.json'))

View File

@ -1,9 +1,30 @@
from dataclasses import dataclass, field, asdict
from dataclasses import dataclass, field, asdict, is_dataclass
import json
@dataclass
class Meta:
country: str = ""
refs: list = field(default_factory=list)
synonyms: list = field(default_factory=list)
def custom_asdict(obj):
if is_dataclass(obj):
result = {}
for field_name, field_def in obj.__dataclass_fields__.items():
value = getattr(obj, field_name)
if field_name == 'meta':
meta_value = custom_asdict(value)
meta_value = {k: v for k, v in meta_value.items() if not (k in ['refs', 'synonyms'] and (not v or all(e is None for e in v)))}
value = meta_value
elif isinstance(value, (list, tuple)) and all(is_dataclass(i) for i in value):
value = [custom_asdict(i) for i in value]
elif isinstance(value, list) and all(e is None for e in value):
continue
result[field_name] = value
return result
else:
return obj
@dataclass
class IntelAgency:
@ -34,31 +55,20 @@ class Galaxy:
file.write(json.dumps(asdict(self), indent=4))
@dataclass
class Cluster():
def __init__(
self,
authors: str,
category: str,
description: str,
name: str,
source: str,
type: str,
uuid: str,
version: int,
):
self.authors = authors
self.category = category
self.description = description
self.name = name
self.source = source
self.type = type
self.uuid = uuid
self.version = version
self.values = []
class Cluster:
authors: str
category: str
description: str
name: str
source: str
type: str
uuid: str
version: int
values: list = field(default_factory=list)
def add_value(self, value: IntelAgency):
self.values.append(value)
def save_to_file(self, path: str):
with open(path, "w") as file:
file.write(json.dumps(asdict(self), indent=4))
file.write(json.dumps(custom_asdict(self), indent=4, ensure_ascii=False))