fix: fixes CaSe InSenSiTiVe duplicates

pull/995/head
Christophe Vandeplas 2024-06-18 16:58:38 +02:00
parent ca3cd1d0fb
commit 6f4b3b1387
No known key found for this signature in database
GPG Key ID: BDC48619FFDC5A5B
10 changed files with 8873 additions and 5773 deletions

View File

@ -3569,16 +3569,6 @@
"uuid": "9aa0a1b7-c9ff-422c-9ef1-431459e1e1b9",
"value": "PMC Bronze 44 Magnum Ammo - 500 Rounds of 180 Grain JHP Ammunition"
},
{
"meta": {
"caliber": "X",
"description": "Tac 7.62x51mm Ammo",
"manufacturer": "PMC",
"name": "PMC X"
},
"uuid": "0dd1db3c-8d5d-4296-b780-ae5ac7a92fed",
"value": "PMC X - Tac 7.62x51mm Ammo"
},
{
"meta": {
"caliber": "Bronze 308 Win Ammo",
@ -4110,5 +4100,5 @@
"value": "NobelSport High Brass Field 12 Gauge Ammo - 250 Rounds of 1"
}
],
"version": 1
"version": 2
}

View File

@ -3660,25 +3660,6 @@
"uuid": "6cb47609-b03e-43d9-a4c7-8342f1011f3b",
"value": "ANGRYREBEL"
},
{
"description": "",
"meta": {
"refs": [
"https://malpedia.caad.fkie.fraunhofer.de/details/elf.avoslocker",
"https://blogs.blackberry.com/en/2022/04/threat-thursday-avoslocker-prompts-advisory-from-fbi-and-fincen",
"https://blog.qualys.com/vulnerabilities-threat-research/2022/03/06/avoslocker-ransomware-behavior-examined-on-windows-linux",
"https://blog.cyble.com/2022/01/17/avoslocker-ransomware-linux-version-targets-vmware-esxi-servers/",
"https://blog.lexfo.fr/Avoslocker.html",
"https://www.ic3.gov/Media/News/2022/220318.pdf",
"https://blogs.vmware.com/security/2022/09/esxi-targeting-ransomware-the-threats-that-are-after-your-virtual-machines-part-1.html",
"https://blogs.vmware.com/security/2022/02/avoslocker-modern-linux-ransomware-threats.html"
],
"synonyms": [],
"type": []
},
"uuid": "465b6a74-87ca-4459-b4be-3f8b272f4485",
"value": "Avoslocker"
},
{
"description": "AVrecon is a Linux-based Remote Access Trojan (RAT) targeting small-office/home-office (SOHO) routers and other ARM-embedded devices. The malware is distributed via exploitation of unpatched vulnerabilities or common misconfiguration of the targeted devices. Once deployed, AVreckon will collect some information about the infected device, open a session to pre-configured C&C server, and spawn a remote shell for command execution. It might also download additional arbitrary files and run them. The malware has recently been used in campaigns aimed at ad-fraud activities, password spraying and data exfiltration.",
"meta": {
@ -15170,7 +15151,10 @@
"https://news.sophos.com/en-us/2021/12/22/avos-locker-remotely-accesses-boxes-even-running-in-safe-mode/",
"https://news.sophos.com/en-us/2022/03/17/the-ransomware-threat-intelligence-center/",
"https://unit42.paloaltonetworks.com/emerging-ransomware-groups/",
"https://blog.qualys.com/vulnerabilities-threat-research/2022/03/06/avoslocker-ransomware-behavior-examined-on-windows-linux"
"https://blog.qualys.com/vulnerabilities-threat-research/2022/03/06/avoslocker-ransomware-behavior-examined-on-windows-linux",
"https://blog.lexfo.fr/Avoslocker.html",
"https://blogs.vmware.com/security/2022/09/esxi-targeting-ransomware-the-threats-that-are-after-your-virtual-machines-part-1.html",
"https://blogs.vmware.com/security/2022/02/avoslocker-modern-linux-ransomware-threats.html"
],
"synonyms": [],
"type": []
@ -56457,5 +56441,5 @@
"value": "Zyklon"
}
],
"version": 19803
"version": 19804
}

View File

@ -24250,11 +24250,6 @@
"uuid": "90c6daf8-8212-4ea8-9b59-af49b290b3b9",
"value": "TurkStatik"
},
{
"description": "ransomware",
"uuid": "93277946-177a-4f92-833d-30db9d432656",
"value": "Tyrant"
},
{
"description": "ransomware",
"uuid": "0407e98d-cd3e-42e1-8daf-3c51d2e4906a",
@ -29200,5 +29195,5 @@
"value": "apos"
}
],
"version": 122
"version": 123
}

View File

@ -1770,7 +1770,7 @@
"date": "1998"
},
"uuid": "2a47361d-584b-493f-80a4-37c74c30cf1b",
"value": "Vortex"
"value": "VorteX"
},
{
"meta": {
@ -2140,13 +2140,6 @@
"uuid": "c42394f8-5f35-4797-9393-8289ab8ad3ad",
"value": "SharpEye"
},
{
"meta": {
"date": "2010"
},
"uuid": "58e2e2ee-5c25-4a13-abfc-2a6c85d978fa",
"value": "VorteX"
},
{
"meta": {
"date": "2010",
@ -3648,5 +3641,5 @@
"value": "COATHANGER"
}
],
"version": 45
"version": 46
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -19,10 +19,10 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import os
import json
import requests
import uuid
from pymispgalaxies import Cluster, Galaxy
'''
From https://www.culturecollections.org.uk/search/?searchScope=Product&pageNumber=1&filter.collectionGroup=0&filter.collection=0&filter.sorting=DateCreated
@ -42,6 +42,7 @@ cell culture characteristics. Passage numbers where given act only as a guide an
the passage number stated will be the passage number received by the customer.
'''
def download_items():
data = {'items': [],
'collections': {},
@ -75,11 +76,13 @@ def save_items(d):
json.dump(d, f, indent=2, sort_keys=True)
return True
def load_saved_items():
with open('items.json', 'r') as f:
d = json.load(f)
return d
data = download_items()
# save_items(data)
# data = load_saved_items()
@ -110,12 +113,19 @@ for item in data['items']:
clusters_dict[cluster['value']] = cluster
# transform dict to list
clusters = []
cluster = Cluster('ukhsa-culture-collections', skip_duplicates=True)
cluster.cluster_values = {}
for item in clusters_dict.values():
clusters.append(item)
cluster.append(item, skip_duplicates=True)
cluster.save('ukhsa-culture-collections')
for cluster, duplicate in cluster.duplicates:
print(f"WARNING: Skipped duplicate: {duplicate} in cluster {cluster}")
json_galaxy = {
try:
galaxy = Galaxy('ukhsa-culture-collections')
except KeyError:
galaxy = Galaxy({
'icon': "virus",
'name': "UKHSA Culture Collections",
'description': "UK Health Security Agency Culture Collections represent deposits of cultures that consist of expertly preserved, authenticated cell lines and microbial strains of known provenance.",
@ -123,20 +133,7 @@ json_galaxy = {
'type': "ukhsa-culture-collections",
'uuid': "bbe11c06-1d6a-477e-88f1-cdda2d71de56",
'version': 1
}
with open(os.path.join('..', 'clusters', 'ukhsa-culture-collections.json'), 'r') as f:
json_cluster = json.load(f)
json_cluster['values'] = clusters
json_cluster['version'] += 1
# save the Galaxy and Cluster file
with open(os.path.join('..', 'galaxies', 'ukhsa-culture-collections.json'), 'w') as f:
json.dump(json_galaxy, f, indent=2, sort_keys=True, ensure_ascii=False)
f.write('\n') # only needed for the beauty and to be compliant with jq_all_the_things
with open(os.path.join('..', 'clusters', 'ukhsa-culture-collections.json'), 'w') as f:
json.dump(json_cluster, f, indent=2, sort_keys=True, ensure_ascii=False)
f.write('\n') # only needed for the beauty and to be compliant with jq_all_the_things
})
galaxy.save('ukhsa-culture-collections')
print("All done, please don't forget to ./jq_all_the_things.sh, commit, and then ./validate_all.sh.")

View File

@ -132,7 +132,8 @@ class Cluster:
def save_to_file(self, path):
with open(path, "w") as file:
file.write(json.dumps(self.__dict__(), indent=4))
file.write(json.dumps(self.__dict__(), indent=2))
file.write('\n')
def __str__(self) -> str:
return f"Cluster: {self.name} - {self.type} - {self.uuid}"
@ -367,6 +368,7 @@ class SoftwareCluster(Cluster):
uuid=associated_software.get("associated_software_id"),
value=associated_software.get("name") + " - Associated Software",
)
self.values.append(value.return_value())
related.append(
{
@ -384,6 +386,12 @@ class SoftwareCluster(Cluster):
uuid=entry.get("id"),
value=entry.get("name"),
)
# duplicates, manually handled
if value.uuid == '6af0eac2-c35f-4569-ae09-47f1ca846961':
value.value = f"{value.value} - Duplicate"
if value.uuid == '39d81c48-8f7c-54cb-8fac-485598e31a55':
value.value = f"{value.value} - Duplicate"
self.values.append(value.return_value())
@ -585,6 +593,17 @@ class ReferencesCluster(Cluster):
uuid=entry.get("id"),
value=entry.get("name"),
)
# handle duplicates manually
if value.uuid == 'eea178f4-80bd-49d1-84b1-f80671e9a3e4':
value.value = f"{value.value} - Duplicate"
if value.uuid == '9bb5c330-56bd-47e7-8414-729d8e6cb3b3':
value.value = f"{value.value} - Duplicate"
if value.uuid == '8b4bdce9-da19-443f-88d2-11466e126c09':
value.value = f"{value.value} - Duplicate"
if value.uuid == 'b4727044-51bb-43b3-afdb-515bb4bb0f7e':
value.value = f"{value.value} - Duplicate"
self.values.append(value.return_value())

View File

@ -14,4 +14,5 @@ class Galaxy:
def save_to_file(self, path: str):
with open(path, "w") as file:
file.write(json.dumps(asdict(self), indent=4))
file.write(json.dumps(asdict(self), indent=2))
file.write('\n')