chg: [favicon object] add favicon object

pull/604/head
Terrtia 2023-06-12 16:51:45 +02:00
parent 3380f5462b
commit 94961f2eba
No known key found for this signature in database
GPG Key ID: 1E1B1F50D84613D0
5 changed files with 195 additions and 23 deletions

View File

@ -15,7 +15,7 @@ config_loader = ConfigLoader()
r_serv_db = config_loader.get_db_conn("Kvrocks_DB")
config_loader = None
AIL_OBJECTS = sorted({'cve', 'cryptocurrency', 'decoded', 'domain', 'item', 'pgp', 'screenshot', 'title', 'username'})
AIL_OBJECTS = sorted({'cve', 'cryptocurrency', 'decoded', 'domain', 'favicon', 'item', 'pgp', 'screenshot', 'title', 'username'})
def get_ail_uuid():
ail_uuid = r_serv_db.get('ail:uuid')

View File

@ -44,8 +44,9 @@ CORRELATION_TYPES_BY_OBJ = {
"cryptocurrency": ["domain", "item"],
"cve": ["domain", "item"],
"decoded": ["domain", "item"],
"domain": ["cve", "cryptocurrency", "decoded", "item", "pgp", "title", "screenshot", "username"],
"item": ["cve", "cryptocurrency", "decoded", "domain", "pgp", "screenshot", "title", "username"],
"domain": ["cve", "cryptocurrency", "decoded", "favicon", "item", "pgp", "title", "screenshot", "username"],
"favicon": ["domain", "item"], # TODO Decoded
"item": ["cve", "cryptocurrency", "decoded", "domain", "favicon", "pgp", "screenshot", "title", "username"],
"pgp": ["domain", "item"],
"screenshot": ["domain", "item"],
"title": ["domain", "item"],

View File

@ -141,9 +141,11 @@ def get_favicon_from_html(html, domain, url):
return favicon_urls
def extract_favicon_from_html(html, url):
favicon_urls = set()
favicons = set()
favicons_urls = set()
soup = BeautifulSoup(html, 'html.parser')
set_icons = set()
all_icons = set()
# If there are multiple <link rel="icon">s, the browser uses their media,
# type, and sizes attributes to select the most appropriate icon.
# If several icons are equally appropriate, the last one is used.
@ -159,27 +161,65 @@ def extract_favicon_from_html(html, url):
# - <meta name="msapplication-TileColor" content="#aaaaaa"> <meta name="theme-color" content="#ffffff">
# - <meta name="msapplication-config" content="/icons/browserconfig.xml">
# desktop browser 'shortcut icon' (older browser), 'icon'
for favicon_tag in ['icon', 'shortcut icon']:
if soup.head:
for icon in soup.head.find_all('link', attrs={'rel': lambda x : x and x.lower() == favicon_tag, 'href': True}):
set_icons.add(icon)
# # TODO: handle base64 favicon
for tag in set_icons:
# Root Favicon
f = get_faup()
f.decode(url)
url_decoded = f.get()
root_domain = f"{url_decoded['scheme']}://{url_decoded['domain']}"
default_icon = f'{root_domain}/favicon.ico'
favicons_urls.add(default_icon)
# print(default_icon)
# shortcut
for shortcut in soup.find_all('link', rel='shortcut icon'):
all_icons.add(shortcut)
# icons
for icon in soup.find_all('link', rel='icon'):
all_icons.add(icon)
for mask_icon in soup.find_all('link', rel='mask-icon'):
all_icons.add(mask_icon)
for apple_touche_icon in soup.find_all('link', rel='apple-touch-icon'):
all_icons.add(apple_touche_icon)
for msapplication in soup.find_all('meta', attrs={'name': 'msapplication-TileImage'}): # msapplication-TileColor
all_icons.add(msapplication)
# msapplication-TileImage
# print(all_icons)
for tag in all_icons:
icon_url = tag.get('href')
if icon_url:
if icon_url.startswith('//'):
icon_url = icon_url.replace('//', '/')
if icon_url.startswith('data:'):
# # TODO: handle base64 favicon
pass
data = icon_url.split(',', 1)
if len(data) > 1:
data = ''.join(data[1].split())
favicon = base64.b64decode(data)
if favicon:
favicons.add(favicon)
else:
icon_url = urljoin(url, icon_url)
icon_url = urlparse(icon_url, scheme=urlparse(url).scheme).geturl()
favicon_urls.add(icon_url)
return favicon_urls
favicon_url = urljoin(url, icon_url)
favicons_urls.add(favicon_url)
elif tag.get('name') == 'msapplication-TileImage':
icon_url = tag.get('content')
if icon_url:
if icon_url.startswith('data:'):
data = icon_url.split(',', 1)
if len(data) > 1:
data = ''.join(data[1].split())
favicon = base64.b64decode(data)
if favicon:
favicons.add(favicon)
else:
favicon_url = urljoin(url, icon_url)
favicons_urls.add(favicon_url)
print(favicon_url)
# print(favicons_urls)
return favicons_urls, favicons
# mmh3.hash(favicon)
# # # - - # # #
@ -1755,7 +1795,9 @@ def test_ail_crawlers():
load_blacklist()
# if __name__ == '__main__':
# item = Item('crawled/2023/03/06/foo.bec50a87b5-0c21-4ed4-9cb2-2d717a7a6507')
# item_id = 'crawled/2023/02/20/data.gz'
# item = Item(item_id)
# content = item.get_content()
# r = extract_author_from_html(content)
# temp_url = ''
# r = extract_favicon_from_html(content, temp_url)
# print(r)

126
bin/lib/objects/Favicons.py Executable file
View File

@ -0,0 +1,126 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
import mmh3
import os
import sys
from flask import url_for
from pymisp import MISPObject
sys.path.append(os.environ['AIL_BIN'])
##################################
# Import Project packages
##################################
from lib.ConfigLoader import ConfigLoader
from lib.objects.abstract_daterange_object import AbstractDaterangeObject, AbstractDaterangeObjects
config_loader = ConfigLoader()
r_objects = config_loader.get_db_conn("Kvrocks_Objects")
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
config_loader = None
class Favicon(AbstractDaterangeObject):
"""
AIL Favicon Object.
"""
def __init__(self, id):
super(Favicon, self).__init__('favicon', id)
# def get_ail_2_ail_payload(self):
# payload = {'raw': self.get_gzip_content(b64=True),
# 'compress': 'gzip'}
# return payload
# # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
def delete(self):
# # TODO:
pass
def get_content(self, r_type='str'):
if r_type == 'str':
return self._get_field('content')
def get_link(self, flask_context=False):
if flask_context:
url = url_for('correlation.show_correlation', type=self.type, id=self.id)
else:
url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}'
return url
# TODO # CHANGE COLOR
def get_svg_icon(self):
return {'style': 'fas', 'icon': '\uf20a', 'color': '#1E88E5', 'radius': 5} # f0c8 f45c
def get_misp_object(self):
obj_attrs = []
obj = MISPObject('favicon')
first_seen = self.get_first_seen()
last_seen = self.get_last_seen()
if first_seen:
obj.first_seen = first_seen
if last_seen:
obj.last_seen = last_seen
if not first_seen or not last_seen:
self.logger.warning(
f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={first_seen}, last={last_seen}')
obj_attrs.append(obj.add_attribute('favicon-mmh3', value=self.id))
obj_attrs.append(obj.add_attribute('favicon', value=self.get_content(r_type='bytes')))
for obj_attr in obj_attrs:
for tag in self.get_tags():
obj_attr.add_tag(tag)
return obj
def get_meta(self, options=set()):
meta = self._get_meta(options=options)
meta['id'] = self.id
meta['tags'] = self.get_tags(r_list=True)
if 'content' in options:
meta['content'] = self.get_content()
return meta
# def get_links(self):
# # TODO GET ALL URLS FROM CORRELATED ITEMS
def add(self, date, item_id): # TODO correlation base 64 -> calc md5
self._add(date, item_id)
def create(self, content, _first_seen=None, _last_seen=None):
if not isinstance(content, str):
content = content.decode()
self._set_field('content', content)
self._create()
def create_favicon(content, url=None): # TODO URL ????
if isinstance(content, str):
content = content.encode()
favicon_id = mmh3.hash_bytes(content)
favicon = Favicon(favicon_id)
if not favicon.exists():
favicon.create(content)
# TODO ADD SEARCH FUNCTION
class Favicons(AbstractDaterangeObjects):
"""
Favicons Objects
"""
def __init__(self):
super().__init__('favicon')
def get_metas(self, obj_ids, options=set()):
return self._get_metas(Favicon, obj_ids, options=options)
def sanitize_name_to_search(self, name_to_search):
return name_to_search # TODO
# if __name__ == '__main__':
# name_to_search = '98'
# print(search_cves_by_name(name_to_search))

View File

@ -18,6 +18,7 @@ from lib.objects import CryptoCurrencies
from lib.objects.Cves import Cve
from lib.objects.Decodeds import Decoded, get_all_decodeds_objects, get_nb_decodeds_objects
from lib.objects.Domains import Domain
from lib.objects.Favicons import Favicon
from lib.objects.Items import Item, get_all_items_objects, get_nb_items_objects
from lib.objects import Pgps
from lib.objects.Screenshots import Screenshot
@ -54,6 +55,8 @@ def get_object(obj_type, subtype, id):
return Decoded(id)
elif obj_type == 'cve':
return Cve(id)
elif obj_type == 'favicon':
return Favicon(id)
elif obj_type == 'screenshot':
return Screenshot(id)
elif obj_type == 'cryptocurrency':
@ -163,7 +166,7 @@ def get_object_card_meta(obj_type, subtype, id, related_btc=False):
obj = get_object(obj_type, subtype, id)
meta = obj.get_meta()
meta['icon'] = obj.get_svg_icon()
if subtype or obj_type == 'cve' or obj_type == 'title':
if subtype or obj_type == 'cve' or obj_type == 'title' or obj_type == 'favicon':
meta['sparkline'] = obj.get_sparkline()
if obj_type == 'cve':
meta['cve_search'] = obj.get_cve_search()