mirror of https://github.com/CIRCL/AIL-framework
chg: [favicon object] add favicon object
parent
3380f5462b
commit
94961f2eba
|
@ -15,7 +15,7 @@ config_loader = ConfigLoader()
|
|||
r_serv_db = config_loader.get_db_conn("Kvrocks_DB")
|
||||
config_loader = None
|
||||
|
||||
AIL_OBJECTS = sorted({'cve', 'cryptocurrency', 'decoded', 'domain', 'item', 'pgp', 'screenshot', 'title', 'username'})
|
||||
AIL_OBJECTS = sorted({'cve', 'cryptocurrency', 'decoded', 'domain', 'favicon', 'item', 'pgp', 'screenshot', 'title', 'username'})
|
||||
|
||||
def get_ail_uuid():
|
||||
ail_uuid = r_serv_db.get('ail:uuid')
|
||||
|
|
|
@ -44,8 +44,9 @@ CORRELATION_TYPES_BY_OBJ = {
|
|||
"cryptocurrency": ["domain", "item"],
|
||||
"cve": ["domain", "item"],
|
||||
"decoded": ["domain", "item"],
|
||||
"domain": ["cve", "cryptocurrency", "decoded", "item", "pgp", "title", "screenshot", "username"],
|
||||
"item": ["cve", "cryptocurrency", "decoded", "domain", "pgp", "screenshot", "title", "username"],
|
||||
"domain": ["cve", "cryptocurrency", "decoded", "favicon", "item", "pgp", "title", "screenshot", "username"],
|
||||
"favicon": ["domain", "item"], # TODO Decoded
|
||||
"item": ["cve", "cryptocurrency", "decoded", "domain", "favicon", "pgp", "screenshot", "title", "username"],
|
||||
"pgp": ["domain", "item"],
|
||||
"screenshot": ["domain", "item"],
|
||||
"title": ["domain", "item"],
|
||||
|
|
|
@ -141,9 +141,11 @@ def get_favicon_from_html(html, domain, url):
|
|||
return favicon_urls
|
||||
|
||||
def extract_favicon_from_html(html, url):
|
||||
favicon_urls = set()
|
||||
favicons = set()
|
||||
favicons_urls = set()
|
||||
|
||||
soup = BeautifulSoup(html, 'html.parser')
|
||||
set_icons = set()
|
||||
all_icons = set()
|
||||
# If there are multiple <link rel="icon">s, the browser uses their media,
|
||||
# type, and sizes attributes to select the most appropriate icon.
|
||||
# If several icons are equally appropriate, the last one is used.
|
||||
|
@ -159,27 +161,65 @@ def extract_favicon_from_html(html, url):
|
|||
# - <meta name="msapplication-TileColor" content="#aaaaaa"> <meta name="theme-color" content="#ffffff">
|
||||
# - <meta name="msapplication-config" content="/icons/browserconfig.xml">
|
||||
|
||||
# desktop browser 'shortcut icon' (older browser), 'icon'
|
||||
for favicon_tag in ['icon', 'shortcut icon']:
|
||||
if soup.head:
|
||||
for icon in soup.head.find_all('link', attrs={'rel': lambda x : x and x.lower() == favicon_tag, 'href': True}):
|
||||
set_icons.add(icon)
|
||||
|
||||
# # TODO: handle base64 favicon
|
||||
for tag in set_icons:
|
||||
# Root Favicon
|
||||
f = get_faup()
|
||||
f.decode(url)
|
||||
url_decoded = f.get()
|
||||
root_domain = f"{url_decoded['scheme']}://{url_decoded['domain']}"
|
||||
default_icon = f'{root_domain}/favicon.ico'
|
||||
favicons_urls.add(default_icon)
|
||||
# print(default_icon)
|
||||
|
||||
# shortcut
|
||||
for shortcut in soup.find_all('link', rel='shortcut icon'):
|
||||
all_icons.add(shortcut)
|
||||
# icons
|
||||
for icon in soup.find_all('link', rel='icon'):
|
||||
all_icons.add(icon)
|
||||
|
||||
for mask_icon in soup.find_all('link', rel='mask-icon'):
|
||||
all_icons.add(mask_icon)
|
||||
for apple_touche_icon in soup.find_all('link', rel='apple-touch-icon'):
|
||||
all_icons.add(apple_touche_icon)
|
||||
for msapplication in soup.find_all('meta', attrs={'name': 'msapplication-TileImage'}): # msapplication-TileColor
|
||||
all_icons.add(msapplication)
|
||||
|
||||
# msapplication-TileImage
|
||||
|
||||
# print(all_icons)
|
||||
for tag in all_icons:
|
||||
icon_url = tag.get('href')
|
||||
if icon_url:
|
||||
if icon_url.startswith('//'):
|
||||
icon_url = icon_url.replace('//', '/')
|
||||
if icon_url.startswith('data:'):
|
||||
# # TODO: handle base64 favicon
|
||||
pass
|
||||
data = icon_url.split(',', 1)
|
||||
if len(data) > 1:
|
||||
data = ''.join(data[1].split())
|
||||
favicon = base64.b64decode(data)
|
||||
if favicon:
|
||||
favicons.add(favicon)
|
||||
else:
|
||||
icon_url = urljoin(url, icon_url)
|
||||
icon_url = urlparse(icon_url, scheme=urlparse(url).scheme).geturl()
|
||||
favicon_urls.add(icon_url)
|
||||
return favicon_urls
|
||||
favicon_url = urljoin(url, icon_url)
|
||||
favicons_urls.add(favicon_url)
|
||||
elif tag.get('name') == 'msapplication-TileImage':
|
||||
icon_url = tag.get('content')
|
||||
if icon_url:
|
||||
if icon_url.startswith('data:'):
|
||||
data = icon_url.split(',', 1)
|
||||
if len(data) > 1:
|
||||
data = ''.join(data[1].split())
|
||||
favicon = base64.b64decode(data)
|
||||
if favicon:
|
||||
favicons.add(favicon)
|
||||
else:
|
||||
favicon_url = urljoin(url, icon_url)
|
||||
favicons_urls.add(favicon_url)
|
||||
print(favicon_url)
|
||||
|
||||
# print(favicons_urls)
|
||||
return favicons_urls, favicons
|
||||
|
||||
# mmh3.hash(favicon)
|
||||
|
||||
# # # - - # # #
|
||||
|
||||
|
@ -1755,7 +1795,9 @@ def test_ail_crawlers():
|
|||
load_blacklist()
|
||||
|
||||
# if __name__ == '__main__':
|
||||
# item = Item('crawled/2023/03/06/foo.bec50a87b5-0c21-4ed4-9cb2-2d717a7a6507')
|
||||
# item_id = 'crawled/2023/02/20/data.gz'
|
||||
# item = Item(item_id)
|
||||
# content = item.get_content()
|
||||
# r = extract_author_from_html(content)
|
||||
# temp_url = ''
|
||||
# r = extract_favicon_from_html(content, temp_url)
|
||||
# print(r)
|
||||
|
|
|
@ -0,0 +1,126 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*-coding:UTF-8 -*
|
||||
|
||||
import mmh3
|
||||
import os
|
||||
import sys
|
||||
|
||||
from flask import url_for
|
||||
|
||||
from pymisp import MISPObject
|
||||
|
||||
sys.path.append(os.environ['AIL_BIN'])
|
||||
##################################
|
||||
# Import Project packages
|
||||
##################################
|
||||
from lib.ConfigLoader import ConfigLoader
|
||||
from lib.objects.abstract_daterange_object import AbstractDaterangeObject, AbstractDaterangeObjects
|
||||
|
||||
config_loader = ConfigLoader()
|
||||
r_objects = config_loader.get_db_conn("Kvrocks_Objects")
|
||||
baseurl = config_loader.get_config_str("Notifications", "ail_domain")
|
||||
config_loader = None
|
||||
|
||||
|
||||
class Favicon(AbstractDaterangeObject):
|
||||
"""
|
||||
AIL Favicon Object.
|
||||
"""
|
||||
|
||||
def __init__(self, id):
|
||||
super(Favicon, self).__init__('favicon', id)
|
||||
|
||||
# def get_ail_2_ail_payload(self):
|
||||
# payload = {'raw': self.get_gzip_content(b64=True),
|
||||
# 'compress': 'gzip'}
|
||||
# return payload
|
||||
|
||||
# # WARNING: UNCLEAN DELETE /!\ TEST ONLY /!\
|
||||
def delete(self):
|
||||
# # TODO:
|
||||
pass
|
||||
|
||||
def get_content(self, r_type='str'):
|
||||
if r_type == 'str':
|
||||
return self._get_field('content')
|
||||
|
||||
def get_link(self, flask_context=False):
|
||||
if flask_context:
|
||||
url = url_for('correlation.show_correlation', type=self.type, id=self.id)
|
||||
else:
|
||||
url = f'{baseurl}/correlation/show?type={self.type}&id={self.id}'
|
||||
return url
|
||||
|
||||
# TODO # CHANGE COLOR
|
||||
def get_svg_icon(self):
|
||||
return {'style': 'fas', 'icon': '\uf20a', 'color': '#1E88E5', 'radius': 5} # f0c8 f45c
|
||||
|
||||
def get_misp_object(self):
|
||||
obj_attrs = []
|
||||
obj = MISPObject('favicon')
|
||||
first_seen = self.get_first_seen()
|
||||
last_seen = self.get_last_seen()
|
||||
if first_seen:
|
||||
obj.first_seen = first_seen
|
||||
if last_seen:
|
||||
obj.last_seen = last_seen
|
||||
if not first_seen or not last_seen:
|
||||
self.logger.warning(
|
||||
f'Export error, None seen {self.type}:{self.subtype}:{self.id}, first={first_seen}, last={last_seen}')
|
||||
|
||||
obj_attrs.append(obj.add_attribute('favicon-mmh3', value=self.id))
|
||||
obj_attrs.append(obj.add_attribute('favicon', value=self.get_content(r_type='bytes')))
|
||||
for obj_attr in obj_attrs:
|
||||
for tag in self.get_tags():
|
||||
obj_attr.add_tag(tag)
|
||||
return obj
|
||||
|
||||
def get_meta(self, options=set()):
|
||||
meta = self._get_meta(options=options)
|
||||
meta['id'] = self.id
|
||||
meta['tags'] = self.get_tags(r_list=True)
|
||||
if 'content' in options:
|
||||
meta['content'] = self.get_content()
|
||||
return meta
|
||||
|
||||
# def get_links(self):
|
||||
# # TODO GET ALL URLS FROM CORRELATED ITEMS
|
||||
|
||||
def add(self, date, item_id): # TODO correlation base 64 -> calc md5
|
||||
self._add(date, item_id)
|
||||
|
||||
def create(self, content, _first_seen=None, _last_seen=None):
|
||||
if not isinstance(content, str):
|
||||
content = content.decode()
|
||||
self._set_field('content', content)
|
||||
self._create()
|
||||
|
||||
|
||||
def create_favicon(content, url=None): # TODO URL ????
|
||||
if isinstance(content, str):
|
||||
content = content.encode()
|
||||
favicon_id = mmh3.hash_bytes(content)
|
||||
favicon = Favicon(favicon_id)
|
||||
if not favicon.exists():
|
||||
favicon.create(content)
|
||||
|
||||
|
||||
# TODO ADD SEARCH FUNCTION
|
||||
|
||||
class Favicons(AbstractDaterangeObjects):
|
||||
"""
|
||||
Favicons Objects
|
||||
"""
|
||||
def __init__(self):
|
||||
super().__init__('favicon')
|
||||
|
||||
def get_metas(self, obj_ids, options=set()):
|
||||
return self._get_metas(Favicon, obj_ids, options=options)
|
||||
|
||||
def sanitize_name_to_search(self, name_to_search):
|
||||
return name_to_search # TODO
|
||||
|
||||
|
||||
# if __name__ == '__main__':
|
||||
# name_to_search = '98'
|
||||
# print(search_cves_by_name(name_to_search))
|
|
@ -18,6 +18,7 @@ from lib.objects import CryptoCurrencies
|
|||
from lib.objects.Cves import Cve
|
||||
from lib.objects.Decodeds import Decoded, get_all_decodeds_objects, get_nb_decodeds_objects
|
||||
from lib.objects.Domains import Domain
|
||||
from lib.objects.Favicons import Favicon
|
||||
from lib.objects.Items import Item, get_all_items_objects, get_nb_items_objects
|
||||
from lib.objects import Pgps
|
||||
from lib.objects.Screenshots import Screenshot
|
||||
|
@ -54,6 +55,8 @@ def get_object(obj_type, subtype, id):
|
|||
return Decoded(id)
|
||||
elif obj_type == 'cve':
|
||||
return Cve(id)
|
||||
elif obj_type == 'favicon':
|
||||
return Favicon(id)
|
||||
elif obj_type == 'screenshot':
|
||||
return Screenshot(id)
|
||||
elif obj_type == 'cryptocurrency':
|
||||
|
@ -163,7 +166,7 @@ def get_object_card_meta(obj_type, subtype, id, related_btc=False):
|
|||
obj = get_object(obj_type, subtype, id)
|
||||
meta = obj.get_meta()
|
||||
meta['icon'] = obj.get_svg_icon()
|
||||
if subtype or obj_type == 'cve' or obj_type == 'title':
|
||||
if subtype or obj_type == 'cve' or obj_type == 'title' or obj_type == 'favicon':
|
||||
meta['sparkline'] = obj.get_sparkline()
|
||||
if obj_type == 'cve':
|
||||
meta['cve_search'] = obj.get_cve_search()
|
||||
|
|
Loading…
Reference in New Issue