#!/usr/bin/env python3 # # # Copyright (C) 2014 Sascha Rommelfangen, Raphael Vinot # Copyright (C) 2014 CIRCL Computer Incident Response Center Luxembourg (SMILE gie) # from datetime import date, timedelta import json import redis from urllib.parse import quote from .helpers import get_socket_path import ipaddress from pyfaup.faup import Faup import socket import dns.resolver import re import sys import logging from pypdns import PyPDNS from pyipasnhistory import IPASNHistory from pybgpranking import BGPRanking from pypssl import PyPSSL from pyeupi import PyEUPI import requests from bs4 import BeautifulSoup try: # import sphinxapi sphinx = True except Exception: sphinx = False r_cache = None def _cache_init(): global r_cache if r_cache is None: r_cache = redis.Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True) def _cache_set(key, value, field=None): _cache_init() if field is None: r_cache.setex(key, json.dumps(value), 3600) else: r_cache.hset(key, field, json.dumps(value)) r_cache.expire(key, 3600) def _cache_get(key, field=None): _cache_init() if field is None: value_json = r_cache.get(key) else: value_json = r_cache.hget(key, field) if value_json is not None: return json.loads(value_json) return None def to_bool(s): """ Converts the given string to a boolean. """ return s.lower() in ('1', 'true', 'yes', 'on') def get_submissions(url, day=None): _cache_init() if day is None: day = date.today().isoformat() else: day = day.isoformat() key = date.today().isoformat() + '_submissions' return r_cache.zscore(key, url) def get_mail_sent(url, day=None): _cache_init() if day is None: day = date.today().isoformat() else: day = day.isoformat() key = date.today().isoformat() + '_mails' return r_cache.sismember(key, url) def set_mail_sent(url, day=None): _cache_init() if day is None: day = date.today().isoformat() else: day = day.isoformat() key = date.today().isoformat() + '_mails' return r_cache.sadd(key, url) def is_valid_url(url): cached = _cache_get(url, 'valid') key = date.today().isoformat() + '_submissions' r_cache.zincrby(key, 1, url) if cached is not None: return cached fex = Faup() if url.startswith('hxxp'): url = 'http' + url[4:] elif not url.startswith('http'): url = 'http://' + url logging.debug("Checking validity of URL: " + url) fex.decode(url) scheme = fex.get_scheme() host = fex.get_host() if scheme is None or host is None: reason = "Not a valid http/https URL/URI" return False, url, reason _cache_set(url, (True, url, None), 'valid') return True, url, None def is_ip(host): try: ipaddress.ip_address(host) return True except ValueError: return False def try_resolve(fex, url): fex.decode(url) host = fex.get_host().lower() if is_ip(host): return True, None try: ipaddr = dns.resolver.query(host, 'A') except Exception: reason = "DNS server problem. Check resolver settings." return False, reason if not ipaddr: reason = "Host " + host + " does not exist." return False, reason return True, None def get_urls(url, depth=1): if depth > 5: print('Too many redirects.') return fex = Faup() def meta_redirect(content): c = content.lower() soup = BeautifulSoup(c, "html.parser") for result in soup.find_all(attrs={'http-equiv': 'refresh'}): if result: out = result["content"].split(";") if len(out) == 2: wait, text = out try: a, url = text.split('=', 1) return url.strip() except Exception: print(text) return None resolve, reason = try_resolve(fex, url) if not resolve: # FIXME: inform that the domain does not resolve yield url return logging.debug("Making HTTP connection to " + url) headers = {'User-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:8.0) Gecko/20100101 Firefox/8.0'} try: response = requests.get(url, allow_redirects=True, headers=headers, timeout=15, verify=False) except Exception: # That one can fail (DNS for example) # FIXME: inform that the get failed yield url return if response.history is not None: for h in response.history: # Yeld the urls in the order we find them yield h.url yield response.url meta_redir_url = meta_redirect(response.content) if meta_redir_url is not None: depth += 1 if not meta_redir_url.startswith('http'): fex.decode(url) base = '{}://{}'.format(fex.get_scheme(), fex.get_host()) port = fex.get_port() if port is not None: base += ':{}'.format(port) if not meta_redir_url.startswith('/'): # relative redirect. resource_path has the initial '/' if fex.get_resource_path() is not None: base += fex.get_resource_path() if not base.endswith('/'): base += '/' meta_redir_url = base + meta_redir_url for url in get_urls(meta_redir_url, depth): yield url def url_list(url): cached = _cache_get(url, 'list') if cached is not None: return cached list_urls = [] for u in get_urls(url): if u is None or u in list_urls: continue list_urls.append(u) _cache_set(url, list_urls, 'list') return list_urls def dns_resolve(url): cached = _cache_get(url, 'dns') if cached is not None: return cached fex = Faup() fex.decode(url) host = fex.get_host().lower() ipv4 = None ipv6 = None if is_ip(host): if ':' in host: try: socket.inet_pton(socket.AF_INET6, host) ipv6 = [host] except Exception: pass else: try: socket.inet_aton(host) ipv4 = [host] except Exception: pass else: try: ipv4 = [str(ip) for ip in dns.resolver.query(host, 'A')] except Exception: logging.debug("No IPv4 address assigned to: " + host) try: ipv6 = [str(ip) for ip in dns.resolver.query(host, 'AAAA')] except Exception: logging.debug("No IPv6 address assigned to: " + host) _cache_set(url, (ipv4, ipv6), 'dns') return ipv4, ipv6 def phish_query(url, key, query): cached = _cache_get(query, 'phishtank') if cached is not None: return cached postfields = {'url': quote(query), 'format': 'json', 'app_key': key} response = requests.post(url, data=postfields) res = response.json() if res["meta"]["status"] == "success": if res["results"]["in_database"]: _cache_set(query, res["results"]["phish_detail_page"], 'phishtank') return res["results"]["phish_detail_page"] else: # no information pass elif res["meta"]["status"] == 'error': # Inform the user? # errormsg = res["errortext"] pass return None def sphinxsearch(server, port, url, query): # WARNING: too dangerous to have on the public interface return '' """ if not sphinx: return None cached = _cache_get(query, 'sphinx') if cached is not None: return cached client = sphinxapi.SphinxClient() client.SetServer(server, port) client.SetMatchMode(2) client.SetConnectTimeout(5.0) result = [] res = client.Query(query) if res.get("matches") is not None: for ticket in res["matches"]: ticket_id = ticket["id"] ticket_link = url + str(ticket_id) result.append(ticket_link) _cache_set(query, result, 'sphinx') return result """ def vt_query_url(url, url_up, key, query, upload=True): cached = _cache_get(query, 'vt') if cached is not None: return cached parameters = {"resource": query, "apikey": key} if upload: parameters['scan'] = 1 response = requests.post(url, data=parameters) if response.text is None or len(response.text) == 0: return None res = response.json() msg = res["verbose_msg"] link = res.get("permalink") positives = res.get("positives") total = res.get("total") if positives is not None: _cache_set(query, (msg, link, positives, total), 'vt') return msg, link, positives, total def gsb_query(url, query): cached = _cache_get(query, 'gsb') if cached is not None: return cached param = '1\n' + query response = requests.post(url, data=param) status = response.status_code if status == 200: _cache_set(query, response.text, 'gsb') return response.text ''' def urlquery_query(url, key, query): return None cached = _cache_get(query, 'urlquery') if cached is not None: return cached try: urlquery.url = url urlquery.key = key response = urlquery.search(query) except Exception: return None if response['_response_']['status'] == 'ok': if response.get('reports') is not None: total_alert_count = 0 for r in response['reports']: total_alert_count += r['urlquery_alert_count'] total_alert_count += r['ids_alert_count'] total_alert_count += r['blacklist_alert_count'] _cache_set(query, total_alert_count, 'urlquery') return total_alert_count else: return None ''' def process_emails(emails, ignorelist, replacelist): to_return = list(set(emails)) for mail in reversed(to_return): for ignorelist_entry in ignorelist: if re.search(ignorelist_entry, mail, re.I): if mail in to_return: to_return.remove(mail) for k, v in list(replacelist.items()): if re.search(k, mail, re.I): if k in to_return: to_return.remove(k) to_return += v return to_return def whois(server, port, domain, ignorelist, replacelist): cached = _cache_get(domain, 'whois') if cached is not None: return cached s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.settimeout(15) try: s.connect((server, port)) except Exception: print("Connection problems - check WHOIS server") print(("WHOIS request while problem occurred: ", domain)) print(("WHOIS server: {}:{}".format(server, port))) sys.exit(0) if domain.startswith('http'): fex = Faup() fex.decode(domain) d = fex.get_domain().lower() else: d = domain s.send(("{}\r\n".format(d)).encode()) response = b'' while True: d = s.recv(4096) response += d if d == b'': break s.close() match = re.findall(r'[\w\.-]+@[\w\.-]+', response.decode()) emails = process_emails(match, ignorelist, replacelist) if len(emails) == 0: return None list_mail = list(set(emails)) _cache_set(domain, list_mail, 'whois') return list_mail def pdnscircl(url, user, passwd, q): cached = _cache_get(q, 'pdns') if cached is not None: return cached pdns = PyPDNS(url, basic_auth=(user, passwd)) response = pdns.query(q) all_uniq = [] for e in reversed(response): host = e['rrname'].lower() if host in all_uniq: continue else: all_uniq.append(host) response = (len(all_uniq), all_uniq[:5]) _cache_set(q, response, 'pdns') return response def psslcircl(url, user, passwd, q): cached = _cache_get(q, 'pssl') if cached is not None: return cached pssl = PyPSSL(url, basic_auth=(user, passwd)) response = pssl.query(q) if response.get(q) is not None: certinfo = response.get(q) entries = {} for sha1 in certinfo['certificates']: entries[sha1] = [] if certinfo['subjects'].get(sha1): for value in certinfo['subjects'][sha1]['values']: entries[sha1].append(value) _cache_set(q, entries, 'pssl') return entries return None def eupi(url, key, q): cached = _cache_get(q, 'eupi') if cached is not None: return cached eu = PyEUPI(key, url) response = eu.search_url(url=q) if response.get('results'): r = response.get('results')[0]['tag_label'] _cache_set(q, r, 'eupi') return r eu.post_submission(q) return None def bgpranking(ip): cached = _cache_get(ip, 'ipasn') if cached is not None: asn = cached['asn'] prefix = cached['prefix'] else: ipasn = IPASNHistory() response = ipasn.query(ip) if 'response' not in response: asn = None prefix = None entry = response['response'][list(response['response'].keys())[0]] _cache_set(ip, entry, 'ipasn') asn = entry['asn'] prefix = entry['prefix'] if not asn or not prefix: # asn, prefix, asn_descr, rank, position, known_asns return None, None, None, None, None, None cached = _cache_get(asn, 'bgp') if cached is not None: return cached bgpranking = BGPRanking() response = bgpranking.query(asn, date=(date.today() - timedelta(1)).isoformat()) if 'response' not in response: return None, None, None, None, None, None to_return = (asn, prefix, response['response']['asn_description'], response['response']['ranking']['rank'], response['response']['ranking']['position'], response['response']['ranking']['total_known_asns']) _cache_set(asn, to_return, 'bgp') return to_return def _deserialize_cached(entry): to_return = {} h = r_cache.hgetall(entry) for key, value in list(h.items()): to_return[key] = json.loads(value) return to_return def get_url_data(url): data = _deserialize_cached(url) if data.get('dns') is not None: ipv4, ipv6 = data['dns'] ip_data = {} if ipv4 is not None: for ip in ipv4: ip_data[ip] = _deserialize_cached(ip) if ipv6 is not None: for ip in ipv6: ip_data[ip] = _deserialize_cached(ip) if len(ip_data) > 0: data.update(ip_data) return {url: data} def cached(url): _cache_init() url_data = get_url_data(url) to_return = [url_data] if url_data[url].get('list') is not None: url_redirs = url_data[url]['list'] for u in url_redirs: if u == url: continue to_return.append(get_url_data(u)) return to_return