From cb5f6e23b7441034a0dff90d56af050972e58c04 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Wed, 16 Jan 2019 15:09:18 +0100 Subject: [PATCH] chg: Trigger cache reload from API, improve digest. --- client/pyurlabuse/api.py | 38 +++++++++++++++++++++++++++++++++----- urlabuse/urlabuse.py | 14 ++++++++------ 2 files changed, 41 insertions(+), 11 deletions(-) diff --git a/client/pyurlabuse/api.py b/client/pyurlabuse/api.py index 73f6a83..31f4951 100644 --- a/client/pyurlabuse/api.py +++ b/client/pyurlabuse/api.py @@ -79,11 +79,41 @@ class PyURLAbuse(object): query = {'query': q} return self._async('psslcircl', query) + def _update_cache(self, cached): + for result in cached['result']: + for url, items in result.items(): + self.resolve(url) + self.phishtank(url) + self.virustotal(url) + self.googlesafebrowsing(url) + self.urlquery(url) + self.ticket(url) + self.whoismail(url) + if 'dns' not in items: + continue + for entry in items['dns']: + if entry is None: + continue + for ip in entry: + self.phishtank(ip) + self.bgpr(ip) + self.urlquery(ip) + self.pdnscircl(ip) + self.sslcircl(ip) + self.whoismail(ip) + def run_query(self, q, with_digest=False): cached = self.get_cache(q, with_digest) if len(cached['result']) > 0: - cached['info'] = 'Used cached content' - return cached + has_cached_content = True + self._update_cache(cached) + for r in cached['result']: + for url, content in r.items(): + if not content: + has_cached_content = False + if has_cached_content: + cached['info'] = 'Used cached content' + return cached job_id = self.urls(q) all_urls = None while True: @@ -121,7 +151,6 @@ class PyURLAbuse(object): self.urlquery(ip) self.pdnscircl(ip) self.sslcircl(ip) - self.ticket(ip) self.whoismail(ip) if v6 is not None: for ip in v6: @@ -129,11 +158,10 @@ class PyURLAbuse(object): self.bgpr(ip) self.urlquery(ip) self.pdnscircl(ip) - self.ticket(ip) self.whoismail(ip) waiting = True time.sleep(.5) - time.sleep(3) + time.sleep(1) cached = self.get_cache(q, with_digest) cached['info'] = 'New query, all the details may not be available.' return cached diff --git a/urlabuse/urlabuse.py b/urlabuse/urlabuse.py index 02bd5fb..1e7cebe 100644 --- a/urlabuse/urlabuse.py +++ b/urlabuse/urlabuse.py @@ -287,7 +287,7 @@ class Query(): def vt_query_url(self, url, url_up, key, query, upload=True): cached = self._cache_get(query, 'vt') - if cached is not None: + if cached is not None and cached[2] is not None: return cached parameters = {"resource": query, "apikey": key} if upload: @@ -300,8 +300,7 @@ class Query(): link = res.get("permalink") positives = res.get("positives") total = res.get("total") - if positives is not None: - self._cache_set(query, (msg, link, positives, total), 'vt') + self._cache_set(query, (msg, link, positives, total), 'vt') return msg, link, positives, total def gsb_query(self, url, query): @@ -519,7 +518,7 @@ class Query(): to_return += '\t' + ip + '\n' data = all_info[ip] if data.get('bgpranking'): - to_return += '\t\tis announced by {} ({}). Position {}/{}.'.format( + to_return += '\t\tis announced by {} ({}). Position {}/{}.\n'.format( data['bgpranking'][2], data['bgpranking'][0], data['bgpranking'][4], data['bgpranking'][5]) all_asns.add('{} ({})'.format(data['bgpranking'][2], data['bgpranking'][0])) @@ -539,8 +538,11 @@ class Query(): if 'whois' in info: all_mails.update(info['whois']) if 'vt' in info and len(info['vt']) == 4: - to_return += '\t{} out of {} positive detections in VT - {}\n'.format( - info['vt'][2], info['vt'][3], info['vt'][1]) + if info['vt'][2] is not None: + to_return += '\t{} out of {} positive detections in VT - {}\n'.format( + info['vt'][2], info['vt'][3], info['vt'][1]) + else: + to_return = '\t{} - {}\n'.format(info['vt'][0], info['vt'][1]) if 'gsb' in info: to_return += '\tKnown as malicious on Google Safe Browsing: {}\n'.format(info['gsb']) if 'phishtank' in info: