new: Major refactoring to use more recent techniques. Python3.6+

pull/14/head
Raphaël Vinot 2018-12-17 16:21:31 +01:00
parent 150a908aac
commit 73fff6db45
39 changed files with 1733 additions and 128 deletions

2
.gitignore vendored
View File

@ -19,3 +19,5 @@ ui-bootstrap-tpls.min.js
build
dist
*egg-info
*.rdb

9
.gitmodules vendored
View File

@ -1,9 +0,0 @@
[submodule "uwhoisd"]
path = uwhoisd
url = https://github.com/Rafiot/uwhoisd.git
[submodule "faup"]
path = faup
url = https://github.com/stricaud/faup.git
[submodule "redis"]
path = redis
url = https://github.com/antirez/redis.git

67
bin/run_backend.py Executable file
View File

@ -0,0 +1,67 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from urlabuse.helpers import get_homedir, check_running
from subprocess import Popen
import time
from pathlib import Path
import argparse
def launch_cache(storage_directory: Path=None):
if not storage_directory:
storage_directory = get_homedir()
if not check_running('cache'):
Popen(["./run_redis.sh"], cwd=(storage_directory / 'cache'))
def shutdown_cache(storage_directory: Path=None):
if not storage_directory:
storage_directory = get_homedir()
Popen(["./shutdown_redis.sh"], cwd=(storage_directory / 'cache'))
def launch_all():
launch_cache()
def check_all(stop=False):
backends = [['cache', False]]
while True:
for b in backends:
try:
b[1] = check_running(b[0])
except Exception:
b[1] = False
if stop:
if not any(b[1] for b in backends):
break
else:
if all(b[1] for b in backends):
break
for b in backends:
if not stop and not b[1]:
print(f"Waiting on {b[0]}")
if stop and b[1]:
print(f"Waiting on {b[0]}")
time.sleep(1)
def stop_all():
shutdown_cache()
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Manage backend DBs.')
parser.add_argument("--start", action='store_true', default=False, help="Start all")
parser.add_argument("--stop", action='store_true', default=False, help="Stop all")
parser.add_argument("--status", action='store_true', default=True, help="Show status")
args = parser.parse_args()
if args.start:
launch_all()
if args.stop:
stop_all()
if not args.stop and args.status:
check_all()

24
bin/run_workers.py Executable file
View File

@ -0,0 +1,24 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import argparse
from multiprocessing import Pool
from rq import Worker, Queue, Connection
from redis import Redis
from urlabuse.helpers import get_socket_path
def worker(process_id: int):
listen = ['default']
cache_socket = get_socket_path('cache')
with Connection(Redis(unix_socket_path=cache_socket)):
worker = Worker(list(map(Queue, listen)))
worker.work()
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Launch a certain amount of workers.')
parser.add_argument('-n', '--number', default=10, type=int, help='Amount of workers to launch.')
args = parser.parse_args()
with Pool(args.number) as p:
p.map(worker, list(range(args.number)))

24
bin/stats.py Normal file
View File

@ -0,0 +1,24 @@
#!/usr/bin/env python3
from datetime import date, timedelta
import redis
from urlabuse.helpers import get_socket_path
import argparse
def perdelta(start, end, delta):
curr = start
while curr < end:
yield curr
curr += delta
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Show on last 30 days.')
args = parser.parse_args()
r = redis.Redis(get_socket_path('cache'))
for result in perdelta(date.today() - timedelta(days=30), date.today(), timedelta(days=1)):
val = r.zcard('{}_submissions'.format(result))
print('{},{}'.format(result, val))

1378
cache/cache.conf vendored Normal file

File diff suppressed because it is too large Load Diff

6
cache/run_redis.sh vendored Executable file
View File

@ -0,0 +1,6 @@
#!/bin/bash
set -e
set -x
../../redis/src/redis-server ./cache.conf

6
cache/shutdown_redis.sh vendored Executable file
View File

@ -0,0 +1,6 @@
#!/bin/bash
# set -e
set -x
../../redis/src/redis-cli -s ./cache.sock shutdown

1
faup

@ -1 +0,0 @@
Subproject commit 9a1440d23290670b4c67d4b15cee19f534adadc7

View File

@ -1,12 +0,0 @@
#!/bin/bash
set -e
set -x
if [ ! -d virtenv ]; then
virtualenv virtenv
fi
. ./virtenv/bin/activate
pip install --upgrade -r requirements.txt

1
redis

@ -1 +0,0 @@
Subproject commit 83b862a30ee90ee5f85eefcc63ff5241b501f073

View File

@ -1,9 +1,5 @@
flask
flask-bootstrap
flask-mail
flask-wtf
rq
redis
redis>=3
pypssl
pypdns
pyeupi

View File

@ -1,9 +0,0 @@
#!/bin/bash
set -e
set -x
REDIS_HOME='/change/me/'
${REDIS_HOME}/redis-server ./redis.conf

28
setup.py Normal file
View File

@ -0,0 +1,28 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from setuptools import setup
setup(
name='urlabuse',
version='0.1',
author='Raphaël Vinot',
author_email='raphael.vinot@circl.lu',
maintainer='Raphaël Vinot',
url='https://github.com/CIRCL/url-abuse/',
description='URL Abuse interface',
packages=['urlabuse'],
scripts=['bin/run_backend.py', 'bin/run_workers.py'],
classifiers=[
'License :: OSI Approved :: GNU Affero General Public License v3 or later (AGPLv3+)',
'Development Status :: 3 - Alpha',
'Environment :: Console',
'Operating System :: POSIX :: Linux',
'Intended Audience :: Science/Research',
'Intended Audience :: Telecommunications Industry',
'Intended Audience :: Information Technology',
'Programming Language :: Python :: 3',
'Topic :: Security',
'Topic :: Internet',
]
)

View File

@ -1,17 +0,0 @@
#!/usr/bin/env python
from datetime import date, timedelta
import redis
def perdelta(start, end, delta):
curr = start
while curr < end:
yield curr
curr += delta
r = redis.Redis('localhost', 6334, db=1)
for result in perdelta(date(2015, 03, 01), date(2015, 12, 12), timedelta(days=1)):
val = r.zcard('{}_submissions'.format(result))
print('{},{}'.format(result, val))

0
urlabuse/__init__.py Normal file
View File

14
urlabuse/exceptions.py Normal file
View File

@ -0,0 +1,14 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
class URLAbuseException(Exception):
pass
class CreateDirectoryException(URLAbuseException):
pass
class MissingEnv(URLAbuseException):
pass

97
urlabuse/helpers.py Normal file
View File

@ -0,0 +1,97 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
from pathlib import Path
from .exceptions import CreateDirectoryException, MissingEnv
from redis import Redis
from redis.exceptions import ConnectionError
from datetime import datetime, timedelta
import time
import asyncio
def get_storage_path() -> Path:
if not os.environ.get('VIRTUAL_ENV'):
raise MissingEnv("VIRTUAL_ENV is missing. This project really wants to run from a virtual envoronment.")
return Path(os.environ['VIRTUAL_ENV'])
def get_homedir() -> Path:
if not os.environ.get('URLABUSE_HOME'):
guessed_home = Path(__file__).resolve().parent.parent
raise MissingEnv(f"URLABUSE_HOME is missing. \
Run the following command (assuming you run the code from the clonned repository):\
export URLABUSE_HOME='{guessed_home}'")
return Path(os.environ['URLABUSE_HOME'])
def safe_create_dir(to_create: Path) -> None:
if to_create.exists() and not to_create.is_dir():
raise CreateDirectoryException(f'The path {to_create} already exists and is not a directory')
os.makedirs(to_create, exist_ok=True)
def set_running(name: str) -> None:
r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
r.hset('running', name, 1)
def unset_running(name: str) -> None:
r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
r.hdel('running', name)
def is_running() -> dict:
r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
return r.hgetall('running')
def get_socket_path(name: str) -> str:
mapping = {
'cache': Path('cache', 'cache.sock')
}
return str(get_homedir() / mapping[name])
def check_running(name: str) -> bool:
socket_path = get_socket_path(name)
print(socket_path)
try:
r = Redis(unix_socket_path=socket_path)
if r.ping():
return True
except ConnectionError:
return False
def shutdown_requested() -> bool:
try:
r = Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
return r.exists('shutdown')
except ConnectionRefusedError:
return True
except ConnectionError:
return True
async def long_sleep_async(sleep_in_sec: int, shutdown_check: int=10) -> bool:
if shutdown_check > sleep_in_sec:
shutdown_check = sleep_in_sec
sleep_until = datetime.now() + timedelta(seconds=sleep_in_sec)
while sleep_until > datetime.now():
await asyncio.sleep(shutdown_check)
if shutdown_requested():
return False
return True
def long_sleep(sleep_in_sec: int, shutdown_check: int=10) -> bool:
if shutdown_check > sleep_in_sec:
shutdown_check = sleep_in_sec
sleep_until = datetime.now() + timedelta(seconds=sleep_in_sec)
while sleep_until > datetime.now():
time.sleep(shutdown_check)
if shutdown_requested():
return False
return True

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3
#
#
# Copyright (C) 2014 Sascha Rommelfangen, Raphael Vinot
@ -8,10 +8,9 @@
from datetime import date
import json
import redis
try:
from urllib.parse import quote
except ImportError:
from urllib import quote
from urllib.parse import quote
from .helpers import get_socket_path
from pyfaup.faup import Faup
import socket
@ -20,8 +19,15 @@ import re
import sys
import logging
from pypdns import PyPDNS
# import bgpranking_web
# import urlquery
try:
import bgpranking_web
except Exception:
pass
try:
import urlquery
except Exception:
pass
from pypssl import PyPSSL
from pyeupi import PyEUPI
import requests
@ -30,17 +36,17 @@ from bs4 import BeautifulSoup
try:
# import sphinxapi
sphinx = True
except:
except Exception:
sphinx = False
enable_cache = True
r_cache = None
def _cache_init(host='localhost', port=6334, db=1):
def _cache_init():
global r_cache
if enable_cache and r_cache is None:
r_cache = redis.Redis(host, port, db=db, decode_responses=True)
r_cache = redis.Redis(unix_socket_path=get_socket_path('cache'), db=1, decode_responses=True)
def _cache_set(key, value, field=None):
@ -108,7 +114,7 @@ def set_mail_sent(url, day=None):
def is_valid_url(url):
cached = _cache_get(url, 'valid')
key = date.today().isoformat() + '_submissions'
r_cache.zincrby(key, url)
r_cache.zincrby(key, 1, url)
if cached is not None:
return cached
fex = Faup()
@ -137,13 +143,13 @@ def is_ip(host):
try:
socket.inet_pton(socket.AF_INET6, host)
return True
except:
except Exception:
pass
else:
try:
socket.inet_aton(host)
return True
except:
except Exception:
pass
return False
@ -181,7 +187,7 @@ def get_urls(url, depth=1):
try:
a, url = text.split('=', 1)
return url.strip()
except:
except Exception:
print(text)
return None
@ -197,7 +203,7 @@ def get_urls(url, depth=1):
try:
response = requests.get(url, allow_redirects=True, headers=headers,
timeout=15, verify=False)
except:
except Exception:
# That one can fail (DNS for example)
# FIXME: inform that the get failed
yield url
@ -248,7 +254,7 @@ def dns_resolve(url):
return cached
fex = Faup()
fex.decode(url)
host = fex.get_host().decode().lower()
host = fex.get_host().lower()
ipv4 = None
ipv6 = None
if is_ip(host):
@ -256,22 +262,22 @@ def dns_resolve(url):
try:
socket.inet_pton(socket.AF_INET6, host)
ipv6 = [host]
except:
except Exception:
pass
else:
try:
socket.inet_aton(host)
ipv4 = [host]
except:
except Exception:
pass
else:
try:
ipv4 = [str(ip) for ip in dns.resolver.query(host, 'A')]
except:
except Exception:
logging.debug("No IPv4 address assigned to: " + host)
try:
ipv6 = [str(ip) for ip in dns.resolver.query(host, 'AAAA')]
except:
except Exception:
logging.debug("No IPv6 address assigned to: " + host)
_cache_set(url, (ipv4, ipv6), 'dns')
return ipv4, ipv6
@ -365,7 +371,7 @@ def urlquery_query(url, key, query):
urlquery.url = url
urlquery.key = key
response = urlquery.search(query)
except:
except Exception:
return None
if response['_response_']['status'] == 'ok':
if response.get('reports') is not None:

@ -1 +0,0 @@
Subproject commit eefb13ffa6b129efb97d794b2df6cd249ec4bff4

0
website/__init__.py Normal file
View File

4
website/requirements.txt Normal file
View File

@ -0,0 +1,4 @@
flask
flask-bootstrap
flask-mail
flask-wtf

View File

@ -1,10 +1,11 @@
import json
import os
from pathlib import Path
from flask import Flask, render_template, request, Response, redirect, url_for
from flask_mail import Mail, Message
from flask_bootstrap import Bootstrap
from flask_wtf import Form
from flask_wtf import FlaskForm
from wtforms import StringField, SubmitField
from wtforms.widgets import TextInput
from wtforms.validators import Required
@ -15,19 +16,18 @@ from logging import Formatter
from rq import Queue
from rq.job import Job
from worker import conn
from redis import Redis
try:
import configparser
except ImportError:
import ConfigParser as configparser
# from pyfaup.faup import Faup
from urlabuse.helpers import get_socket_path
import configparser
from .proxied import ReverseProxied
from url_abuse_async import is_valid_url, url_list, dns_resolve, phish_query, psslcircl, \
from urlabuse.urlabuse import is_valid_url, url_list, dns_resolve, phish_query, psslcircl, \
vt_query_url, gsb_query, urlquery_query, sphinxsearch, whois, pdnscircl, bgpranking, \
cached, get_mail_sent, set_mail_sent, get_submissions, eupi
config_path = 'config.ini'
config_dir = Path('config')
class AngularTextInput(TextInput):
@ -37,7 +37,7 @@ class AngularTextInput(TextInput):
return super(AngularTextInput, self).__call__(field, **kwargs)
class URLForm(Form):
class URLForm(FlaskForm):
url = StringField('URL Field',
description='Enter the URL you want to lookup here.',
validators=[Required()], widget=AngularTextInput())
@ -58,9 +58,9 @@ def prepare_auth():
return None
to_return = {}
with open('users.key', 'r') as f:
for l in f:
l = l.strip()
user, password = l.split('=')
for line in f:
line = line.strip()
user, password = line.split('=')
to_return[user] = password
return to_return
@ -73,7 +73,7 @@ def create_app(configfile=None):
app.logger.addHandler(handler)
app.logger.setLevel(logging.INFO)
Bootstrap(app)
q = Queue(connection=conn)
q = Queue(connection=Redis(unix_socket_path=get_socket_path('cache')))
# Mail Config
app.config['MAIL_SERVER'] = 'localhost'
@ -82,7 +82,7 @@ def create_app(configfile=None):
app.config['SECRET_KEY'] = 'devkey'
app.config['BOOTSTRAP_SERVE_LOCAL'] = True
app.config['configfile'] = config_path
app.config['configfile'] = config_dir / 'config.ini'
parser = configparser.SafeConfigParser()
parser.read(app.config['configfile'])
@ -145,7 +145,7 @@ def create_app(configfile=None):
def check_valid(job_key):
if job_key is None:
return json.dumps(None), 200
job = Job.fetch(job_key, connection=conn)
job = Job.fetch(job_key, connection=Redis(unix_socket_path=get_socket_path('cache')))
if job.is_finished:
return json.dumps(job.result), 200
else:
@ -176,36 +176,49 @@ def create_app(configfile=None):
u = q.enqueue_call(func=dns_resolve, args=(url,), result_ttl=500)
return u.get_id()
def read_auth(name):
key = config_dir / f'{name}.key'
if not key.exists():
return None
with open(key) as f:
to_return = []
for line in f.readlines():
to_return.append(line.strip())
return to_return
@app.route('/phishtank', methods=['POST'])
def phishtank():
data = json.loads(request.data.decode())
if not os.path.exists('phishtank.key'):
auth = read_auth('phishtank')
if not auth:
return None
key = auth[0]
data = json.loads(request.data.decode())
url = parser.get("PHISHTANK", "url")
key = open('phishtank.key', 'r').readline().strip()
query = data["query"]
u = q.enqueue_call(func=phish_query, args=(url, key, query,), result_ttl=500)
return u.get_id()
@app.route('/virustotal_report', methods=['POST'])
def vt():
data = json.loads(request.data.decode())
if not os.path.exists('virustotal.key'):
auth = read_auth('virustotal')
if not auth:
return None
key = auth[0]
data = json.loads(request.data.decode())
url = parser.get("VIRUSTOTAL", "url_report")
url_up = parser.get("VIRUSTOTAL", "url_upload")
key = open('virustotal.key', 'r').readline().strip()
query = data["query"]
u = q.enqueue_call(func=vt_query_url, args=(url, url_up, key, query,), result_ttl=500)
return u.get_id()
@app.route('/googlesafebrowsing', methods=['POST'])
def gsb():
data = json.loads(request.data.decode())
if not os.path.exists('googlesafebrowsing.key'):
auth = read_auth('googlesafebrowsing')
if not auth:
return None
key = auth[0]
data = json.loads(request.data.decode())
url = parser.get("GOOGLESAFEBROWSING", "url")
key = open('googlesafebrowsing.key', 'r').readline().strip()
url = url.format(key)
query = data["query"]
u = q.enqueue_call(func=gsb_query, args=(url, query,), result_ttl=500)
@ -213,11 +226,12 @@ def create_app(configfile=None):
@app.route('/urlquery', methods=['POST'])
def urlquery():
data = json.loads(request.data.decode())
if not os.path.exists('urlquery.key'):
auth = read_auth('urlquery')
if not auth:
return None
key = auth[0]
data = json.loads(request.data.decode())
url = parser.get("URLQUERY", "url")
key = open('urlquery.key', 'r').readline().strip()
query = data["query"]
u = q.enqueue_call(func=urlquery_query, args=(url, key, query,), result_ttl=500)
return u.get_id()
@ -249,19 +263,23 @@ def create_app(configfile=None):
@app.route('/eupi', methods=['POST'])
def eu():
data = json.loads(request.data.decode())
if not os.path.exists('eupi.key'):
auth = read_auth('eupi')
if not auth:
return None
key = auth[0]
data = json.loads(request.data.decode())
url = parser.get("EUPI", "url")
key = open('eupi.key', 'r').readline().strip()
query = data["query"]
u = q.enqueue_call(func=eupi, args=(url, key, query,), result_ttl=500)
return u.get_id()
@app.route('/pdnscircl', methods=['POST'])
def dnscircl():
auth = read_auth('pdnscircl')
if not auth:
return None
user, password = auth
url = parser.get("PDNS_CIRCL", "url")
user, password = open('pdnscircl.key', 'r').readlines()
data = json.loads(request.data.decode())
query = data["query"]
u = q.enqueue_call(func=pdnscircl, args=(url, user.strip(), password.strip(),
@ -277,8 +295,12 @@ def create_app(configfile=None):
@app.route('/psslcircl', methods=['POST'])
def sslcircl():
auth = read_auth('psslcircl')
if not auth:
return None
user, password = auth
url = parser.get("PDNS_CIRCL", "url")
url = parser.get("PSSL_CIRCL", "url")
user, password = open('psslcircl.key', 'r').readlines()
data = json.loads(request.data.decode())
query = data["query"]
u = q.enqueue_call(func=psslcircl, args=(url, user.strip(), password.strip(),

View File

Before

Width:  |  Height:  |  Size: 673 B

After

Width:  |  Height:  |  Size: 673 B

View File

@ -1,19 +0,0 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import redis
from rq import Worker, Queue, Connection
listen = ['default']
redis_url = os.getenv('REDISTOGO_URL', 'redis://localhost:6334')
conn = redis.from_url(redis_url)
if __name__ == '__main__':
with Connection(conn):
worker = Worker(list(map(Queue, listen)))
worker.work()