From a6693535de55dae8c80e73acd8408d47a5c862e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Fri, 5 Jul 2019 16:27:23 +0200 Subject: [PATCH] fix: Allow to disable scraping private IPs. --- lookyloo/lookyloo.py | 17 ++++++++++++++++- website/web/__init__.py | 10 +++++++++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py index 8770176..73af8b9 100644 --- a/lookyloo/lookyloo.py +++ b/lookyloo/lookyloo.py @@ -13,6 +13,10 @@ import tempfile import pathlib import time +import ipaddress +import socket +from urllib.parse import urlsplit + from io import BytesIO import base64 from uuid import uuid4 @@ -29,11 +33,12 @@ import logging class Lookyloo(): - def __init__(self, splash_url: str='http://127.0.0.1:8050', loglevel: int=logging.DEBUG): + def __init__(self, splash_url: str='http://127.0.0.1:8050', loglevel: int=logging.DEBUG, only_global_lookups=False): self.__init_logger(loglevel) self.redis = Redis(unix_socket_path=get_socket_path('cache'), decode_responses=True) self.scrape_dir = get_homedir() / 'scraped' self.splash_url = splash_url + self.only_global_lookups = only_global_lookups if not self.scrape_dir.exists(): self.scrape_dir.mkdir(parents=True, exist_ok=True) @@ -159,6 +164,16 @@ class Lookyloo(): os: str=None, browser: str=None): if not url.startswith('http'): url = f'http://{url}' + if self.only_global_lookups: + splitted_url = urlsplit(url) + if splitted_url.netloc: + if ':' in splitted_url.netloc: + ip = socket.gethostbyname(splitted_url.netloc.split(':')[0]) + else: + ip = socket.gethostbyname(splitted_url.netloc) + if not ipaddress.ip_address(ip).is_global: + return False + items = crawl(self.splash_url, url, depth, user_agent=user_agent, log_enabled=True, log_level='INFO') if not items: # broken diff --git a/website/web/__init__.py b/website/web/__init__.py index e210a52..305cd59 100644 --- a/website/web/__init__.py +++ b/website/web/__init__.py @@ -6,6 +6,7 @@ import pickle from zipfile import ZipFile, ZIP_DEFLATED from io import BytesIO import os +import logging from flask import Flask, render_template, request, session, send_file, redirect, url_for, Response from flask_bootstrap import Bootstrap @@ -31,7 +32,14 @@ app.config['BOOTSTRAP_SERVE_LOCAL'] = True app.config['SESSION_COOKIE_NAME'] = 'lookyloo' app.debug = False -lookyloo = Lookyloo() +# API entry point for splash +splash_url = 'http://127.0.0.1:8050' +# Splash log level +loglevel = logging.DEBUG +# Set it to True if your instance is publicly available so users aren't able to scan your internal network +only_global_lookups = False + +lookyloo = Lookyloo(splash_url=splash_url, loglevel=loglevel, only_global_lookups=only_global_lookups) # keep