From 680735b3d3dbf2d558b8b98c230d7201f72780b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Vinot?= Date: Tue, 19 Mar 2024 19:21:41 +0100 Subject: [PATCH] new: optionally allow website to violate your privacy (more) --- lookyloo/lookyloo.py | 1 + poetry.lock | 65 +++++++++++++++--------------- pyproject.toml | 8 ++-- tools/remove_capture.py | 2 +- website/web/__init__.py | 1 + website/web/genericapi.py | 6 ++- website/web/templates/capture.html | 10 +++++ 7 files changed, 55 insertions(+), 38 deletions(-) diff --git a/lookyloo/lookyloo.py b/lookyloo/lookyloo.py index 8c9977b..162373d 100644 --- a/lookyloo/lookyloo.py +++ b/lookyloo/lookyloo.py @@ -667,6 +667,7 @@ class Lookyloo(): color_scheme=query.get('color_scheme', None), rendered_hostname_only=query.get('rendered_hostname_only', True), with_favicon=query.get('with_favicon', True), + allow_tracking=query.get('allow_tracking', True), # force=query.get('force', False), # recapture_interval=query.get('recapture_interval', 300), priority=priority diff --git a/poetry.lock b/poetry.lock index 2687e80..311807f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -926,13 +926,13 @@ files = [ [[package]] name = "fsspec" -version = "2024.3.0" +version = "2024.3.1" description = "File-system specification" optional = false python-versions = ">=3.8" files = [ - {file = "fsspec-2024.3.0-py3-none-any.whl", hash = "sha256:779001bd0122c9c4975cf03827d5e86c3afb914a3ae27040f15d341ab506a693"}, - {file = "fsspec-2024.3.0.tar.gz", hash = "sha256:f13a130c0ed07e15c4e1aeb0472a823e9c426b0b5792a1f40d902b0a71972d43"}, + {file = "fsspec-2024.3.1-py3-none-any.whl", hash = "sha256:918d18d41bf73f0e2b261824baeb1b124bcf771767e3a26425cd7dec3332f512"}, + {file = "fsspec-2024.3.1.tar.gz", hash = "sha256:f39780e282d7d117ffb42bb96992f8a90795e4d0fb0f661a70ca39fe9c43ded9"}, ] [package.extras] @@ -1237,13 +1237,13 @@ testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs [[package]] name = "importlib-resources" -version = "6.3.1" +version = "6.3.2" description = "Read resources from Python packages" optional = false python-versions = ">=3.8" files = [ - {file = "importlib_resources-6.3.1-py3-none-any.whl", hash = "sha256:4811639ca7fa830abdb8e9ca0a104dc6ad13de691d9fe0d3173a71304f068159"}, - {file = "importlib_resources-6.3.1.tar.gz", hash = "sha256:29a3d16556e330c3c8fb8202118c5ff41241cc34cbfb25989bbad226d99b7995"}, + {file = "importlib_resources-6.3.2-py3-none-any.whl", hash = "sha256:f41f4098b16cd140a97d256137cfd943d958219007990b2afb00439fc623f580"}, + {file = "importlib_resources-6.3.2.tar.gz", hash = "sha256:963eb79649252b0160c1afcfe5a1d3fe3ad66edd0a8b114beacffb70c0674223"}, ] [package.dependencies] @@ -1463,20 +1463,20 @@ referencing = ">=0.31.0" [[package]] name = "lacuscore" -version = "1.8.9" +version = "1.8.10" description = "Core of Lacus, usable as a module" optional = false -python-versions = ">=3.8,<4.0" +python-versions = "<4.0,>=3.8" files = [ - {file = "lacuscore-1.8.9-py3-none-any.whl", hash = "sha256:8088a3ad164d8b6707889a8ee5ff8fded82a3ec193720f434a72cb98d998aba8"}, - {file = "lacuscore-1.8.9.tar.gz", hash = "sha256:26c9a392caf57091bf0a9c866f8b617d3f65190592b9c8fe912206922cd8e530"}, + {file = "lacuscore-1.8.10-py3-none-any.whl", hash = "sha256:95a62f127565bdc3c61ff893c930fe7ed8b34604e95b023e82fa10f3233ac49d"}, + {file = "lacuscore-1.8.10.tar.gz", hash = "sha256:5408333a851fd01d06f05bf28854a1f530b96a513c2040b6c70f39f6a80e3059"}, ] [package.dependencies] async-timeout = {version = ">=4.0.3,<5.0.0", markers = "python_version < \"3.11\""} defang = ">=0.5.3,<0.6.0" dnspython = ">=2.6.1,<3.0.0" -playwrightcapture = {version = ">=1.23.10,<2.0.0", extras = ["recaptcha"]} +playwrightcapture = {version = ">=1.23.11,<2.0.0", extras = ["recaptcha"]} redis = {version = ">=5.0.3,<6.0.0", extras = ["hiredis"]} requests = ">=2.31.0,<3.0.0" ua-parser = ">=0.18.0,<0.19.0" @@ -2286,13 +2286,13 @@ test = ["pytest"] [[package]] name = "playwrightcapture" -version = "1.23.10" +version = "1.23.11" description = "A simple library to capture websites using playwright" optional = false -python-versions = ">=3.8,<4.0" +python-versions = "<4.0,>=3.8" files = [ - {file = "playwrightcapture-1.23.10-py3-none-any.whl", hash = "sha256:14f02be5266b0be76b64e3ef484b05d8eec05d6d46ca8e2d1aed9f4067baa959"}, - {file = "playwrightcapture-1.23.10.tar.gz", hash = "sha256:8c30130e96fafdf5db20987b255b7fec2fcf9b11dd66fd253958496fa8718a35"}, + {file = "playwrightcapture-1.23.11-py3-none-any.whl", hash = "sha256:5bcf79460d86671fe38152bc6e456fe33d7b7b9ed473b73ad560faed9888660e"}, + {file = "playwrightcapture-1.23.11.tar.gz", hash = "sha256:959bc7b7fbb912b0fe12c9c3cd0a32b206188559defa60bc8140ac11fdf6cdb5"}, ] [package.dependencies] @@ -2304,7 +2304,7 @@ puremagic = ">=1.21,<2.0" pydub = {version = ">=0.25.1,<0.26.0", optional = true, markers = "extra == \"recaptcha\""} pytz = {version = ">=2024.1,<2025.0", markers = "python_version < \"3.9\""} requests = {version = ">=2.31.0,<3.0.0", extras = ["socks"], optional = true, markers = "extra == \"recaptcha\""} -setuptools = ">=69.1.1,<70.0.0" +setuptools = ">=69.2.0,<70.0.0" SpeechRecognition = {version = ">=3.10.1,<4.0.0", optional = true, markers = "extra == \"recaptcha\""} tzdata = ">=2024.1,<2025.0" w3lib = ">=2.1.2,<3.0.0" @@ -2520,13 +2520,13 @@ docs = ["Sphinx (>=5.3.0,<6.0.0)"] [[package]] name = "pylacus" -version = "1.8.1" +version = "1.8.2" description = "Python CLI and module for lacus" optional = false -python-versions = ">=3.8,<4.0" +python-versions = "<4.0,>=3.8" files = [ - {file = "pylacus-1.8.1-py3-none-any.whl", hash = "sha256:15e85357c7f0af0cf25ced4fd8690d8f6dac64a02da87a4d22a6059dc8291a06"}, - {file = "pylacus-1.8.1.tar.gz", hash = "sha256:e10929608fad539196413e87f0b2b16c6beb5e04f0a1051cd0cb3feb255682fc"}, + {file = "pylacus-1.8.2-py3-none-any.whl", hash = "sha256:7451dd6892e252d4caf00406cb828c2e7290226d9ca345516ab9c3e36610fe06"}, + {file = "pylacus-1.8.2.tar.gz", hash = "sha256:fc96020cc833e665484441de3c65e84e68f40bd8c75485d4dc671806e7da7974"}, ] [package.dependencies] @@ -2602,13 +2602,13 @@ virustotal = ["validators (>=0.22.0,<0.23.0)"] [[package]] name = "pypandora" -version = "1.8.0" +version = "1.8.1" description = "Python CLI and module for pandora" optional = false python-versions = ">=3.8,<4.0" files = [ - {file = "pypandora-1.8.0-py3-none-any.whl", hash = "sha256:42956eab2f789efcaba3631a436e2325295d9dcbca1c4e5ff32dd3607f640781"}, - {file = "pypandora-1.8.0.tar.gz", hash = "sha256:e8d110588b9a1cec00b362140b10a60a5d152406f1e4ac7eb89c9f0d42826d84"}, + {file = "pypandora-1.8.1-py3-none-any.whl", hash = "sha256:79f703726070672071d0d5bc40751f3daff7959cbb4497324969d06375f46aa7"}, + {file = "pypandora-1.8.1.tar.gz", hash = "sha256:8ffa3822fc521823c76828fd220fed569c02e164ebd66b751fd8657da6316618"}, ] [package.dependencies] @@ -3077,19 +3077,19 @@ files = [ [[package]] name = "s3fs" -version = "2024.3.0" +version = "2024.3.1" description = "Convenient Filesystem interface over S3" optional = false python-versions = ">= 3.8" files = [ - {file = "s3fs-2024.3.0-py3-none-any.whl", hash = "sha256:def23c00eb89e3a49bbe7211eb617204c99959cadf9a796faa512bafa861c115"}, - {file = "s3fs-2024.3.0.tar.gz", hash = "sha256:a663235c8adb7e6bbc45a75292c9395b0fbd580bb394adc4ced9e8ad4bd864da"}, + {file = "s3fs-2024.3.1-py3-none-any.whl", hash = "sha256:f4566a5446c473740d272ec08e0b4aae8db1aa05f662c42ff0aa2c89bb5060ea"}, + {file = "s3fs-2024.3.1.tar.gz", hash = "sha256:1b8bc8dbd65e7b60f5487378f6eeffe1de59aa72caa9efca6dad6ab877405487"}, ] [package.dependencies] aiobotocore = ">=2.5.4,<3.0.0" aiohttp = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1" -fsspec = "2024.3.0" +fsspec = "2024.3.1" [package.extras] awscli = ["aiobotocore[awscli] (>=2.5.4,<3.0.0)"] @@ -3172,13 +3172,13 @@ tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"] [[package]] name = "tldextract" -version = "5.1.1" +version = "5.1.2" description = "Accurately separates a URL's subdomain, domain, and public suffix, using the Public Suffix List (PSL). By default, this includes the public ICANN TLDs and their exceptions. You can optionally support the Public Suffix List's private domains as well." optional = false python-versions = ">=3.8" files = [ - {file = "tldextract-5.1.1-py3-none-any.whl", hash = "sha256:b9c4510a8766d377033b6bace7e9f1f17a891383ced3c5d50c150f181e9e1cc2"}, - {file = "tldextract-5.1.1.tar.gz", hash = "sha256:9b6dbf803cb5636397f0203d48541c0da8ba53babaf0e8a6feda2d88746813d4"}, + {file = "tldextract-5.1.2-py3-none-any.whl", hash = "sha256:4dfc4c277b6b97fa053899fcdb892d2dc27295851ab5fac4e07797b6a21b2e46"}, + {file = "tldextract-5.1.2.tar.gz", hash = "sha256:c9e17f756f05afb5abac04fe8f766e7e70f9fe387adb1859f0f52408ee060200"}, ] [package.dependencies] @@ -3188,7 +3188,8 @@ requests = ">=2.1.0" requests-file = ">=1.4" [package.extras] -testing = ["black", "mypy", "pytest", "pytest-gitignore", "pytest-mock", "responses", "ruff", "tox", "types-filelock", "types-requests"] +release = ["build", "twine"] +testing = ["black", "mypy", "pytest", "pytest-gitignore", "pytest-mock", "responses", "ruff", "syrupy", "tox", "types-filelock", "types-requests"] [[package]] name = "tomli" @@ -3735,4 +3736,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<3.13" -content-hash = "c6d8bf2b0b4bf1d6bb93eb235ecf658e42166942983eb3b1369c32e65269823d" +content-hash = "04f4fb6039a06bfcdd9fa386612dc5f7f47033f03335ee8541a46cdbc77f9a31" diff --git a/pyproject.toml b/pyproject.toml index b5f7e6d..902523b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,9 +66,9 @@ har2tree = "^1.23.3" passivetotal = "^2.5.9" werkzeug = "^3.0.1" filetype = "^1.2.0" -pypandora = "^1.8.0" -lacuscore = "^1.8.9" -pylacus = "^1.8.1" +pypandora = "^1.8.1" +lacuscore = "^1.8.10" +pylacus = "^1.8.2" pyipasnhistory = "^2.1.2" publicsuffixlist = "^0.10.0.20240205" pyfaup = "^1.2" @@ -76,7 +76,7 @@ chardet = "^5.2.0" pysecuritytxt = "^1.3.0" pylookyloomonitoring = "^1.1.3" pytz = {"version" = "^2024.1", python = "<3.9"} -s3fs = "^2024.3.0" +s3fs = "^2024.3.1" urllib3 = [ {version = "<2", python = "<3.10"}, {version = "^2.0.7", python = ">=3.10"} diff --git a/tools/remove_capture.py b/tools/remove_capture.py index b45c440..49ac984 100644 --- a/tools/remove_capture.py +++ b/tools/remove_capture.py @@ -4,7 +4,7 @@ import argparse import shutil from lookyloo import Lookyloo -from lookyloo.helpers import get_homedir +from lookyloo.default import get_homedir removed_captures_dir = get_homedir() / 'removed_captures' diff --git a/website/web/__init__.py b/website/web/__init__.py index 10b989c..f746943 100644 --- a/website/web/__init__.py +++ b/website/web/__init__.py @@ -1530,6 +1530,7 @@ def capture_web() -> str | Response | WerkzeugResponse: capture_query['browser'] = request.form['browser'] capture_query['listing'] = True if request.form.get('listing') else False + capture_query['allow_tracking'] = True if request.form.get('allow_tracking') else False if request.form.get('referer'): capture_query['referer'] = request.form['referer'] diff --git a/website/web/genericapi.py b/website/web/genericapi.py index e7a5deb..c3ec35d 100644 --- a/website/web/genericapi.py +++ b/website/web/genericapi.py @@ -385,6 +385,7 @@ submit_fields_post = api.model('SubmitFieldsPost', { 'document': fields.String(description="A base64 encoded document, it can be anything a browser can display.", example=''), 'document_name': fields.String(description="The name of the document.", example=''), 'listing': fields.Integer(description="Display the capture on the index", min=0, max=1, example=1), + 'allow_tracking': fields.Integer(description="Attempt to let the website violate your privacy", min=0, max=1, example=0), 'user_agent': fields.String(description="User agent to use for the capture", example=''), 'browser_name': fields.String(description="Use this browser. Must be chromium, firefox or webkit.", example=''), 'device_name': fields.String(description="Use the pre-configured settings for this device. Get a list from /json/devices.", example=''), @@ -401,6 +402,7 @@ class SubmitCapture(Resource): # type: ignore[misc] @api.param('url', 'The URL to capture', required=True) # type: ignore[misc] @api.param('listing', 'Display the capture on the index', default=1) # type: ignore[misc] + @api.param('allow_tracking', 'Attempt to let the website violate your privacy', default=1) # type: ignore[misc] @api.param('user_agent', 'User agent to use for the capture') # type: ignore[misc] @api.param('browser_name', 'Use this browser. Must be chromium, firefox or webkit.') # type: ignore[misc] @api.param('device_name', 'Use the pre-configured settings for this device') # type: ignore[misc] @@ -418,7 +420,9 @@ class SubmitCapture(Resource): # type: ignore[misc] to_query: CaptureSettings = { 'url': request.args['url'], - 'listing': False if 'listing' in request.args and request.args['listing'] in [0, '0'] else True} + 'listing': False if 'listing' in request.args and request.args['listing'] in [0, '0'] else True, + 'allow_tracking': False if 'allow_tracking' in request.args and request.args['allow_tracking'] in [0, '0'] else True + } if request.args.get('user_agent'): to_query['user_agent'] = request.args['user_agent'] if request.args.get('browser_name'): diff --git a/website/web/templates/capture.html b/website/web/templates/capture.html index 31ba52b..ecf9f36 100644 --- a/website/web/templates/capture.html +++ b/website/web/templates/capture.html @@ -223,6 +223,16 @@
+
+ +
+
+ +
We'll attempt to click on the button allowing the website captured to violate your privacy.
+
+
+
+