new: Support for disabling JS during capture

Related https://github.com/ail-project/lacus/issues/30
pull/955/head
Raphaël Vinot 2024-10-10 17:23:13 +02:00
parent 774a99deb1
commit 3a6e2004be
6 changed files with 53 additions and 36 deletions

View File

@ -715,6 +715,7 @@ class Lookyloo():
rendered_hostname_only=query.rendered_hostname_only,
with_favicon=query.with_favicon,
allow_tracking=query.allow_tracking,
java_script_enabled=query.java_script_enabled,
# force=query.force,
# recapture_interval=query.recapture_interval,
priority=priority

65
poetry.lock generated
View File

@ -2,24 +2,24 @@
[[package]]
name = "aiobotocore"
version = "2.15.1"
version = "2.15.2"
description = "Async client for aws services using botocore and aiohttp"
optional = false
python-versions = ">=3.8"
files = [
{file = "aiobotocore-2.15.1-py3-none-any.whl", hash = "sha256:0f738cde74108553b753b24655094289a3c7ea0f0f179ed1c0f8cea488999a35"},
{file = "aiobotocore-2.15.1.tar.gz", hash = "sha256:1f9f16eec2a3da32df162b5db12da779ec6d6c6311715c936cad511f436efa74"},
{file = "aiobotocore-2.15.2-py3-none-any.whl", hash = "sha256:d4d3128b4b558e2b4c369bfa963b022d7e87303adb82eec623cec8aa77ae578a"},
{file = "aiobotocore-2.15.2.tar.gz", hash = "sha256:9ac1cfcaccccc80602968174aa032bf978abe36bd4e55e6781d6500909af1375"},
]
[package.dependencies]
aiohttp = ">=3.9.2,<4.0.0"
aioitertools = ">=0.5.1,<1.0.0"
botocore = ">=1.35.16,<1.35.24"
botocore = ">=1.35.16,<1.35.37"
wrapt = ">=1.10.10,<2.0.0"
[package.extras]
awscli = ["awscli (>=1.34.16,<1.34.24)"]
boto3 = ["boto3 (>=1.35.16,<1.35.24)"]
awscli = ["awscli (>=1.34.16,<1.35.3)"]
boto3 = ["boto3 (>=1.35.16,<1.35.37)"]
[[package]]
name = "aiodns"
@ -385,13 +385,13 @@ WTForms = "*"
[[package]]
name = "botocore"
version = "1.35.23"
version = "1.35.36"
description = "Low-level, data-driven core of boto 3."
optional = false
python-versions = ">=3.8"
files = [
{file = "botocore-1.35.23-py3-none-any.whl", hash = "sha256:cab9ec4e0367b9f33f0bc02c5a29f587b0119ecffd6d125bacee085dcbc8817d"},
{file = "botocore-1.35.23.tar.gz", hash = "sha256:25b17a9ccba6ad32bb5bf7ba4f52656aa03c1cb29f6b4e438050ee4ad1967a3b"},
{file = "botocore-1.35.36-py3-none-any.whl", hash = "sha256:64241c778bf2dc863d93abab159e14024d97a926a5715056ef6411418cb9ead3"},
{file = "botocore-1.35.36.tar.gz", hash = "sha256:354ec1b766f0029b5d6ff0c45d1a0f9e5007b7d2f3ec89bcdd755b208c5bc797"},
]
[package.dependencies]
@ -403,7 +403,7 @@ urllib3 = [
]
[package.extras]
crt = ["awscrt (==0.21.5)"]
crt = ["awscrt (==0.22.0)"]
[[package]]
name = "brotli"
@ -1747,23 +1747,26 @@ referencing = ">=0.31.0"
[[package]]
name = "lacuscore"
version = "1.11.0"
version = "1.11.1"
description = "Core of Lacus, usable as a module"
optional = false
python-versions = "<4.0,>=3.8"
files = [
{file = "lacuscore-1.11.0-py3-none-any.whl", hash = "sha256:1950e127f772e19e9cead68a789533d1508c4a345b105e74b830267a7110eb6f"},
{file = "lacuscore-1.11.0.tar.gz", hash = "sha256:e9dbc065462be95843a2f697be14875ed3cc7601653d5fb2c700e5935c8f52ee"},
{file = "lacuscore-1.11.1-py3-none-any.whl", hash = "sha256:580fe74fb14172a87b168deb31c1dc85fc5b4b95e0a8289ce3052fdf8f7b0062"},
{file = "lacuscore-1.11.1.tar.gz", hash = "sha256:40917895db0cb4a46fcfd381ad374a21a6d31bbd85ba0b4e85c09467f0a2ff81"},
]
[package.dependencies]
async-timeout = {version = ">=4.0.3,<5.0.0", markers = "python_version < \"3.11\""}
defang = ">=0.5.3,<0.6.0"
dnspython = ">=2.6.1,<3.0.0"
dnspython = [
{version = "<2.7", markers = "python_version < \"3.9\""},
{version = ">=2.7,<3.0", markers = "python_version >= \"3.9\""},
]
eval-type-backport = {version = ">=0.2.0,<0.3.0", markers = "python_version < \"3.10\""}
playwrightcapture = {version = ">=1.26.0,<2.0.0", extras = ["recaptcha"]}
pydantic = ">=2.9.1,<3.0.0"
redis = {version = ">=5.0.8,<6.0.0", extras = ["hiredis"]}
playwrightcapture = {version = ">=1.26.1,<2.0.0", extras = ["recaptcha"]}
pydantic = ">=2.9.2,<3.0.0"
redis = {version = ">=5.1.1,<6.0.0", extras = ["hiredis"]}
requests = ">=2.32.3,<3.0.0"
ua-parser = ">=0.18.0,<0.19.0"
@ -2753,29 +2756,29 @@ test = ["pytest"]
[[package]]
name = "playwrightcapture"
version = "1.26.0"
version = "1.26.1"
description = "A simple library to capture websites using playwright"
optional = false
python-versions = "<4.0,>=3.8"
files = [
{file = "playwrightcapture-1.26.0-py3-none-any.whl", hash = "sha256:2694fe7d1fe6a5347de6395e9edb8a45c8e7c41adc30a1651355cba7ff1388e5"},
{file = "playwrightcapture-1.26.0.tar.gz", hash = "sha256:f149edbddab605d9c2659487c8a1d8dad388dc0f4919790b7ff99605524ee093"},
{file = "playwrightcapture-1.26.1-py3-none-any.whl", hash = "sha256:66cb8efa97b99f5aff25c2046a28a2dc4496c3b316d6ac4d3482868ede1e8114"},
{file = "playwrightcapture-1.26.1.tar.gz", hash = "sha256:20ba35fa2308f581d5ef8db92db9aef4520d9598a16050a8701808d2dd05907a"},
]
[package.dependencies]
aiohttp = {version = ">=3.10.5,<4.0.0", extras = ["speedups"]}
aiohttp = {version = ">=3.10.9,<4.0.0", extras = ["speedups"]}
aiohttp-socks = ">=0.9,<0.10"
async-timeout = {version = ">=4.0.3,<5.0.0", markers = "python_version < \"3.11\""}
beautifulsoup4 = {version = ">=4.12.3,<5.0.0", extras = ["charset-normalizer", "lxml"]}
dateparser = ">=1.2.0,<2.0.0"
playwright = ">=1.47.0,<2.0.0"
playwright-stealth = ">=1.0.6,<2.0.0"
puremagic = ">=1.27,<2.0"
puremagic = ">=1.28,<2.0"
pydub = {version = ">=0.25.1,<0.26.0", optional = true, markers = "extra == \"recaptcha\""}
pytz = {version = ">=2024.2,<2025.0", markers = "python_version < \"3.9\""}
setuptools = ">=75.1.0,<76.0.0"
SpeechRecognition = {version = ">=3.10.4,<4.0.0", optional = true, markers = "extra == \"recaptcha\""}
tzdata = ">=2024.1,<2025.0"
tzdata = ">=2024.2,<2025.0"
w3lib = ">=2.2.1,<3.0.0"
[package.extras]
@ -2944,13 +2947,13 @@ files = [
[[package]]
name = "publicsuffixlist"
version = "1.0.2.20241009"
version = "1.0.2.20241010"
description = "publicsuffixlist implement"
optional = false
python-versions = ">=3.5"
files = [
{file = "publicsuffixlist-1.0.2.20241009-py2.py3-none-any.whl", hash = "sha256:47115d3851f0b0f4a208ffcba3b6c9c72fc53be44a822fa0101268aa113552f9"},
{file = "publicsuffixlist-1.0.2.20241009.tar.gz", hash = "sha256:f2b5f3b93aa0f41a0513b05c53f339c6da2e542d08c9e2f3379a27de4f4e1585"},
{file = "publicsuffixlist-1.0.2.20241010-py2.py3-none-any.whl", hash = "sha256:f3c5ba727344e1956cc39d7c13c1aa13d5e16a374aa8ce100aa411d485ee6d6d"},
{file = "publicsuffixlist-1.0.2.20241010.tar.gz", hash = "sha256:a726d826bc3b6704995ce0ca144d87768060e18e1df39e6e57f2c645671c0df9"},
]
[package.extras]
@ -3314,20 +3317,20 @@ docs = ["Sphinx (>=5.3.0,<6.0.0)"]
[[package]]
name = "pylacus"
version = "1.10.0"
version = "1.11.1"
description = "Python CLI and module for lacus"
optional = false
python-versions = "<4.0,>=3.8"
files = [
{file = "pylacus-1.10.0-py3-none-any.whl", hash = "sha256:aba4b74d2f1246f36755c06b068912a2865bb4826b07c7ec1dc64f31126343db"},
{file = "pylacus-1.10.0.tar.gz", hash = "sha256:ec3c405e074b47c45587f2fe36afafc9951a93116c4dc6092706b473ca844f1a"},
{file = "pylacus-1.11.1-py3-none-any.whl", hash = "sha256:136dfe01fd83fb4c3a68414db1eb87ac90d77bf833ee297e7567221c350a9108"},
{file = "pylacus-1.11.1.tar.gz", hash = "sha256:4abfef990b492c027cc71173d171a76a2308539b55458e254b1f1368ae151162"},
]
[package.dependencies]
requests = ">=2.32.3,<3.0.0"
[package.extras]
docs = ["Sphinx (<7.2)", "Sphinx (>=7.2,<8.0)"]
docs = ["Sphinx (<7.2)", "Sphinx (>=7.2,<8.0)", "Sphinx (>=8,<9)"]
[[package]]
name = "pylookyloo"
@ -4683,4 +4686,4 @@ type = ["pytest-mypy"]
[metadata]
lock-version = "2.0"
python-versions = ">=3.8.1,<3.13"
content-hash = "e527f01777a61208ff33981d7d317d5c3c2292fd2690e662abca2c6ceb12e07d"
content-hash = "c84695c8ec738d2984f09f6b99f6f3b69ca40b78f0ccba59e373acb44096ae41"

View File

@ -71,10 +71,10 @@ passivetotal = "^2.5.9"
werkzeug = "^3.0.4"
filetype = "^1.2.0"
pypandora = "^1.9.0"
lacuscore = "^1.11.0"
pylacus = "^1.10.0"
lacuscore = "^1.11.1"
pylacus = "^1.11.1"
pyipasnhistory = "^2.1.2"
publicsuffixlist = "^1.0.2.20241009"
publicsuffixlist = "^1.0.2.20241010"
pyfaup = "^1.2"
chardet = "^5.2.0"
pysecuritytxt = "^1.3.2"

View File

@ -1632,7 +1632,7 @@ def capture_web() -> str | Response | WerkzeugResponse:
capture_query['listing'] = True if request.form.get('listing') else False
capture_query['allow_tracking'] = True if request.form.get('allow_tracking') else False
capture_query['java_script_enabled'] = True if request.form.get('java_script_enabled') else False
if request.form.get('referer'):
capture_query['referer'] = request.form['referer']

View File

@ -536,6 +536,7 @@ submit_fields_post = api.model('SubmitFieldsPost', {
'document_name': fields.String(description="The name of the document.", example=''),
'listing': fields.Integer(description="Display the capture on the index", min=0, max=1, example=1),
'allow_tracking': fields.Integer(description="Attempt to let the website violate your privacy", min=0, max=1, example=0),
'java_script_enabled': fields.Integer(description="Enable/Disable running JavaScript when rendering the page", min=0, max=1, example=1),
'user_agent': fields.String(description="User agent to use for the capture", example=''),
'browser_name': fields.String(description="Use this browser. Must be chromium, firefox or webkit.", example=''),
'device_name': fields.String(description="Use the pre-configured settings for this device. Get a list from /json/devices.", example=''),
@ -553,6 +554,7 @@ class SubmitCapture(Resource): # type: ignore[misc]
@api.param('url', 'The URL to capture', required=True) # type: ignore[misc]
@api.param('listing', 'Display the capture on the index', default=1) # type: ignore[misc]
@api.param('allow_tracking', 'Attempt to let the website violate your privacy', default=1) # type: ignore[misc]
@api.param('java_script_enabled', 'Enable/Disable running JavaScript when rendering the page', default=1) # type: ignore[misc]
@api.param('user_agent', 'User agent to use for the capture') # type: ignore[misc]
@api.param('browser_name', 'Use this browser. Must be chromium, firefox or webkit.') # type: ignore[misc]
@api.param('device_name', 'Use the pre-configured settings for this device') # type: ignore[misc]
@ -571,7 +573,8 @@ class SubmitCapture(Resource): # type: ignore[misc]
to_query: dict[str, Any] = {
'url': request.args['url'],
'listing': False if 'listing' in request.args and request.args['listing'] in [0, '0'] else True,
'allow_tracking': False if 'allow_tracking' in request.args and request.args['allow_tracking'] in [0, '0'] else True
'allow_tracking': False if 'allow_tracking' in request.args and request.args['allow_tracking'] in [0, '0'] else True,
'java_script_enabled': False if 'java_script_enabled' in request.args and request.args['java_script_enabled'] in [0, '0'] else True
}
if request.args.get('user_agent'):
to_query['user_agent'] = request.args['user_agent']

View File

@ -288,6 +288,16 @@
</div>
</div>
<div class="row mb-3">
<label for="java_script_enabled" class="col-sm-2 col-form-check-label">Enable JavaScript:</label>
<div class="col-sm-10">
<div class="form-check">
<input class="form-check-input" type="checkbox" id="java_script_enabled" name="java_script_enabled" aria-describedby="java_script_enabled_help"
{% if predefined_settings.get('java_script_enabled', true) is true %}checked="checked"{% endif %}>
<div id="java_script_enabled_help" class="form-text">If disabled, the browser will not run any JavaScript when rendering the page.</div>
</div>
</div>
</div>
<!-- Referer -->
<div class="row mb-3">