From 0c689b9d014324aba5b8751dacec4c0fc20b2038 Mon Sep 17 00:00:00 2001 From: Markus Unterwaditzer Date: Wed, 11 Jan 2023 21:59:13 +0100 Subject: [PATCH] fix: allow verification when page size exceeds 1MB (using HTML5 parser) (#22879) * fix: allow verification when page size exceeds 1MB Truncates the page after 1MB instead Closes #15316 * switch to HTML5 parser, fix rubocop errors * undo rubocop fixes Co-authored-by: Chris Zubak-Skees --- app/lib/request.rb | 14 ++++++++---- app/services/verify_link_service.rb | 2 +- spec/lib/request_spec.rb | 5 +++++ spec/services/verify_link_service_spec.rb | 27 +++++++++++++++++++++++ 4 files changed, 43 insertions(+), 5 deletions(-) diff --git a/app/lib/request.rb b/app/lib/request.rb index b2819c8edc..0508169dcb 100644 --- a/app/lib/request.rb +++ b/app/lib/request.rb @@ -154,9 +154,7 @@ class Request end module ClientLimit - def body_with_limit(limit = 1.megabyte) - raise Mastodon::LengthValidationError if content_length.present? && content_length > limit - + def truncated_body(limit = 1.megabyte) if charset.nil? encoding = Encoding::BINARY else @@ -173,11 +171,19 @@ class Request contents << chunk chunk.clear - raise Mastodon::LengthValidationError if contents.bytesize > limit + break if contents.bytesize > limit end contents end + + def body_with_limit(limit = 1.megabyte) + raise Mastodon::LengthValidationError if content_length.present? && content_length > limit + + contents = truncated_body(limit) + raise Mastodon::LengthValidationError if contents.bytesize > limit + contents + end end if ::HTTP::Response.methods.include?(:body_with_limit) && !Rails.env.production? diff --git a/app/services/verify_link_service.rb b/app/services/verify_link_service.rb index 7496fe2d51..d049b52d16 100644 --- a/app/services/verify_link_service.rb +++ b/app/services/verify_link_service.rb @@ -26,7 +26,7 @@ class VerifyLinkService < BaseService def link_back_present? return false if @body.blank? - links = Nokogiri::HTML(@body).xpath('//a[contains(concat(" ", normalize-space(@rel), " "), " me ")]|//link[contains(concat(" ", normalize-space(@rel), " "), " me ")]') + links = Nokogiri::HTML5(@body).xpath('//a[contains(concat(" ", normalize-space(@rel), " "), " me ")]|//link[contains(concat(" ", normalize-space(@rel), " "), " me ")]') if links.any? { |link| link['href']&.downcase == @link_back.downcase } true diff --git a/spec/lib/request_spec.rb b/spec/lib/request_spec.rb index 5eccf32014..8539944e28 100644 --- a/spec/lib/request_spec.rb +++ b/spec/lib/request_spec.rb @@ -120,6 +120,11 @@ describe Request do expect { subject.perform { |response| response.body_with_limit } }.to raise_error Mastodon::LengthValidationError end + it 'truncates large monolithic body' do + stub_request(:any, 'http://example.com').to_return(body: SecureRandom.random_bytes(2.megabytes), headers: { 'Content-Length' => 2.megabytes }) + expect(subject.perform { |response| response.truncated_body.bytesize }).to be < 2.megabytes + end + it 'uses binary encoding if Content-Type does not tell encoding' do stub_request(:any, 'http://example.com').to_return(body: '', headers: { 'Content-Type' => 'text/html' }) expect(subject.perform { |response| response.body_with_limit.encoding }).to eq Encoding::BINARY diff --git a/spec/services/verify_link_service_spec.rb b/spec/services/verify_link_service_spec.rb index 52ba454cce..391560f1ce 100644 --- a/spec/services/verify_link_service_spec.rb +++ b/spec/services/verify_link_service_spec.rb @@ -73,6 +73,33 @@ RSpec.describe VerifyLinkService, type: :service do end end + context 'when a document is truncated but the link back is valid' do + let(:html) do + " + + + + +