Fix limit in fetch_replies_service to not always limit by 5 (which always caused us to only do one page). Rename some variables to make purpose clearer. Return the array of all fetched uris instead of just the number we got

Signed-off-by: sneakers-the-rat <sneakers-the-rat@protonmail.com>
sneakers-the-rat 2024-10-12 21:26:12 -07:00 committed by Jonny Saunders
parent fe216c418d
commit 887bccd622
3 changed files with 19 additions and 15 deletions

View File

@ -10,7 +10,7 @@ class ActivityPub::FetchAllRepliesService < ActivityPub::FetchRepliesService
@allow_synchronous_requests = allow_synchronous_requests @allow_synchronous_requests = allow_synchronous_requests
@filter_by_host = false @filter_by_host = false
@items = collection_items(collection_or_uri) @items = collection_items(collection_or_uri, fetch_all: true)
@items = filtered_replies @items = filtered_replies
return if @items.nil? return if @items.nil?
@ -25,6 +25,8 @@ class ActivityPub::FetchAllRepliesService < ActivityPub::FetchRepliesService
return if @items.nil? return if @items.nil?
# find all statuses that we *shouldn't* update the replies for, and use that as a filter # find all statuses that we *shouldn't* update the replies for, and use that as a filter
# Typically we assume this is smaller than the replies we *should* fetch,
# so we minimize the number of uris we should load here.
uris = @items.map { |item| value_or_id(item) } uris = @items.map { |item| value_or_id(item) }
dont_update = Status.where(uri: uris).shouldnt_fetch_replies.pluck(:uri) dont_update = Status.where(uri: uris).shouldnt_fetch_replies.pluck(:uri)

View File

@ -5,6 +5,8 @@ class ActivityPub::FetchRepliesService < BaseService
# Limit of fetched replies # Limit of fetched replies
MAX_REPLIES = 5 MAX_REPLIES = 5
# Limit when fetching all (to prevent infinite fetch attack)
FETCH_ALL_MAX_REPLIES = 500
def call(parent_status, collection_or_uri, allow_synchronous_requests: true, request_id: nil, filter_by_host: true) def call(parent_status, collection_or_uri, allow_synchronous_requests: true, request_id: nil, filter_by_host: true)
@account = parent_status.account @account = parent_status.account
@ -21,7 +23,7 @@ class ActivityPub::FetchRepliesService < BaseService
private private
def collection_items(collection_or_uri) def collection_items(collection_or_uri, fetch_all: false)
collection = fetch_collection(collection_or_uri) collection = fetch_collection(collection_or_uri)
return unless collection.is_a?(Hash) return unless collection.is_a?(Hash)
@ -39,8 +41,8 @@ class ActivityPub::FetchRepliesService < BaseService
all_items.concat(as_array(items)) all_items.concat(as_array(items))
# Quit early if we are not fetching all replies # Quit early if we are not fetching all replies or we've reached the absolute max
break if all_items.size >= MAX_REPLIES break if (!fetch_all && all_items.size >= MAX_REPLIES) || (all_items.size >= FETCH_ALL_MAX_REPLIES)
collection = collection['next'].present? ? fetch_collection(collection['next']) : nil collection = collection['next'].present? ? fetch_collection(collection['next']) : nil
end end

View File

@ -11,7 +11,7 @@ class ActivityPub::FetchAllRepliesWorker
sidekiq_options queue: 'pull', retry: 3 sidekiq_options queue: 'pull', retry: 3
# Global max replies to fetch per request # Global max replies to fetch per request (all replies, recursively)
MAX_REPLIES = 1000 MAX_REPLIES = 1000
def perform(parent_status_id, current_account_id = nil, options = {}) def perform(parent_status_id, current_account_id = nil, options = {})
@ -20,21 +20,21 @@ class ActivityPub::FetchAllRepliesWorker
@current_account = @current_account_id.nil? ? nil : Account.find(@current_account_id) @current_account = @current_account_id.nil? ? nil : Account.find(@current_account_id)
Rails.logger.debug { "FetchAllRepliesWorker - #{parent_status_id}: Fetching all replies for status: #{@parent_status}" } Rails.logger.debug { "FetchAllRepliesWorker - #{parent_status_id}: Fetching all replies for status: #{@parent_status}" }
all_replies = get_replies(@parent_status.uri, options) uris_to_fetch = get_replies(@parent_status.uri, options)
got_replies = all_replies.length fetched_uris = uris_to_fetch
until all_replies.empty? || got_replies >= MAX_REPLIES until uris_to_fetch.empty? || fetched_uris.length >= MAX_REPLIES
next_reply = all_replies.pop next_reply = uris_to_fetch.pop
next if next_reply.nil? next if next_reply.nil?
new_replies = get_replies(next_reply, options) new_reply_uris = get_replies(next_reply, options)
next if new_replies.nil? next if new_reply_uris.nil?
got_replies += new_replies.length uris_to_fetch.concat(new_reply_uris)
all_replies.concat(new_replies) fetched_uris.concat(new_reply_uris)
end end
Rails.logger.debug { "FetchAllRepliesWorker - #{parent_status_id}: fetched #{got_replies} replies" } Rails.logger.debug { "FetchAllRepliesWorker - #{parent_status_id}: fetched #{fetched_uris.length} replies" }
got_replies fetched_uris
end end
private private