Fix limit in fetch_replies_service to not always limit by 5 (which always caused us to only do one page). Rename some variables to make purpose clearer. Return the array of all fetched uris instead of just the number we got

Signed-off-by: sneakers-the-rat <sneakers-the-rat@protonmail.com>
sneakers-the-rat 2024-10-12 21:26:12 -07:00 committed by Jonny Saunders
parent fe216c418d
commit 887bccd622
3 changed files with 19 additions and 15 deletions

View File

@ -10,7 +10,7 @@ class ActivityPub::FetchAllRepliesService < ActivityPub::FetchRepliesService
@allow_synchronous_requests = allow_synchronous_requests
@filter_by_host = false
@items = collection_items(collection_or_uri)
@items = collection_items(collection_or_uri, fetch_all: true)
@items = filtered_replies
return if @items.nil?
@ -25,6 +25,8 @@ class ActivityPub::FetchAllRepliesService < ActivityPub::FetchRepliesService
return if @items.nil?
# find all statuses that we *shouldn't* update the replies for, and use that as a filter
# Typically we assume this is smaller than the replies we *should* fetch,
# so we minimize the number of uris we should load here.
uris = @items.map { |item| value_or_id(item) }
dont_update = Status.where(uri: uris).shouldnt_fetch_replies.pluck(:uri)

View File

@ -5,6 +5,8 @@ class ActivityPub::FetchRepliesService < BaseService
# Limit of fetched replies
MAX_REPLIES = 5
# Limit when fetching all (to prevent infinite fetch attack)
FETCH_ALL_MAX_REPLIES = 500
def call(parent_status, collection_or_uri, allow_synchronous_requests: true, request_id: nil, filter_by_host: true)
@account = parent_status.account
@ -21,7 +23,7 @@ class ActivityPub::FetchRepliesService < BaseService
private
def collection_items(collection_or_uri)
def collection_items(collection_or_uri, fetch_all: false)
collection = fetch_collection(collection_or_uri)
return unless collection.is_a?(Hash)
@ -39,8 +41,8 @@ class ActivityPub::FetchRepliesService < BaseService
all_items.concat(as_array(items))
# Quit early if we are not fetching all replies
break if all_items.size >= MAX_REPLIES
# Quit early if we are not fetching all replies or we've reached the absolute max
break if (!fetch_all && all_items.size >= MAX_REPLIES) || (all_items.size >= FETCH_ALL_MAX_REPLIES)
collection = collection['next'].present? ? fetch_collection(collection['next']) : nil
end

View File

@ -11,7 +11,7 @@ class ActivityPub::FetchAllRepliesWorker
sidekiq_options queue: 'pull', retry: 3
# Global max replies to fetch per request
# Global max replies to fetch per request (all replies, recursively)
MAX_REPLIES = 1000
def perform(parent_status_id, current_account_id = nil, options = {})
@ -20,21 +20,21 @@ class ActivityPub::FetchAllRepliesWorker
@current_account = @current_account_id.nil? ? nil : Account.find(@current_account_id)
Rails.logger.debug { "FetchAllRepliesWorker - #{parent_status_id}: Fetching all replies for status: #{@parent_status}" }
all_replies = get_replies(@parent_status.uri, options)
got_replies = all_replies.length
until all_replies.empty? || got_replies >= MAX_REPLIES
next_reply = all_replies.pop
uris_to_fetch = get_replies(@parent_status.uri, options)
fetched_uris = uris_to_fetch
until uris_to_fetch.empty? || fetched_uris.length >= MAX_REPLIES
next_reply = uris_to_fetch.pop
next if next_reply.nil?
new_replies = get_replies(next_reply, options)
next if new_replies.nil?
new_reply_uris = get_replies(next_reply, options)
next if new_reply_uris.nil?
got_replies += new_replies.length
all_replies.concat(new_replies)
uris_to_fetch.concat(new_reply_uris)
fetched_uris.concat(new_reply_uris)
end
Rails.logger.debug { "FetchAllRepliesWorker - #{parent_status_id}: fetched #{got_replies} replies" }
got_replies
Rails.logger.debug { "FetchAllRepliesWorker - #{parent_status_id}: fetched #{fetched_uris.length} replies" }
fetched_uris
end
private