mirror of https://github.com/tootsuite/mastodon
add maximum page limit
parent
d6e7e88afb
commit
0c443e87e6
|
@ -104,3 +104,6 @@ FETCH_REPLIES_MAX_GLOBAL=1000
|
|||
|
||||
# Max number of replies to fetch - for a single post
|
||||
FETCH_REPLIES_MAX_SINGLE=500
|
||||
|
||||
# Max number of replies Collection pages to fetch - total
|
||||
FETCH_REPLIES_MAX_PAGES=500
|
||||
|
|
|
@ -6,18 +6,18 @@ class ActivityPub::FetchAllRepliesService < ActivityPub::FetchRepliesService
|
|||
# Limit of replies to fetch per status
|
||||
MAX_REPLIES = (ENV['FETCH_REPLIES_MAX_SINGLE'] || 500).to_i
|
||||
|
||||
def call(collection_or_uri, allow_synchronous_requests: true, request_id: nil)
|
||||
def call(collection_or_uri, max_pages = nil, allow_synchronous_requests: true, request_id: nil)
|
||||
@allow_synchronous_requests = allow_synchronous_requests
|
||||
@filter_by_host = false
|
||||
@collection_or_uri = collection_or_uri
|
||||
|
||||
@items = collection_items(collection_or_uri)
|
||||
@items, n_pages = collection_items(collection_or_uri, max_pages)
|
||||
@items = filtered_replies
|
||||
return if @items.nil?
|
||||
|
||||
FetchReplyWorker.push_bulk(@items) { |reply_uri| [reply_uri, { 'request_id' => request_id }] }
|
||||
|
||||
@items
|
||||
[@items, n_pages]
|
||||
end
|
||||
|
||||
private
|
||||
|
|
|
@ -11,7 +11,7 @@ class ActivityPub::FetchRepliesService < BaseService
|
|||
@allow_synchronous_requests = allow_synchronous_requests
|
||||
@filter_by_host = filter_by_host
|
||||
|
||||
@items = collection_items(collection_or_uri)
|
||||
@items, = collection_items(collection_or_uri)
|
||||
return if @items.nil?
|
||||
|
||||
FetchReplyWorker.push_bulk(filtered_replies) { |reply_uri| [reply_uri, { 'request_id' => request_id }] }
|
||||
|
@ -21,7 +21,7 @@ class ActivityPub::FetchRepliesService < BaseService
|
|||
|
||||
private
|
||||
|
||||
def collection_items(collection_or_uri)
|
||||
def collection_items(collection_or_uri, max_pages = nil)
|
||||
collection = fetch_collection(collection_or_uri)
|
||||
return unless collection.is_a?(Hash)
|
||||
|
||||
|
@ -29,6 +29,7 @@ class ActivityPub::FetchRepliesService < BaseService
|
|||
return unless collection.is_a?(Hash)
|
||||
|
||||
all_items = []
|
||||
n_pages = 1
|
||||
while collection.is_a?(Hash)
|
||||
items = case collection['type']
|
||||
when 'Collection', 'CollectionPage'
|
||||
|
@ -39,12 +40,14 @@ class ActivityPub::FetchRepliesService < BaseService
|
|||
|
||||
all_items.concat(as_array(items))
|
||||
|
||||
break if all_items.size > MAX_REPLIES
|
||||
break if all_items.size >= MAX_REPLIES
|
||||
break if !max_pages.nil? && n_pages >= max_pages
|
||||
|
||||
collection = collection['next'].present? ? fetch_collection(collection['next']) : nil
|
||||
n_pages += 1
|
||||
end
|
||||
|
||||
all_items
|
||||
[all_items, n_pages]
|
||||
end
|
||||
|
||||
def fetch_collection(collection_or_uri)
|
||||
|
|
|
@ -13,28 +13,30 @@ class ActivityPub::FetchAllRepliesWorker
|
|||
|
||||
# Global max replies to fetch per request (all replies, recursively)
|
||||
MAX_REPLIES = (ENV['FETCH_REPLIES_MAX_GLOBAL'] || 1000).to_i
|
||||
MAX_PAGES = (ENV['FETCH_REPLIES_MAX_PAGES'] || 500).to_i
|
||||
|
||||
def perform(parent_status_id, options = {})
|
||||
@parent_status = Status.find(parent_status_id)
|
||||
Rails.logger.debug { "FetchAllRepliesWorker - #{@parent_status.uri}: Fetching all replies for status: #{@parent_status}" }
|
||||
|
||||
uris_to_fetch = get_replies(@parent_status.uri, options)
|
||||
uris_to_fetch, n_pages = get_replies(@parent_status.uri, MAX_PAGES, options)
|
||||
return if uris_to_fetch.nil?
|
||||
|
||||
@parent_status.touch(:fetched_replies_at)
|
||||
fetched_uris = uris_to_fetch.clone.to_set
|
||||
|
||||
until uris_to_fetch.empty? || fetched_uris.length >= MAX_REPLIES
|
||||
until uris_to_fetch.empty? || fetched_uris.length >= MAX_REPLIES || n_pages >= MAX_PAGES
|
||||
next_reply = uris_to_fetch.pop
|
||||
next if next_reply.nil?
|
||||
|
||||
new_reply_uris = get_replies(next_reply, options)
|
||||
new_reply_uris, new_n_pages = get_replies(next_reply, MAX_PAGES - n_pages, options)
|
||||
next if new_reply_uris.nil?
|
||||
|
||||
new_reply_uris = new_reply_uris.reject { |uri| fetched_uris.include?(uri) }
|
||||
|
||||
uris_to_fetch.concat(new_reply_uris)
|
||||
fetched_uris = fetched_uris.merge(new_reply_uris)
|
||||
n_pages += new_n_pages
|
||||
end
|
||||
|
||||
Rails.logger.debug { "FetchAllRepliesWorker - #{parent_status_id}: fetched #{fetched_uris.length} replies" }
|
||||
|
@ -43,11 +45,11 @@ class ActivityPub::FetchAllRepliesWorker
|
|||
|
||||
private
|
||||
|
||||
def get_replies(status_uri, options = {})
|
||||
def get_replies(status_uri, max_pages, options = {})
|
||||
replies_collection_or_uri = get_replies_uri(status_uri)
|
||||
return if replies_collection_or_uri.nil?
|
||||
|
||||
ActivityPub::FetchAllRepliesService.new.call(replies_collection_or_uri, **options.deep_symbolize_keys)
|
||||
ActivityPub::FetchAllRepliesService.new.call(replies_collection_or_uri, max_pages, **options.deep_symbolize_keys)
|
||||
end
|
||||
|
||||
def get_replies_uri(parent_status_uri)
|
||||
|
|
|
@ -122,20 +122,18 @@ RSpec.describe ActivityPub::FetchAllRepliesWorker do
|
|||
|
||||
stub_request(:get, item).to_return(status: 200, body: Oj.dump(empty_object), headers: { 'Content-Type': 'application/activity+json' })
|
||||
end
|
||||
|
||||
stub_request(:get, top_note_uri).to_return(status: 200, body: Oj.dump(top_object), headers: { 'Content-Type': 'application/activity+json' })
|
||||
stub_request(:get, reply_note_uri).to_return(status: 200, body: Oj.dump(reply_object), headers: { 'Content-Type': 'application/activity+json' })
|
||||
end
|
||||
|
||||
shared_examples 'fetches all replies' do
|
||||
before do
|
||||
stub_request(:get, top_note_uri).to_return(status: 200, body: Oj.dump(top_object), headers: { 'Content-Type': 'application/activity+json' })
|
||||
stub_request(:get, reply_note_uri).to_return(status: 200, body: Oj.dump(reply_object), headers: { 'Content-Type': 'application/activity+json' })
|
||||
end
|
||||
|
||||
it 'fetches statuses recursively' do
|
||||
got_uris = subject.perform(status.id)
|
||||
expect(got_uris).to match_array(all_items)
|
||||
end
|
||||
|
||||
it 'respects the maxium limits set by not recursing after the max is reached' do
|
||||
it 'respects the maximum limits set by not recursing after the max is reached' do
|
||||
stub_const('ActivityPub::FetchAllRepliesWorker::MAX_REPLIES', 5)
|
||||
got_uris = subject.perform(status.id)
|
||||
expect(got_uris).to match_array(top_items + top_items_paged)
|
||||
|
@ -249,6 +247,12 @@ RSpec.describe ActivityPub::FetchAllRepliesWorker do
|
|||
end
|
||||
|
||||
it_behaves_like 'fetches all replies'
|
||||
|
||||
it 'limits by max pages' do
|
||||
stub_const('ActivityPub::FetchAllRepliesWorker::MAX_PAGES', 3)
|
||||
got_uris = subject.perform(status.id)
|
||||
expect(got_uris).to match_array(top_items + top_items_paged + nested_items)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue