PeerTube/server/lib/activitypub/crawl.ts

import { ACTIVITY_PUB, JOB_REQUEST_TIMEOUT, WEBSERVER } from '../../initializers/constants'
import { doRequest } from '../../helpers/requests'
import { logger } from '../../helpers/logger'
import * as Bluebird from 'bluebird'
import { ActivityPubOrderedCollection } from '../../../shared/models/activitypub'
import { URL } from 'url'

type HandlerFunction<T> = (items: T[]) => (Promise<any> | Bluebird<any>)
type CleanerFunction = (startedDate: Date) => (Promise<any> | Bluebird<any>)

async function crawlCollectionPage <T> (uri: string, handler: HandlerFunction<T>, cleaner?: CleanerFunction) {
  logger.info('Crawling ActivityPub data on %s.', uri)

  const options = {
    method: 'GET',
    uri,
    json: true,
    activityPub: true,
    timeout: JOB_REQUEST_TIMEOUT
  }

  const startDate = new Date()

  const response = await doRequest<ActivityPubOrderedCollection<T>>(options)
  const firstBody = response.body

  const limit = ACTIVITY_PUB.FETCH_PAGE_LIMIT
  let i = 0
  let nextLink = firstBody.first
  while (nextLink && i < limit) {
    let body: any

    if (typeof nextLink === 'string') {
      // Don't crawl ourselves
      const remoteHost = new URL(nextLink).host
      if (remoteHost === WEBSERVER.HOST) continue

      options.uri = nextLink

      const res = await doRequest<ActivityPubOrderedCollection<T>>(options)
      body = res.body
    } else {
      // nextLink is already the object we want
      body = nextLink
    }

    nextLink = body.next
    i++

    if (Array.isArray(body.orderedItems)) {
      const items = body.orderedItems
      logger.info('Processing %i ActivityPub items for %s.', items.length, options.uri)

      await handler(items)
    }
  }

  if (cleaner) await cleaner(startDate)
}

export {
  crawlCollectionPage
}
Fix user notifications tests 2019-04-25 14:23:15 +02:00			`import { ACTIVITY_PUB, JOB_REQUEST_TIMEOUT, WEBSERVER } from '../../initializers/constants'`
Refractor and optimize AP collections Only display urls in general object, and paginate video comments, shares, likes and dislikes 2018-05-25 16:21:16 +02:00			`import { doRequest } from '../../helpers/requests'`
			`import { logger } from '../../helpers/logger'`
Set bitrate limits for transcoding (fixes #638) (#1135) * Set bitrate limits for transcoding (fixes #638) * added optimization script and test, changed stuff * fix test, improve docs * re-add optimize-old-videos script * added documentation * Don't optimize videos without valid UUID, or redundancy videos * move getUUIDFromFilename * fix tests? * update torrent and file size, some more fixes/improvements * use higher bitrate for high fps video, adjust bitrates * add test video * don't throw error if resolution is undefined * generate test fixture on the fly * use random noise video for bitrate test, add promise * shorten test video to avoid timeout * use existing function to optimize video * various fixes * increase test timeout * limit test fixture size, add link * test fixes * add await * more test fixes, add -b:v parameter * replace ffmpeg wiki link * fix ffmpeg params * fix unit test * add test fixture to .gitgnore * add video transcoding fps model * add missing file 2018-10-08 16:26:04 +02:00			`import * as Bluebird from 'bluebird'`
Check activities host 2018-11-14 15:01:28 +01:00			`import { ActivityPubOrderedCollection } from '../../../shared/models/activitypub'`
Move to eslint 2020-01-31 16:56:52 +01:00			`import { URL } from 'url'`
Refractor and optimize AP collections Only display urls in general object, and paginate video comments, shares, likes and dislikes 2018-05-25 16:21:16 +02:00
Cleanup invalid rates/comments/shares 2019-03-19 16:23:02 +01:00			`type HandlerFunction<T> = (items: T[]) => (Promise<any> \| Bluebird<any>)`
			`type CleanerFunction = (startedDate: Date) => (Promise<any> \| Bluebird<any>)`

			`async function crawlCollectionPage <T> (uri: string, handler: HandlerFunction<T>, cleaner?: CleanerFunction) {`
Refractor and optimize AP collections Only display urls in general object, and paginate video comments, shares, likes and dislikes 2018-05-25 16:21:16 +02:00			`logger.info('Crawling ActivityPub data on %s.', uri)`

			`const options = {`
			`method: 'GET',`
			`uri,`
			`json: true,`
			`activityPub: true,`
			`timeout: JOB_REQUEST_TIMEOUT`
			`}`

Cleanup invalid rates/comments/shares 2019-03-19 16:23:02 +01:00			`const startDate = new Date()`

Check activities host 2018-11-14 15:01:28 +01:00			`const response = await doRequest<ActivityPubOrderedCollection<T>>(options)`
Refractor and optimize AP collections Only display urls in general object, and paginate video comments, shares, likes and dislikes 2018-05-25 16:21:16 +02:00			`const firstBody = response.body`

Move to eslint 2020-01-31 16:56:52 +01:00			`const limit = ACTIVITY_PUB.FETCH_PAGE_LIMIT`
Refractor and optimize AP collections Only display urls in general object, and paginate video comments, shares, likes and dislikes 2018-05-25 16:21:16 +02:00			`let i = 0`
			`let nextLink = firstBody.first`
			`while (nextLink && i < limit) {`
More robust federation In particular when fetching pleroma outbox 2019-05-31 15:14:40 +02:00			`let body: any`
Fix user notifications tests 2019-04-25 14:23:15 +02:00
More robust federation In particular when fetching pleroma outbox 2019-05-31 15:14:40 +02:00			`if (typeof nextLink === 'string') {`
			`// Don't crawl ourselves`
Move to eslint 2020-01-31 16:56:52 +01:00			`const remoteHost = new URL(nextLink).host`
More robust federation In particular when fetching pleroma outbox 2019-05-31 15:14:40 +02:00			`if (remoteHost === WEBSERVER.HOST) continue`

			`options.uri = nextLink`

			`const res = await doRequest<ActivityPubOrderedCollection<T>>(options)`
			`body = res.body`
			`} else {`
			`// nextLink is already the object we want`
			`body = nextLink`
			`}`
Refractor and optimize AP collections Only display urls in general object, and paginate video comments, shares, likes and dislikes 2018-05-25 16:21:16 +02:00
			`nextLink = body.next`
			`i++`

			`if (Array.isArray(body.orderedItems)) {`
			`const items = body.orderedItems`
Add concept of video state, and add ability to wait transcoding before publishing a video 2018-06-12 20:04:58 +02:00			`logger.info('Processing %i ActivityPub items for %s.', items.length, options.uri)`
Refractor and optimize AP collections Only display urls in general object, and paginate video comments, shares, likes and dislikes 2018-05-25 16:21:16 +02:00
			`await handler(items)`
			`}`
			`}`
Cleanup invalid rates/comments/shares 2019-03-19 16:23:02 +01:00
			`if (cleaner) await cleaner(startDate)`
Refractor and optimize AP collections Only display urls in general object, and paginate video comments, shares, likes and dislikes 2018-05-25 16:21:16 +02:00			`}`

			`export {`
			`crawlCollectionPage`
			`}`