Sitemap: Hide empty accounts/channels and add video tags (#6633)

* feat(sitemap): remove empty accounts/channels

closes #6607

* feat(sitemap): add more video tags

https://developers.google.com/search/docs/crawling-indexing/sitemaps/video-sitemaps

closes #6606

* Chunk videos SQL query

* Optimize SQL query

---------

Co-authored-by: Chocobozzz <me@florianbigard.com>
pull/6598/head
kontrollanten 2024-10-23 07:17:02 +02:00 committed by GitHub
parent ef1732e5b9
commit 41c70a6b35
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 138 additions and 51 deletions

View File

@ -6,7 +6,8 @@ export const VideoInclude = {
FILES: 1 << 3,
CAPTIONS: 1 << 4,
SOURCE: 1 << 5,
AUTOMATIC_TAGS: 1 << 6
AUTOMATIC_TAGS: 1 << 6,
TAGS: 1 << 7
} as const
export type VideoIncludeType = typeof VideoInclude[keyof typeof VideoInclude]

View File

@ -417,14 +417,17 @@ export class VideosCommand extends AbstractCommand {
mode?: 'legacy' | 'resumable' // default legacy
waitTorrentGeneration?: boolean // default true
completedExpectedStatus?: HttpStatusCodeType
videoChannelId?: number
} = {}) {
const { mode = 'legacy', waitTorrentGeneration = true } = options
const { mode = 'legacy', videoChannelId, waitTorrentGeneration = true } = options
let defaultChannelId = 1
try {
const { videoChannels } = await this.server.users.getMyInfo({ token: options.token })
defaultChannelId = videoChannels[0].id
} catch (e) { /* empty */ }
if (!videoChannelId) {
try {
const { videoChannels } = await this.server.users.getMyInfo({ token: options.token })
defaultChannelId = videoChannels[0].id
} catch (e) { /* empty */ }
}
// Override default attributes
const attributes = {
@ -432,7 +435,7 @@ export class VideosCommand extends AbstractCommand {
category: 5,
licence: 4,
language: 'zh',
channelId: defaultChannelId,
channelId: videoChannelId || defaultChannelId,
nsfw: true,
waitTranscoding: false,
description: 'my super description',

View File

@ -193,15 +193,33 @@ describe('Test misc endpoints', function () {
it('Should add videos, channel and accounts and get sitemap', async function () {
this.timeout(35000)
await server.videos.upload({ attributes: { name: 'video 1', nsfw: false } })
await server.videos.upload({ attributes: { name: 'video 2', nsfw: false } })
await server.videos.upload({ attributes: { name: 'video 3', privacy: VideoPrivacy.PRIVATE } })
const { token: user1Token } = await server.users.generate('user1')
const { token: user2Token } = await server.users.generate('user2')
const { token: user3Token } = await server.users.generate('user3')
await server.channels.create({ attributes: { name: 'channel1', displayName: 'channel 1' } })
await server.channels.create({ attributes: { name: 'channel2', displayName: 'channel 2' } })
const { id: channel1Id } = await server.channels.create({
attributes: { name: 'channel1', displayName: 'channel 1' },
token: user1Token
})
const { id: channel2Id } = await server.channels.create({
attributes: { name: 'channel2', displayName: 'channel 2' },
token: user2Token
})
const { id: channel3Id } = await server.channels.create({
attributes: { name: 'channel3', displayName: 'channel 3' },
token: user3Token
})
await server.users.create({ username: 'user1', password: 'password' })
await server.users.create({ username: 'user2', password: 'password' })
const { id: video1Id } = await server.videos.upload({ attributes: { name: 'video 1', nsfw: false }, videoChannelId: channel1Id })
await server.videos.upload({ attributes: { name: 'video 2', nsfw: false }, videoChannelId: channel2Id })
await server.videos.upload({ attributes: { name: 'video 3', privacy: VideoPrivacy.PRIVATE }, videoChannelId: channel3Id })
await server.videos.update({
id: video1Id,
attributes: {
tags: [ 'fish', 'chips' ]
}
})
const res = await makeGetRequest({
url: server.url,
@ -216,11 +234,25 @@ describe('Test misc endpoints', function () {
expect(res.text).to.contain('<video:title>video 2</video:title>')
expect(res.text).to.not.contain('<video:title>video 3</video:title>')
expect(res.text).to.match(/<video:thumbnail_loc>.*\.jpg<\/video:thumbnail_loc>/)
expect(res.text).to.match(/<video:content_loc>.*\.webm<\/video:content_loc>/)
expect(res.text).to.match(/<video:player_loc>.*\/videos\/embed\/.*<\/video:player_loc>/)
expect(res.text).to.match(/<video:duration>.*<\/video:duration>/)
expect(res.text).to.match(/<video:rating>0<\/video:rating>/)
expect(res.text).to.match(/<video:view_count>0<\/video:view_count>/)
expect(res.text).to.match(/<video:publication_date>.*<\/video:publication_date>/)
expect(res.text).to.match(/<video:tag>fish<\/video:tag>/)
expect(res.text).to.match(/<video:tag>chips<\/video:tag>/)
expect(res.text).to.match(/<video:uploader.*>channel 1<\/video:uploader>/)
expect(res.text).to.match(/<video:live>NO<\/video:live>/)
expect(res.text).to.contain('<url><loc>' + server.url + '/c/channel1/videos</loc></url>')
expect(res.text).to.contain('<url><loc>' + server.url + '/c/channel2/videos</loc></url>')
expect(res.text).to.not.contain('<url><loc>' + server.url + '/c/channel3/videos</loc></url>')
expect(res.text).to.contain('<url><loc>' + server.url + '/a/user1/video-channels</loc></url>')
expect(res.text).to.contain('<url><loc>' + server.url + '/a/user2/video-channels</loc></url>')
expect(res.text).to.not.contain('<url><loc>' + server.url + '/a/user3/video-channels</loc></url>')
})
it('Should not fail with big title/description videos', async function () {

View File

@ -5,11 +5,11 @@ import { logger } from '@server/helpers/logger.js'
import { getServerActor } from '@server/models/application/application.js'
import { buildNSFWFilter } from '../helpers/express-utils.js'
import { ROUTE_CACHE_LIFETIME, WEBSERVER } from '../initializers/constants.js'
import { apiRateLimiter, asyncMiddleware } from '../middlewares/index.js'
import { cacheRoute } from '../middlewares/cache/cache.js'
import { apiRateLimiter, asyncMiddleware, cacheRoute } from '../middlewares/index.js'
import { AccountModel } from '../models/account/account.js'
import { VideoModel } from '../models/video/video.js'
import { VideoChannelModel } from '../models/video/video-channel.js'
import { VideoFileStream, VideoInclude } from '@peertube/peertube-models'
const sitemapRouter = express.Router()
@ -73,32 +73,64 @@ async function getSitemapAccountUrls () {
async function getSitemapLocalVideoUrls () {
const serverActor = await getServerActor()
const { data } = await VideoModel.listForApi({
start: 0,
count: undefined,
sort: 'createdAt',
displayOnlyForFollower: {
actorId: serverActor.id,
orLocalVideos: true
},
isLocal: true,
nsfw: buildNSFWFilter(),
countVideos: false
})
let acc: { url: string, video: any[] }[] = []
return data.map(v => ({
url: WEBSERVER.URL + v.getWatchStaticPath(),
video: [
{
// Sitemap title should be < 100 characters
title: truncate(v.name, { length: 100, omission: '...' }),
// Sitemap description should be < 2000 characters
description: truncate(v.description || v.name, { length: 2000, omission: '...' }),
player_loc: WEBSERVER.URL + v.getEmbedStaticPath(),
thumbnail_loc: WEBSERVER.URL + v.getMiniatureStaticPath()
}
]
}))
const chunkSize = 200
let hasData = true
let i = 0
while (hasData && i < 1000) {
const { data } = await VideoModel.listForApi({
start: chunkSize * i,
count: chunkSize,
sort: 'createdAt',
displayOnlyForFollower: {
actorId: serverActor.id,
orLocalVideos: true
},
isLocal: true,
nsfw: buildNSFWFilter(),
countVideos: false,
include: VideoInclude.FILES | VideoInclude.TAGS
})
hasData = data.length !== 0
i++
acc = acc.concat(
data.map(v => {
const contentLoc = v.getHLSPlaylist()?.getMasterPlaylistUrl(v) ||
v.getMaxQualityFile(VideoFileStream.VIDEO)?.getFileUrl(v) ||
v.getMaxQualityFile(VideoFileStream.AUDIO)?.getFileUrl(v)
return {
url: WEBSERVER.URL + v.getWatchStaticPath(),
video: [
{
// Sitemap title should be < 100 characters
'title': truncate(v.name, { length: 100, omission: '...' }),
// Sitemap description should be < 2000 characters
'description': truncate(v.description || v.name, { length: 2000, omission: '...' }),
'player_loc': WEBSERVER.URL + v.getEmbedStaticPath(),
'thumbnail_loc': WEBSERVER.URL + v.getMiniatureStaticPath(),
'content_loc': contentLoc,
'duration': v.duration,
'view_count': v.views,
'publication_date': v.publishedAt.toISOString(),
'uploader': v.VideoChannel.getDisplayName(),
'uploader:info': v.VideoChannel.getClientUrl(),
'live': v.isLive ? 'YES' : 'NO',
'family_friendly': v.nsfw ? 'NO' : 'YES',
'rating': (v.likes * 5) / (v.likes + v.dislikes) || 0, // Rating is between 0.0 and 5.0
'tag': v.Tags.map(t => t.name)
}
]
}
})
)
}
return acc
}
function getSitemapBasicUrls () {

View File

@ -1,6 +1,6 @@
import { Account, AccountSummary } from '@peertube/peertube-models'
import { Account, AccountSummary, VideoPrivacy } from '@peertube/peertube-models'
import { ModelCache } from '@server/models/shared/model-cache.js'
import { FindOptions, IncludeOptions, Includeable, Op, Transaction, WhereOptions } from 'sequelize'
import { FindOptions, IncludeOptions, Includeable, Op, Transaction, WhereOptions, literal } from 'sequelize'
import {
AfterDestroy,
AllowNull,
@ -422,7 +422,7 @@ export class AccountModel extends SequelizeModel<AccountModel> {
}
static listLocalsForSitemap (sort: string): Promise<MAccountHost[]> {
const query = {
return AccountModel.unscoped().findAll({
attributes: [ ],
offset: 0,
order: getSort(sort),
@ -433,13 +433,19 @@ export class AccountModel extends SequelizeModel<AccountModel> {
where: {
serverId: null
}
},
{
attributes: [ 'id' ],
model: VideoChannelModel.unscoped(),
required: true,
where: {
[Op.and]: [
literal(`EXISTS (SELECT 1 FROM "video" WHERE "privacy" = ${VideoPrivacy.PUBLIC} AND "channelId" = "VideoChannels"."id")`)
]
}
}
]
}
return AccountModel
.unscoped()
.findAll(query)
})
}
toFormattedJSON (this: MAccountFormattable): Account {

View File

@ -121,6 +121,10 @@ export class VideoModelBuilder {
if (include & VideoInclude.AUTOMATIC_TAGS) {
this.addAutoTag(row, videoModel)
}
if (include & VideoInclude.TAGS) {
this.addTag(row, videoModel)
}
}
}

View File

@ -109,6 +109,10 @@ export class VideosModelListQueryBuilder extends AbstractVideoQueryBuilder {
this.includeAutomaticTags(serverActor.Account.id)
}
if (options.include & VideoInclude.TAGS) {
this.includeTags()
}
const select = this.buildSelect()
this.query = `${select} FROM (${this.innerQuery}) AS "tmp" ${this.joins} ${this.innerSort}`

View File

@ -1,5 +1,5 @@
import { forceNumber, pick } from '@peertube/peertube-core-utils'
import { ActivityPubActor, VideoChannel, VideoChannelSummary } from '@peertube/peertube-models'
import { ActivityPubActor, VideoChannel, VideoChannelSummary, VideoPrivacy } from '@peertube/peertube-models'
import { CONFIG } from '@server/initializers/config.js'
import { InternalEventEmitter } from '@server/lib/internal-event-emitter.js'
import { MAccountHost } from '@server/types/models/index.js'
@ -523,7 +523,12 @@ export class VideoChannelModel extends SequelizeModel<VideoChannelModel> {
serverId: null
}
}
]
],
where: {
[Op.and]: [
literal(`EXISTS (SELECT 1 FROM "video" WHERE "privacy" = ${VideoPrivacy.PUBLIC} AND "channelId" = "VideoChannelModel"."id")`)
]
}
}
return VideoChannelModel