PeerTube/server/lib/hls.ts

282 lines
10 KiB
TypeScript
Raw Normal View History

Add support for saving video files to object storage (#4290) * Add support for saving video files to object storage * Add support for custom url generation on s3 stored files Uses two config keys to support url generation that doesn't directly go to (compatible s3). Can be used to generate urls to any cache server or CDN. * Upload files to s3 concurrently and delete originals afterwards * Only publish after move to object storage is complete * Use base url instead of url template * Fix mistyped config field * Add rudenmentary way to download before transcode * Implement Chocobozzz suggestions https://github.com/Chocobozzz/PeerTube/pull/4290#issuecomment-891670478 The remarks in question: Try to use objectStorage prefix instead of s3 prefix for your function/variables/config names Prefer to use a tree for the config: s3.streaming_playlists_bucket -> object_storage.streaming_playlists.bucket Use uppercase for config: S3.STREAMING_PLAYLISTS_BUCKETINFO.bucket -> OBJECT_STORAGE.STREAMING_PLAYLISTS.BUCKET (maybe BUCKET_NAME instead of BUCKET) I suggest to rename moveJobsRunning to pendingMovingJobs (or better, create a dedicated videoJobInfo table with a pendingMove & videoId columns so we could also use this table to track pending transcoding jobs) https://github.com/Chocobozzz/PeerTube/pull/4290/files#diff-3e26d41ca4bda1de8e1747af70ca2af642abcc1e9e0bfb94239ff2165acfbde5R19 uses a string instead of an integer I think we should store the origin object storage URL in fileUrl, without base_url injection. Instead, inject the base_url at "runtime" so admins can easily change this configuration without running a script to update DB URLs * Import correct function * Support multipart upload * Remove import of node 15.0 module stream/promises * Extend maximum upload job length Using the same value as for redundancy downloading seems logical * Use dynamic part size for really large uploads Also adds very small part size for local testing * Fix decreasePendingMove query * Resolve various PR comments * Move to object storage after optimize * Make upload size configurable and increase default * Prune webtorrent files that are stored in object storage * Move files after transcoding jobs * Fix federation * Add video path manager * Support move to external storage job in client * Fix live object storage tests Co-authored-by: Chocobozzz <me@florianbigard.com>
2021-08-17 08:26:20 +02:00
import { close, ensureDir, move, open, outputJSON, read, readFile, remove, stat, writeFile } from 'fs-extra'
2022-08-17 15:36:03 +02:00
import { flatten } from 'lodash'
import PQueue from 'p-queue'
2020-11-20 17:16:55 +01:00
import { basename, dirname, join } from 'path'
import { MStreamingPlaylist, MStreamingPlaylistFilesVideo, MVideo } from '@server/types/models'
import { uniqify, uuidRegex } from '@shared/core-utils'
2021-12-17 13:58:07 +01:00
import { sha256 } from '@shared/extra-utils'
import { getVideoStreamDimensionsInfo } from '@shared/ffmpeg'
import { VideoStorage } from '@shared/models'
import { getAudioStreamCodec, getVideoStreamCodec } from '../helpers/ffmpeg'
2019-01-29 08:37:25 +01:00
import { logger } from '../helpers/logger'
import { doRequest, doRequestAndSaveToFile } from '../helpers/requests'
import { generateRandomString } from '../helpers/utils'
2019-04-11 11:33:44 +02:00
import { CONFIG } from '../initializers/config'
2021-11-29 15:46:52 +01:00
import { P2P_MEDIA_LOADER_PEER_VERSION, REQUEST_TIMEOUTS } from '../initializers/constants'
import { sequelizeTypescript } from '../initializers/database'
2020-11-20 17:16:55 +01:00
import { VideoFileModel } from '../models/video/video-file'
import { VideoStreamingPlaylistModel } from '../models/video/video-streaming-playlist'
import { storeHLSFileFromFilename } from './object-storage'
import { generateHLSMasterPlaylistFilename, generateHlsSha256SegmentsFilename, getHlsResolutionPlaylistFilename } from './paths'
Add support for saving video files to object storage (#4290) * Add support for saving video files to object storage * Add support for custom url generation on s3 stored files Uses two config keys to support url generation that doesn't directly go to (compatible s3). Can be used to generate urls to any cache server or CDN. * Upload files to s3 concurrently and delete originals afterwards * Only publish after move to object storage is complete * Use base url instead of url template * Fix mistyped config field * Add rudenmentary way to download before transcode * Implement Chocobozzz suggestions https://github.com/Chocobozzz/PeerTube/pull/4290#issuecomment-891670478 The remarks in question: Try to use objectStorage prefix instead of s3 prefix for your function/variables/config names Prefer to use a tree for the config: s3.streaming_playlists_bucket -> object_storage.streaming_playlists.bucket Use uppercase for config: S3.STREAMING_PLAYLISTS_BUCKETINFO.bucket -> OBJECT_STORAGE.STREAMING_PLAYLISTS.BUCKET (maybe BUCKET_NAME instead of BUCKET) I suggest to rename moveJobsRunning to pendingMovingJobs (or better, create a dedicated videoJobInfo table with a pendingMove & videoId columns so we could also use this table to track pending transcoding jobs) https://github.com/Chocobozzz/PeerTube/pull/4290/files#diff-3e26d41ca4bda1de8e1747af70ca2af642abcc1e9e0bfb94239ff2165acfbde5R19 uses a string instead of an integer I think we should store the origin object storage URL in fileUrl, without base_url injection. Instead, inject the base_url at "runtime" so admins can easily change this configuration without running a script to update DB URLs * Import correct function * Support multipart upload * Remove import of node 15.0 module stream/promises * Extend maximum upload job length Using the same value as for redundancy downloading seems logical * Use dynamic part size for really large uploads Also adds very small part size for local testing * Fix decreasePendingMove query * Resolve various PR comments * Move to object storage after optimize * Make upload size configurable and increase default * Prune webtorrent files that are stored in object storage * Move files after transcoding jobs * Fix federation * Add video path manager * Support move to external storage job in client * Fix live object storage tests Co-authored-by: Chocobozzz <me@florianbigard.com>
2021-08-17 08:26:20 +02:00
import { VideoPathManager } from './video-path-manager'
2019-04-08 11:13:49 +02:00
async function updateStreamingPlaylistsInfohashesIfNeeded () {
const playlistsToUpdate = await VideoStreamingPlaylistModel.listByIncorrectPeerVersion()
// Use separate SQL queries, because we could have many videos to update
for (const playlist of playlistsToUpdate) {
await sequelizeTypescript.transaction(async t => {
const videoFiles = await VideoFileModel.listByStreamingPlaylist(playlist.id, t)
2021-07-23 11:20:00 +02:00
playlist.assignP2PMediaLoaderInfoHashes(playlist.Video, videoFiles)
playlist.p2pMediaLoaderPeerVersion = P2P_MEDIA_LOADER_PEER_VERSION
2021-07-23 11:20:00 +02:00
2019-04-08 11:13:49 +02:00
await playlist.save({ transaction: t })
})
}
}
2019-01-29 08:37:25 +01:00
async function updatePlaylistAfterFileChange (video: MVideo, playlist: MStreamingPlaylist) {
try {
let playlistWithFiles = await updateMasterHLSPlaylist(video, playlist)
playlistWithFiles = await updateSha256VODSegments(video, playlist)
// Refresh playlist, operations can take some time
playlistWithFiles = await VideoStreamingPlaylistModel.loadWithVideoAndFiles(playlist.id)
playlistWithFiles.assignP2PMediaLoaderInfoHashes(video, playlistWithFiles.VideoFiles)
await playlistWithFiles.save()
video.setHLSPlaylist(playlistWithFiles)
} catch (err) {
logger.info('Cannot update playlist after file change. Maybe due to concurrent transcoding', { err })
}
}
2019-01-29 08:37:25 +01:00
// ---------------------------------------------------------------------------
2019-01-29 08:37:25 +01:00
// Avoid concurrency issues when updating streaming playlist files
const playlistFilesQueue = new PQueue({ concurrency: 1 })
2019-01-29 08:37:25 +01:00
function updateMasterHLSPlaylist (video: MVideo, playlistArg: MStreamingPlaylist): Promise<MStreamingPlaylistFilesVideo> {
return playlistFilesQueue.add(async () => {
const playlist = await VideoStreamingPlaylistModel.loadWithVideoAndFiles(playlistArg.id)
2020-07-31 13:49:36 +02:00
const masterPlaylists: string[] = [ '#EXTM3U', '#EXT-X-VERSION:3' ]
2019-11-26 16:25:36 +01:00
for (const file of playlist.VideoFiles) {
const playlistFilename = getHlsResolutionPlaylistFilename(file.filename)
await VideoPathManager.Instance.makeAvailableVideoFile(file.withVideoOrPlaylist(playlist), async videoFilePath => {
const size = await getVideoStreamDimensionsInfo(videoFilePath)
const bandwidth = 'BANDWIDTH=' + video.getBandwidthBits(file)
const resolution = `RESOLUTION=${size?.width || 0}x${size?.height || 0}`
let line = `#EXT-X-STREAM-INF:${bandwidth},${resolution}`
if (file.fps) line += ',FRAME-RATE=' + file.fps
const codecs = await Promise.all([
getVideoStreamCodec(videoFilePath),
getAudioStreamCodec(videoFilePath)
])
2019-01-29 08:37:25 +01:00
line += `,CODECS="${codecs.filter(c => !!c).join(',')}"`
masterPlaylists.push(line)
masterPlaylists.push(playlistFilename)
})
}
if (playlist.playlistFilename) {
await video.removeStreamingPlaylistFile(playlist, playlist.playlistFilename)
}
playlist.playlistFilename = generateHLSMasterPlaylistFilename(video.isLive)
const masterPlaylistPath = VideoPathManager.Instance.getFSHLSOutputPath(video, playlist.playlistFilename)
await writeFile(masterPlaylistPath, masterPlaylists.join('\n') + '\n')
if (playlist.storage === VideoStorage.OBJECT_STORAGE) {
playlist.playlistUrl = await storeHLSFileFromFilename(playlist, playlist.playlistFilename)
await remove(masterPlaylistPath)
}
return playlist.save()
Add support for saving video files to object storage (#4290) * Add support for saving video files to object storage * Add support for custom url generation on s3 stored files Uses two config keys to support url generation that doesn't directly go to (compatible s3). Can be used to generate urls to any cache server or CDN. * Upload files to s3 concurrently and delete originals afterwards * Only publish after move to object storage is complete * Use base url instead of url template * Fix mistyped config field * Add rudenmentary way to download before transcode * Implement Chocobozzz suggestions https://github.com/Chocobozzz/PeerTube/pull/4290#issuecomment-891670478 The remarks in question: Try to use objectStorage prefix instead of s3 prefix for your function/variables/config names Prefer to use a tree for the config: s3.streaming_playlists_bucket -> object_storage.streaming_playlists.bucket Use uppercase for config: S3.STREAMING_PLAYLISTS_BUCKETINFO.bucket -> OBJECT_STORAGE.STREAMING_PLAYLISTS.BUCKET (maybe BUCKET_NAME instead of BUCKET) I suggest to rename moveJobsRunning to pendingMovingJobs (or better, create a dedicated videoJobInfo table with a pendingMove & videoId columns so we could also use this table to track pending transcoding jobs) https://github.com/Chocobozzz/PeerTube/pull/4290/files#diff-3e26d41ca4bda1de8e1747af70ca2af642abcc1e9e0bfb94239ff2165acfbde5R19 uses a string instead of an integer I think we should store the origin object storage URL in fileUrl, without base_url injection. Instead, inject the base_url at "runtime" so admins can easily change this configuration without running a script to update DB URLs * Import correct function * Support multipart upload * Remove import of node 15.0 module stream/promises * Extend maximum upload job length Using the same value as for redundancy downloading seems logical * Use dynamic part size for really large uploads Also adds very small part size for local testing * Fix decreasePendingMove query * Resolve various PR comments * Move to object storage after optimize * Make upload size configurable and increase default * Prune webtorrent files that are stored in object storage * Move files after transcoding jobs * Fix federation * Add video path manager * Support move to external storage job in client * Fix live object storage tests Co-authored-by: Chocobozzz <me@florianbigard.com>
2021-08-17 08:26:20 +02:00
})
2019-01-29 08:37:25 +01:00
}
// ---------------------------------------------------------------------------
2022-08-04 09:22:21 +02:00
function updateSha256VODSegments (video: MVideo, playlistArg: MStreamingPlaylist): Promise<MStreamingPlaylistFilesVideo> {
return playlistFilesQueue.add(async () => {
const json: { [filename: string]: { [range: string]: string } } = {}
const playlist = await VideoStreamingPlaylistModel.loadWithVideoAndFiles(playlistArg.id)
// For all the resolutions available for this video
for (const file of playlist.VideoFiles) {
const rangeHashes: { [range: string]: string } = {}
const fileWithPlaylist = file.withVideoOrPlaylist(playlist)
2019-01-29 08:37:25 +01:00
await VideoPathManager.Instance.makeAvailableVideoFile(fileWithPlaylist, videoPath => {
2019-01-29 08:37:25 +01:00
return VideoPathManager.Instance.makeAvailableResolutionPlaylistFile(fileWithPlaylist, async resolutionPlaylistPath => {
const playlistContent = await readFile(resolutionPlaylistPath)
const ranges = getRangesFromPlaylist(playlistContent.toString())
2019-01-29 08:37:25 +01:00
const fd = await open(videoPath, 'r')
for (const range of ranges) {
const buf = Buffer.alloc(range.length)
await read(fd, buf, 0, range.length, range.offset)
rangeHashes[`${range.offset}-${range.offset + range.length - 1}`] = sha256(buf)
}
await close(fd)
const videoFilename = file.filename
json[videoFilename] = rangeHashes
})
Add support for saving video files to object storage (#4290) * Add support for saving video files to object storage * Add support for custom url generation on s3 stored files Uses two config keys to support url generation that doesn't directly go to (compatible s3). Can be used to generate urls to any cache server or CDN. * Upload files to s3 concurrently and delete originals afterwards * Only publish after move to object storage is complete * Use base url instead of url template * Fix mistyped config field * Add rudenmentary way to download before transcode * Implement Chocobozzz suggestions https://github.com/Chocobozzz/PeerTube/pull/4290#issuecomment-891670478 The remarks in question: Try to use objectStorage prefix instead of s3 prefix for your function/variables/config names Prefer to use a tree for the config: s3.streaming_playlists_bucket -> object_storage.streaming_playlists.bucket Use uppercase for config: S3.STREAMING_PLAYLISTS_BUCKETINFO.bucket -> OBJECT_STORAGE.STREAMING_PLAYLISTS.BUCKET (maybe BUCKET_NAME instead of BUCKET) I suggest to rename moveJobsRunning to pendingMovingJobs (or better, create a dedicated videoJobInfo table with a pendingMove & videoId columns so we could also use this table to track pending transcoding jobs) https://github.com/Chocobozzz/PeerTube/pull/4290/files#diff-3e26d41ca4bda1de8e1747af70ca2af642abcc1e9e0bfb94239ff2165acfbde5R19 uses a string instead of an integer I think we should store the origin object storage URL in fileUrl, without base_url injection. Instead, inject the base_url at "runtime" so admins can easily change this configuration without running a script to update DB URLs * Import correct function * Support multipart upload * Remove import of node 15.0 module stream/promises * Extend maximum upload job length Using the same value as for redundancy downloading seems logical * Use dynamic part size for really large uploads Also adds very small part size for local testing * Fix decreasePendingMove query * Resolve various PR comments * Move to object storage after optimize * Make upload size configurable and increase default * Prune webtorrent files that are stored in object storage * Move files after transcoding jobs * Fix federation * Add video path manager * Support move to external storage job in client * Fix live object storage tests Co-authored-by: Chocobozzz <me@florianbigard.com>
2021-08-17 08:26:20 +02:00
})
}
2019-01-29 08:37:25 +01:00
if (playlist.segmentsSha256Filename) {
await video.removeStreamingPlaylistFile(playlist, playlist.segmentsSha256Filename)
}
playlist.segmentsSha256Filename = generateHlsSha256SegmentsFilename(video.isLive)
const outputPath = VideoPathManager.Instance.getFSHLSOutputPath(video, playlist.segmentsSha256Filename)
await outputJSON(outputPath, json)
if (playlist.storage === VideoStorage.OBJECT_STORAGE) {
playlist.segmentsSha256Url = await storeHLSFileFromFilename(playlist, playlist.segmentsSha256Filename)
await remove(outputPath)
}
return playlist.save()
})
2019-01-29 08:37:25 +01:00
}
// ---------------------------------------------------------------------------
async function buildSha256Segment (segmentPath: string) {
const buf = await readFile(segmentPath)
return sha256(buf)
}
function downloadPlaylistSegments (playlistUrl: string, destinationDir: string, timeout: number, bodyKBLimit: number) {
let timer
let remainingBodyKBLimit = bodyKBLimit
2019-01-29 08:37:25 +01:00
logger.info('Importing HLS playlist %s', playlistUrl)
2019-01-29 08:37:25 +01:00
2021-02-03 09:33:05 +01:00
return new Promise<void>(async (res, rej) => {
const tmpDirectory = join(CONFIG.STORAGE.TMP_DIR, await generateRandomString(10))
2019-01-29 08:37:25 +01:00
await ensureDir(tmpDirectory)
2019-01-29 08:37:25 +01:00
timer = setTimeout(() => {
deleteTmpDirectory(tmpDirectory)
2019-01-29 08:37:25 +01:00
return rej(new Error('HLS download timeout.'))
}, timeout)
try {
// Fetch master playlist
const subPlaylistUrls = await fetchUniqUrls(playlistUrl)
const subRequests = subPlaylistUrls.map(u => fetchUniqUrls(u))
2022-08-17 15:36:03 +02:00
const fileUrls = uniqify(flatten(await Promise.all(subRequests)))
logger.debug('Will download %d HLS files.', fileUrls.length, { fileUrls })
for (const fileUrl of fileUrls) {
const destPath = join(tmpDirectory, basename(fileUrl))
2021-11-30 08:40:30 +01:00
await doRequestAndSaveToFile(fileUrl, destPath, { bodyKBLimit: remainingBodyKBLimit, timeout: REQUEST_TIMEOUTS.REDUNDANCY })
const { size } = await stat(destPath)
remainingBodyKBLimit -= (size / 1000)
logger.debug('Downloaded HLS playlist file %s with %d kB remained limit.', fileUrl, Math.floor(remainingBodyKBLimit))
}
clearTimeout(timer)
await move(tmpDirectory, destinationDir, { overwrite: true })
return res()
} catch (err) {
deleteTmpDirectory(tmpDirectory)
return rej(err)
2019-01-29 08:37:25 +01:00
}
})
function deleteTmpDirectory (directory: string) {
remove(directory)
.catch(err => logger.error('Cannot delete path on HLS download error.', { err }))
}
async function fetchUniqUrls (playlistUrl: string) {
2021-03-08 14:24:11 +01:00
const { body } = await doRequest(playlistUrl)
if (!body) return []
const urls = body.split('\n')
.filter(line => line.endsWith('.m3u8') || line.endsWith('.mp4'))
.map(url => {
if (url.startsWith('http://') || url.startsWith('https://')) return url
return `${dirname(playlistUrl)}/${url}`
})
2022-08-17 15:36:03 +02:00
return uniqify(urls)
}
2019-01-29 08:37:25 +01:00
}
// ---------------------------------------------------------------------------
async function renameVideoFileInPlaylist (playlistPath: string, newVideoFilename: string) {
const content = await readFile(playlistPath, 'utf8')
const newContent = content.replace(new RegExp(`${uuidRegex}-\\d+-fragmented.mp4`, 'g'), newVideoFilename)
await writeFile(playlistPath, newContent, 'utf8')
}
// ---------------------------------------------------------------------------
function injectQueryToPlaylistUrls (content: string, queryString: string) {
return content.replace(/\.(m3u8|ts|mp4)/gm, '.$1?' + queryString)
}
// ---------------------------------------------------------------------------
2019-01-29 08:37:25 +01:00
export {
updateMasterHLSPlaylist,
updateSha256VODSegments,
buildSha256Segment,
2019-04-08 11:13:49 +02:00
downloadPlaylistSegments,
updateStreamingPlaylistsInfohashesIfNeeded,
updatePlaylistAfterFileChange,
injectQueryToPlaylistUrls,
renameVideoFileInPlaylist
2019-01-29 08:37:25 +01:00
}
// ---------------------------------------------------------------------------
2020-10-26 16:44:23 +01:00
function getRangesFromPlaylist (playlistContent: string) {
const ranges: { offset: number, length: number }[] = []
const lines = playlistContent.split('\n')
const regex = /^#EXT-X-BYTERANGE:(\d+)@(\d+)$/
for (const line of lines) {
const captured = regex.exec(line)
if (captured) {
ranges.push({ length: parseInt(captured[1], 10), offset: parseInt(captured[2], 10) })
}
}
return ranges
}