PeerTube/server/tools/peertube-import-videos.ts

416 lines
12 KiB
TypeScript
Raw Normal View History

import { registerTSPaths } from '../helpers/register-ts-paths'
registerTSPaths()
2021-06-25 17:48:27 +02:00
import { program } from 'commander'
import { accessSync, constants } from 'fs'
import { remove } from 'fs-extra'
import { truncate } from 'lodash'
import { join } from 'path'
import * as prompt from 'prompt'
import { promisify } from 'util'
2021-07-09 15:03:44 +02:00
import { YoutubeDL } from '@server/helpers/youtube-dl'
import { getVideoCategories, uploadVideo } from '../../shared/extra-utils/index'
import { sha256 } from '../helpers/core-utils'
import { doRequestAndSaveToFile } from '../helpers/requests'
import { CONSTRAINTS_FIELDS } from '../initializers/constants'
2021-07-09 15:03:44 +02:00
import {
buildCommonVideoOptions,
buildServer,
buildVideoAttributesFromCommander,
getAccessTokenOrDie,
getLogger,
getServerCredentials
} from './cli'
const processOptions = {
maxBuffer: Infinity
}
2018-02-09 16:47:06 +01:00
let command = program
.name('import-videos')
command = buildCommonVideoOptions(command)
command
2018-02-09 16:47:06 +01:00
.option('-u, --url <url>', 'Server url')
.option('-U, --username <username>', 'Username')
.option('-p, --password <token>', 'Password')
.option('--target-url <targetUrl>', 'Video target URL')
.option('--since <since>', 'Publication date (inclusive) since which the videos can be imported (YYYY-MM-DD)', parseDate)
.option('--until <until>', 'Publication date (inclusive) until which the videos can be imported (YYYY-MM-DD)', parseDate)
.option('--first <first>', 'Process first n elements of returned playlist')
.option('--last <last>', 'Process last n elements of returned playlist')
.option('--wait-interval <waitInterval>', 'Duration between two video imports (in seconds)', convertIntoMs)
.option('-T, --tmpdir <tmpdir>', 'Working directory', __dirname)
.usage("[global options] [ -- youtube-dl options]")
2018-02-09 16:47:06 +01:00
.parse(process.argv)
2021-02-03 09:33:05 +01:00
const options = command.opts()
const log = getLogger(options.verbose)
getServerCredentials(command)
.then(({ url, username, password }) => {
2021-02-03 09:33:05 +01:00
if (!options.targetUrl) {
exitError('--target-url field is required.')
}
try {
2021-02-03 09:33:05 +01:00
accessSync(options.tmpdir, constants.R_OK | constants.W_OK)
} catch (e) {
2021-02-03 09:33:05 +01:00
exitError('--tmpdir %s: directory does not exist or is not accessible', options.tmpdir)
}
2018-05-10 23:59:28 +02:00
url = normalizeTargetUrl(url)
2021-02-03 09:33:05 +01:00
options.targetUrl = normalizeTargetUrl(options.targetUrl)
2021-07-09 15:03:44 +02:00
run(url, username, password)
2020-01-31 16:56:52 +01:00
.catch(err => exitError(err))
})
2020-01-31 16:56:52 +01:00
.catch(err => console.error(err))
2018-02-09 16:47:06 +01:00
2021-07-09 15:03:44 +02:00
async function run (url: string, username: string, password: string) {
if (!password) password = await promptPassword()
2018-05-11 11:26:50 +02:00
2021-05-11 10:54:05 +02:00
const youtubeDLBinary = await YoutubeDL.safeGetYoutubeDL()
2021-05-11 10:54:05 +02:00
let info = await getYoutubeDLInfo(youtubeDLBinary, options.targetUrl, command.args)
if (!Array.isArray(info)) info = [ info ]
2018-02-09 16:47:06 +01:00
// Try to fix youtube channels upload
const uploadsObject = info.find(i => !i.ie_key && !i.duration && i.title === 'Uploads')
if (uploadsObject) {
console.log('Fixing URL to %s.', uploadsObject.url)
2021-05-11 10:54:05 +02:00
info = await getYoutubeDLInfo(youtubeDLBinary, uploadsObject.url, command.args)
2020-11-17 15:28:24 +01:00
}
2018-02-09 16:47:06 +01:00
2020-11-17 15:28:24 +01:00
let infoArray: any[]
2020-11-17 15:28:24 +01:00
infoArray = [].concat(info)
2021-02-03 09:33:05 +01:00
if (options.first) {
infoArray = infoArray.slice(0, options.first)
} else if (options.last) {
infoArray = infoArray.slice(-options.last)
2020-11-17 15:28:24 +01:00
}
// Normalize utf8 fields
2020-11-17 15:28:24 +01:00
infoArray = infoArray.map(i => normalizeObject(i))
2018-02-09 16:47:06 +01:00
2020-11-17 15:28:24 +01:00
log.info('Will download and upload %d videos.\n', infoArray.length)
for (const [ index, info ] of infoArray.entries()) {
2020-11-17 15:28:24 +01:00
try {
2021-02-03 09:33:05 +01:00
if (index > 0 && options.waitInterval) {
log.info("Wait for %d seconds before continuing.", options.waitInterval / 1000)
await new Promise(res => setTimeout(res, options.waitInterval))
}
2020-11-17 15:28:24 +01:00
await processVideo({
2021-02-03 09:33:05 +01:00
cwd: options.tmpdir,
2020-11-17 15:28:24 +01:00
url,
2021-07-09 15:03:44 +02:00
username,
password,
2020-11-17 15:28:24 +01:00
youtubeInfo: info
})
} catch (err) {
console.error('Cannot process video.', { info, url, err })
2018-02-09 16:47:06 +01:00
}
2020-11-17 15:28:24 +01:00
}
2018-02-09 16:47:06 +01:00
2021-07-09 15:03:44 +02:00
log.info('Video/s for user %s imported: %s', username, options.targetUrl)
2020-11-17 15:28:24 +01:00
process.exit(0)
2018-02-09 16:47:06 +01:00
}
async function processVideo (parameters: {
2020-01-31 16:56:52 +01:00
cwd: string
url: string
2021-07-09 15:03:44 +02:00
username: string
password: string
2019-06-13 11:09:38 +02:00
youtubeInfo: any
}) {
2021-07-09 15:03:44 +02:00
const { youtubeInfo, cwd, url, username, password } = parameters
2021-05-11 10:54:05 +02:00
const youtubeDL = new YoutubeDL('', [])
2019-06-13 11:09:38 +02:00
log.debug('Fetching object.', youtubeInfo)
const videoInfo = await fetchObject(youtubeInfo)
log.debug('Fetched object.', videoInfo)
2021-05-11 10:54:05 +02:00
const originallyPublishedAt = youtubeDL.buildOriginallyPublishedAt(videoInfo)
2021-07-09 15:03:44 +02:00
2021-02-03 09:33:05 +01:00
if (options.since && originallyPublishedAt && originallyPublishedAt.getTime() < options.since.getTime()) {
2021-07-09 15:03:44 +02:00
log.info('Video "%s" has been published before "%s", don\'t upload it.\n', videoInfo.title, formatDate(options.since))
return
}
2021-07-09 15:03:44 +02:00
2021-02-03 09:33:05 +01:00
if (options.until && originallyPublishedAt && originallyPublishedAt.getTime() > options.until.getTime()) {
2021-07-09 15:03:44 +02:00
log.info('Video "%s" has been published after "%s", don\'t upload it.\n', videoInfo.title, formatDate(options.until))
return
}
2018-02-12 11:02:14 +01:00
2021-07-09 15:03:44 +02:00
const server = buildServer(url)
const { data } = await server.searchCommand.advancedVideoSearch({
search: {
search: videoInfo.title,
sort: '-match',
searchTarget: 'local'
}
})
2018-02-12 11:02:14 +01:00
log.info('############################################################\n')
2018-02-09 16:47:06 +01:00
2021-07-09 15:03:44 +02:00
if (data.find(v => v.name === videoInfo.title)) {
log.info('Video "%s" already exists, don\'t reupload it.\n', videoInfo.title)
return
}
2018-02-09 16:47:06 +01:00
const path = join(cwd, sha256(videoInfo.url) + '.mp4')
2018-02-09 16:47:06 +01:00
log.info('Downloading video "%s"...', videoInfo.title)
2021-05-11 10:54:05 +02:00
const youtubeDLOptions = [ '-f', youtubeDL.getYoutubeDLVideoFormat(), ...command.args, '-o', path ]
try {
2021-05-11 10:54:05 +02:00
const youtubeDLBinary = await YoutubeDL.safeGetYoutubeDL()
const youtubeDLExec = promisify(youtubeDLBinary.exec).bind(youtubeDLBinary)
2021-02-03 09:33:05 +01:00
const output = await youtubeDLExec(videoInfo.url, youtubeDLOptions, processOptions)
log.info(output.join('\n'))
await uploadVideoOnPeerTube({
2021-05-11 10:54:05 +02:00
youtubeDL,
cwd,
url,
2021-07-09 15:03:44 +02:00
username,
password,
videoInfo: normalizeObject(videoInfo),
videoPath: path
})
} catch (err) {
log.error(err.message)
}
2018-02-09 16:47:06 +01:00
}
2019-06-13 11:09:38 +02:00
async function uploadVideoOnPeerTube (parameters: {
2021-05-11 10:54:05 +02:00
youtubeDL: YoutubeDL
2020-01-31 16:56:52 +01:00
videoInfo: any
videoPath: string
cwd: string
url: string
2021-07-09 15:03:44 +02:00
username: string
password: string
2019-06-13 11:09:38 +02:00
}) {
2021-07-09 15:03:44 +02:00
const { youtubeDL, videoInfo, videoPath, cwd, url, username, password } = parameters
2019-06-13 11:09:38 +02:00
const category = await getCategory(videoInfo.categories, url)
2018-02-09 16:47:06 +01:00
const licence = getLicence(videoInfo.license)
let tags = []
if (Array.isArray(videoInfo.tags)) {
2018-02-15 18:40:24 +01:00
tags = videoInfo.tags
.filter(t => t.length < CONSTRAINTS_FIELDS.VIDEOS.TAG.max && t.length > CONSTRAINTS_FIELDS.VIDEOS.TAG.min)
.map(t => t.normalize())
.slice(0, 5)
}
2018-02-09 16:47:06 +01:00
2018-02-14 15:56:07 +01:00
let thumbnailfile
if (videoInfo.thumbnail) {
thumbnailfile = join(cwd, sha256(videoInfo.thumbnail) + '.jpg')
2018-02-14 15:56:07 +01:00
2021-03-09 14:01:44 +01:00
await doRequestAndSaveToFile(videoInfo.thumbnail, thumbnailfile)
2018-02-14 15:56:07 +01:00
}
2021-05-11 10:54:05 +02:00
const originallyPublishedAt = youtubeDL.buildOriginallyPublishedAt(videoInfo)
const defaultAttributes = {
name: truncate(videoInfo.title, {
2020-01-31 16:56:52 +01:00
length: CONSTRAINTS_FIELDS.VIDEOS.NAME.max,
separator: /,? +/,
omission: ' […]'
}),
2018-02-09 16:47:06 +01:00
category,
licence,
2018-02-20 18:56:43 +01:00
nsfw: isNSFW(videoInfo),
description: videoInfo.description,
tags
2018-02-09 16:47:06 +01:00
}
2021-07-09 15:03:44 +02:00
let accessToken = await getAccessTokenOrDie(url, username, password)
const server = buildServer(url, accessToken)
const videoAttributes = await buildVideoAttributesFromCommander(server, program, defaultAttributes)
Object.assign(videoAttributes, {
originallyPublishedAt: originallyPublishedAt ? originallyPublishedAt.toISOString() : null,
thumbnailfile,
previewfile: thumbnailfile,
fixture: videoPath
})
2019-06-13 11:09:38 +02:00
log.info('\nUploading on PeerTube video "%s".', videoAttributes.name)
2019-06-13 11:09:38 +02:00
try {
await uploadVideo(url, accessToken, videoAttributes)
} catch (err) {
2018-02-20 18:18:47 +01:00
if (err.message.indexOf('401') !== -1) {
log.info('Got 401 Unauthorized, token may have expired, renewing token and retry.')
2021-07-09 15:03:44 +02:00
accessToken = await getAccessTokenOrDie(url, username, password)
await uploadVideo(url, accessToken, videoAttributes)
} else {
exitError(err.message)
}
}
2018-02-14 15:56:07 +01:00
2018-08-27 16:23:34 +02:00
await remove(videoPath)
if (thumbnailfile) await remove(thumbnailfile)
2018-02-14 15:56:07 +01:00
log.warn('Uploaded video "%s"!\n', videoAttributes.name)
2018-02-09 16:47:06 +01:00
}
2019-06-13 11:09:38 +02:00
/* ---------------------------------------------------------- */
async function getCategory (categories: string[], url: string) {
if (!categories) return undefined
2020-01-31 16:56:52 +01:00
const categoryString = categories[0]
2018-02-09 16:47:06 +01:00
if (categoryString === 'News & Politics') return 11
const res = await getVideoCategories(url)
2018-02-09 16:47:06 +01:00
const categoriesServer = res.body
for (const key of Object.keys(categoriesServer)) {
2020-01-31 16:56:52 +01:00
const categoryServer = categoriesServer[key]
2018-02-09 16:47:06 +01:00
if (categoryString.toLowerCase() === categoryServer.toLowerCase()) return parseInt(key, 10)
}
return undefined
}
function getLicence (licence: string) {
if (!licence) return undefined
2020-02-28 16:03:39 +01:00
if (licence.includes('Creative Commons Attribution licence')) return 1
2018-02-09 16:47:06 +01:00
return undefined
}
2018-02-12 11:02:14 +01:00
function normalizeObject (obj: any) {
const newObj: any = {}
for (const key of Object.keys(obj)) {
// Deprecated key
if (key === 'resolution') continue
2020-01-31 16:56:52 +01:00
const value = obj[key]
2018-02-12 11:02:14 +01:00
if (typeof value === 'string') {
2020-01-31 16:56:52 +01:00
newObj[key] = value.normalize()
2018-02-12 11:02:14 +01:00
} else {
2020-01-31 16:56:52 +01:00
newObj[key] = value
2018-02-12 11:02:14 +01:00
}
}
return newObj
}
function fetchObject (info: any) {
const url = buildUrl(info)
return new Promise<any>(async (res, rej) => {
2021-05-11 10:54:05 +02:00
const youtubeDL = await YoutubeDL.safeGetYoutubeDL()
2020-01-31 16:56:52 +01:00
youtubeDL.getInfo(url, undefined, processOptions, (err, videoInfo) => {
if (err) return rej(err)
const videoInfoWithUrl = Object.assign(videoInfo, { url })
return res(normalizeObject(videoInfoWithUrl))
})
})
}
function buildUrl (info: any) {
2018-02-20 18:56:43 +01:00
const webpageUrl = info.webpage_url as string
2020-01-31 16:56:52 +01:00
if (webpageUrl?.match(/^https?:\/\//)) return webpageUrl
2018-02-20 18:56:43 +01:00
const url = info.url as string
2020-01-31 16:56:52 +01:00
if (url?.match(/^https?:\/\//)) return url
// It seems youtube-dl does not return the video url
return 'https://www.youtube.com/watch?v=' + info.id
}
2018-02-20 18:56:43 +01:00
function isNSFW (info: any) {
2019-06-13 11:09:38 +02:00
return info.age_limit && info.age_limit >= 16
2018-02-20 18:56:43 +01:00
}
function normalizeTargetUrl (url: string) {
let normalizedUrl = url.replace(/\/+$/, '')
2019-12-30 10:44:50 +01:00
if (!normalizedUrl.startsWith('http://') && !normalizedUrl.startsWith('https://')) {
normalizedUrl = 'https://' + normalizedUrl
}
return normalizedUrl
}
2019-06-13 11:09:38 +02:00
async function promptPassword () {
return new Promise<string>((res, rej) => {
prompt.start()
const schema = {
properties: {
password: {
hidden: true,
required: true
}
}
}
prompt.get(schema, function (err, result) {
if (err) {
return rej(err)
}
return res(result.password)
})
})
}
function parseDate (dateAsStr: string): Date {
if (!/\d{4}-\d{2}-\d{2}/.test(dateAsStr)) {
exitError(`Invalid date passed: ${dateAsStr}. Expected format: YYYY-MM-DD. See help for usage.`)
}
const date = new Date(dateAsStr)
date.setHours(0, 0, 0)
if (isNaN(date.getTime())) {
exitError(`Invalid date passed: ${dateAsStr}. See help for usage.`)
}
return date
}
function formatDate (date: Date): string {
2020-01-31 16:56:52 +01:00
return date.toISOString().split('T')[0]
}
function convertIntoMs (secondsAsStr: string): number {
const seconds = parseInt(secondsAsStr, 10)
if (seconds <= 0) {
exitError(`Invalid duration passed: ${seconds}. Expected duration to be strictly positive and in seconds`)
}
return Math.round(seconds * 1000)
}
function exitError (message: string, ...meta: any[]) {
// use console.error instead of log.error here
console.error(message, ...meta)
process.exit(-1)
}
2020-11-17 15:28:24 +01:00
function getYoutubeDLInfo (youtubeDL: any, url: string, args: string[]) {
return new Promise<any>((res, rej) => {
const options = [ '-j', '--flat-playlist', '--playlist-reverse', ...args ]
youtubeDL.getInfo(url, options, processOptions, async (err, info) => {
if (err) return rej(err)
return res(info)
})
})
}