PeerTube/server/tools/peertube-import-videos.ts

400 lines
11 KiB
TypeScript
Raw Normal View History

import { registerTSPaths } from '../helpers/register-ts-paths'
registerTSPaths()
2018-02-09 16:47:06 +01:00
import * as program from 'commander'
import { join } from 'path'
2018-02-14 15:56:07 +01:00
import { doRequestAndSaveToFile } from '../helpers/requests'
import { CONSTRAINTS_FIELDS } from '../initializers/constants'
import { getClient, getVideoCategories, login, searchVideoWithSort, uploadVideo } from '../../shared/extra-utils/index'
import { truncate } from 'lodash'
2018-05-10 23:59:28 +02:00
import * as prompt from 'prompt'
import { accessSync, constants } from 'fs'
2018-08-27 16:23:34 +02:00
import { remove } from 'fs-extra'
import { sha256 } from '../helpers/core-utils'
import { buildOriginallyPublishedAt, safeGetYoutubeDL } from '../helpers/youtube-dl'
2020-01-31 16:56:52 +01:00
import { buildCommonVideoOptions, buildVideoAttributesFromCommander, getLogger, getServerCredentials } from './cli'
2019-06-13 11:09:38 +02:00
type UserInfo = {
username: string
password: string
}
const processOptions = {
maxBuffer: Infinity
}
2018-02-09 16:47:06 +01:00
let command = program
.name('import-videos')
command = buildCommonVideoOptions(command)
command
2018-02-09 16:47:06 +01:00
.option('-u, --url <url>', 'Server url')
.option('-U, --username <username>', 'Username')
.option('-p, --password <token>', 'Password')
.option('--target-url <targetUrl>', 'Video target URL')
.option('--since <since>', 'Publication date (inclusive) since which the videos can be imported (YYYY-MM-DD)', parseDate)
.option('--until <until>', 'Publication date (inclusive) until which the videos can be imported (YYYY-MM-DD)', parseDate)
.option('--first <first>', 'Process first n elements of returned playlist')
.option('--last <last>', 'Process last n elements of returned playlist')
.option('-T, --tmpdir <tmpdir>', 'Working directory', __dirname)
.usage("[global options] [ -- youtube-dl options]")
2018-02-09 16:47:06 +01:00
.parse(process.argv)
2020-01-31 16:56:52 +01:00
const log = getLogger(program['verbose'])
getServerCredentials(command)
.then(({ url, username, password }) => {
2020-01-31 16:56:52 +01:00
if (!program['targetUrl']) {
exitError('--target-url field is required.')
}
try {
2020-01-31 16:56:52 +01:00
accessSync(program['tmpdir'], constants.R_OK | constants.W_OK)
} catch (e) {
2020-01-31 16:56:52 +01:00
exitError('--tmpdir %s: directory does not exist or is not accessible', program['tmpdir'])
}
2018-05-10 23:59:28 +02:00
url = normalizeTargetUrl(url)
2020-01-31 16:56:52 +01:00
program['targetUrl'] = normalizeTargetUrl(program['targetUrl'])
const user = { username, password }
2018-05-11 11:26:50 +02:00
run(url, user)
2020-01-31 16:56:52 +01:00
.catch(err => exitError(err))
})
2020-01-31 16:56:52 +01:00
.catch(err => console.error(err))
2018-02-09 16:47:06 +01:00
2019-06-13 11:09:38 +02:00
async function run (url: string, user: UserInfo) {
2018-05-11 10:31:15 +02:00
if (!user.password) {
user.password = await promptPassword()
2018-05-10 23:59:28 +02:00
}
2018-05-11 11:26:50 +02:00
const youtubeDL = await safeGetYoutubeDL()
const options = [ '-j', '--flat-playlist', '--playlist-reverse', ...command.args ]
2020-01-31 16:56:52 +01:00
youtubeDL.getInfo(program['targetUrl'], options, processOptions, async (err, info) => {
if (err) {
exitError(err.stderr + ' ' + err.message)
}
2018-02-09 16:47:06 +01:00
let infoArray: any[]
2018-02-09 16:47:06 +01:00
// Normalize utf8 fields
infoArray = [].concat(info)
2020-01-31 16:56:52 +01:00
if (program['first']) {
infoArray = infoArray.slice(0, program['first'])
} else if (program['last']) {
infoArray = infoArray.slice(-program['last'])
}
infoArray = infoArray.map(i => normalizeObject(i))
log.info('Will download and upload %d videos.\n', infoArray.length)
2018-02-09 16:47:06 +01:00
for (const info of infoArray) {
2020-04-14 09:41:20 +02:00
try {
await processVideo({
cwd: program['tmpdir'],
url,
user,
youtubeInfo: info
})
} catch (err) {
console.error('Cannot process video.', { info, url })
}
2018-02-09 16:47:06 +01:00
}
2020-01-31 16:56:52 +01:00
log.info('Video/s for user %s imported: %s', user.username, program['targetUrl'])
2018-02-09 16:47:06 +01:00
process.exit(0)
})
}
2019-06-13 11:09:38 +02:00
function processVideo (parameters: {
2020-01-31 16:56:52 +01:00
cwd: string
url: string
user: { username: string, password: string }
2019-06-13 11:09:38 +02:00
youtubeInfo: any
}) {
const { youtubeInfo, cwd, url, user } = parameters
2018-02-09 16:47:06 +01:00
return new Promise(async res => {
log.debug('Fetching object.', youtubeInfo)
2019-06-13 11:09:38 +02:00
const videoInfo = await fetchObject(youtubeInfo)
log.debug('Fetched object.', videoInfo)
2020-01-31 16:56:52 +01:00
if (program['since']) {
if (buildOriginallyPublishedAt(videoInfo).getTime() < program['since'].getTime()) {
log.info('Video "%s" has been published before "%s", don\'t upload it.\n',
2020-01-31 16:56:52 +01:00
videoInfo.title, formatDate(program['since']))
return res()
}
}
2020-01-31 16:56:52 +01:00
if (program['until']) {
if (buildOriginallyPublishedAt(videoInfo).getTime() > program['until'].getTime()) {
log.info('Video "%s" has been published after "%s", don\'t upload it.\n',
2020-01-31 16:56:52 +01:00
videoInfo.title, formatDate(program['until']))
return res()
}
}
const result = await searchVideoWithSort(url, videoInfo.title, '-match')
2018-02-12 11:02:14 +01:00
log.info('############################################################\n')
2018-02-12 11:02:14 +01:00
if (result.body.data.find(v => v.name === videoInfo.title)) {
log.info('Video "%s" already exists, don\'t reupload it.\n', videoInfo.title)
2018-02-09 16:47:06 +01:00
return res()
}
const path = join(cwd, sha256(videoInfo.url) + '.mp4')
2018-02-09 16:47:06 +01:00
log.info('Downloading video "%s"...', videoInfo.title)
2018-02-09 16:47:06 +01:00
const options = [ '-f', 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best', ...command.args, '-o', path ]
try {
const youtubeDL = await safeGetYoutubeDL()
youtubeDL.exec(videoInfo.url, options, processOptions, async (err, output) => {
if (err) {
log.error(err)
return res()
}
log.info(output.join('\n'))
2019-06-13 11:09:38 +02:00
await uploadVideoOnPeerTube({
cwd,
url,
user,
videoInfo: normalizeObject(videoInfo),
videoPath: path
})
return res()
})
} catch (err) {
log.error(err.message)
return res()
}
2018-02-09 16:47:06 +01:00
})
}
2019-06-13 11:09:38 +02:00
async function uploadVideoOnPeerTube (parameters: {
2020-01-31 16:56:52 +01:00
videoInfo: any
videoPath: string
cwd: string
url: string
user: { username: string, password: string }
2019-06-13 11:09:38 +02:00
}) {
const { videoInfo, videoPath, cwd, url, user } = parameters
const category = await getCategory(videoInfo.categories, url)
2018-02-09 16:47:06 +01:00
const licence = getLicence(videoInfo.license)
let tags = []
if (Array.isArray(videoInfo.tags)) {
2018-02-15 18:40:24 +01:00
tags = videoInfo.tags
.filter(t => t.length < CONSTRAINTS_FIELDS.VIDEOS.TAG.max && t.length > CONSTRAINTS_FIELDS.VIDEOS.TAG.min)
.map(t => t.normalize())
.slice(0, 5)
}
2018-02-09 16:47:06 +01:00
2018-02-14 15:56:07 +01:00
let thumbnailfile
if (videoInfo.thumbnail) {
thumbnailfile = join(cwd, sha256(videoInfo.thumbnail) + '.jpg')
2018-02-14 15:56:07 +01:00
await doRequestAndSaveToFile({
method: 'GET',
uri: videoInfo.thumbnail
}, thumbnailfile)
}
const originallyPublishedAt = buildOriginallyPublishedAt(videoInfo)
const defaultAttributes = {
name: truncate(videoInfo.title, {
2020-01-31 16:56:52 +01:00
length: CONSTRAINTS_FIELDS.VIDEOS.NAME.max,
separator: /,? +/,
omission: ' […]'
}),
2018-02-09 16:47:06 +01:00
category,
licence,
2018-02-20 18:56:43 +01:00
nsfw: isNSFW(videoInfo),
description: videoInfo.description,
tags
2018-02-09 16:47:06 +01:00
}
const videoAttributes = await buildVideoAttributesFromCommander(url, program, defaultAttributes)
Object.assign(videoAttributes, {
originallyPublishedAt: originallyPublishedAt ? originallyPublishedAt.toISOString() : null,
thumbnailfile,
previewfile: thumbnailfile,
fixture: videoPath
})
2019-06-13 11:09:38 +02:00
log.info('\nUploading on PeerTube video "%s".', videoAttributes.name)
2019-06-13 11:09:38 +02:00
let accessToken = await getAccessTokenOrDie(url, user)
try {
await uploadVideo(url, accessToken, videoAttributes)
} catch (err) {
2018-02-20 18:18:47 +01:00
if (err.message.indexOf('401') !== -1) {
log.info('Got 401 Unauthorized, token may have expired, renewing token and retry.')
2019-06-13 11:09:38 +02:00
accessToken = await getAccessTokenOrDie(url, user)
await uploadVideo(url, accessToken, videoAttributes)
} else {
exitError(err.message)
}
}
2018-02-14 15:56:07 +01:00
2018-08-27 16:23:34 +02:00
await remove(videoPath)
if (thumbnailfile) await remove(thumbnailfile)
2018-02-14 15:56:07 +01:00
log.warn('Uploaded video "%s"!\n', videoAttributes.name)
2018-02-09 16:47:06 +01:00
}
2019-06-13 11:09:38 +02:00
/* ---------------------------------------------------------- */
async function getCategory (categories: string[], url: string) {
if (!categories) return undefined
2020-01-31 16:56:52 +01:00
const categoryString = categories[0]
2018-02-09 16:47:06 +01:00
if (categoryString === 'News & Politics') return 11
const res = await getVideoCategories(url)
2018-02-09 16:47:06 +01:00
const categoriesServer = res.body
for (const key of Object.keys(categoriesServer)) {
2020-01-31 16:56:52 +01:00
const categoryServer = categoriesServer[key]
2018-02-09 16:47:06 +01:00
if (categoryString.toLowerCase() === categoryServer.toLowerCase()) return parseInt(key, 10)
}
return undefined
}
function getLicence (licence: string) {
if (!licence) return undefined
2020-02-28 16:03:39 +01:00
if (licence.includes('Creative Commons Attribution licence')) return 1
2018-02-09 16:47:06 +01:00
return undefined
}
2018-02-12 11:02:14 +01:00
function normalizeObject (obj: any) {
const newObj: any = {}
for (const key of Object.keys(obj)) {
// Deprecated key
if (key === 'resolution') continue
2020-01-31 16:56:52 +01:00
const value = obj[key]
2018-02-12 11:02:14 +01:00
if (typeof value === 'string') {
2020-01-31 16:56:52 +01:00
newObj[key] = value.normalize()
2018-02-12 11:02:14 +01:00
} else {
2020-01-31 16:56:52 +01:00
newObj[key] = value
2018-02-12 11:02:14 +01:00
}
}
return newObj
}
function fetchObject (info: any) {
const url = buildUrl(info)
return new Promise<any>(async (res, rej) => {
const youtubeDL = await safeGetYoutubeDL()
2020-01-31 16:56:52 +01:00
youtubeDL.getInfo(url, undefined, processOptions, (err, videoInfo) => {
if (err) return rej(err)
const videoInfoWithUrl = Object.assign(videoInfo, { url })
return res(normalizeObject(videoInfoWithUrl))
})
})
}
function buildUrl (info: any) {
2018-02-20 18:56:43 +01:00
const webpageUrl = info.webpage_url as string
2020-01-31 16:56:52 +01:00
if (webpageUrl?.match(/^https?:\/\//)) return webpageUrl
2018-02-20 18:56:43 +01:00
const url = info.url as string
2020-01-31 16:56:52 +01:00
if (url?.match(/^https?:\/\//)) return url
// It seems youtube-dl does not return the video url
return 'https://www.youtube.com/watch?v=' + info.id
}
2018-02-20 18:56:43 +01:00
function isNSFW (info: any) {
2019-06-13 11:09:38 +02:00
return info.age_limit && info.age_limit >= 16
2018-02-20 18:56:43 +01:00
}
function normalizeTargetUrl (url: string) {
let normalizedUrl = url.replace(/\/+$/, '')
2019-12-30 10:44:50 +01:00
if (!normalizedUrl.startsWith('http://') && !normalizedUrl.startsWith('https://')) {
normalizedUrl = 'https://' + normalizedUrl
}
return normalizedUrl
}
2019-06-13 11:09:38 +02:00
async function promptPassword () {
return new Promise<string>((res, rej) => {
prompt.start()
const schema = {
properties: {
password: {
hidden: true,
required: true
}
}
}
prompt.get(schema, function (err, result) {
if (err) {
return rej(err)
}
return res(result.password)
})
})
}
async function getAccessTokenOrDie (url: string, user: UserInfo) {
const resClient = await getClient(url)
const client = {
id: resClient.body.client_id,
secret: resClient.body.client_secret
}
try {
const res = await login(url, client, user)
return res.body.access_token
} catch (err) {
exitError('Cannot authenticate. Please check your username/password.')
2019-06-13 11:09:38 +02:00
}
}
function parseDate (dateAsStr: string): Date {
if (!/\d{4}-\d{2}-\d{2}/.test(dateAsStr)) {
exitError(`Invalid date passed: ${dateAsStr}. Expected format: YYYY-MM-DD. See help for usage.`)
}
const date = new Date(dateAsStr)
date.setHours(0, 0, 0)
if (isNaN(date.getTime())) {
exitError(`Invalid date passed: ${dateAsStr}. See help for usage.`)
}
return date
}
function formatDate (date: Date): string {
2020-01-31 16:56:52 +01:00
return date.toISOString().split('T')[0]
}
function exitError (message: string, ...meta: any[]) {
// use console.error instead of log.error here
console.error(message, ...meta)
process.exit(-1)
}