mirror of https://github.com/Chocobozzz/PeerTube
chore: add ctranslate2 and timestamped
parent
47095673b3
commit
2e242129b9
|
@ -0,0 +1,59 @@
|
|||
import { createLogger } from 'winston'
|
||||
import { join } from 'path'
|
||||
import { expect } from 'chai'
|
||||
import { existsSync } from 'node:fs'
|
||||
import { rm, mkdir, readFile } from 'node:fs/promises'
|
||||
import { buildAbsoluteFixturePath, root } from '@peertube/peertube-node-utils'
|
||||
import { transcriberFactory } from '@peertube/peertube-transcription'
|
||||
|
||||
describe('Transcribers', function () {
|
||||
const transcriptDirectory = join(root(), 'test-transcript')
|
||||
const vttTranscriptPath = join(transcriptDirectory, 'video_short.vtt')
|
||||
const transcribers = [
|
||||
'openai-whisper',
|
||||
'whisper-ctranslate2',
|
||||
'whisper-timestamped'
|
||||
]
|
||||
|
||||
before(async function () {
|
||||
await mkdir(transcriptDirectory, { recursive: true })
|
||||
})
|
||||
|
||||
transcribers.forEach(function (transcriberName) {
|
||||
describe(`${transcriberName}`, function () {
|
||||
it(`Should instanciate`, function () {
|
||||
transcriberFactory.createFromEngineName(transcriberName)
|
||||
})
|
||||
|
||||
it('Should run transcription on a media file without raising any errors', async function () {
|
||||
const transcriber = transcriberFactory.createFromEngineName(
|
||||
transcriberName,
|
||||
createLogger(),
|
||||
transcriptDirectory
|
||||
)
|
||||
const mediaFilePath = buildAbsoluteFixturePath('video_short.mp4')
|
||||
const transcript = await transcriber.transcribe(
|
||||
mediaFilePath,
|
||||
{ name: 'tiny' },
|
||||
'fr',
|
||||
'vtt'
|
||||
)
|
||||
expect(transcript).to.deep.equals({
|
||||
path: vttTranscriptPath,
|
||||
language: 'fr',
|
||||
format: 'vtt'
|
||||
})
|
||||
expect(transcript.path).to.equals(vttTranscriptPath)
|
||||
|
||||
expect(existsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist.`).to.be.true
|
||||
|
||||
console.log(await readFile(transcript.path, 'utf8'))
|
||||
await rm(transcript.path)
|
||||
})
|
||||
})
|
||||
})
|
||||
|
||||
after(async function () {
|
||||
await rm(transcriptDirectory, { recursive: true, force: true })
|
||||
})
|
||||
})
|
|
@ -1,28 +0,0 @@
|
|||
import { createLogger } from 'winston'
|
||||
import { join } from 'path'
|
||||
import { expect } from 'chai'
|
||||
import { remove, pathExistsSync } from 'fs-extra/esm.js'
|
||||
import { buildAbsoluteFixturePath, root } from '@peertube/peertube-node-utils'
|
||||
import { transcriberFactory } from '@peertube/transcription'
|
||||
|
||||
describe('Open AI Transcriber', function () {
|
||||
|
||||
const transcriptDirectory = join(root(), 'test-transcript')
|
||||
const vttTranscriptPath = join(transcriptDirectory, 'test.vtt')
|
||||
|
||||
it('Should instanciate', function () {
|
||||
transcriberFactory.createFromEngineName('faster-whisper')
|
||||
})
|
||||
|
||||
it('Should run transcription on a media file without raising any errors', async function () {
|
||||
const transcriber = transcriberFactory.createFromEngineName('openai-whisper', createLogger(), transcriptDirectory)
|
||||
const mediaFilePath = buildAbsoluteFixturePath('video_short.mp4')
|
||||
const transcript = await transcriber.transcribe(mediaFilePath, { name: 'tiny' }, 'fr', 'vtt')
|
||||
expect(transcript.path).to.equals(vttTranscriptPath)
|
||||
expect(pathExistsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist`)
|
||||
})
|
||||
|
||||
after(async function () {
|
||||
await remove(transcriptDirectory)
|
||||
})
|
||||
})
|
|
@ -1,28 +0,0 @@
|
|||
import { createLogger } from 'winston'
|
||||
import { join } from 'path'
|
||||
import { expect } from 'chai'
|
||||
import { remove, pathExistsSync } from 'fs-extra/esm.js'
|
||||
import { buildAbsoluteFixturePath, root } from '@peertube/peertube-node-utils'
|
||||
import { transcriberFactory } from '@peertube/transcription'
|
||||
|
||||
describe('Open AI Transcriber', function () {
|
||||
|
||||
const transcriptDirectory = join(root(), 'test-transcript')
|
||||
const vttTranscriptPath = join(transcriptDirectory, 'test.vtt')
|
||||
|
||||
it('Should instanciate', function () {
|
||||
transcriberFactory.createFromEngineName('openai-whisper')
|
||||
})
|
||||
|
||||
it('Should run transcription on a media file without raising any errors', async function () {
|
||||
const transcriber = transcriberFactory.createFromEngineName('openai-whisper', createLogger(), transcriptDirectory)
|
||||
const mediaFilePath = buildAbsoluteFixturePath('video_short.mp4')
|
||||
const transcript = await transcriber.transcribe(mediaFilePath, { name: 'tiny' }, 'fr', 'vtt')
|
||||
expect(transcript.path).to.equals(vttTranscriptPath)
|
||||
expect(pathExistsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist`)
|
||||
})
|
||||
|
||||
after(async function () {
|
||||
await remove(transcriptDirectory)
|
||||
})
|
||||
})
|
|
@ -1,34 +0,0 @@
|
|||
import { createLogger } from 'winston'
|
||||
import { join } from 'path'
|
||||
import { expect } from 'chai'
|
||||
import { remove, pathExistsSync } from 'fs-extra/esm'
|
||||
import { buildAbsoluteFixturePath, root } from '@peertube/peertube-node-utils'
|
||||
import { transcriberFactory } from '@peertube/peertube-transcription'
|
||||
|
||||
describe('Transcribers', function () {
|
||||
const transcriptDirectory = join(root(), 'test-transcript')
|
||||
const vttTranscriptPath = join(transcriptDirectory, 'test.vtt')
|
||||
const transcribers = [
|
||||
'openai-whisper',
|
||||
'faster-whisper'
|
||||
]
|
||||
|
||||
transcribers.forEach(function (transcriber) {
|
||||
it(`Should instanciate a ${transcriber} transcriber`, function () {
|
||||
transcriberFactory.createFromEngineName('openai-whisper')
|
||||
})
|
||||
|
||||
it('Should run transcription on a media file without raising any errors', async function () {
|
||||
const transcriber = transcriberFactory.createFromEngineName('openai-whisper', createLogger(), transcriptDirectory)
|
||||
const mediaFilePath = buildAbsoluteFixturePath('video_short.mp4')
|
||||
const transcript = await transcriber.transcribe(mediaFilePath, { name: 'tiny' }, 'fr', 'vtt')
|
||||
expect(transcript.path).to.equals(vttTranscriptPath)
|
||||
expect(pathExistsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist`)
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
after(async function () {
|
||||
await remove(transcriptDirectory)
|
||||
})
|
||||
})
|
|
@ -0,0 +1,13 @@
|
|||
import { basename, extname } from 'path'
|
||||
|
||||
export const getFileInfo = (path: string) => {
|
||||
const extension = extname(path)
|
||||
const baseName = basename(path, extension)
|
||||
const name = `${baseName}${extension}`
|
||||
|
||||
return ({
|
||||
extension,
|
||||
baseName,
|
||||
name
|
||||
})
|
||||
}
|
|
@ -1,6 +1,9 @@
|
|||
import { Logger, createLogger } from 'winston'
|
||||
import { TranscriptionEngine } from './transcription-engine.js'
|
||||
import { TransformersTranscriber, OpenaiTranscriber } from './whisper/index.js'
|
||||
import {
|
||||
Ctranslate2Transcriber,
|
||||
OpenaiTranscriber, WhisperTimestampedTranscriber
|
||||
} from './whisper/index.js'
|
||||
import { AbstractTranscriber } from './abstract-transcriber.js'
|
||||
|
||||
export class TranscriberFactory {
|
||||
|
@ -10,19 +13,29 @@ export class TranscriberFactory {
|
|||
this.engines = engines
|
||||
}
|
||||
|
||||
createFromEngineName (engineName: string, logger: Logger = createLogger(), transcriptDirectory: string = AbstractTranscriber.DEFAULT_TRANSCRIPT_DIRECTORY) {
|
||||
createFromEngineName (
|
||||
engineName: string,
|
||||
logger: Logger = createLogger(),
|
||||
transcriptDirectory: string = AbstractTranscriber.DEFAULT_TRANSCRIPT_DIRECTORY
|
||||
) {
|
||||
const engine = this.engines.find(({ name }) => name === engineName)
|
||||
if (!engine) {
|
||||
throw new Error(`Unknow engine ${engineName}`)
|
||||
}
|
||||
|
||||
const transcriberArgs: ConstructorParameters<typeof AbstractTranscriber> = [ engine, logger, transcriptDirectory ]
|
||||
const transcriberArgs: ConstructorParameters<typeof AbstractTranscriber> = [
|
||||
engine,
|
||||
logger,
|
||||
transcriptDirectory
|
||||
]
|
||||
|
||||
switch (engineName) {
|
||||
case 'whisper':
|
||||
case 'openai-whisper':
|
||||
return new OpenaiTranscriber(...transcriberArgs)
|
||||
case 'transformers':
|
||||
return new TransformersTranscriber(...transcriberArgs)
|
||||
case 'whisper-ctranslate2':
|
||||
return new Ctranslate2Transcriber(...transcriberArgs)
|
||||
case 'whisper-timestamped':
|
||||
return new WhisperTimestampedTranscriber(...transcriberArgs)
|
||||
default:
|
||||
throw new Error(`Unimplemented engine ${engineName}`)
|
||||
}
|
||||
|
|
|
@ -11,16 +11,16 @@ export const engines: TranscriptionEngine[] = [
|
|||
license : 'MIT',
|
||||
supportedModelFormats: [ 'ONNX' ]
|
||||
},
|
||||
{
|
||||
name : 'transformers',
|
||||
description : 'High-performance inference of OpenAI\'s Whisper automatic speech recognition model',
|
||||
type: 'binary',
|
||||
language : 'python',
|
||||
requirements : [],
|
||||
forgeURL : '',
|
||||
license : '',
|
||||
supportedModelFormats: [ 'ONNX' ]
|
||||
},
|
||||
// {
|
||||
// name : 'transformers',
|
||||
// description : 'High-performance inference of OpenAI\'s Whisper automatic speech recognition model',
|
||||
// type: 'binary',
|
||||
// language : 'python',
|
||||
// requirements : [],
|
||||
// forgeURL : '',
|
||||
// license : '',
|
||||
// supportedModelFormats: [ 'ONNX' ]
|
||||
// },
|
||||
{
|
||||
name: 'openai-whisper',
|
||||
description: 'High-performance inference of OpenAI\'s Whisper automatic speech recognition model',
|
||||
|
@ -42,5 +42,16 @@ export const engines: TranscriptionEngine[] = [
|
|||
forgeURL: 'https://github.com/openai/whisper',
|
||||
license: 'MIT',
|
||||
supportedModelFormats: [ 'CTranslate2' ]
|
||||
},
|
||||
{
|
||||
name: 'whisper-timestamped',
|
||||
description: '',
|
||||
requirements: [ 'python' ],
|
||||
language: 'python',
|
||||
type: 'binary',
|
||||
binary: 'whisper-ctranslate2',
|
||||
forgeURL: 'https://github.com/openai/whisper',
|
||||
license: 'MIT',
|
||||
supportedModelFormats: [ 'CTranslate2' ]
|
||||
}
|
||||
]
|
||||
|
|
|
@ -3,8 +3,9 @@ import { $ } from 'execa'
|
|||
import { TranscriptionModel } from '../../transcription-model.js'
|
||||
import { Transcript, TranscriptFormat } from '../../transcript.js'
|
||||
import { AbstractTranscriber } from '../../abstract-transcriber.js'
|
||||
import { getFileInfo } from '../../file-utils.js'
|
||||
|
||||
export class FasterWhisperTranscriber extends AbstractTranscriber {
|
||||
export class Ctranslate2Transcriber extends AbstractTranscriber {
|
||||
async transcribe (
|
||||
mediaFilePath: string,
|
||||
model: TranscriptionModel,
|
||||
|
@ -12,8 +13,9 @@ export class FasterWhisperTranscriber extends AbstractTranscriber {
|
|||
format: TranscriptFormat = 'vtt'
|
||||
): Promise<Transcript> {
|
||||
const $$ = $({ verbose: true })
|
||||
const { baseName } = getFileInfo(mediaFilePath)
|
||||
|
||||
await $$`whisper ${[
|
||||
await $$`whisper-ctranslate2 ${[
|
||||
mediaFilePath,
|
||||
'--model',
|
||||
model.name,
|
||||
|
@ -27,7 +29,7 @@ export class FasterWhisperTranscriber extends AbstractTranscriber {
|
|||
|
||||
return {
|
||||
language,
|
||||
path: join(this.transcriptDirectory, `test.${format}`),
|
||||
path: join(this.transcriptDirectory, `${baseName}.${format}`),
|
||||
format
|
||||
}
|
||||
}
|
|
@ -1,3 +1,5 @@
|
|||
export * from './ctranslate2-transcriber.js'
|
||||
export * from './transformers-js-transcriber.js'
|
||||
export * from './transformers-transcriber.js'
|
||||
export * from './openai-transcriber.js'
|
||||
export * from './timestamped-transcriber.js'
|
||||
|
|
|
@ -3,6 +3,7 @@ import { $ } from 'execa'
|
|||
import { TranscriptionModel } from '../../transcription-model.js'
|
||||
import { Transcript, TranscriptFormat } from '../../transcript.js'
|
||||
import { AbstractTranscriber } from '../../abstract-transcriber.js'
|
||||
import { getFileInfo } from '../../file-utils.js'
|
||||
|
||||
export class OpenaiTranscriber extends AbstractTranscriber {
|
||||
async transcribe (
|
||||
|
@ -11,9 +12,12 @@ export class OpenaiTranscriber extends AbstractTranscriber {
|
|||
language: string,
|
||||
format: TranscriptFormat = 'vtt'
|
||||
): Promise<Transcript> {
|
||||
// Shall we run the command with `{ shell: true }` to get the same error as in sh ?
|
||||
// ex: ENOENT => Command not found
|
||||
const $$ = $({ verbose: true })
|
||||
const { baseName } = getFileInfo(mediaFilePath)
|
||||
|
||||
await $$`whisper ${[
|
||||
const { stdout } = await $$`whisper ${[
|
||||
mediaFilePath,
|
||||
'--model',
|
||||
model.name,
|
||||
|
@ -22,12 +26,14 @@ export class OpenaiTranscriber extends AbstractTranscriber {
|
|||
'--output_dir',
|
||||
this.transcriptDirectory
|
||||
]}`
|
||||
console.log(stdout)
|
||||
|
||||
await $$`ls ${this.transcriptDirectory}`
|
||||
const { stdout: lsStdout } = await $$`ls ${this.transcriptDirectory}`
|
||||
console.log(lsStdout)
|
||||
|
||||
return {
|
||||
language,
|
||||
path: join(this.transcriptDirectory, `test.${format}`),
|
||||
path: join(this.transcriptDirectory, `${baseName}.${format}`),
|
||||
format
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
import assert from 'node:assert'
|
||||
import { join } from 'node:path'
|
||||
import { existsSync } from 'node:fs'
|
||||
import { rename } from 'node:fs/promises'
|
||||
import { $ } from 'execa'
|
||||
import { TranscriptionModel } from '../../transcription-model.js'
|
||||
import { Transcript, TranscriptFormat } from '../../transcript.js'
|
||||
import { AbstractTranscriber } from '../../abstract-transcriber.js'
|
||||
import { getFileInfo } from '../../file-utils.js'
|
||||
|
||||
export class WhisperTimestampedTranscriber extends AbstractTranscriber {
|
||||
async transcribe (
|
||||
mediaFilePath: string,
|
||||
model: TranscriptionModel,
|
||||
language: string,
|
||||
format: TranscriptFormat = 'vtt'
|
||||
): Promise<Transcript> {
|
||||
const $$ = $({ verbose: true })
|
||||
const { baseName, name } = getFileInfo(mediaFilePath)
|
||||
await $$`whisper_timestamped ${[
|
||||
mediaFilePath,
|
||||
'--model',
|
||||
model.name,
|
||||
'--output_format',
|
||||
'all',
|
||||
'--output_dir',
|
||||
this.transcriptDirectory
|
||||
]}`
|
||||
|
||||
const internalTranscriptPath = join(this.transcriptDirectory, `${name}.${format}`)
|
||||
const transcriptPath = join(this.transcriptDirectory, `${baseName}.${format}`)
|
||||
assert(existsSync(internalTranscriptPath), '')
|
||||
|
||||
await rename(internalTranscriptPath, transcriptPath)
|
||||
await $$`ls ${this.transcriptDirectory}`
|
||||
|
||||
return {
|
||||
language,
|
||||
path: transcriptPath,
|
||||
format
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,19 +1,18 @@
|
|||
import { TranscriptionModel } from '../../transcription-model.js'
|
||||
import { AbstractTranscriber } from '../../abstract-transcriber.js'
|
||||
import { Transcript, TranscriptFormat } from '../../transcript.js'
|
||||
import { Promise } from 'bluebird'
|
||||
import { TranscriptionModel } from "../../transcription-model.js";
|
||||
import { AbstractTranscriber } from "../../abstract-transcriber.js";
|
||||
import { Transcript, TranscriptFormat } from "../../transcript.js";
|
||||
|
||||
// Disable local models
|
||||
// env.allowLocalModels = true
|
||||
|
||||
export class TransformersJsTranscriber extends AbstractTranscriber {
|
||||
async transcribe (
|
||||
async transcribe(
|
||||
mediaFilePath: string,
|
||||
model: TranscriptionModel,
|
||||
language: string,
|
||||
format: TranscriptFormat = 'vtt'
|
||||
format: TranscriptFormat = "vtt",
|
||||
): Promise<Transcript> {
|
||||
return Promise.resolve(undefined)
|
||||
return Promise.resolve(undefined);
|
||||
// return pipeline('automatic-speech-recognition', 'no_attentions', {
|
||||
// // For medium models, we need to load the `no_attentions` revision to avoid running out of memory
|
||||
// revision: [].includes('/whisper-medium') ? 'no_attentions' : 'main'
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
import { TranscriptionModel } from '../../transcription-model.js'
|
||||
import { AbstractTranscriber } from '../../abstract-transcriber.js'
|
||||
import { Transcript, TranscriptFormat } from '../../transcript.js'
|
||||
import { $ } from 'execa'
|
||||
import { join } from 'path'
|
||||
|
||||
export class TransformersTranscriber extends AbstractTranscriber {
|
||||
async transcribe (
|
||||
|
@ -9,6 +11,33 @@ export class TransformersTranscriber extends AbstractTranscriber {
|
|||
language: string,
|
||||
format: TranscriptFormat = 'vtt'
|
||||
): Promise<Transcript> {
|
||||
return Promise.resolve(undefined)
|
||||
const $$ = $({ verbose: true })
|
||||
// const ffmpegChildProcess = $$`ffmpeg ${[
|
||||
// '-i',
|
||||
// mediaFilePath,
|
||||
// '-vn', // no video
|
||||
// '-ar',
|
||||
// 16000, // set the audio sampling frequency
|
||||
// '-ac',
|
||||
// '1', // set the number of audio channels to 1 since Vosk is expecting mono
|
||||
// '-bufsize',
|
||||
// 1000, // set a buffer size to provide a steady flow of frames
|
||||
// '-'
|
||||
// ]}`
|
||||
|
||||
await $$`transformers-cli ${[
|
||||
'--task',
|
||||
'automatic-speech-recognition',
|
||||
'--model',
|
||||
'openai/whisper-tiny',
|
||||
'--input',
|
||||
mediaFilePath
|
||||
]}`
|
||||
|
||||
return {
|
||||
language,
|
||||
path: join(this.transcriptDirectory, `test.${format}`),
|
||||
format
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue