mirror of https://github.com/Chocobozzz/PeerTube
chore(test): implement formats test of all implementations
Also compare result of other implementation to the reference implementation
parent
b615bf1523
commit
27d0e66698
|
@ -10,35 +10,29 @@ config.truncateThreshold = 0
|
|||
|
||||
describe('Open AI Whisper transcriber', function () {
|
||||
const transcriptDirectory = join(root(), 'test-transcript')
|
||||
const expectedVttTranscriptPath = join(transcriptDirectory, 'video_short.vtt')
|
||||
const shortVideoPath = buildAbsoluteFixturePath('video_short.mp4')
|
||||
|
||||
before(async function () {
|
||||
await mkdir(transcriptDirectory, { recursive: true })
|
||||
})
|
||||
|
||||
it('Should transcribe a media file', async function () {
|
||||
const transcriber = new OpenaiTranscriber(
|
||||
{
|
||||
name: 'openai-whisper',
|
||||
requirements: [],
|
||||
language: '',
|
||||
type: 'binary',
|
||||
license: '',
|
||||
binary: 'whisper',
|
||||
supportedModelFormats: [ 'PyTorch' ]
|
||||
},
|
||||
createLogger(),
|
||||
transcriptDirectory
|
||||
)
|
||||
const transcript = await transcriber.transcribe(
|
||||
buildAbsoluteFixturePath('video_short.mp4'),
|
||||
{ name: 'tiny' },
|
||||
'fr',
|
||||
'vtt'
|
||||
)
|
||||
|
||||
before(async function () {
|
||||
await mkdir(transcriptDirectory, { recursive: true })
|
||||
})
|
||||
|
||||
it('Should transcribe a media file and provide a valid path to a transcript file in `vtt` format by default', async function () {
|
||||
const transcript = await transcriber.transcribe(shortVideoPath)
|
||||
expect(transcript).to.deep.equals({
|
||||
path: expectedVttTranscriptPath,
|
||||
language: 'fr',
|
||||
path: join(transcriptDirectory, 'video_short.vtt'),
|
||||
language: 'en',
|
||||
format: 'vtt'
|
||||
})
|
||||
|
||||
|
@ -54,6 +48,39 @@ You
|
|||
)
|
||||
})
|
||||
|
||||
it('May produce a transcript file in the `srt` format', async function () {
|
||||
const transcript = await transcriber.transcribe(shortVideoPath, { name: 'tiny' }, 'en', 'srt')
|
||||
expect(transcript).to.deep.equals({
|
||||
path: join(transcriptDirectory, 'video_short.srt'),
|
||||
language: 'en',
|
||||
format: 'srt'
|
||||
})
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-unused-expressions
|
||||
expect(existsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist.`).to.be.true
|
||||
expect(await readFile(transcript.path, 'utf8')).to.equal(
|
||||
`1
|
||||
00:00:00,000 --> 00:00:02,000
|
||||
You
|
||||
|
||||
`
|
||||
)
|
||||
})
|
||||
|
||||
it('May produce a transcript file in the `txt` format', async function () {
|
||||
const transcript = await transcriber.transcribe(shortVideoPath, { name: 'tiny' }, 'en', 'txt')
|
||||
expect(transcript).to.deep.equals({
|
||||
path: join(transcriptDirectory, 'video_short.txt'),
|
||||
language: 'en',
|
||||
format: 'txt'
|
||||
})
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-unused-expressions
|
||||
expect(existsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist.`).to.be.true
|
||||
expect(await readFile(transcript.path, 'utf8')).to.equal(`You
|
||||
`)
|
||||
})
|
||||
|
||||
after(async function () {
|
||||
await rm(transcriptDirectory, { recursive: true, force: true })
|
||||
})
|
||||
|
|
|
@ -4,56 +4,116 @@ import { expect, config } from 'chai'
|
|||
import { existsSync } from 'node:fs'
|
||||
import { mkdir, readFile, rm } from 'node:fs/promises'
|
||||
import { buildAbsoluteFixturePath, root } from '@peertube/peertube-node-utils'
|
||||
import { OpenaiTranscriber } from '@peertube/peertube-transcription'
|
||||
import { OpenaiTranscriber, WhisperTimestampedTranscriber } from '@peertube/peertube-transcription'
|
||||
|
||||
config.truncateThreshold = 0
|
||||
|
||||
describe('Linto timestamped Whisper transcriber', function () {
|
||||
const transcriptDirectory = join(root(), 'test-transcript')
|
||||
const expectedVttTranscriptPath = join(transcriptDirectory, 'video_short.vtt')
|
||||
|
||||
before(async function () {
|
||||
await mkdir(transcriptDirectory, { recursive: true })
|
||||
})
|
||||
|
||||
it('Should transcribe a media file', async function () {
|
||||
const transcriber = new OpenaiTranscriber(
|
||||
const shortVideoPath = buildAbsoluteFixturePath('video_short.mp4')
|
||||
const transcriber = new WhisperTimestampedTranscriber(
|
||||
{
|
||||
name: 'timestamped-whisper',
|
||||
name: 'whisper-timestamped',
|
||||
requirements: [],
|
||||
language: '',
|
||||
type: 'binary',
|
||||
license: '',
|
||||
binary: 'whisper_timestamped',
|
||||
supportedModelFormats: [ 'PyTorch' ]
|
||||
},
|
||||
createLogger(),
|
||||
transcriptDirectory
|
||||
)
|
||||
|
||||
before(async function () {
|
||||
await mkdir(transcriptDirectory, { recursive: true })
|
||||
})
|
||||
|
||||
it('Should transcribe a media file and produce transcript file in th `vtt` format by default', async function () {
|
||||
const transcript = await transcriber.transcribe(
|
||||
buildAbsoluteFixturePath('video_short.mp4'),
|
||||
shortVideoPath,
|
||||
{ name: 'tiny' },
|
||||
'fr',
|
||||
'vtt'
|
||||
)
|
||||
|
||||
expect(transcript).to.deep.equals({
|
||||
path: expectedVttTranscriptPath,
|
||||
path: join(transcriptDirectory, 'video_short.vtt'),
|
||||
language: 'fr',
|
||||
format: 'vtt'
|
||||
})
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-unused-expressions
|
||||
expect(existsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist.`).to.be.true
|
||||
|
||||
// Whisper timestamped should produce a transcript with micro seconds precisions.
|
||||
expect(await readFile(transcript.path, 'utf8')).to.equal(
|
||||
`WEBVTT
|
||||
|
||||
00:00.000 --> 00:02.000
|
||||
You
|
||||
00:02.480 --> 00:02.500
|
||||
you
|
||||
|
||||
`
|
||||
)
|
||||
})
|
||||
|
||||
it('May produce a transcript file in the `srt` format', async function () {
|
||||
const transcript = await transcriber.transcribe(shortVideoPath, { name: 'tiny' }, 'en', 'srt')
|
||||
expect(transcript).to.deep.equals({
|
||||
path: join(transcriptDirectory, 'video_short.srt'),
|
||||
language: 'en',
|
||||
format: 'srt'
|
||||
})
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-unused-expressions
|
||||
expect(existsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist.`).to.be.true
|
||||
expect(await readFile(transcript.path, 'utf8')).to.equal(
|
||||
`1
|
||||
00:00:02,480 --> 00:00:02,500
|
||||
you
|
||||
|
||||
`
|
||||
)
|
||||
})
|
||||
|
||||
it('May produce a transcript file in `txt` format', async function () {
|
||||
const transcript = await transcriber.transcribe(shortVideoPath, { name: 'tiny' }, 'en', 'txt')
|
||||
expect(transcript).to.deep.equals({
|
||||
path: join(transcriptDirectory, 'video_short.txt'),
|
||||
language: 'en',
|
||||
format: 'txt'
|
||||
})
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-unused-expressions
|
||||
expect(existsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist.`).to.be.true
|
||||
expect(await readFile(transcript.path, 'utf8')).to.equal(`You
|
||||
`)
|
||||
})
|
||||
|
||||
it('Should produce the same transcript text as openai-whisper given the same parameters', async function () {
|
||||
const transcribeArguments: Parameters<typeof transcriber.transcribe> = [
|
||||
shortVideoPath,
|
||||
{ name: 'tiny' },
|
||||
'en',
|
||||
'txt'
|
||||
]
|
||||
const transcript = await transcriber.transcribe(...transcribeArguments)
|
||||
const openaiTranscriber = new OpenaiTranscriber(
|
||||
{
|
||||
name: 'openai-whisper',
|
||||
requirements: [],
|
||||
type: 'binary',
|
||||
binary: 'whisper',
|
||||
supportedModelFormats: [ 'PyTorch' ]
|
||||
},
|
||||
createLogger(),
|
||||
join(transcriptDirectory, 'openai-whisper')
|
||||
)
|
||||
const openaiTranscript = await openaiTranscriber.transcribe(...transcribeArguments)
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-unused-expressions
|
||||
expect(existsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist.`).to.be.true
|
||||
expect(await readFile(transcript.path, 'utf8')).to.equal(await readFile(openaiTranscript.path, 'utf8'))
|
||||
})
|
||||
|
||||
after(async function () {
|
||||
await rm(transcriptDirectory, { recursive: true, force: true })
|
||||
})
|
||||
|
|
|
@ -4,41 +4,34 @@ import { expect, config } from 'chai'
|
|||
import { existsSync } from 'node:fs'
|
||||
import { mkdir, readFile, rm } from 'node:fs/promises'
|
||||
import { buildAbsoluteFixturePath, root } from '@peertube/peertube-node-utils'
|
||||
import { OpenaiTranscriber } from '@peertube/peertube-transcription'
|
||||
import { Ctranslate2Transcriber, OpenaiTranscriber } from '@peertube/peertube-transcription'
|
||||
|
||||
config.truncateThreshold = 0
|
||||
|
||||
describe('Whisper CTranslate2 transcriber', function () {
|
||||
const transcriptDirectory = join(root(), 'test-transcript')
|
||||
const expectedVttTranscriptPath = join(transcriptDirectory, 'video_short.vtt')
|
||||
|
||||
before(async function () {
|
||||
await mkdir(transcriptDirectory, { recursive: true })
|
||||
})
|
||||
|
||||
it('Should transcribe a media file', async function () {
|
||||
const transcriber = new OpenaiTranscriber(
|
||||
const shortVideoPath = buildAbsoluteFixturePath('video_short.mp4')
|
||||
const transcriber = new Ctranslate2Transcriber(
|
||||
{
|
||||
name: 'whisper-ctranslate2',
|
||||
name: 'anyNameShouldBeFineReally',
|
||||
requirements: [],
|
||||
language: '',
|
||||
type: 'binary',
|
||||
license: '',
|
||||
binary: 'whisper-ctranslate2',
|
||||
supportedModelFormats: []
|
||||
},
|
||||
createLogger(),
|
||||
transcriptDirectory
|
||||
)
|
||||
const transcript = await transcriber.transcribe(
|
||||
buildAbsoluteFixturePath('video_short.mp4'),
|
||||
{ name: 'tiny' },
|
||||
'fr',
|
||||
'vtt'
|
||||
)
|
||||
|
||||
before(async function () {
|
||||
await mkdir(transcriptDirectory, { recursive: true })
|
||||
})
|
||||
|
||||
it('Should transcribe a media file and provide a valid path to a transcript file in `vtt` format by default', async function () {
|
||||
const transcript = await transcriber.transcribe(shortVideoPath, { name: 'tiny' })
|
||||
expect(transcript).to.deep.equals({
|
||||
path: expectedVttTranscriptPath,
|
||||
language: 'fr',
|
||||
path: join(transcriptDirectory, 'video_short.vtt'),
|
||||
language: 'en',
|
||||
format: 'vtt'
|
||||
})
|
||||
|
||||
|
@ -54,6 +47,65 @@ You
|
|||
)
|
||||
})
|
||||
|
||||
it('May produce a transcript file in the `srt` format', async function () {
|
||||
const transcript = await transcriber.transcribe(shortVideoPath, { name: 'tiny' }, 'en', 'srt')
|
||||
expect(transcript).to.deep.equals({
|
||||
path: join(transcriptDirectory, 'video_short.srt'),
|
||||
language: 'en',
|
||||
format: 'srt'
|
||||
})
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-unused-expressions
|
||||
expect(existsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist.`).to.be.true
|
||||
expect(await readFile(transcript.path, 'utf8')).to.equal(
|
||||
`1
|
||||
00:00:00,000 --> 00:00:02,000
|
||||
You
|
||||
|
||||
`
|
||||
)
|
||||
})
|
||||
|
||||
it('May produce a transcript file in the `txt` format', async function () {
|
||||
const transcript = await transcriber.transcribe(shortVideoPath, { name: 'tiny' }, 'en', 'txt')
|
||||
expect(transcript).to.deep.equals({
|
||||
path: join(transcriptDirectory, 'video_short.txt'),
|
||||
language: 'en',
|
||||
format: 'txt'
|
||||
})
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-unused-expressions
|
||||
expect(existsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist.`).to.be.true
|
||||
expect(await readFile(transcript.path, 'utf8')).to.equal(`You
|
||||
`)
|
||||
})
|
||||
|
||||
it('Should produce the same transcript text as openai-whisper given the same parameters', async function () {
|
||||
const transcribeArguments: Parameters<typeof transcriber.transcribe> = [
|
||||
shortVideoPath,
|
||||
{ name: 'tiny' },
|
||||
'en',
|
||||
'txt'
|
||||
]
|
||||
const transcript = await transcriber.transcribe(...transcribeArguments)
|
||||
const openaiTranscriber = new OpenaiTranscriber(
|
||||
{
|
||||
name: 'openai-whisper',
|
||||
requirements: [],
|
||||
type: 'binary',
|
||||
binary: 'whisper',
|
||||
supportedModelFormats: [ 'PyTorch' ]
|
||||
},
|
||||
createLogger(),
|
||||
join(transcriptDirectory, 'openai-whisper')
|
||||
)
|
||||
const openaiTranscript = await openaiTranscriber.transcribe(...transcribeArguments)
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-unused-expressions
|
||||
expect(existsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist.`).to.be.true
|
||||
expect(await readFile(transcript.path, 'utf8')).to.equal(await readFile(openaiTranscript.path, 'utf8'))
|
||||
})
|
||||
|
||||
after(async function () {
|
||||
await rm(transcriptDirectory, { recursive: true, force: true })
|
||||
})
|
||||
|
|
|
@ -6,11 +6,11 @@ import { ModelFormat } from './transcription-model.js'
|
|||
export interface TranscriptionEngine {
|
||||
name: string
|
||||
description?: string
|
||||
language: string
|
||||
language?: string
|
||||
requirements: string[]
|
||||
type: 'binary' | 'bindings' | 'ws'
|
||||
binary?: string
|
||||
license: string
|
||||
binary: string
|
||||
license?: string
|
||||
forgeURL?: string
|
||||
supportedModelFormats: ModelFormat[]
|
||||
|
||||
|
|
|
@ -5,6 +5,7 @@ export const engines: TranscriptionEngine[] = [
|
|||
name : 'whisper-cpp',
|
||||
description : 'High-performance inference of OpenAI\'s Whisper automatic speech recognition model',
|
||||
type: 'binary',
|
||||
binary: 'main',
|
||||
language : 'cpp',
|
||||
requirements : [],
|
||||
forgeURL : 'https://github.com/ggerganov/whisper.cpp',
|
||||
|
@ -49,7 +50,7 @@ export const engines: TranscriptionEngine[] = [
|
|||
requirements: [ 'python' ],
|
||||
language: 'python',
|
||||
type: 'binary',
|
||||
binary: 'whisper-ctranslate2',
|
||||
binary: 'whisper_timestamped',
|
||||
forgeURL: 'https://github.com/openai/whisper',
|
||||
license: 'MIT',
|
||||
supportedModelFormats: [ 'CTranslate2' ]
|
||||
|
|
|
@ -1,37 +1,5 @@
|
|||
import { join } from 'path'
|
||||
import { $ } from 'execa'
|
||||
import { TranscriptionModel } from '../../transcription-model.js'
|
||||
import { Transcript, TranscriptFormat } from '../../transcript.js'
|
||||
import { AbstractTranscriber } from '../../abstract-transcriber.js'
|
||||
import { getFileInfo } from '../../file-utils.js'
|
||||
import { OpenaiTranscriber } from './openai-transcriber.js'
|
||||
|
||||
export class Ctranslate2Transcriber extends AbstractTranscriber {
|
||||
async transcribe (
|
||||
mediaFilePath: string,
|
||||
model: TranscriptionModel,
|
||||
language: string,
|
||||
format: TranscriptFormat = 'vtt'
|
||||
): Promise<Transcript> {
|
||||
this.createPerformanceMark()
|
||||
const $$ = $({ verbose: true })
|
||||
const { baseName } = getFileInfo(mediaFilePath)
|
||||
export class Ctranslate2Transcriber extends OpenaiTranscriber {
|
||||
|
||||
await $$`whisper-ctranslate2 ${[
|
||||
mediaFilePath,
|
||||
'--model',
|
||||
model.name,
|
||||
'--output_format',
|
||||
'all',
|
||||
'--output_dir',
|
||||
this.transcriptDirectory
|
||||
]}`
|
||||
|
||||
this.measurePerformanceMark()
|
||||
|
||||
return {
|
||||
language,
|
||||
path: join(this.transcriptDirectory, `${baseName}.${format}`),
|
||||
format
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,8 +8,8 @@ import { getFileInfo } from '../../file-utils.js'
|
|||
export class OpenaiTranscriber extends AbstractTranscriber {
|
||||
async transcribe (
|
||||
mediaFilePath: string,
|
||||
model: TranscriptionModel,
|
||||
language: string,
|
||||
model: TranscriptionModel = { name: 'tiny' },
|
||||
language: string = 'en',
|
||||
format: TranscriptFormat = 'vtt'
|
||||
): Promise<Transcript> {
|
||||
this.createPerformanceMark()
|
||||
|
@ -18,14 +18,16 @@ export class OpenaiTranscriber extends AbstractTranscriber {
|
|||
const $$ = $({ verbose: true })
|
||||
const { baseName } = getFileInfo(mediaFilePath)
|
||||
|
||||
await $$`whisper ${[
|
||||
await $$`${this.engine.binary} ${[
|
||||
mediaFilePath,
|
||||
'--model',
|
||||
model.name,
|
||||
'--output_format',
|
||||
'all',
|
||||
'--output_dir',
|
||||
this.transcriptDirectory
|
||||
this.transcriptDirectory,
|
||||
'--language',
|
||||
language
|
||||
]}`
|
||||
|
||||
this.measurePerformanceMark()
|
||||
|
|
|
@ -5,10 +5,10 @@ import { rename } from 'node:fs/promises'
|
|||
import { $ } from 'execa'
|
||||
import { TranscriptionModel } from '../../transcription-model.js'
|
||||
import { Transcript, TranscriptFormat } from '../../transcript.js'
|
||||
import { AbstractTranscriber } from '../../abstract-transcriber.js'
|
||||
import { getFileInfo } from '../../file-utils.js'
|
||||
import { OpenaiTranscriber } from './openai-transcriber.js'
|
||||
|
||||
export class WhisperTimestampedTranscriber extends AbstractTranscriber {
|
||||
export class WhisperTimestampedTranscriber extends OpenaiTranscriber {
|
||||
async transcribe (
|
||||
mediaFilePath: string,
|
||||
model: TranscriptionModel,
|
||||
|
@ -19,7 +19,7 @@ export class WhisperTimestampedTranscriber extends AbstractTranscriber {
|
|||
|
||||
const $$ = $({ verbose: true })
|
||||
const { baseName, name } = getFileInfo(mediaFilePath)
|
||||
await $$`whisper_timestamped ${[
|
||||
await $$`${this.engine.binary} ${[
|
||||
mediaFilePath,
|
||||
'--model',
|
||||
model.name,
|
||||
|
@ -31,7 +31,8 @@ export class WhisperTimestampedTranscriber extends AbstractTranscriber {
|
|||
|
||||
const internalTranscriptPath = join(this.transcriptDirectory, `${name}.${format}`)
|
||||
const transcriptPath = join(this.transcriptDirectory, `${baseName}.${format}`)
|
||||
assert(existsSync(internalTranscriptPath), '')
|
||||
// Whisper timestamped is supposed to output file with the video file extension ex: video.mp4.vtt
|
||||
assert(existsSync(internalTranscriptPath), `${internalTranscriptPath} file doesn't exist.`)
|
||||
await rename(internalTranscriptPath, transcriptPath)
|
||||
|
||||
this.measurePerformanceMark()
|
||||
|
|
Loading…
Reference in New Issue