diff --git a/packages/tests/src/transcription/benchmark.spec.ts b/packages/tests/src/transcription/benchmark.spec.ts index 8427f61bf..f82ce8c92 100644 --- a/packages/tests/src/transcription/benchmark.spec.ts +++ b/packages/tests/src/transcription/benchmark.spec.ts @@ -84,23 +84,21 @@ describe('Transcribers benchmark', function () { }) transcribers.forEach(function (transcriberName) { - describe(`${transcriberName}`, function () { - it('Should run a benchmark on each transcriber implementation', async function () { - this.timeout(45000) - const transcriber = transcriberFactory.createFromEngineName( - transcriberName, - createLogger(), - transcriptDirectory - ) - const transcriptFile = await transcriber.transcribe(mediaFilePath, { name: 'tiny' }, 'fr', 'txt') - const evaluator = new TranscriptFileEvaluator(referenceTranscriptFile, transcriptFile) - await new Promise(resolve => setTimeout(resolve, 1)) + it(`Run ${transcriberName} transcriber benchmark without issue`, async function () { + this.timeout(45000) + const transcriber = transcriberFactory.createFromEngineName( + transcriberName, + createLogger(), + transcriptDirectory + ) + const transcriptFile = await transcriber.transcribe(mediaFilePath, { name: 'tiny' }, 'fr', 'txt') + const evaluator = new TranscriptFileEvaluator(referenceTranscriptFile, transcriptFile) + await new Promise(resolve => setTimeout(resolve, 1)) - benchmark = benchmarkReducer(benchmark, transcriberName, { - engine: transcriber.engine, - WER: await evaluator.wer(), - CER: await evaluator.cer() - }) + benchmark = benchmarkReducer(benchmark, transcriberName, { + engine: transcriber.engine, + WER: await evaluator.wer(), + CER: await evaluator.cer() }) }) }) diff --git a/packages/tests/src/transcription/transcript/transcript-file.spec.ts b/packages/tests/src/transcription/transcript/transcript-file.spec.ts index f689f0104..be334d08c 100644 --- a/packages/tests/src/transcription/transcript/transcript-file.spec.ts +++ b/packages/tests/src/transcription/transcript/transcript-file.spec.ts @@ -22,31 +22,5 @@ describe('Transcript File', function () { }) expect(await transcript1.equals(transcript2)).to.be.true - - const reference = new TranscriptFile({ - path: buildAbsoluteFixturePath('transcription/transcript/reference.txt'), - language: 'fr', - format: 'txt' - }) - const hypothesis = await TranscriptFile.write({ - path: buildAbsoluteFixturePath('transcription/transcript/openai.txt'), - content: `Communiquez lors d'une classe transplante. Utilisez les photos prises lors de cette classe pour raconter quotidiennement le séjour vécu. -C'est le scénario P-Dagujic présenté par monsieur Navoli, professeur ainsi que le 3 sur une école alimentaire de Montpellier. -La première application a utilisé ce ralame déatec. L'enseignant va alors transférer les différentes photos réalisés lors de la classe transplante. -Dans un dossier, spécifique pour que les élèves puissent le retrouver plus facilement. Il téléverse donc ses photos dans le dossier, dans le venté, dans la médiatèque de la classe. -Pour terminer, il s'assure que le dossier soit bien ouvert aux utilisateurs afin que tout le monde puisse l'utiliser. -Les élèves par la suite utilisera le blog. A partir de leurs nantes, il pourront se loi de parposte rédigeant un article d'un reinté. -Ils illustront ses articles à l'aide des photos de que mon numérique mise à n'accélier dans le venté. -Pour se faire, il pourront utiliser les diteurs avancés qui les renvèrent directement dans la médiatèque de la classe où il pourront retrouver le dossier créé par leurs enseignants. -Une fois leur article terminée, les élèves soumétront se lui-ci au professeur qui pourra soit la noté pour correction ou le public. -Ensuite, il pourront lire et commenter ce de leurs camarades ou répondre aux commentaires de la veille. -`, - format: 'txt', - language: 'fr' - }) - - const output = await reference.evaluate(hypothesis) - - console.log(output) }) }) diff --git a/packages/tests/src/transcription/whisper/transcriber/openai-transcriber.spec.ts b/packages/tests/src/transcription/whisper/transcriber/openai-transcriber.spec.ts index 7d30c3741..e07d2297f 100644 --- a/packages/tests/src/transcription/whisper/transcriber/openai-transcriber.spec.ts +++ b/packages/tests/src/transcription/whisper/transcriber/openai-transcriber.spec.ts @@ -31,13 +31,13 @@ describe('Open AI Whisper transcriber', function () { it('Should transcribe a media file and provide a valid path to a transcript file in `vtt` format by default', async function () { const transcript = await transcriber.transcribe(shortVideoPath) - expect(transcript.equals(new TranscriptFile({ + expect(await transcript.equals(new TranscriptFile({ path: join(transcriptDirectory, 'video_short.vtt'), language: 'en', format: 'vtt' }))).to.be.true - expect(await transcript.read()).to.equal( + expect(await transcript.read()).to.equals( `WEBVTT 00:00.000 --> 00:02.000 @@ -49,7 +49,7 @@ You it('May produce a transcript file in the `srt` format', async function () { const transcript = await transcriber.transcribe(shortVideoPath, { name: 'tiny' }, 'en', 'srt') - expect(transcript.equals(new TranscriptFile({ + expect(await transcript.equals(new TranscriptFile({ path: join(transcriptDirectory, 'video_short.srt'), language: 'en', format: 'srt' @@ -66,7 +66,7 @@ You it('May produce a transcript file in the `txt` format', async function () { const transcript = await transcriber.transcribe(shortVideoPath, { name: 'tiny' }, 'en', 'txt') - expect(transcript.equals(new TranscriptFile({ + expect(await transcript.equals(new TranscriptFile({ path: join(transcriptDirectory, 'video_short.txt'), language: 'en', format: 'txt' @@ -77,13 +77,13 @@ You }) it('May transcribe a media file using a local PyTorch model', async function () { - await transcriber.transcribe(frVideoPath, { name: 'myLocalModel', path: buildAbsoluteFixturePath('transcription/tiny.pt') }, 'fr') + await transcriber.transcribe(frVideoPath, { name: 'myLocalModel', path: buildAbsoluteFixturePath('transcription/models/tiny.pt') }, 'fr') }) it('May transcribe a media file in french', async function () { this.timeout(45000) const transcript = await transcriber.transcribe(frVideoPath, { name: 'tiny' }, 'fr', 'txt') - expect(transcript.equals(new TranscriptFile({ + expect(await transcript.equals(new TranscriptFile({ path: join(transcriptDirectory, 'communiquer-lors-dune-classe-transplantee.txt'), language: 'fr', format: 'txt' @@ -107,7 +107,7 @@ Ensuite, il pourront lire et commenter ce de leurs camarades ou répondre aux co it('May transcribe a media file in french with small model', async function () { this.timeout(400000) const transcript = await transcriber.transcribe(frVideoPath, { name: 'small' }, 'fr', 'txt') - expect(transcript.equals(new TranscriptFile({ + expect(await transcript.equals(new TranscriptFile({ path: join(transcriptDirectory, 'communiquer-lors-dune-classe-transplantee.txt'), language: 'fr', format: 'txt' diff --git a/packages/tests/src/transcription/whisper/transcriber/timestamped-transcriber.spec.ts b/packages/tests/src/transcription/whisper/transcriber/timestamped-transcriber.spec.ts index 12cd7897c..febb28dcf 100644 --- a/packages/tests/src/transcription/whisper/transcriber/timestamped-transcriber.spec.ts +++ b/packages/tests/src/transcription/whisper/transcriber/timestamped-transcriber.spec.ts @@ -35,7 +35,7 @@ describe('Linto timestamped Whisper transcriber', function () { 'fr' ) - expect(transcript.equals(new TranscriptFile({ + expect(await transcript.equals(new TranscriptFile({ path: join(transcriptDirectory, 'video_short.vtt'), language: 'fr', format: 'vtt' @@ -53,7 +53,7 @@ you it('May produce a transcript file in the `srt` format with a ms precision', async function () { const transcript = await transcriber.transcribe(shortVideoPath, { name: 'tiny' }, 'en', 'srt') - expect(transcript.equals(new TranscriptFile({ + expect(await transcript.equals(new TranscriptFile({ path: join(transcriptDirectory, 'video_short.srt'), language: 'en', format: 'srt' @@ -70,7 +70,7 @@ you it('May produce a transcript file in `txt` format', async function () { const transcript = await transcriber.transcribe(shortVideoPath, { name: 'tiny' }, 'en', 'txt') - expect(transcript.equals(new TranscriptFile({ + expect(await transcript.equals(new TranscriptFile({ path: join(transcriptDirectory, 'video_short.txt'), language: 'en', format: 'txt' @@ -83,7 +83,7 @@ you it('May transcribe a media file in french', async function () { this.timeout(45000) const transcript = await transcriber.transcribe(frVideoPath, { name: 'tiny' }, 'fr', 'txt') - expect(transcript.equals(new TranscriptFile({ + expect(await transcript.equals(new TranscriptFile({ path: join(transcriptDirectory, 'communiquer-lors-dune-classe-transplantee.txt'), language: 'fr', format: 'txt' diff --git a/packages/tests/src/transcription/whisper/transcriber/whisper-ctranslate2.spec.ts b/packages/tests/src/transcription/whisper/transcriber/whisper-ctranslate2.spec.ts index 081a4a8d9..591b811cb 100644 --- a/packages/tests/src/transcription/whisper/transcriber/whisper-ctranslate2.spec.ts +++ b/packages/tests/src/transcription/whisper/transcriber/whisper-ctranslate2.spec.ts @@ -30,7 +30,7 @@ describe('Whisper CTranslate2 transcriber', function () { it('Should transcribe a media file and provide a valid path to a transcript file in `vtt` format by default', async function () { const transcript = await transcriber.transcribe(shortVideoPath, { name: 'tiny' }) - expect(transcript.equals(new TranscriptFile({ path: join(transcriptDirectory, 'video_short.vtt') }))).to.be.true + expect(await transcript.equals(new TranscriptFile({ path: join(transcriptDirectory, 'video_short.vtt') }))).to.be.true expect(await readFile(transcript.path, 'utf8')).to.equal( `WEBVTT @@ -43,7 +43,7 @@ You it('May produce a transcript file in the `srt` format', async function () { const transcript = await transcriber.transcribe(shortVideoPath, { name: 'tiny' }, 'en', 'srt') - expect(transcript.equals(new TranscriptFile({ + expect(await transcript.equals(new TranscriptFile({ path: join(transcriptDirectory, 'video_short.srt'), format: 'srt' }))).to.be.true @@ -59,7 +59,7 @@ You it('May produce a transcript file in the `txt` format', async function () { const transcript = await transcriber.transcribe(shortVideoPath, { name: 'tiny' }, 'en', 'txt') - expect(transcript.equals(new TranscriptFile({ + expect(await transcript.equals(new TranscriptFile({ path: join(transcriptDirectory, 'video_short.txt'), format: 'txt' }))).to.be.true @@ -71,11 +71,11 @@ You it('May transcribe a media file using a local CTranslate2 model', async function () { const transcript = await transcriber.transcribe( shortVideoPath, - { name: 'myLocalModel', path: buildAbsoluteFixturePath('transcription/faster-whisper-tiny') }, + { name: 'myLocalModel', path: buildAbsoluteFixturePath('transcription/models/faster-whisper-tiny') }, 'en', 'txt' ) - expect(transcript.equals(new TranscriptFile({ + expect(await transcript.equals(new TranscriptFile({ path: join(transcriptDirectory, 'video_short.txt'), format: 'txt' }))).to.be.true @@ -87,7 +87,7 @@ You it('May transcribe a media file in french', async function () { this.timeout(45000) const transcript = await transcriber.transcribe(frVideoPath, { name: 'tiny' }, 'fr', 'txt') - expect(transcript.equals(new TranscriptFile({ + expect(await transcript.equals(new TranscriptFile({ path: join(transcriptDirectory, 'communiquer-lors-dune-classe-transplantee.txt'), language: 'fr', format: 'txt' @@ -128,7 +128,7 @@ Ensuite, il pourront lire et commenter ce de leur camarade, on répondra au comm ) const openaiTranscript = await openaiTranscriber.transcribe(...transcribeArguments) - expect(transcript.equals(openaiTranscript)) + expect(await transcript.equals(openaiTranscript)) }) after(async function () {