From c289c867414f11a2ff1e53887e484f607bb42844 Mon Sep 17 00:00:00 2001 From: Chocobozzz Date: Wed, 3 Jul 2024 15:07:01 +0200 Subject: [PATCH] Add vad_filter to ctranslate transcriber Helps us to correctly detect the language if there is no voice in the first 30 seconds Also helps to lower hallucinations --- packages/tests/src/shared/transcription.ts | 6 +++++- .../src/whisper/transcriber/ctranslate2-transcriber.ts | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/packages/tests/src/shared/transcription.ts b/packages/tests/src/shared/transcription.ts index 0bc040062..5561684ad 100644 --- a/packages/tests/src/shared/transcription.ts +++ b/packages/tests/src/shared/transcription.ts @@ -28,7 +28,11 @@ export function getCustomModelPath (modelName: CustomModelName) { // --------------------------------------------------------------------------- -export async function checkAutoCaption (servers: PeerTubeServer[], uuid: string, captionContains = 'WEBVTT\n\n00:00.000 --> 00:') { +export async function checkAutoCaption ( + servers: PeerTubeServer[], + uuid: string, + captionContains = new RegExp('^WEBVTT\\n\\n00:00.\\d{3} --> 00:') +) { for (const server of servers) { const body = await server.captions.list({ videoId: uuid }) expect(body.total).to.equal(1) diff --git a/packages/transcription/src/whisper/transcriber/ctranslate2-transcriber.ts b/packages/transcription/src/whisper/transcriber/ctranslate2-transcriber.ts index 87b424f46..de0a9c508 100644 --- a/packages/transcription/src/whisper/transcriber/ctranslate2-transcriber.ts +++ b/packages/transcription/src/whisper/transcriber/ctranslate2-transcriber.ts @@ -35,6 +35,8 @@ export class Ctranslate2Transcriber extends OpenaiTranscriber { ...modelArgs, '--word_timestamps', 'True', + '--vad_filter', + 'true', '--output_format', 'all', '--output_dir',