Add vad_filter to ctranslate transcriber

Helps us to correctly detect the language if there is no voice in the
first 30 seconds

Also helps to lower hallucinations
pull/6416/merge
Chocobozzz 2024-07-03 15:07:01 +02:00
parent fb5236f2af
commit c289c86741
No known key found for this signature in database
GPG Key ID: 583A612D890159BE
2 changed files with 7 additions and 1 deletions

View File

@ -28,7 +28,11 @@ export function getCustomModelPath (modelName: CustomModelName) {
// ---------------------------------------------------------------------------
export async function checkAutoCaption (servers: PeerTubeServer[], uuid: string, captionContains = 'WEBVTT\n\n00:00.000 --> 00:') {
export async function checkAutoCaption (
servers: PeerTubeServer[],
uuid: string,
captionContains = new RegExp('^WEBVTT\\n\\n00:00.\\d{3} --> 00:')
) {
for (const server of servers) {
const body = await server.captions.list({ videoId: uuid })
expect(body.total).to.equal(1)

View File

@ -35,6 +35,8 @@ export class Ctranslate2Transcriber extends OpenaiTranscriber {
...modelArgs,
'--word_timestamps',
'True',
'--vad_filter',
'true',
'--output_format',
'all',
'--output_dir',