chore: add individual tests for others transcribers

2024-04-22 14:52:08 +02:00 · 2024-04-22 14:52:08 +02:00 · d3b73e875a
parent 9f652db162
commit d3b73e875a
5 changed files with 65 additions and 5 deletions
--- a/.github/workflows/transcription.yml
+++ b/.github/workflows/transcription.yml
@ -27,4 +27,4 @@ jobs:

      - name: Run transcription tests
        run: |
-          npm run mocha -- --exit --bail packages/tests/src/transcription/whisper/transcriber/openai-transcriber.spec.ts
+          npm run mocha -- --exit --bail packages/tests/src/transcription/**/*.spec.ts
--- a/packages/tests/src/transcription/benchmark.spec.ts
+++ b/packages/tests/src/transcription/benchmark.spec.ts
--- a/packages/tests/src/transcription/whisper/transcriber/openai-transcriber.spec.ts
+++ b/packages/tests/src/transcription/whisper/transcriber/openai-transcriber.spec.ts
@ -8,7 +8,7 @@ import { OpenaiTranscriber } from '@peertube/peertube-transcription'

 config.truncateThreshold = 0

-describe('Open AI transcriber', function () {
+describe('Open AI Whisper transcriber', function () {
  const transcriptDirectory = join(root(), 'test-transcript')
  const expectedVttTranscriptPath = join(transcriptDirectory, 'video_short.vtt')

--- a/packages/tests/src/transcription/whisper/transcriber/timestamped-transcriber.spec.ts
+++ b/packages/tests/src/transcription/whisper/transcriber/timestamped-transcriber.spec.ts
@ -0,0 +1,60 @@
+import { createLogger } from 'winston'
+import { join } from 'path'
+import { expect, config } from 'chai'
+import { existsSync } from 'node:fs'
+import { mkdir, readFile, rm } from 'node:fs/promises'
+import { buildAbsoluteFixturePath, root } from '@peertube/peertube-node-utils'
+import { OpenaiTranscriber } from '@peertube/peertube-transcription'
+
+config.truncateThreshold = 0
+
+describe('Linto timestamped Whisper transcriber', function () {
+  const transcriptDirectory = join(root(), 'test-transcript')
+  const expectedVttTranscriptPath = join(transcriptDirectory, 'video_short.vtt')
+
+  before(async function () {
+    await mkdir(transcriptDirectory, { recursive: true })
+  })
+
+  it('Should transcribe a media file', async function () {
+    const transcriber = new OpenaiTranscriber(
+      {
+        name: 'timestamped-whisper',
+        requirements: [],
+        language: '',
+        type: 'binary',
+        license: '',
+        supportedModelFormats: [ 'PyTorch' ]
+      },
+      createLogger(),
+      transcriptDirectory
+    )
+    const transcript = await transcriber.transcribe(
+      buildAbsoluteFixturePath('video_short.mp4'),
+      { name: 'tiny' },
+      'fr',
+      'vtt'
+    )
+
+    expect(transcript).to.deep.equals({
+      path: expectedVttTranscriptPath,
+      language: 'fr',
+      format: 'vtt'
+    })
+
+    // eslint-disable-next-line @typescript-eslint/no-unused-expressions
+    expect(existsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist.`).to.be.true
+    expect(await readFile(transcript.path, 'utf8')).to.equal(
+      `WEBVTT
+
+00:00.000 --> 00:02.000
+You
+
+`
+    )
+  })
+
+  after(async function () {
+    await rm(transcriptDirectory, { recursive: true, force: true })
+  })
+})
--- a/packages/tests/src/transcription/whisper/transcriber/whisper-ctranslate2.spec.ts
+++ b/packages/tests/src/transcription/whisper/transcriber/whisper-ctranslate2.spec.ts
@ -8,7 +8,7 @@ import { OpenaiTranscriber } from '@peertube/peertube-transcription'

 config.truncateThreshold = 0

-describe('Open AI transcriber', function () {
+describe('Whisper CTranslate2 transcriber', function () {
  const transcriptDirectory = join(root(), 'test-transcript')
  const expectedVttTranscriptPath = join(transcriptDirectory, 'video_short.vtt')

@ -19,12 +19,12 @@ describe('Open AI transcriber', function () {
  it('Should transcribe a media file', async function () {
    const transcriber = new OpenaiTranscriber(
      {
-        name: 'openai-whisper',
+        name: 'whisper-ctranslate2',
        requirements: [],
        language: '',
        type: 'binary',
        license: '',
-        supportedModelFormats: [ 'PyTorch' ]
+        supportedModelFormats: []
      },
      createLogger(),
      transcriptDirectory