chore(test): add more test case with other language and models size and local model

2024-04-23 18:18:50 +02:00 · 2024-04-23 18:18:50 +02:00 · ae8ce3b696
parent 27d0e66698
commit ae8ce3b696
10 changed files with 187 additions and 76 deletions
--- a/packages/tests/fixtures/transcription/README.md
+++ b/packages/tests/fixtures/transcription/README.md
@ -0,0 +1,5 @@
+
+CC BY-NC-SA 4.0 Deed
+Attribution-NonCommercial-ShareAlike 4.0 International
+communiquer-lors-dune-classe-transplantee.mp4
+https://podeduc.apps.education.fr/numerique-educatif/video/21893-communiquer-lors-dune-classe-transplantee/
--- a/packages/tests/fixtures/transcription/communiquer-lors-dune-classe-transplantee.mp4
+++ b/packages/tests/fixtures/transcription/communiquer-lors-dune-classe-transplantee.mp4
--- a/packages/tests/fixtures/transcription/tiny-ctranslate2.bin
+++ b/packages/tests/fixtures/transcription/tiny-ctranslate2.bin
--- a/packages/tests/fixtures/transcription/tiny.pt
+++ b/packages/tests/fixtures/transcription/tiny.pt
--- a/packages/tests/src/transcription/benchmark.ts
+++ b/packages/tests/src/transcription/benchmark.ts
@ -1,48 +1,53 @@
 import { createLogger } from 'winston'
 import { join } from 'path'
-import { expect } from 'chai'
-import { existsSync } from 'node:fs'
-import { rm, mkdir, readFile } from 'node:fs/promises'
+import { rm, mkdir } from 'node:fs/promises'
 import { buildAbsoluteFixturePath, root } from '@peertube/peertube-node-utils'
-import { toHumanReadable, transcriberFactory } from '@peertube/peertube-transcription'
+import { toHumanReadable, transcriberFactory, TranscriptionEngine } from '@peertube/peertube-transcription'
 import { performance, PerformanceObserver } from 'node:perf_hooks'
+import { CpuInfo, CpuUsage } from 'node:os'

-// const WER_TOLERANCE = 1
-// const CER_TOLERANCE = 1
+const WER_TOLERANCE = 1
+const CER_TOLERANCE = 1
+
+interface TestResult {
+  WER: number
+  CER: number
+  duration: number
+  engine: TranscriptionEngine
+  dataThroughput: number // relevant ?
+  cpus: CpuInfo[]
+  cpuUsages: CpuUsage[]
+  /**
+   * {
+   *  rss: 4935680,
+   *  heapTotal: 1826816,
+   *  heapUsed: 650472,
+   *  external: 49879,
+   *  arrayBuffers: 9386
+   * }
+   *
+   * - `heapTotal` and `heapUsed` refer to V8's memory usage.
+   * - `external` refers to the memory usage of C++ objects bound to JavaScript objects managed by V8.
+   * - `rss`, Resident Set Size, is the amount of space occupied in the main memory device
+   * (that is a subset of the total allocated memory) for the process, including all C++ and JavaScript objects and code.
+   * - `arrayBuffers` refers to memory allocated for ArrayBuffers and SharedArrayBuffers, including all Node.js Buffers.
+   * This is also included in the external value.
+   * When Node.js is used as an embedded library, this value may be 0 because allocations for ArrayBuffers may not be tracked in that case.
+   *
+   * When using Worker threads, rss will be a value that is valid for the entire process,
+   * while the other fields will only refer to the current thread.
+   *
+   * The process.memoryUsage() method iterates over each page to gather information about memory usage
+   * which might be slow depending on the program memory allocations.
+   */
+  memoryUsages: Record<number, MemoryUsage>
+}
+
+// var os = require('os');
 //
-// interface TestResult {
-//   WER: number
-//   CER: number
-//   duration: number
-//   engine: TranscriptionEngine
-//   dataThroughput: number // relevant ?
-//   cpus: CpuInfo[]
-//   cpuUsages: CpuUsage[]
-//   memoryUsages: Record<number, MemoryUsage>
-//   // Prints:
-//   // {
-//   //  rss: 4935680,
-//   //  heapTotal: 1826816,
-//   //  heapUsed: 650472,
-//   //  external: 49879,
-//   //  arrayBuffers: 9386
-//   // }
-//
-//   // heapTotal and heapUsed refer to V8's memory usage.
-//   // external refers to the memory usage of C++ objects bound to JavaScript objects managed by V8.
-//   // rss, Resident Set Size, is the amount of space occupied in the main memory device (that is a subset of the total allocated memory) for the process, including all C++ and JavaScript objects and code.
-//   // arrayBuffers refers to memory allocated for ArrayBuffers and SharedArrayBuffers, including all Node.js Buffers. This is also included in the external value. When Node.js is used as an embedded library, this value may be 0 because allocations for ArrayBuffers may not be tracked in that case.
-//   //
-//   // When using Worker threads, rss will be a value that is valid for the entire process, while the other fields will only refer to the current thread.
-//   //
-//   // The process.memoryUsage() method iterates over each page to gather information about memory usage which might be slow depending on the program memory allocations.
-// }
-//
-// // var os = require('os');
-// //
-// console.log(cpus())
-// // console.log(os.totalmem());
-// // console.log(os.freemem())
+// console.log(os.cpus())
+// console.log(os.totalmem());
+// console.log(os.freemem())
 //
 // const testsResults: Record<string, TestResult> = {
 //   cpus: []
@ -58,9 +63,8 @@ import { performance, PerformanceObserver } from 'node:perf_hooks'
 //   return testResults
 // }

-describe('Transcribers', function () {
+describe('Transcribers benchmark', function () {
  const transcriptDirectory = join(root(), 'test-transcript')
-  const expectedVttTranscriptPath = join(transcriptDirectory, 'video_short.vtt')
  const mediaFilePath = buildAbsoluteFixturePath('video_short.mp4')
  const transcribers = [
    'openai-whisper',
@ -77,40 +81,17 @@ describe('Transcribers', function () {
        .forEach((entry) => console.log(`Transcription ${entry.name} took ${toHumanReadable(entry.duration)}`))
    })
    performanceObserver.observe({ type: 'measure' })
-
-    // console.table
  })

  transcribers.forEach(function (transcriberName) {
    describe(`${transcriberName}`, function () {
-      it(`Should instanciate`, function () {
-        transcriberFactory.createFromEngineName(transcriberName)
-      })
-
      it('Should run transcription on a media file without raising any errors', async function () {
        const transcriber = transcriberFactory.createFromEngineName(
          transcriberName,
          createLogger(),
          transcriptDirectory
        )
-        const transcript = await transcriber.transcribe(
-          mediaFilePath,
-          { name: 'tiny' },
-          'fr',
-          'vtt'
-        )
-        expect(transcript).to.deep.equals({
-          path: expectedVttTranscriptPath,
-          language: 'fr',
-          format: 'vtt'
-        })
-        expect(transcript.path).to.equals(expectedVttTranscriptPath)
-
-        // eslint-disable-next-line @typescript-eslint/no-unused-expressions
-        expect(existsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist.`).to.be.true
-        expect(await readFile(transcript.path, 'utf8'), `Transcript file ${transcript.path} doesn't exist.`).to.equal('...')
-
-        await rm(transcript.path)
+        await transcriber.transcribe(mediaFilePath)
      })
    })
  })
--- a/packages/tests/src/transcription/whisper/transcriber/openai-transcriber.spec.ts
+++ b/packages/tests/src/transcription/whisper/transcriber/openai-transcriber.spec.ts
@ -11,6 +11,7 @@ config.truncateThreshold = 0
 describe('Open AI Whisper transcriber', function () {
  const transcriptDirectory = join(root(), 'test-transcript')
  const shortVideoPath = buildAbsoluteFixturePath('video_short.mp4')
+  const frVideoPath = buildAbsoluteFixturePath('transcription/communiquer-lors-dune-classe-transplantee.mp4')

  const transcriber = new OpenaiTranscriber(
    {
@ -81,6 +82,68 @@ You
 `)
  })

+  it('May transcribe a media file using a local PyTorch model', async function () {
+    await transcriber.transcribe(frVideoPath, { name: 'myLocalModel', path: buildAbsoluteFixturePath('transcription/tiny.pt') }, 'fr')
+  })
+
+  it('May transcribe a media file in french', async function () {
+    this.timeout(45000)
+    const transcript = await transcriber.transcribe(frVideoPath, { name: 'tiny' }, 'fr', 'txt')
+    expect(transcript).to.deep.equals({
+      path: join(transcriptDirectory, 'communiquer-lors-dune-classe-transplantee.txt'),
+      language: 'fr',
+      format: 'txt'
+    })
+
+    // eslint-disable-next-line @typescript-eslint/no-unused-expressions
+    expect(existsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist.`).to.be.true
+    expect(await readFile(transcript.path, 'utf8')).to.equal(
+      `Communiquez lors d'une classe transplante. Utilisez les photos prises lors de cette classe pour raconter quotidiennement le séjour vécu.
+C'est le scénario P-Dagujic présenté par monsieur Navoli, professeur ainsi que le 3 sur une école alimentaire de Montpellier.
+La première application a utilisé ce ralame déatec. L'enseignant va alors transférer les différentes photos réalisés lors de la classe transplante.
+Dans un dossier, spécifique pour que les élèves puissent le retrouver plus facilement. Il téléverse donc ses photos dans le dossier, dans le venté, dans la médiatèque de la classe.
+Pour terminer, il s'assure que le dossier soit bien ouvert aux utilisateurs afin que tout le monde puisse l'utiliser.
+Les élèves par la suite utilisera le blog. A partir de leurs nantes, il pourront se loi de parposte rédigeant un article d'un reinté.
+Ils illustront ses articles à l'aide des photos de que mon numérique mise à n'accélier dans le venté.
+Pour se faire, il pourront utiliser les diteurs avancés qui les renvèrent directement dans la médiatèque de la classe où il pourront retrouver le dossier créé par leurs enseignants.
+Une fois leur article terminée, les élèves soumétront se lui-ci au professeur qui pourra soit la noté pour correction ou le public.
+Ensuite, il pourront lire et commenter ce de leurs camarades ou répondre aux commentaires de la veille.
+`
+    )
+  })
+
+  it('May transcribe a media file in french with small model', async function () {
+    this.timeout(300000)
+    const transcript = await transcriber.transcribe(frVideoPath, { name: 'small' }, 'fr', 'txt')
+    expect(transcript).to.deep.equals({
+      path: join(transcriptDirectory, 'communiquer-lors-dune-classe-transplantee.txt'),
+      language: 'fr',
+      format: 'txt'
+    })
+
+    // eslint-disable-next-line @typescript-eslint/no-unused-expressions
+    expect(existsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist.`).to.be.true
+    expect(await readFile(transcript.path, 'utf8')).to.equal(
+      `Communiquer lors d'une classe transplantée. Utiliser les photos prises lors de cette classe
+pour raconter quotidiennement le séjour vécu. C'est le scénario pédagogique présenté
+par M. Navoli, professeur en cycle 3 sur une école élémentaire de Montpellier.
+La première application à utiliser sera la médiathèque. L'enseignant va alors transférer
+les différentes photos réalisées lors de la classe transplantée dans un dossier spécifique
+pour que les élèves puissent le retrouver plus facilement. Ils téléversent donc ces
+photos dans le dossier, dans le NT, dans la médiathèque de la classe. Pour terminer,
+ils s'assurent que le dossier soit bien ouvert aux utilisateurs afin que tout le monde
+puisse l'utiliser. Les élèves, par la suite, utiliseront le blog. A partir de leur note,
+ils pourront, seul ou à deux par postes, rédiger un article dans leur NT. Ils illustreront
+ces articles à l'aide des photos et documents numériques mis en accès libre dans le NT.
+Pour ce faire, ils pourront utiliser l'éditeur avancé qui les renverra directement dans
+la médiathèque de la classe où ils pourront retrouver le dossier créé par leur enseignant.
+Une fois leur article terminé, les élèves soulèteront celui-ci au professeur qui pourra
+soit la noter pour correction ou le publier. Ensuite, ils pourront lire et commenter ceux
+de leur camarade, ou répondre au commentaire de la veille.
+`
+    )
+  })
+
  after(async function () {
    await rm(transcriptDirectory, { recursive: true, force: true })
  })
--- a/packages/tests/src/transcription/whisper/transcriber/timestamped-transcriber.spec.ts
+++ b/packages/tests/src/transcription/whisper/transcriber/timestamped-transcriber.spec.ts
@ -11,6 +11,7 @@ config.truncateThreshold = 0
 describe('Linto timestamped Whisper transcriber', function () {
  const transcriptDirectory = join(root(), 'test-transcript')
  const shortVideoPath = buildAbsoluteFixturePath('video_short.mp4')
+  const frVideoPath = buildAbsoluteFixturePath('transcription/communiquer-lors-dune-classe-transplantee.mp4')
  const transcriber = new WhisperTimestampedTranscriber(
    {
      name: 'whisper-timestamped',
@ -84,18 +85,54 @@ you

    // eslint-disable-next-line @typescript-eslint/no-unused-expressions
    expect(existsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist.`).to.be.true
-    expect(await readFile(transcript.path, 'utf8')).to.equal(`You
-  `)
+    expect(await readFile(transcript.path, 'utf8')).to.equal(`you
+`)
+  })
+
+  it('May transcribe a media file in french', async function () {
+    this.timeout(45000)
+    const transcript = await transcriber.transcribe(frVideoPath, { name: 'tiny' }, 'fr', 'txt')
+    expect(transcript).to.deep.equals({
+      path: join(transcriptDirectory, 'communiquer-lors-dune-classe-transplantee.txt'),
+      language: 'fr',
+      format: 'txt'
+    })
+
+    // eslint-disable-next-line @typescript-eslint/no-unused-expressions
+    expect(existsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist.`).to.be.true
+    expect(await readFile(transcript.path, 'utf8')).to.equal(
+      `...
+Communiquez lors du ne class et transplanté.
+Utilisez les photos prises lors de cette classe pour raconter quotidiennement le seuil jour vécu.
+C'est le scénario P.D. à Goujit présenté par M.I.N.A.Voli,
+professeur en cycle 3 sur une école émenteur de Montpellier.
+La première application a utilisé ce ralame de Yatek.
+L'enseignant va alors transférer les différentes photos réalisés lors de la classe transplantée dans un dossier,
+spécifique pour que les élèves puissent le retrouver plus facilement.
+Il t'éleverce donc ses photos dans le dossier, dans le venté, dans la médiatèque de la classe.
+Pour terminer, il s'assure que le dossier soit bien ouvert aux utilisateurs afin que tout le monde puisse l'utiliser.
+Les élèves par la suite utiliseront le blog.
+À partir de leur note, il pourront se loi de par poste rédigène article dans le reinté.
+Ils illustront ses articles à l'aide des photos de commun numérique mise à n'accélier dans la même thé.
+Pour se faire, il pourront utiliser les dites ravences qui les renvèrent directement dans la médiatèque de la classe,
+où ils pourront retrouver le dossier créé par leur enseignon.
+Une fois leur article terminée, les élèves soumétront se lui-ci au professeur,
+qui pourra soit la noter pour correction ou le public.
+Ensuite, il pourront lire et commenter ce de leur camarade, ou répondre au commentaire de la veille.
+`
+    )
  })

  it('Should produce the same transcript text as openai-whisper given the same parameters', async function () {
-    const transcribeArguments: Parameters<typeof transcriber.transcribe> = [
+    const transcribeParameters: Parameters<typeof transcriber.transcribe> = [
      shortVideoPath,
      { name: 'tiny' },
      'en',
      'txt'
    ]
-    const transcript = await transcriber.transcribe(...transcribeArguments)
+
+    const transcript = await transcriber.transcribe(...transcribeParameters)
+
    const openaiTranscriber = new OpenaiTranscriber(
      {
        name: 'openai-whisper',
@ -107,10 +144,8 @@ you
      createLogger(),
      join(transcriptDirectory, 'openai-whisper')
    )
-    const openaiTranscript = await openaiTranscriber.transcribe(...transcribeArguments)
+    const openaiTranscript = await openaiTranscriber.transcribe(...transcribeParameters)

-    // eslint-disable-next-line @typescript-eslint/no-unused-expressions
-    expect(existsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist.`).to.be.true
    expect(await readFile(transcript.path, 'utf8')).to.equal(await readFile(openaiTranscript.path, 'utf8'))
  })

--- a/packages/transcription/README.md
+++ b/packages/transcription/README.md
@ -1,7 +1,7 @@

 DeepLearningFramework vs training libraries

-
+https://github.com/openai/whisper/blob/main/whisper/__init__.py#L144


 ```typescript
@ -24,3 +24,25 @@ const deepLearningFrameworks: DeepLearningFramework = [
 What about the lifecycle of each transcriber ?
 - install => installer
 - update => udpater
+
+For the **Python** packages :
+1. Install
+```sh
+pip install <package-name>
+```
+Package version should be constraint to a version compatible with our wrapper.
+We could also attempt to run our test against different version of the lib to be future ready.
+
+2. Update
+```sh
+pip install -U <package-name>
+```
+
+> Need the package name somewhere in the model
+>
+>
+### Whisper timestamped discrepancies
+- Lower case instead of upper case
+- missing .json file
+- binary name is awkard, package is name whisper-timestamped and binary name is whisper-tiomestamped
+> https://github.com/linto-ai/whisper-timestamped/issues?q=is:issue+author:lutangar
--- a/packages/transcription/src/transcription-engine.ts
+++ b/packages/transcription/src/transcription-engine.ts
@ -2,8 +2,9 @@ import { ModelFormat } from './transcription-model.js'

 /**
 * The engine, or framework.
+ *
 */
-export interface TranscriptionEngine {
+export class TranscriptionEngine {
  name: string
  description?: string
  language?: string
@ -14,6 +15,10 @@ export interface TranscriptionEngine {
  forgeURL?: string
  supportedModelFormats: ModelFormat[]

+  constructor (parameters: TranscriptionEngine) {
+    Object.assign(this, parameters)
+  }
+
  // There could be a default models.
  // There could be a list of default models
 }
--- a/packages/transcription/src/whisper/transcriber/openai-transcriber.ts
+++ b/packages/transcription/src/whisper/transcriber/openai-transcriber.ts
@ -21,9 +21,9 @@ export class OpenaiTranscriber extends AbstractTranscriber {
    await $$`${this.engine.binary} ${[
      mediaFilePath,
      '--model',
-      model.name,
+      model?.path || model.name,
      '--output_format',
-      'all',
+      format,
      '--output_dir',
      this.transcriptDirectory,
      '--language',