From 47095673b3cc792733b97e1c4ba058b6d1a6d2d4 Mon Sep 17 00:00:00 2001
From: lutangar <johan@larriereguichet.fr>
Date: Tue, 16 Apr 2024 17:49:04 +0200
Subject: [PATCH] chore: fiddling around some more

---
 .../whisper/engine/python.spec.ts             | 33 ----------
 .../faster-whisper-transcriber.spec.ts        | 28 ++++++++
 .../transcriber/openai-transcriber.spec.ts    | 28 ++++++++
 .../whisper/transcribers.spec.ts              | 34 ++++++++++
 packages/tests/tsconfig.json                  |  3 +-
 packages/transcription/package.json           |  2 +-
 .../transcription/src/abstract-transcriber.ts | 39 +++++++++++
 packages/transcription/src/index.ts           |  8 ++-
 packages/transcription/src/installer.ts       |  0
 packages/transcription/src/model-factory.ts   |  9 +++
 .../transcription/src/transcriber-factory.ts  | 30 +++++++++
 packages/transcription/src/transcript.ts      |  3 +
 .../transcription/src/transcription-engine.ts | 41 ++++--------
 .../transcription/src/transcription-model.ts  |  4 +-
 .../transcription/src/transcription-result.ts |  1 -
 packages/transcription/src/update.ts          |  0
 .../src/whisper/{engine => }/README.md        |  0
 .../transcription/src/whisper/engine/cpp.ts   | 38 -----------
 .../src/whisper/engine/engines.ts             | 12 ----
 .../transcription/src/whisper/engine/index.ts |  4 --
 .../src/whisper/engine/python.ts              | 65 -------------------
 .../src/whisper/engine/transformers-js.ts     | 42 ------------
 .../src/whisper/engine/transformers.ts        | 38 -----------
 packages/transcription/src/whisper/engines.ts | 46 +++++++++++++
 packages/transcription/src/whisper/index.ts   |  3 +-
 .../transcriber/faster-whisper-transcriber.ts | 34 ++++++++++
 .../src/whisper/transcriber/index.ts          |  3 +
 .../whisper/transcriber/openai-transcriber.ts | 34 ++++++++++
 .../transformers-js-transcriber.ts            | 22 +++++++
 .../transcriber/transformers-transcriber.ts   | 14 ++++
 packages/transcription/tsconfig.types.json    | 10 +++
 server/tsconfig.json                          |  1 +
 tsconfig.eslint.json                          |  1 +
 33 files changed, 364 insertions(+), 266 deletions(-)
 delete mode 100644 packages/tests/src/transcription/whisper/engine/python.spec.ts
 create mode 100644 packages/tests/src/transcription/whisper/transcriber/faster-whisper-transcriber.spec.ts
 create mode 100644 packages/tests/src/transcription/whisper/transcriber/openai-transcriber.spec.ts
 create mode 100644 packages/tests/src/transcription/whisper/transcribers.spec.ts
 create mode 100644 packages/transcription/src/abstract-transcriber.ts
 create mode 100644 packages/transcription/src/installer.ts
 create mode 100644 packages/transcription/src/model-factory.ts
 create mode 100644 packages/transcription/src/transcriber-factory.ts
 create mode 100644 packages/transcription/src/transcript.ts
 delete mode 100644 packages/transcription/src/transcription-result.ts
 create mode 100644 packages/transcription/src/update.ts
 rename packages/transcription/src/whisper/{engine => }/README.md (100%)
 delete mode 100644 packages/transcription/src/whisper/engine/cpp.ts
 delete mode 100644 packages/transcription/src/whisper/engine/engines.ts
 delete mode 100644 packages/transcription/src/whisper/engine/index.ts
 delete mode 100644 packages/transcription/src/whisper/engine/python.ts
 delete mode 100644 packages/transcription/src/whisper/engine/transformers-js.ts
 delete mode 100644 packages/transcription/src/whisper/engine/transformers.ts
 create mode 100644 packages/transcription/src/whisper/engines.ts
 create mode 100644 packages/transcription/src/whisper/transcriber/faster-whisper-transcriber.ts
 create mode 100644 packages/transcription/src/whisper/transcriber/index.ts
 create mode 100644 packages/transcription/src/whisper/transcriber/openai-transcriber.ts
 create mode 100644 packages/transcription/src/whisper/transcriber/transformers-js-transcriber.ts
 create mode 100644 packages/transcription/src/whisper/transcriber/transformers-transcriber.ts
 create mode 100644 packages/transcription/tsconfig.types.json

diff --git a/packages/tests/src/transcription/whisper/engine/python.spec.ts b/packages/tests/src/transcription/whisper/engine/python.spec.ts
deleted file mode 100644
index e6b4df6f4..000000000
--- a/packages/tests/src/transcription/whisper/engine/python.spec.ts
+++ /dev/null
@@ -1,33 +0,0 @@
-import { join } from 'path'
-import { buildAbsoluteFixturePath, root } from '@peertube/peertube-node-utils'
-import { remove, pathExistsSync } from 'fs-extra/esm.js'
-import { $ } from 'execa'
-import { expect } from 'chai'
-import { WhisperEngine } from '@peertube/transcription'
-
-describe('Whisper', function () {
-  const transcriptDirectory = join(root(), 'test-transcript')
-  const vttTranscriptPath = join(transcriptDirectory, 'test.vtt')
-
-  it('Should be present on the system', async function () {
-    await $`whisper`
-  })
-
-  it('Should run transcription on a media file without raising any errors', async function () {
-    const mediaFilePath = buildAbsoluteFixturePath('video_short.mp4')
-    const whisperEngine = new WhisperEngine({ transcriptDirectory })
-    await whisperEngine.transcribe('tiny', mediaFilePath)
-  })
-
-  it('Should be create a vtt transcript file', async function () {
-    const mediaFilePath = buildAbsoluteFixturePath('video_very_long_10p.mp4')
-    const whisperEngine = new WhisperEngine({ transcriptDirectory })
-    const { } = await whisperEngine.transcribe('tiny', mediaFilePath)
-
-    expect(pathExistsSync(vttTranscriptPath)).to.be.true
-  })
-
-  after(async function () {
-    await remove(transcriptDirectory)
-  })
-})
diff --git a/packages/tests/src/transcription/whisper/transcriber/faster-whisper-transcriber.spec.ts b/packages/tests/src/transcription/whisper/transcriber/faster-whisper-transcriber.spec.ts
new file mode 100644
index 000000000..da30ea5aa
--- /dev/null
+++ b/packages/tests/src/transcription/whisper/transcriber/faster-whisper-transcriber.spec.ts
@@ -0,0 +1,28 @@
+import { createLogger } from 'winston'
+import { join } from 'path'
+import { expect } from 'chai'
+import { remove, pathExistsSync } from 'fs-extra/esm.js'
+import { buildAbsoluteFixturePath, root } from '@peertube/peertube-node-utils'
+import { transcriberFactory } from '@peertube/transcription'
+
+describe('Open AI Transcriber', function () {
+
+  const transcriptDirectory = join(root(), 'test-transcript')
+  const vttTranscriptPath = join(transcriptDirectory, 'test.vtt')
+
+  it('Should instanciate', function () {
+    transcriberFactory.createFromEngineName('faster-whisper')
+  })
+
+  it('Should run transcription on a media file without raising any errors', async function () {
+    const transcriber = transcriberFactory.createFromEngineName('openai-whisper', createLogger(), transcriptDirectory)
+    const mediaFilePath = buildAbsoluteFixturePath('video_short.mp4')
+    const transcript = await transcriber.transcribe(mediaFilePath, { name: 'tiny' }, 'fr', 'vtt')
+    expect(transcript.path).to.equals(vttTranscriptPath)
+    expect(pathExistsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist`)
+  })
+
+  after(async function () {
+    await remove(transcriptDirectory)
+  })
+})
diff --git a/packages/tests/src/transcription/whisper/transcriber/openai-transcriber.spec.ts b/packages/tests/src/transcription/whisper/transcriber/openai-transcriber.spec.ts
new file mode 100644
index 000000000..cc0721c6f
--- /dev/null
+++ b/packages/tests/src/transcription/whisper/transcriber/openai-transcriber.spec.ts
@@ -0,0 +1,28 @@
+import { createLogger } from 'winston'
+import { join } from 'path'
+import { expect } from 'chai'
+import { remove, pathExistsSync } from 'fs-extra/esm.js'
+import { buildAbsoluteFixturePath, root } from '@peertube/peertube-node-utils'
+import { transcriberFactory } from '@peertube/transcription'
+
+describe('Open AI Transcriber', function () {
+
+  const transcriptDirectory = join(root(), 'test-transcript')
+  const vttTranscriptPath = join(transcriptDirectory, 'test.vtt')
+
+  it('Should instanciate', function () {
+    transcriberFactory.createFromEngineName('openai-whisper')
+  })
+
+  it('Should run transcription on a media file without raising any errors', async function () {
+    const transcriber = transcriberFactory.createFromEngineName('openai-whisper', createLogger(), transcriptDirectory)
+    const mediaFilePath = buildAbsoluteFixturePath('video_short.mp4')
+    const transcript = await transcriber.transcribe(mediaFilePath, { name: 'tiny' }, 'fr', 'vtt')
+    expect(transcript.path).to.equals(vttTranscriptPath)
+    expect(pathExistsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist`)
+  })
+
+  after(async function () {
+    await remove(transcriptDirectory)
+  })
+})
diff --git a/packages/tests/src/transcription/whisper/transcribers.spec.ts b/packages/tests/src/transcription/whisper/transcribers.spec.ts
new file mode 100644
index 000000000..e1dd9c9b1
--- /dev/null
+++ b/packages/tests/src/transcription/whisper/transcribers.spec.ts
@@ -0,0 +1,34 @@
+import { createLogger } from 'winston'
+import { join } from 'path'
+import { expect } from 'chai'
+import { remove, pathExistsSync } from 'fs-extra/esm'
+import { buildAbsoluteFixturePath, root } from '@peertube/peertube-node-utils'
+import { transcriberFactory } from '@peertube/peertube-transcription'
+
+describe('Transcribers', function () {
+  const transcriptDirectory = join(root(), 'test-transcript')
+  const vttTranscriptPath = join(transcriptDirectory, 'test.vtt')
+  const transcribers = [
+    'openai-whisper',
+    'faster-whisper'
+  ]
+
+  transcribers.forEach(function (transcriber) {
+    it(`Should instanciate a ${transcriber} transcriber`, function () {
+      transcriberFactory.createFromEngineName('openai-whisper')
+    })
+
+    it('Should run transcription on a media file without raising any errors', async function () {
+      const transcriber = transcriberFactory.createFromEngineName('openai-whisper', createLogger(), transcriptDirectory)
+      const mediaFilePath = buildAbsoluteFixturePath('video_short.mp4')
+      const transcript = await transcriber.transcribe(mediaFilePath, { name: 'tiny' }, 'fr', 'vtt')
+      expect(transcript.path).to.equals(vttTranscriptPath)
+      expect(pathExistsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist`)
+    })
+
+  })
+
+  after(async function () {
+    await remove(transcriptDirectory)
+  })
+})
diff --git a/packages/tests/tsconfig.json b/packages/tests/tsconfig.json
index 148647e84..6737ea215 100644
--- a/packages/tests/tsconfig.json
+++ b/packages/tests/tsconfig.json
@@ -6,7 +6,8 @@
     "tsBuildInfoFile": "./dist/.tsbuildinfo",
     "paths": {
       "@tests/*": [ "./src/*" ],
-      "@server/*": [ "../../server/core/*" ]
+      "@server/*": [ "../../server/core/*" ],
+      "@peertube/peertube-transcription": [ "../transcription" ]
     }
   },
   "references": [
diff --git a/packages/transcription/package.json b/packages/transcription/package.json
index 1ebbeb2ce..366a08686 100644
--- a/packages/transcription/package.json
+++ b/packages/transcription/package.json
@@ -1,5 +1,5 @@
 {
-  "name": "@peertube/transcription",
+  "name": "@peertube/peertube-transcription",
   "private": true,
   "version": "0.0.0",
   "main": "dist/index.js",
diff --git a/packages/transcription/src/abstract-transcriber.ts b/packages/transcription/src/abstract-transcriber.ts
new file mode 100644
index 000000000..4ba314132
--- /dev/null
+++ b/packages/transcription/src/abstract-transcriber.ts
@@ -0,0 +1,39 @@
+import { Logger } from 'winston'
+import { join } from 'path'
+import { root } from '@peertube/peertube-node-utils'
+import { TranscriptionEngine } from './transcription-engine.js'
+import { TranscriptionModel } from './transcription-model.js'
+import { Transcript, TranscriptFormat } from './transcript.js'
+import { existsSync } from 'fs'
+
+export abstract class AbstractTranscriber {
+  public static DEFAULT_TRANSCRIPT_DIRECTORY = join(root(), 'dist', 'transcripts')
+
+  engine: TranscriptionEngine
+  logger: Logger
+  transcriptDirectory: string
+
+  constructor (
+    engine: TranscriptionEngine,
+    logger: Logger,
+    transcriptDirectory: string = AbstractTranscriber.DEFAULT_TRANSCRIPT_DIRECTORY
+  ) {
+    this.engine = engine
+    this.logger = logger
+    this.transcriptDirectory = transcriptDirectory
+  }
+
+  detectLanguage () {
+    return Promise.resolve('')
+  }
+
+  loadModel (model: TranscriptionModel) {
+    if (existsSync(model.path)) { /* empty */ }
+  }
+
+  supports (model: TranscriptionModel) {
+    return model.format === 'PyTorch'
+  }
+
+  abstract transcribe (mediaFilePath: string, model: TranscriptionModel, language: string, format: TranscriptFormat): Promise<Transcript>
+}
diff --git a/packages/transcription/src/index.ts b/packages/transcription/src/index.ts
index 1cf9ecd1b..1099f77b5 100644
--- a/packages/transcription/src/index.ts
+++ b/packages/transcription/src/index.ts
@@ -1,4 +1,8 @@
-export * from './whisper/index.js'
+import { TranscriberFactory } from './transcriber-factory.js'
+import { engines } from './whisper/index.js'
+
 export * from './transcription-engine.js'
 export * from './transcription-model.js'
-export * from './transcription-result.js'
+export * from './transcript.js'
+
+export const transcriberFactory = new TranscriberFactory(engines)
diff --git a/packages/transcription/src/installer.ts b/packages/transcription/src/installer.ts
new file mode 100644
index 000000000..e69de29bb
diff --git a/packages/transcription/src/model-factory.ts b/packages/transcription/src/model-factory.ts
new file mode 100644
index 000000000..fbdc5abed
--- /dev/null
+++ b/packages/transcription/src/model-factory.ts
@@ -0,0 +1,9 @@
+import { TranscriptionModel } from './transcription-model.js'
+
+export class ModelFactory {
+  createModelFromName (name: string): TranscriptionModel {
+    return {
+      name
+    }
+  }
+}
diff --git a/packages/transcription/src/transcriber-factory.ts b/packages/transcription/src/transcriber-factory.ts
new file mode 100644
index 000000000..230bfaa0e
--- /dev/null
+++ b/packages/transcription/src/transcriber-factory.ts
@@ -0,0 +1,30 @@
+import { Logger, createLogger } from 'winston'
+import { TranscriptionEngine } from './transcription-engine.js'
+import { TransformersTranscriber, OpenaiTranscriber } from './whisper/index.js'
+import { AbstractTranscriber } from './abstract-transcriber.js'
+
+export class TranscriberFactory {
+  engines: TranscriptionEngine[]
+
+  constructor (engines: TranscriptionEngine[]) {
+    this.engines = engines
+  }
+
+  createFromEngineName (engineName: string, logger: Logger = createLogger(), transcriptDirectory: string = AbstractTranscriber.DEFAULT_TRANSCRIPT_DIRECTORY) {
+    const engine = this.engines.find(({ name }) => name === engineName)
+    if (!engine) {
+      throw new Error(`Unknow engine ${engineName}`)
+    }
+
+    const transcriberArgs: ConstructorParameters<typeof AbstractTranscriber> = [ engine, logger, transcriptDirectory ]
+
+    switch (engineName) {
+      case 'whisper':
+        return new OpenaiTranscriber(...transcriberArgs)
+      case 'transformers':
+        return new TransformersTranscriber(...transcriberArgs)
+      default:
+        throw new Error(`Unimplemented engine ${engineName}`)
+    }
+  }
+}
diff --git a/packages/transcription/src/transcript.ts b/packages/transcription/src/transcript.ts
new file mode 100644
index 000000000..2a8c9449a
--- /dev/null
+++ b/packages/transcription/src/transcript.ts
@@ -0,0 +1,3 @@
+export type TranscriptFormat = 'txt' | 'vtt' | 'srt'
+
+export type Transcript = { path: string, language?: string, format: TranscriptFormat }
diff --git a/packages/transcription/src/transcription-engine.ts b/packages/transcription/src/transcription-engine.ts
index f924abd75..33d9c66b9 100644
--- a/packages/transcription/src/transcription-engine.ts
+++ b/packages/transcription/src/transcription-engine.ts
@@ -1,32 +1,19 @@
-import { join } from 'path'
-import { root } from '@peertube/peertube-node-utils'
-import { TranscriptionModel } from './transcription-model.js'
-import { TranscriptionResult } from './transcription-result.js'
+import { ModelFormat } from './transcription-model.js'
 
-export abstract class TranscriptionEngine {
-  public name: string
-  public description: string
-  public language: string
-  public requirements: string[]
-  public type: 'binary' | 'bindings' | 'ws'
-  public license: string
-  public forgeURL: string
+/**
+ * The engine, or framework.
+ */
+export interface TranscriptionEngine {
+  name: string
+  description: string
+  language: string
+  requirements: string[]
+  type: 'binary' | 'bindings' | 'ws'
+  binary?: string
+  license: string
+  forgeURL: string
+  supportedModelFormats: ModelFormat[]
 
-  public static DEFAULT_TRANSCRIPT_DIRECTORY = join(root(), 'dist', 'transcripts')
   // There could be a default models.
   // There could be a list of default models
-
-  public abstract transcribe (
-    model: TranscriptionModel | string,
-    mediaFilePath: string,
-    language: string,
-    outputFormat: string
-  ): Promise<TranscriptionResult>
-  public abstract loadModel (model: TranscriptionModel)
-  public abstract detectLanguage (): Promise<string>
-  public abstract supports (model: TranscriptionModel): boolean
-
-  static getModelName (model: TranscriptionModel | string) {
-    return typeof model === 'string' ? model : model.name
-  }
 }
diff --git a/packages/transcription/src/transcription-model.ts b/packages/transcription/src/transcription-model.ts
index b76bff159..3a9a02e32 100644
--- a/packages/transcription/src/transcription-model.ts
+++ b/packages/transcription/src/transcription-model.ts
@@ -41,9 +41,11 @@
 // .'PyTorch' | 'GGML' | 'ONNX' // CoreML, OpenVino, Scikit-Learn, TensorFlow/Keras, PySpark
 // https://towardsdatascience.com/guide-to-file-formats-for-machine-learning-columnar-training-inferencing-and-the-feature-store-2e0c3d18d4f9
 
+export type ModelFormat = 'PyTorch' | 'GGML' | 'ONNX' | 'CTranslate2' // CoreML, OpenVino, Scikit-Learn, TensorFlow/Keras, PySpark
+
 export abstract class TranscriptionModel {
   name: string
-  format?: 'PyTorch' | 'GGML' | 'ONNX' // CoreML, OpenVino, Scikit-Learn, TensorFlow/Keras, PySpark
+  format?: ModelFormat
   path?: string
   url?: string
 
diff --git a/packages/transcription/src/transcription-result.ts b/packages/transcription/src/transcription-result.ts
deleted file mode 100644
index 4f00a3883..000000000
--- a/packages/transcription/src/transcription-result.ts
+++ /dev/null
@@ -1 +0,0 @@
-export type TranscriptionResult = { transcriptFilePath: string, language?: string }
diff --git a/packages/transcription/src/update.ts b/packages/transcription/src/update.ts
new file mode 100644
index 000000000..e69de29bb
diff --git a/packages/transcription/src/whisper/engine/README.md b/packages/transcription/src/whisper/README.md
similarity index 100%
rename from packages/transcription/src/whisper/engine/README.md
rename to packages/transcription/src/whisper/README.md
diff --git a/packages/transcription/src/whisper/engine/cpp.ts b/packages/transcription/src/whisper/engine/cpp.ts
deleted file mode 100644
index 85f3093ac..000000000
--- a/packages/transcription/src/whisper/engine/cpp.ts
+++ /dev/null
@@ -1,38 +0,0 @@
-import { existsSync } from 'fs'
-import { TranscriptionModel } from '../../transcription-model.js'
-import { TranscriptionEngine } from '../../transcription-engine.js'
-import { Promise } from 'bluebird'
-import { TranscriptionResult } from '../../transcription-result.js'
-
-export class WhisperCppEngine implements TranscriptionEngine {
-  name = 'transformers'
-  description = 'High-performance inference of OpenAI\'s Whisper automatic speech recognition model'
-  type: 'binary'
-  language = 'cpp'
-  requirements = []
-  forgeURL = 'https://github.com/ggerganov/whisper.cpp'
-  license = 'MIT'
-
-  detectLanguage () {
-    return Promise.resolve('')
-  }
-
-  loadModel (model: TranscriptionModel) {
-    if (existsSync(model.path)) { /* empty */ }
-  }
-
-  supports (model: TranscriptionModel) {
-    return true
-  }
-
-  transcribe (
-    model: TranscriptionModel | string,
-    mediaFilePath: string,
-    language: string,
-    outputFormat: string
-  ): Promise<TranscriptionResult> {
-    return Promise.resolve(undefined)
-  }
-}
-
-export const whisperCppEngine = new WhisperCppEngine()
diff --git a/packages/transcription/src/whisper/engine/engines.ts b/packages/transcription/src/whisper/engine/engines.ts
deleted file mode 100644
index 739db1326..000000000
--- a/packages/transcription/src/whisper/engine/engines.ts
+++ /dev/null
@@ -1,12 +0,0 @@
-import { TranscriptionEngine } from '../../transcription-engine.js'
-import { whisperEngine } from './python.js'
-import { whisperCppEngine } from './cpp.js'
-import { transformers } from './transformers.js'
-import { transformersJs } from './transformers-js.js'
-
-export const engines: TranscriptionEngine[] = [
-  whisperCppEngine,
-  whisperEngine,
-  transformers,
-  transformersJs
-]
diff --git a/packages/transcription/src/whisper/engine/index.ts b/packages/transcription/src/whisper/engine/index.ts
deleted file mode 100644
index 4f3dac3b4..000000000
--- a/packages/transcription/src/whisper/engine/index.ts
+++ /dev/null
@@ -1,4 +0,0 @@
-export * from './cpp.js'
-export * from './python.js'
-export * from './transformers.js'
-export * from './transformers-js.js'
diff --git a/packages/transcription/src/whisper/engine/python.ts b/packages/transcription/src/whisper/engine/python.ts
deleted file mode 100644
index 20694b359..000000000
--- a/packages/transcription/src/whisper/engine/python.ts
+++ /dev/null
@@ -1,65 +0,0 @@
-import { existsSync } from 'fs'
-import { join } from 'path'
-import { ChildProcess } from 'child_process'
-import { $ } from 'execa'
-import { TranscriptionEngine } from '../../transcription-engine.js'
-import { TranscriptionModel } from '../../transcription-model.js'
-import { TranscriptionResult } from '../../transcription-result.js'
-
-type TranscriptFormat = 'txt' | 'vtt' | 'srt'
-
-export class WhisperEngine implements TranscriptionEngine {
-  name: 'whisper'
-  description: 'High-performance inference of OpenAI\'s Whisper automatic speech recognition model'
-  requirements: ['python', 'pyTorch', 'ffmpeg']
-  language: 'python'
-  type: 'binary'
-  binary: string
-  forgeURL: 'https://github.com/openai/whisper'
-  license: 'MIT'
-  process?: ChildProcess
-  transcriptDirectory: string
-
-  public constructor (transcriptDirectory: WhisperEngine['transcriptDirectory'] = TranscriptionEngine.DEFAULT_TRANSCRIPT_DIRECTORY) {
-    this.transcriptDirectory = transcriptDirectory
-  }
-
-  detectLanguage () {
-    return Promise.resolve('')
-  }
-
-  loadModel (model: TranscriptionModel) {
-    if (existsSync(model.path)) { /* empty */ }
-  }
-
-  supports (model: TranscriptionModel) {
-    return model.format === 'PyTorch'
-  }
-
-  async transcribe (
-    model: TranscriptionModel | string,
-    mediaFilePath: string,
-    format: TranscriptFormat = 'vtt'
-  ): Promise<TranscriptionResult> {
-    const $$ = $({ verbose: true })
-
-    await $$`whisper ${[
-      mediaFilePath,
-      '--model',
-      TranscriptionEngine.getModelName(model),
-      '--output_format',
-      'all',
-      '--output_dir',
-      this.transcriptDirectory
-    ]}`
-
-    await $$`ls ${this.transcriptDirectory}`
-
-    return {
-      language: '',
-      transcriptFilePath: join(this.transcriptDirectory, `test.${format}`)
-    }
-  }
-}
-
-export const whisperEngine = new WhisperEngine()
diff --git a/packages/transcription/src/whisper/engine/transformers-js.ts b/packages/transcription/src/whisper/engine/transformers-js.ts
deleted file mode 100644
index 0978f43df..000000000
--- a/packages/transcription/src/whisper/engine/transformers-js.ts
+++ /dev/null
@@ -1,42 +0,0 @@
-// import { pipeline, env } from '@xenova/transformers'
-import { TranscriptionModel } from '../../transcription-model.js'
-import { TranscriptionEngine } from '../../transcription-engine.js'
-import { TranscriptionResult } from '../../transcription-result.js'
-import { Promise } from 'bluebird'
-
-// Disable local models
-// env.allowLocalModels = true
-
-class TransformersJs implements TranscriptionEngine {
-  name = 'transformers.js'
-  description = ''
-  requirements = []
-  language = 'js'
-  forgeURL: string
-  license: string
-  type: 'bindings'
-
-  transcribe (
-    model: TranscriptionModel | string,
-    mediaFilePath: string,
-    language: string, outputFormat: string): Promise<TranscriptionResult> {
-    return Promise.resolve(undefined)
-    // return pipeline('automatic-speech-recognition', 'no_attentions', {
-    //   // For medium models, we need to load the `no_attentions` revision to avoid running out of memory
-    //   revision: [].includes('/whisper-medium') ? 'no_attentions' : 'main'
-    // })
-  }
-
-  detectLanguage (): Promise<string> {
-    return Promise.resolve('')
-  }
-
-  loadModel (model: TranscriptionModel) {
-  }
-
-  supports (model: TranscriptionModel): boolean {
-    return false
-  }
-}
-
-export const transformersJs = new TransformersJs()
diff --git a/packages/transcription/src/whisper/engine/transformers.ts b/packages/transcription/src/whisper/engine/transformers.ts
deleted file mode 100644
index adf7b0669..000000000
--- a/packages/transcription/src/whisper/engine/transformers.ts
+++ /dev/null
@@ -1,38 +0,0 @@
-import { TranscriptionEngine } from '../../transcription-engine.js'
-import { TranscriptionModel } from '../../transcription-model.js'
-import { existsSync } from 'fs'
-import { TranscriptionResult } from '../../transcription-result.js'
-import { Promise } from 'bluebird'
-
-export class Transformers implements TranscriptionEngine {
-  name = 'transformers'
-  description = 'High-performance inference of OpenAI\'s Whisper automatic speech recognition model'
-  type: 'binary'
-  language = 'cpp'
-  requirements = []
-  forgeURL = 'https://github.com/ggerganov/whisper.cpp'
-  license = 'MIT'
-
-  supports (model: TranscriptionModel) {
-    return true
-  }
-
-  detectLanguage () {
-    return Promise.resolve('')
-  }
-
-  loadModel (model: TranscriptionModel) {
-    if (existsSync(model.path)) { /* empty */ }
-  }
-
-  transcribe (
-    model: TranscriptionModel | string,
-    mediaFilePath: string,
-    language: string,
-    outputFormat: string
-  ): Promise<TranscriptionResult> {
-    return Promise.resolve(undefined)
-  }
-}
-
-export const transformers = new Transformers()
diff --git a/packages/transcription/src/whisper/engines.ts b/packages/transcription/src/whisper/engines.ts
new file mode 100644
index 000000000..2f422f93c
--- /dev/null
+++ b/packages/transcription/src/whisper/engines.ts
@@ -0,0 +1,46 @@
+import { TranscriptionEngine } from '../transcription-engine.js'
+
+export const engines: TranscriptionEngine[] = [
+  {
+    name : 'whisper-cpp',
+    description : 'High-performance inference of OpenAI\'s Whisper automatic speech recognition model',
+    type: 'binary',
+    language : 'cpp',
+    requirements : [],
+    forgeURL : 'https://github.com/ggerganov/whisper.cpp',
+    license : 'MIT',
+    supportedModelFormats: [ 'ONNX' ]
+  },
+  {
+    name : 'transformers',
+    description : 'High-performance inference of OpenAI\'s Whisper automatic speech recognition model',
+    type: 'binary',
+    language : 'python',
+    requirements : [],
+    forgeURL : '',
+    license : '',
+    supportedModelFormats: [ 'ONNX' ]
+  },
+  {
+    name: 'openai-whisper',
+    description: 'High-performance inference of OpenAI\'s Whisper automatic speech recognition model',
+    requirements: [ 'python', 'pyTorch', 'ffmpeg' ],
+    language: 'python',
+    type: 'binary',
+    binary: 'whisper',
+    forgeURL: 'https://github.com/openai/whisper',
+    license: 'MIT',
+    supportedModelFormats: [ 'PyTorch' ]
+  },
+  {
+    name: 'whisper-ctranslate2',
+    description: '',
+    requirements: [ 'python' ],
+    language: 'python',
+    type: 'binary',
+    binary: 'whisper-ctranslate2',
+    forgeURL: 'https://github.com/openai/whisper',
+    license: 'MIT',
+    supportedModelFormats: [ 'CTranslate2' ]
+  }
+]
diff --git a/packages/transcription/src/whisper/index.ts b/packages/transcription/src/whisper/index.ts
index d3cdbb358..ba4581d7f 100644
--- a/packages/transcription/src/whisper/index.ts
+++ b/packages/transcription/src/whisper/index.ts
@@ -1 +1,2 @@
-export * from './engine/index.js'
+export * from './transcriber/index.js'
+export * from './engines.js'
diff --git a/packages/transcription/src/whisper/transcriber/faster-whisper-transcriber.ts b/packages/transcription/src/whisper/transcriber/faster-whisper-transcriber.ts
new file mode 100644
index 000000000..f1a049710
--- /dev/null
+++ b/packages/transcription/src/whisper/transcriber/faster-whisper-transcriber.ts
@@ -0,0 +1,34 @@
+import { join } from 'path'
+import { $ } from 'execa'
+import { TranscriptionModel } from '../../transcription-model.js'
+import { Transcript, TranscriptFormat } from '../../transcript.js'
+import { AbstractTranscriber } from '../../abstract-transcriber.js'
+
+export class FasterWhisperTranscriber extends AbstractTranscriber {
+  async transcribe (
+    mediaFilePath: string,
+    model: TranscriptionModel,
+    language: string,
+    format: TranscriptFormat = 'vtt'
+  ): Promise<Transcript> {
+    const $$ = $({ verbose: true })
+
+    await $$`whisper ${[
+      mediaFilePath,
+      '--model',
+      model.name,
+      '--output_format',
+      'all',
+      '--output_dir',
+      this.transcriptDirectory
+    ]}`
+
+    await $$`ls ${this.transcriptDirectory}`
+
+    return {
+      language,
+      path: join(this.transcriptDirectory, `test.${format}`),
+      format
+    }
+  }
+}
diff --git a/packages/transcription/src/whisper/transcriber/index.ts b/packages/transcription/src/whisper/transcriber/index.ts
new file mode 100644
index 000000000..b4e6e5710
--- /dev/null
+++ b/packages/transcription/src/whisper/transcriber/index.ts
@@ -0,0 +1,3 @@
+export * from './transformers-js-transcriber.js'
+export * from './transformers-transcriber.js'
+export * from './openai-transcriber.js'
diff --git a/packages/transcription/src/whisper/transcriber/openai-transcriber.ts b/packages/transcription/src/whisper/transcriber/openai-transcriber.ts
new file mode 100644
index 000000000..40c70131e
--- /dev/null
+++ b/packages/transcription/src/whisper/transcriber/openai-transcriber.ts
@@ -0,0 +1,34 @@
+import { join } from 'path'
+import { $ } from 'execa'
+import { TranscriptionModel } from '../../transcription-model.js'
+import { Transcript, TranscriptFormat } from '../../transcript.js'
+import { AbstractTranscriber } from '../../abstract-transcriber.js'
+
+export class OpenaiTranscriber extends AbstractTranscriber {
+  async transcribe (
+    mediaFilePath: string,
+    model: TranscriptionModel,
+    language: string,
+    format: TranscriptFormat = 'vtt'
+  ): Promise<Transcript> {
+    const $$ = $({ verbose: true })
+
+    await $$`whisper ${[
+      mediaFilePath,
+      '--model',
+      model.name,
+      '--output_format',
+      'all',
+      '--output_dir',
+      this.transcriptDirectory
+    ]}`
+
+    await $$`ls ${this.transcriptDirectory}`
+
+    return {
+      language,
+      path: join(this.transcriptDirectory, `test.${format}`),
+      format
+    }
+  }
+}
diff --git a/packages/transcription/src/whisper/transcriber/transformers-js-transcriber.ts b/packages/transcription/src/whisper/transcriber/transformers-js-transcriber.ts
new file mode 100644
index 000000000..c7bb9ab1c
--- /dev/null
+++ b/packages/transcription/src/whisper/transcriber/transformers-js-transcriber.ts
@@ -0,0 +1,22 @@
+import { TranscriptionModel } from '../../transcription-model.js'
+import { AbstractTranscriber } from '../../abstract-transcriber.js'
+import { Transcript, TranscriptFormat } from '../../transcript.js'
+import { Promise } from 'bluebird'
+
+// Disable local models
+// env.allowLocalModels = true
+
+export class TransformersJsTranscriber extends AbstractTranscriber {
+  async transcribe (
+    mediaFilePath: string,
+    model: TranscriptionModel,
+    language: string,
+    format: TranscriptFormat = 'vtt'
+  ): Promise<Transcript> {
+    return Promise.resolve(undefined)
+    // return pipeline('automatic-speech-recognition', 'no_attentions', {
+    //   // For medium models, we need to load the `no_attentions` revision to avoid running out of memory
+    //   revision: [].includes('/whisper-medium') ? 'no_attentions' : 'main'
+    // })
+  }
+}
diff --git a/packages/transcription/src/whisper/transcriber/transformers-transcriber.ts b/packages/transcription/src/whisper/transcriber/transformers-transcriber.ts
new file mode 100644
index 000000000..6341e5bef
--- /dev/null
+++ b/packages/transcription/src/whisper/transcriber/transformers-transcriber.ts
@@ -0,0 +1,14 @@
+import { TranscriptionModel } from '../../transcription-model.js'
+import { AbstractTranscriber } from '../../abstract-transcriber.js'
+import { Transcript, TranscriptFormat } from '../../transcript.js'
+
+export class TransformersTranscriber extends AbstractTranscriber {
+  async transcribe (
+    mediaFilePath: string,
+    model: TranscriptionModel,
+    language: string,
+    format: TranscriptFormat = 'vtt'
+  ): Promise<Transcript> {
+    return Promise.resolve(undefined)
+  }
+}
diff --git a/packages/transcription/tsconfig.types.json b/packages/transcription/tsconfig.types.json
new file mode 100644
index 000000000..9edb53ece
--- /dev/null
+++ b/packages/transcription/tsconfig.types.json
@@ -0,0 +1,10 @@
+{
+  "extends": "./tsconfig.json",
+  "compilerOptions": {
+    "outDir": "../types-generator/dist/peertube-transcription",
+    "tsBuildInfoFile": "../types-generator/dist/peertube-transcription/.tsbuildinfo",
+    "stripInternal": true,
+    "removeComments": false,
+    "emitDeclarationOnly": true
+  }
+}
diff --git a/server/tsconfig.json b/server/tsconfig.json
index 21442d082..ed0bfca48 100644
--- a/server/tsconfig.json
+++ b/server/tsconfig.json
@@ -14,6 +14,7 @@
     { "path": "../packages/ffmpeg" },
     { "path": "../packages/models" },
     { "path": "../packages/node-utils" },
+    { "path": "../packages/transcription" },
     { "path": "../packages/typescript-utils" }
   ],
   "include": [
diff --git a/tsconfig.eslint.json b/tsconfig.eslint.json
index c2e868173..772a9fcbc 100644
--- a/tsconfig.eslint.json
+++ b/tsconfig.eslint.json
@@ -27,6 +27,7 @@
     { "path": "./packages/models" },
     { "path": "./packages/node-utils" },
     { "path": "./packages/server-commands" },
+    { "path": "./packages/transcription" },
     { "path": "./packages/typescript-utils" }
   ]
 }