chore: fiddling around some more

2024-04-16 17:49:04 +02:00 · 2024-04-16 17:49:04 +02:00 · 47095673b3
parent fbc6ca2270
commit 47095673b3
33 changed files with 364 additions and 266 deletions
--- a/packages/tests/src/transcription/whisper/engine/python.spec.ts
+++ b/packages/tests/src/transcription/whisper/engine/python.spec.ts
@ -1,33 +0,0 @@
-import { join } from 'path'
-import { buildAbsoluteFixturePath, root } from '@peertube/peertube-node-utils'
-import { remove, pathExistsSync } from 'fs-extra/esm.js'
-import { $ } from 'execa'
-import { expect } from 'chai'
-import { WhisperEngine } from '@peertube/transcription'
-
-describe('Whisper', function () {
-  const transcriptDirectory = join(root(), 'test-transcript')
-  const vttTranscriptPath = join(transcriptDirectory, 'test.vtt')
-
-  it('Should be present on the system', async function () {
-    await $`whisper`
-  })
-
-  it('Should run transcription on a media file without raising any errors', async function () {
-    const mediaFilePath = buildAbsoluteFixturePath('video_short.mp4')
-    const whisperEngine = new WhisperEngine({ transcriptDirectory })
-    await whisperEngine.transcribe('tiny', mediaFilePath)
-  })
-
-  it('Should be create a vtt transcript file', async function () {
-    const mediaFilePath = buildAbsoluteFixturePath('video_very_long_10p.mp4')
-    const whisperEngine = new WhisperEngine({ transcriptDirectory })
-    const { } = await whisperEngine.transcribe('tiny', mediaFilePath)
-
-    expect(pathExistsSync(vttTranscriptPath)).to.be.true
-  })
-
-  after(async function () {
-    await remove(transcriptDirectory)
-  })
-})
--- a/packages/tests/src/transcription/whisper/transcriber/faster-whisper-transcriber.spec.ts
+++ b/packages/tests/src/transcription/whisper/transcriber/faster-whisper-transcriber.spec.ts
@ -0,0 +1,28 @@
+import { createLogger } from 'winston'
+import { join } from 'path'
+import { expect } from 'chai'
+import { remove, pathExistsSync } from 'fs-extra/esm.js'
+import { buildAbsoluteFixturePath, root } from '@peertube/peertube-node-utils'
+import { transcriberFactory } from '@peertube/transcription'
+
+describe('Open AI Transcriber', function () {
+
+  const transcriptDirectory = join(root(), 'test-transcript')
+  const vttTranscriptPath = join(transcriptDirectory, 'test.vtt')
+
+  it('Should instanciate', function () {
+    transcriberFactory.createFromEngineName('faster-whisper')
+  })
+
+  it('Should run transcription on a media file without raising any errors', async function () {
+    const transcriber = transcriberFactory.createFromEngineName('openai-whisper', createLogger(), transcriptDirectory)
+    const mediaFilePath = buildAbsoluteFixturePath('video_short.mp4')
+    const transcript = await transcriber.transcribe(mediaFilePath, { name: 'tiny' }, 'fr', 'vtt')
+    expect(transcript.path).to.equals(vttTranscriptPath)
+    expect(pathExistsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist`)
+  })
+
+  after(async function () {
+    await remove(transcriptDirectory)
+  })
+})
--- a/packages/tests/src/transcription/whisper/transcriber/openai-transcriber.spec.ts
+++ b/packages/tests/src/transcription/whisper/transcriber/openai-transcriber.spec.ts
@ -0,0 +1,28 @@
+import { createLogger } from 'winston'
+import { join } from 'path'
+import { expect } from 'chai'
+import { remove, pathExistsSync } from 'fs-extra/esm.js'
+import { buildAbsoluteFixturePath, root } from '@peertube/peertube-node-utils'
+import { transcriberFactory } from '@peertube/transcription'
+
+describe('Open AI Transcriber', function () {
+
+  const transcriptDirectory = join(root(), 'test-transcript')
+  const vttTranscriptPath = join(transcriptDirectory, 'test.vtt')
+
+  it('Should instanciate', function () {
+    transcriberFactory.createFromEngineName('openai-whisper')
+  })
+
+  it('Should run transcription on a media file without raising any errors', async function () {
+    const transcriber = transcriberFactory.createFromEngineName('openai-whisper', createLogger(), transcriptDirectory)
+    const mediaFilePath = buildAbsoluteFixturePath('video_short.mp4')
+    const transcript = await transcriber.transcribe(mediaFilePath, { name: 'tiny' }, 'fr', 'vtt')
+    expect(transcript.path).to.equals(vttTranscriptPath)
+    expect(pathExistsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist`)
+  })
+
+  after(async function () {
+    await remove(transcriptDirectory)
+  })
+})
--- a/packages/tests/src/transcription/whisper/transcribers.spec.ts
+++ b/packages/tests/src/transcription/whisper/transcribers.spec.ts
@ -0,0 +1,34 @@
+import { createLogger } from 'winston'
+import { join } from 'path'
+import { expect } from 'chai'
+import { remove, pathExistsSync } from 'fs-extra/esm'
+import { buildAbsoluteFixturePath, root } from '@peertube/peertube-node-utils'
+import { transcriberFactory } from '@peertube/peertube-transcription'
+
+describe('Transcribers', function () {
+  const transcriptDirectory = join(root(), 'test-transcript')
+  const vttTranscriptPath = join(transcriptDirectory, 'test.vtt')
+  const transcribers = [
+    'openai-whisper',
+    'faster-whisper'
+  ]
+
+  transcribers.forEach(function (transcriber) {
+    it(`Should instanciate a ${transcriber} transcriber`, function () {
+      transcriberFactory.createFromEngineName('openai-whisper')
+    })
+
+    it('Should run transcription on a media file without raising any errors', async function () {
+      const transcriber = transcriberFactory.createFromEngineName('openai-whisper', createLogger(), transcriptDirectory)
+      const mediaFilePath = buildAbsoluteFixturePath('video_short.mp4')
+      const transcript = await transcriber.transcribe(mediaFilePath, { name: 'tiny' }, 'fr', 'vtt')
+      expect(transcript.path).to.equals(vttTranscriptPath)
+      expect(pathExistsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist`)
+    })
+
+  })
+
+  after(async function () {
+    await remove(transcriptDirectory)
+  })
+})
--- a/packages/tests/tsconfig.json
+++ b/packages/tests/tsconfig.json
@ -6,7 +6,8 @@
    "tsBuildInfoFile": "./dist/.tsbuildinfo",
    "paths": {
      "@tests/*": [ "./src/*" ],
-      "@server/*": [ "../../server/core/*" ]
+      "@server/*": [ "../../server/core/*" ],
+      "@peertube/peertube-transcription": [ "../transcription" ]
    }
  },
  "references": [
--- a/packages/transcription/package.json
+++ b/packages/transcription/package.json
@ -1,5 +1,5 @@
 {
-  "name": "@peertube/transcription",
+  "name": "@peertube/peertube-transcription",
  "private": true,
  "version": "0.0.0",
  "main": "dist/index.js",
--- a/packages/transcription/src/abstract-transcriber.ts
+++ b/packages/transcription/src/abstract-transcriber.ts
@ -0,0 +1,39 @@
+import { Logger } from 'winston'
+import { join } from 'path'
+import { root } from '@peertube/peertube-node-utils'
+import { TranscriptionEngine } from './transcription-engine.js'
+import { TranscriptionModel } from './transcription-model.js'
+import { Transcript, TranscriptFormat } from './transcript.js'
+import { existsSync } from 'fs'
+
+export abstract class AbstractTranscriber {
+  public static DEFAULT_TRANSCRIPT_DIRECTORY = join(root(), 'dist', 'transcripts')
+
+  engine: TranscriptionEngine
+  logger: Logger
+  transcriptDirectory: string
+
+  constructor (
+    engine: TranscriptionEngine,
+    logger: Logger,
+    transcriptDirectory: string = AbstractTranscriber.DEFAULT_TRANSCRIPT_DIRECTORY
+  ) {
+    this.engine = engine
+    this.logger = logger
+    this.transcriptDirectory = transcriptDirectory
+  }
+
+  detectLanguage () {
+    return Promise.resolve('')
+  }
+
+  loadModel (model: TranscriptionModel) {
+    if (existsSync(model.path)) { /* empty */ }
+  }
+
+  supports (model: TranscriptionModel) {
+    return model.format === 'PyTorch'
+  }
+
+  abstract transcribe (mediaFilePath: string, model: TranscriptionModel, language: string, format: TranscriptFormat): Promise<Transcript>
+}
--- a/packages/transcription/src/index.ts
+++ b/packages/transcription/src/index.ts
@ -1,4 +1,8 @@
-export * from './whisper/index.js'
+import { TranscriberFactory } from './transcriber-factory.js'
+import { engines } from './whisper/index.js'
+
 export * from './transcription-engine.js'
 export * from './transcription-model.js'
-export * from './transcription-result.js'
+export * from './transcript.js'
+
+export const transcriberFactory = new TranscriberFactory(engines)
--- a/packages/transcription/src/installer.ts
+++ b/packages/transcription/src/installer.ts
--- a/packages/transcription/src/model-factory.ts
+++ b/packages/transcription/src/model-factory.ts
@ -0,0 +1,9 @@
+import { TranscriptionModel } from './transcription-model.js'
+
+export class ModelFactory {
+  createModelFromName (name: string): TranscriptionModel {
+    return {
+      name
+    }
+  }
+}
--- a/packages/transcription/src/transcriber-factory.ts
+++ b/packages/transcription/src/transcriber-factory.ts
@ -0,0 +1,30 @@
+import { Logger, createLogger } from 'winston'
+import { TranscriptionEngine } from './transcription-engine.js'
+import { TransformersTranscriber, OpenaiTranscriber } from './whisper/index.js'
+import { AbstractTranscriber } from './abstract-transcriber.js'
+
+export class TranscriberFactory {
+  engines: TranscriptionEngine[]
+
+  constructor (engines: TranscriptionEngine[]) {
+    this.engines = engines
+  }
+
+  createFromEngineName (engineName: string, logger: Logger = createLogger(), transcriptDirectory: string = AbstractTranscriber.DEFAULT_TRANSCRIPT_DIRECTORY) {
+    const engine = this.engines.find(({ name }) => name === engineName)
+    if (!engine) {
+      throw new Error(`Unknow engine ${engineName}`)
+    }
+
+    const transcriberArgs: ConstructorParameters<typeof AbstractTranscriber> = [ engine, logger, transcriptDirectory ]
+
+    switch (engineName) {
+      case 'whisper':
+        return new OpenaiTranscriber(...transcriberArgs)
+      case 'transformers':
+        return new TransformersTranscriber(...transcriberArgs)
+      default:
+        throw new Error(`Unimplemented engine ${engineName}`)
+    }
+  }
+}
--- a/packages/transcription/src/transcript.ts
+++ b/packages/transcription/src/transcript.ts
@ -0,0 +1,3 @@
+export type TranscriptFormat = 'txt' | 'vtt' | 'srt'
+
+export type Transcript = { path: string, language?: string, format: TranscriptFormat }
--- a/packages/transcription/src/transcription-engine.ts
+++ b/packages/transcription/src/transcription-engine.ts
@ -1,32 +1,19 @@
-import { join } from 'path'
-import { root } from '@peertube/peertube-node-utils'
-import { TranscriptionModel } from './transcription-model.js'
-import { TranscriptionResult } from './transcription-result.js'
+import { ModelFormat } from './transcription-model.js'

-export abstract class TranscriptionEngine {
-  public name: string
-  public description: string
-  public language: string
-  public requirements: string[]
-  public type: 'binary' | 'bindings' | 'ws'
-  public license: string
-  public forgeURL: string
+/**
+ * The engine, or framework.
+ */
+export interface TranscriptionEngine {
+  name: string
+  description: string
+  language: string
+  requirements: string[]
+  type: 'binary' | 'bindings' | 'ws'
+  binary?: string
+  license: string
+  forgeURL: string
+  supportedModelFormats: ModelFormat[]

-  public static DEFAULT_TRANSCRIPT_DIRECTORY = join(root(), 'dist', 'transcripts')
  // There could be a default models.
  // There could be a list of default models
-
-  public abstract transcribe (
-    model: TranscriptionModel | string,
-    mediaFilePath: string,
-    language: string,
-    outputFormat: string
-  ): Promise<TranscriptionResult>
-  public abstract loadModel (model: TranscriptionModel)
-  public abstract detectLanguage (): Promise<string>
-  public abstract supports (model: TranscriptionModel): boolean
-
-  static getModelName (model: TranscriptionModel | string) {
-    return typeof model === 'string' ? model : model.name
-  }
 }
--- a/packages/transcription/src/transcription-model.ts
+++ b/packages/transcription/src/transcription-model.ts
@ -41,9 +41,11 @@
 // .'PyTorch' | 'GGML' | 'ONNX' // CoreML, OpenVino, Scikit-Learn, TensorFlow/Keras, PySpark
 // https://towardsdatascience.com/guide-to-file-formats-for-machine-learning-columnar-training-inferencing-and-the-feature-store-2e0c3d18d4f9

+export type ModelFormat = 'PyTorch' | 'GGML' | 'ONNX' | 'CTranslate2' // CoreML, OpenVino, Scikit-Learn, TensorFlow/Keras, PySpark
+
 export abstract class TranscriptionModel {
  name: string
-  format?: 'PyTorch' | 'GGML' | 'ONNX' // CoreML, OpenVino, Scikit-Learn, TensorFlow/Keras, PySpark
+  format?: ModelFormat
  path?: string
  url?: string

--- a/packages/transcription/src/transcription-result.ts
+++ b/packages/transcription/src/transcription-result.ts
@ -1 +0,0 @@
-export type TranscriptionResult = { transcriptFilePath: string, language?: string }
--- a/packages/transcription/src/update.ts
+++ b/packages/transcription/src/update.ts
--- a/packages/transcription/src/whisper/engine/README.md
+++ b/packages/transcription/src/whisper/engine/README.md
--- a/packages/transcription/src/whisper/engine/cpp.ts
+++ b/packages/transcription/src/whisper/engine/cpp.ts
@ -1,38 +0,0 @@
-import { existsSync } from 'fs'
-import { TranscriptionModel } from '../../transcription-model.js'
-import { TranscriptionEngine } from '../../transcription-engine.js'
-import { Promise } from 'bluebird'
-import { TranscriptionResult } from '../../transcription-result.js'
-
-export class WhisperCppEngine implements TranscriptionEngine {
-  name = 'transformers'
-  description = 'High-performance inference of OpenAI\'s Whisper automatic speech recognition model'
-  type: 'binary'
-  language = 'cpp'
-  requirements = []
-  forgeURL = 'https://github.com/ggerganov/whisper.cpp'
-  license = 'MIT'
-
-  detectLanguage () {
-    return Promise.resolve('')
-  }
-
-  loadModel (model: TranscriptionModel) {
-    if (existsSync(model.path)) { /* empty */ }
-  }
-
-  supports (model: TranscriptionModel) {
-    return true
-  }
-
-  transcribe (
-    model: TranscriptionModel | string,
-    mediaFilePath: string,
-    language: string,
-    outputFormat: string
-  ): Promise<TranscriptionResult> {
-    return Promise.resolve(undefined)
-  }
-}
-
-export const whisperCppEngine = new WhisperCppEngine()
--- a/packages/transcription/src/whisper/engine/engines.ts
+++ b/packages/transcription/src/whisper/engine/engines.ts
@ -1,12 +0,0 @@
-import { TranscriptionEngine } from '../../transcription-engine.js'
-import { whisperEngine } from './python.js'
-import { whisperCppEngine } from './cpp.js'
-import { transformers } from './transformers.js'
-import { transformersJs } from './transformers-js.js'
-
-export const engines: TranscriptionEngine[] = [
-  whisperCppEngine,
-  whisperEngine,
-  transformers,
-  transformersJs
-]
--- a/packages/transcription/src/whisper/engine/index.ts
+++ b/packages/transcription/src/whisper/engine/index.ts
@ -1,4 +0,0 @@
-export * from './cpp.js'
-export * from './python.js'
-export * from './transformers.js'
-export * from './transformers-js.js'
--- a/packages/transcription/src/whisper/engine/python.ts
+++ b/packages/transcription/src/whisper/engine/python.ts
@ -1,65 +0,0 @@
-import { existsSync } from 'fs'
-import { join } from 'path'
-import { ChildProcess } from 'child_process'
-import { $ } from 'execa'
-import { TranscriptionEngine } from '../../transcription-engine.js'
-import { TranscriptionModel } from '../../transcription-model.js'
-import { TranscriptionResult } from '../../transcription-result.js'
-
-type TranscriptFormat = 'txt' | 'vtt' | 'srt'
-
-export class WhisperEngine implements TranscriptionEngine {
-  name: 'whisper'
-  description: 'High-performance inference of OpenAI\'s Whisper automatic speech recognition model'
-  requirements: ['python', 'pyTorch', 'ffmpeg']
-  language: 'python'
-  type: 'binary'
-  binary: string
-  forgeURL: 'https://github.com/openai/whisper'
-  license: 'MIT'
-  process?: ChildProcess
-  transcriptDirectory: string
-
-  public constructor (transcriptDirectory: WhisperEngine['transcriptDirectory'] = TranscriptionEngine.DEFAULT_TRANSCRIPT_DIRECTORY) {
-    this.transcriptDirectory = transcriptDirectory
-  }
-
-  detectLanguage () {
-    return Promise.resolve('')
-  }
-
-  loadModel (model: TranscriptionModel) {
-    if (existsSync(model.path)) { /* empty */ }
-  }
-
-  supports (model: TranscriptionModel) {
-    return model.format === 'PyTorch'
-  }
-
-  async transcribe (
-    model: TranscriptionModel | string,
-    mediaFilePath: string,
-    format: TranscriptFormat = 'vtt'
-  ): Promise<TranscriptionResult> {
-    const $$ = $({ verbose: true })
-
-    await $$`whisper ${[
-      mediaFilePath,
-      '--model',
-      TranscriptionEngine.getModelName(model),
-      '--output_format',
-      'all',
-      '--output_dir',
-      this.transcriptDirectory
-    ]}`
-
-    await $$`ls ${this.transcriptDirectory}`
-
-    return {
-      language: '',
-      transcriptFilePath: join(this.transcriptDirectory, `test.${format}`)
-    }
-  }
-}
-
-export const whisperEngine = new WhisperEngine()
--- a/packages/transcription/src/whisper/engine/transformers-js.ts
+++ b/packages/transcription/src/whisper/engine/transformers-js.ts
@ -1,42 +0,0 @@
-// import { pipeline, env } from '@xenova/transformers'
-import { TranscriptionModel } from '../../transcription-model.js'
-import { TranscriptionEngine } from '../../transcription-engine.js'
-import { TranscriptionResult } from '../../transcription-result.js'
-import { Promise } from 'bluebird'
-
-// Disable local models
-// env.allowLocalModels = true
-
-class TransformersJs implements TranscriptionEngine {
-  name = 'transformers.js'
-  description = ''
-  requirements = []
-  language = 'js'
-  forgeURL: string
-  license: string
-  type: 'bindings'
-
-  transcribe (
-    model: TranscriptionModel | string,
-    mediaFilePath: string,
-    language: string, outputFormat: string): Promise<TranscriptionResult> {
-    return Promise.resolve(undefined)
-    // return pipeline('automatic-speech-recognition', 'no_attentions', {
-    //   // For medium models, we need to load the `no_attentions` revision to avoid running out of memory
-    //   revision: [].includes('/whisper-medium') ? 'no_attentions' : 'main'
-    // })
-  }
-
-  detectLanguage (): Promise<string> {
-    return Promise.resolve('')
-  }
-
-  loadModel (model: TranscriptionModel) {
-  }
-
-  supports (model: TranscriptionModel): boolean {
-    return false
-  }
-}
-
-export const transformersJs = new TransformersJs()
--- a/packages/transcription/src/whisper/engine/transformers.ts
+++ b/packages/transcription/src/whisper/engine/transformers.ts
@ -1,38 +0,0 @@
-import { TranscriptionEngine } from '../../transcription-engine.js'
-import { TranscriptionModel } from '../../transcription-model.js'
-import { existsSync } from 'fs'
-import { TranscriptionResult } from '../../transcription-result.js'
-import { Promise } from 'bluebird'
-
-export class Transformers implements TranscriptionEngine {
-  name = 'transformers'
-  description = 'High-performance inference of OpenAI\'s Whisper automatic speech recognition model'
-  type: 'binary'
-  language = 'cpp'
-  requirements = []
-  forgeURL = 'https://github.com/ggerganov/whisper.cpp'
-  license = 'MIT'
-
-  supports (model: TranscriptionModel) {
-    return true
-  }
-
-  detectLanguage () {
-    return Promise.resolve('')
-  }
-
-  loadModel (model: TranscriptionModel) {
-    if (existsSync(model.path)) { /* empty */ }
-  }
-
-  transcribe (
-    model: TranscriptionModel | string,
-    mediaFilePath: string,
-    language: string,
-    outputFormat: string
-  ): Promise<TranscriptionResult> {
-    return Promise.resolve(undefined)
-  }
-}
-
-export const transformers = new Transformers()
--- a/packages/transcription/src/whisper/engines.ts
+++ b/packages/transcription/src/whisper/engines.ts
@ -0,0 +1,46 @@
+import { TranscriptionEngine } from '../transcription-engine.js'
+
+export const engines: TranscriptionEngine[] = [
+  {
+    name : 'whisper-cpp',
+    description : 'High-performance inference of OpenAI\'s Whisper automatic speech recognition model',
+    type: 'binary',
+    language : 'cpp',
+    requirements : [],
+    forgeURL : 'https://github.com/ggerganov/whisper.cpp',
+    license : 'MIT',
+    supportedModelFormats: [ 'ONNX' ]
+  },
+  {
+    name : 'transformers',
+    description : 'High-performance inference of OpenAI\'s Whisper automatic speech recognition model',
+    type: 'binary',
+    language : 'python',
+    requirements : [],
+    forgeURL : '',
+    license : '',
+    supportedModelFormats: [ 'ONNX' ]
+  },
+  {
+    name: 'openai-whisper',
+    description: 'High-performance inference of OpenAI\'s Whisper automatic speech recognition model',
+    requirements: [ 'python', 'pyTorch', 'ffmpeg' ],
+    language: 'python',
+    type: 'binary',
+    binary: 'whisper',
+    forgeURL: 'https://github.com/openai/whisper',
+    license: 'MIT',
+    supportedModelFormats: [ 'PyTorch' ]
+  },
+  {
+    name: 'whisper-ctranslate2',
+    description: '',
+    requirements: [ 'python' ],
+    language: 'python',
+    type: 'binary',
+    binary: 'whisper-ctranslate2',
+    forgeURL: 'https://github.com/openai/whisper',
+    license: 'MIT',
+    supportedModelFormats: [ 'CTranslate2' ]
+  }
+]
--- a/packages/transcription/src/whisper/index.ts
+++ b/packages/transcription/src/whisper/index.ts
@ -1 +1,2 @@
-export * from './engine/index.js'
+export * from './transcriber/index.js'
+export * from './engines.js'
--- a/packages/transcription/src/whisper/transcriber/faster-whisper-transcriber.ts
+++ b/packages/transcription/src/whisper/transcriber/faster-whisper-transcriber.ts
@ -0,0 +1,34 @@
+import { join } from 'path'
+import { $ } from 'execa'
+import { TranscriptionModel } from '../../transcription-model.js'
+import { Transcript, TranscriptFormat } from '../../transcript.js'
+import { AbstractTranscriber } from '../../abstract-transcriber.js'
+
+export class FasterWhisperTranscriber extends AbstractTranscriber {
+  async transcribe (
+    mediaFilePath: string,
+    model: TranscriptionModel,
+    language: string,
+    format: TranscriptFormat = 'vtt'
+  ): Promise<Transcript> {
+    const $$ = $({ verbose: true })
+
+    await $$`whisper ${[
+      mediaFilePath,
+      '--model',
+      model.name,
+      '--output_format',
+      'all',
+      '--output_dir',
+      this.transcriptDirectory
+    ]}`
+
+    await $$`ls ${this.transcriptDirectory}`
+
+    return {
+      language,
+      path: join(this.transcriptDirectory, `test.${format}`),
+      format
+    }
+  }
+}
--- a/packages/transcription/src/whisper/transcriber/index.ts
+++ b/packages/transcription/src/whisper/transcriber/index.ts
@ -0,0 +1,3 @@
+export * from './transformers-js-transcriber.js'
+export * from './transformers-transcriber.js'
+export * from './openai-transcriber.js'
--- a/packages/transcription/src/whisper/transcriber/openai-transcriber.ts
+++ b/packages/transcription/src/whisper/transcriber/openai-transcriber.ts
@ -0,0 +1,34 @@
+import { join } from 'path'
+import { $ } from 'execa'
+import { TranscriptionModel } from '../../transcription-model.js'
+import { Transcript, TranscriptFormat } from '../../transcript.js'
+import { AbstractTranscriber } from '../../abstract-transcriber.js'
+
+export class OpenaiTranscriber extends AbstractTranscriber {
+  async transcribe (
+    mediaFilePath: string,
+    model: TranscriptionModel,
+    language: string,
+    format: TranscriptFormat = 'vtt'
+  ): Promise<Transcript> {
+    const $$ = $({ verbose: true })
+
+    await $$`whisper ${[
+      mediaFilePath,
+      '--model',
+      model.name,
+      '--output_format',
+      'all',
+      '--output_dir',
+      this.transcriptDirectory
+    ]}`
+
+    await $$`ls ${this.transcriptDirectory}`
+
+    return {
+      language,
+      path: join(this.transcriptDirectory, `test.${format}`),
+      format
+    }
+  }
+}
--- a/packages/transcription/src/whisper/transcriber/transformers-js-transcriber.ts
+++ b/packages/transcription/src/whisper/transcriber/transformers-js-transcriber.ts
@ -0,0 +1,22 @@
+import { TranscriptionModel } from '../../transcription-model.js'
+import { AbstractTranscriber } from '../../abstract-transcriber.js'
+import { Transcript, TranscriptFormat } from '../../transcript.js'
+import { Promise } from 'bluebird'
+
+// Disable local models
+// env.allowLocalModels = true
+
+export class TransformersJsTranscriber extends AbstractTranscriber {
+  async transcribe (
+    mediaFilePath: string,
+    model: TranscriptionModel,
+    language: string,
+    format: TranscriptFormat = 'vtt'
+  ): Promise<Transcript> {
+    return Promise.resolve(undefined)
+    // return pipeline('automatic-speech-recognition', 'no_attentions', {
+    //   // For medium models, we need to load the `no_attentions` revision to avoid running out of memory
+    //   revision: [].includes('/whisper-medium') ? 'no_attentions' : 'main'
+    // })
+  }
+}
--- a/packages/transcription/src/whisper/transcriber/transformers-transcriber.ts
+++ b/packages/transcription/src/whisper/transcriber/transformers-transcriber.ts
@ -0,0 +1,14 @@
+import { TranscriptionModel } from '../../transcription-model.js'
+import { AbstractTranscriber } from '../../abstract-transcriber.js'
+import { Transcript, TranscriptFormat } from '../../transcript.js'
+
+export class TransformersTranscriber extends AbstractTranscriber {
+  async transcribe (
+    mediaFilePath: string,
+    model: TranscriptionModel,
+    language: string,
+    format: TranscriptFormat = 'vtt'
+  ): Promise<Transcript> {
+    return Promise.resolve(undefined)
+  }
+}
--- a/packages/transcription/tsconfig.types.json
+++ b/packages/transcription/tsconfig.types.json
@ -0,0 +1,10 @@
+{
+  "extends": "./tsconfig.json",
+  "compilerOptions": {
+    "outDir": "../types-generator/dist/peertube-transcription",
+    "tsBuildInfoFile": "../types-generator/dist/peertube-transcription/.tsbuildinfo",
+    "stripInternal": true,
+    "removeComments": false,
+    "emitDeclarationOnly": true
+  }
+}
--- a/server/tsconfig.json
+++ b/server/tsconfig.json
@ -14,6 +14,7 @@
    { "path": "../packages/ffmpeg" },
    { "path": "../packages/models" },
    { "path": "../packages/node-utils" },
+    { "path": "../packages/transcription" },
    { "path": "../packages/typescript-utils" }
  ],
  "include": [
--- a/tsconfig.eslint.json
+++ b/tsconfig.eslint.json
@ -27,6 +27,7 @@
    { "path": "./packages/models" },
    { "path": "./packages/node-utils" },
    { "path": "./packages/server-commands" },
+    { "path": "./packages/transcription" },
    { "path": "./packages/typescript-utils" }
  ]
 }
				`@ -1 +0,0 @@`
				`export type TranscriptionResult = { transcriptFilePath: string, language?: string }`