From 47095673b3cc792733b97e1c4ba058b6d1a6d2d4 Mon Sep 17 00:00:00 2001 From: lutangar Date: Tue, 16 Apr 2024 17:49:04 +0200 Subject: [PATCH] chore: fiddling around some more --- .../whisper/engine/python.spec.ts | 33 ---------- .../faster-whisper-transcriber.spec.ts | 28 ++++++++ .../transcriber/openai-transcriber.spec.ts | 28 ++++++++ .../whisper/transcribers.spec.ts | 34 ++++++++++ packages/tests/tsconfig.json | 3 +- packages/transcription/package.json | 2 +- .../transcription/src/abstract-transcriber.ts | 39 +++++++++++ packages/transcription/src/index.ts | 8 ++- packages/transcription/src/installer.ts | 0 packages/transcription/src/model-factory.ts | 9 +++ .../transcription/src/transcriber-factory.ts | 30 +++++++++ packages/transcription/src/transcript.ts | 3 + .../transcription/src/transcription-engine.ts | 41 ++++-------- .../transcription/src/transcription-model.ts | 4 +- .../transcription/src/transcription-result.ts | 1 - packages/transcription/src/update.ts | 0 .../src/whisper/{engine => }/README.md | 0 .../transcription/src/whisper/engine/cpp.ts | 38 ----------- .../src/whisper/engine/engines.ts | 12 ---- .../transcription/src/whisper/engine/index.ts | 4 -- .../src/whisper/engine/python.ts | 65 ------------------- .../src/whisper/engine/transformers-js.ts | 42 ------------ .../src/whisper/engine/transformers.ts | 38 ----------- packages/transcription/src/whisper/engines.ts | 46 +++++++++++++ packages/transcription/src/whisper/index.ts | 3 +- .../transcriber/faster-whisper-transcriber.ts | 34 ++++++++++ .../src/whisper/transcriber/index.ts | 3 + .../whisper/transcriber/openai-transcriber.ts | 34 ++++++++++ .../transformers-js-transcriber.ts | 22 +++++++ .../transcriber/transformers-transcriber.ts | 14 ++++ packages/transcription/tsconfig.types.json | 10 +++ server/tsconfig.json | 1 + tsconfig.eslint.json | 1 + 33 files changed, 364 insertions(+), 266 deletions(-) delete mode 100644 packages/tests/src/transcription/whisper/engine/python.spec.ts create mode 100644 packages/tests/src/transcription/whisper/transcriber/faster-whisper-transcriber.spec.ts create mode 100644 packages/tests/src/transcription/whisper/transcriber/openai-transcriber.spec.ts create mode 100644 packages/tests/src/transcription/whisper/transcribers.spec.ts create mode 100644 packages/transcription/src/abstract-transcriber.ts create mode 100644 packages/transcription/src/installer.ts create mode 100644 packages/transcription/src/model-factory.ts create mode 100644 packages/transcription/src/transcriber-factory.ts create mode 100644 packages/transcription/src/transcript.ts delete mode 100644 packages/transcription/src/transcription-result.ts create mode 100644 packages/transcription/src/update.ts rename packages/transcription/src/whisper/{engine => }/README.md (100%) delete mode 100644 packages/transcription/src/whisper/engine/cpp.ts delete mode 100644 packages/transcription/src/whisper/engine/engines.ts delete mode 100644 packages/transcription/src/whisper/engine/index.ts delete mode 100644 packages/transcription/src/whisper/engine/python.ts delete mode 100644 packages/transcription/src/whisper/engine/transformers-js.ts delete mode 100644 packages/transcription/src/whisper/engine/transformers.ts create mode 100644 packages/transcription/src/whisper/engines.ts create mode 100644 packages/transcription/src/whisper/transcriber/faster-whisper-transcriber.ts create mode 100644 packages/transcription/src/whisper/transcriber/index.ts create mode 100644 packages/transcription/src/whisper/transcriber/openai-transcriber.ts create mode 100644 packages/transcription/src/whisper/transcriber/transformers-js-transcriber.ts create mode 100644 packages/transcription/src/whisper/transcriber/transformers-transcriber.ts create mode 100644 packages/transcription/tsconfig.types.json diff --git a/packages/tests/src/transcription/whisper/engine/python.spec.ts b/packages/tests/src/transcription/whisper/engine/python.spec.ts deleted file mode 100644 index e6b4df6f4..000000000 --- a/packages/tests/src/transcription/whisper/engine/python.spec.ts +++ /dev/null @@ -1,33 +0,0 @@ -import { join } from 'path' -import { buildAbsoluteFixturePath, root } from '@peertube/peertube-node-utils' -import { remove, pathExistsSync } from 'fs-extra/esm.js' -import { $ } from 'execa' -import { expect } from 'chai' -import { WhisperEngine } from '@peertube/transcription' - -describe('Whisper', function () { - const transcriptDirectory = join(root(), 'test-transcript') - const vttTranscriptPath = join(transcriptDirectory, 'test.vtt') - - it('Should be present on the system', async function () { - await $`whisper` - }) - - it('Should run transcription on a media file without raising any errors', async function () { - const mediaFilePath = buildAbsoluteFixturePath('video_short.mp4') - const whisperEngine = new WhisperEngine({ transcriptDirectory }) - await whisperEngine.transcribe('tiny', mediaFilePath) - }) - - it('Should be create a vtt transcript file', async function () { - const mediaFilePath = buildAbsoluteFixturePath('video_very_long_10p.mp4') - const whisperEngine = new WhisperEngine({ transcriptDirectory }) - const { } = await whisperEngine.transcribe('tiny', mediaFilePath) - - expect(pathExistsSync(vttTranscriptPath)).to.be.true - }) - - after(async function () { - await remove(transcriptDirectory) - }) -}) diff --git a/packages/tests/src/transcription/whisper/transcriber/faster-whisper-transcriber.spec.ts b/packages/tests/src/transcription/whisper/transcriber/faster-whisper-transcriber.spec.ts new file mode 100644 index 000000000..da30ea5aa --- /dev/null +++ b/packages/tests/src/transcription/whisper/transcriber/faster-whisper-transcriber.spec.ts @@ -0,0 +1,28 @@ +import { createLogger } from 'winston' +import { join } from 'path' +import { expect } from 'chai' +import { remove, pathExistsSync } from 'fs-extra/esm.js' +import { buildAbsoluteFixturePath, root } from '@peertube/peertube-node-utils' +import { transcriberFactory } from '@peertube/transcription' + +describe('Open AI Transcriber', function () { + + const transcriptDirectory = join(root(), 'test-transcript') + const vttTranscriptPath = join(transcriptDirectory, 'test.vtt') + + it('Should instanciate', function () { + transcriberFactory.createFromEngineName('faster-whisper') + }) + + it('Should run transcription on a media file without raising any errors', async function () { + const transcriber = transcriberFactory.createFromEngineName('openai-whisper', createLogger(), transcriptDirectory) + const mediaFilePath = buildAbsoluteFixturePath('video_short.mp4') + const transcript = await transcriber.transcribe(mediaFilePath, { name: 'tiny' }, 'fr', 'vtt') + expect(transcript.path).to.equals(vttTranscriptPath) + expect(pathExistsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist`) + }) + + after(async function () { + await remove(transcriptDirectory) + }) +}) diff --git a/packages/tests/src/transcription/whisper/transcriber/openai-transcriber.spec.ts b/packages/tests/src/transcription/whisper/transcriber/openai-transcriber.spec.ts new file mode 100644 index 000000000..cc0721c6f --- /dev/null +++ b/packages/tests/src/transcription/whisper/transcriber/openai-transcriber.spec.ts @@ -0,0 +1,28 @@ +import { createLogger } from 'winston' +import { join } from 'path' +import { expect } from 'chai' +import { remove, pathExistsSync } from 'fs-extra/esm.js' +import { buildAbsoluteFixturePath, root } from '@peertube/peertube-node-utils' +import { transcriberFactory } from '@peertube/transcription' + +describe('Open AI Transcriber', function () { + + const transcriptDirectory = join(root(), 'test-transcript') + const vttTranscriptPath = join(transcriptDirectory, 'test.vtt') + + it('Should instanciate', function () { + transcriberFactory.createFromEngineName('openai-whisper') + }) + + it('Should run transcription on a media file without raising any errors', async function () { + const transcriber = transcriberFactory.createFromEngineName('openai-whisper', createLogger(), transcriptDirectory) + const mediaFilePath = buildAbsoluteFixturePath('video_short.mp4') + const transcript = await transcriber.transcribe(mediaFilePath, { name: 'tiny' }, 'fr', 'vtt') + expect(transcript.path).to.equals(vttTranscriptPath) + expect(pathExistsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist`) + }) + + after(async function () { + await remove(transcriptDirectory) + }) +}) diff --git a/packages/tests/src/transcription/whisper/transcribers.spec.ts b/packages/tests/src/transcription/whisper/transcribers.spec.ts new file mode 100644 index 000000000..e1dd9c9b1 --- /dev/null +++ b/packages/tests/src/transcription/whisper/transcribers.spec.ts @@ -0,0 +1,34 @@ +import { createLogger } from 'winston' +import { join } from 'path' +import { expect } from 'chai' +import { remove, pathExistsSync } from 'fs-extra/esm' +import { buildAbsoluteFixturePath, root } from '@peertube/peertube-node-utils' +import { transcriberFactory } from '@peertube/peertube-transcription' + +describe('Transcribers', function () { + const transcriptDirectory = join(root(), 'test-transcript') + const vttTranscriptPath = join(transcriptDirectory, 'test.vtt') + const transcribers = [ + 'openai-whisper', + 'faster-whisper' + ] + + transcribers.forEach(function (transcriber) { + it(`Should instanciate a ${transcriber} transcriber`, function () { + transcriberFactory.createFromEngineName('openai-whisper') + }) + + it('Should run transcription on a media file without raising any errors', async function () { + const transcriber = transcriberFactory.createFromEngineName('openai-whisper', createLogger(), transcriptDirectory) + const mediaFilePath = buildAbsoluteFixturePath('video_short.mp4') + const transcript = await transcriber.transcribe(mediaFilePath, { name: 'tiny' }, 'fr', 'vtt') + expect(transcript.path).to.equals(vttTranscriptPath) + expect(pathExistsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist`) + }) + + }) + + after(async function () { + await remove(transcriptDirectory) + }) +}) diff --git a/packages/tests/tsconfig.json b/packages/tests/tsconfig.json index 148647e84..6737ea215 100644 --- a/packages/tests/tsconfig.json +++ b/packages/tests/tsconfig.json @@ -6,7 +6,8 @@ "tsBuildInfoFile": "./dist/.tsbuildinfo", "paths": { "@tests/*": [ "./src/*" ], - "@server/*": [ "../../server/core/*" ] + "@server/*": [ "../../server/core/*" ], + "@peertube/peertube-transcription": [ "../transcription" ] } }, "references": [ diff --git a/packages/transcription/package.json b/packages/transcription/package.json index 1ebbeb2ce..366a08686 100644 --- a/packages/transcription/package.json +++ b/packages/transcription/package.json @@ -1,5 +1,5 @@ { - "name": "@peertube/transcription", + "name": "@peertube/peertube-transcription", "private": true, "version": "0.0.0", "main": "dist/index.js", diff --git a/packages/transcription/src/abstract-transcriber.ts b/packages/transcription/src/abstract-transcriber.ts new file mode 100644 index 000000000..4ba314132 --- /dev/null +++ b/packages/transcription/src/abstract-transcriber.ts @@ -0,0 +1,39 @@ +import { Logger } from 'winston' +import { join } from 'path' +import { root } from '@peertube/peertube-node-utils' +import { TranscriptionEngine } from './transcription-engine.js' +import { TranscriptionModel } from './transcription-model.js' +import { Transcript, TranscriptFormat } from './transcript.js' +import { existsSync } from 'fs' + +export abstract class AbstractTranscriber { + public static DEFAULT_TRANSCRIPT_DIRECTORY = join(root(), 'dist', 'transcripts') + + engine: TranscriptionEngine + logger: Logger + transcriptDirectory: string + + constructor ( + engine: TranscriptionEngine, + logger: Logger, + transcriptDirectory: string = AbstractTranscriber.DEFAULT_TRANSCRIPT_DIRECTORY + ) { + this.engine = engine + this.logger = logger + this.transcriptDirectory = transcriptDirectory + } + + detectLanguage () { + return Promise.resolve('') + } + + loadModel (model: TranscriptionModel) { + if (existsSync(model.path)) { /* empty */ } + } + + supports (model: TranscriptionModel) { + return model.format === 'PyTorch' + } + + abstract transcribe (mediaFilePath: string, model: TranscriptionModel, language: string, format: TranscriptFormat): Promise +} diff --git a/packages/transcription/src/index.ts b/packages/transcription/src/index.ts index 1cf9ecd1b..1099f77b5 100644 --- a/packages/transcription/src/index.ts +++ b/packages/transcription/src/index.ts @@ -1,4 +1,8 @@ -export * from './whisper/index.js' +import { TranscriberFactory } from './transcriber-factory.js' +import { engines } from './whisper/index.js' + export * from './transcription-engine.js' export * from './transcription-model.js' -export * from './transcription-result.js' +export * from './transcript.js' + +export const transcriberFactory = new TranscriberFactory(engines) diff --git a/packages/transcription/src/installer.ts b/packages/transcription/src/installer.ts new file mode 100644 index 000000000..e69de29bb diff --git a/packages/transcription/src/model-factory.ts b/packages/transcription/src/model-factory.ts new file mode 100644 index 000000000..fbdc5abed --- /dev/null +++ b/packages/transcription/src/model-factory.ts @@ -0,0 +1,9 @@ +import { TranscriptionModel } from './transcription-model.js' + +export class ModelFactory { + createModelFromName (name: string): TranscriptionModel { + return { + name + } + } +} diff --git a/packages/transcription/src/transcriber-factory.ts b/packages/transcription/src/transcriber-factory.ts new file mode 100644 index 000000000..230bfaa0e --- /dev/null +++ b/packages/transcription/src/transcriber-factory.ts @@ -0,0 +1,30 @@ +import { Logger, createLogger } from 'winston' +import { TranscriptionEngine } from './transcription-engine.js' +import { TransformersTranscriber, OpenaiTranscriber } from './whisper/index.js' +import { AbstractTranscriber } from './abstract-transcriber.js' + +export class TranscriberFactory { + engines: TranscriptionEngine[] + + constructor (engines: TranscriptionEngine[]) { + this.engines = engines + } + + createFromEngineName (engineName: string, logger: Logger = createLogger(), transcriptDirectory: string = AbstractTranscriber.DEFAULT_TRANSCRIPT_DIRECTORY) { + const engine = this.engines.find(({ name }) => name === engineName) + if (!engine) { + throw new Error(`Unknow engine ${engineName}`) + } + + const transcriberArgs: ConstructorParameters = [ engine, logger, transcriptDirectory ] + + switch (engineName) { + case 'whisper': + return new OpenaiTranscriber(...transcriberArgs) + case 'transformers': + return new TransformersTranscriber(...transcriberArgs) + default: + throw new Error(`Unimplemented engine ${engineName}`) + } + } +} diff --git a/packages/transcription/src/transcript.ts b/packages/transcription/src/transcript.ts new file mode 100644 index 000000000..2a8c9449a --- /dev/null +++ b/packages/transcription/src/transcript.ts @@ -0,0 +1,3 @@ +export type TranscriptFormat = 'txt' | 'vtt' | 'srt' + +export type Transcript = { path: string, language?: string, format: TranscriptFormat } diff --git a/packages/transcription/src/transcription-engine.ts b/packages/transcription/src/transcription-engine.ts index f924abd75..33d9c66b9 100644 --- a/packages/transcription/src/transcription-engine.ts +++ b/packages/transcription/src/transcription-engine.ts @@ -1,32 +1,19 @@ -import { join } from 'path' -import { root } from '@peertube/peertube-node-utils' -import { TranscriptionModel } from './transcription-model.js' -import { TranscriptionResult } from './transcription-result.js' +import { ModelFormat } from './transcription-model.js' -export abstract class TranscriptionEngine { - public name: string - public description: string - public language: string - public requirements: string[] - public type: 'binary' | 'bindings' | 'ws' - public license: string - public forgeURL: string +/** + * The engine, or framework. + */ +export interface TranscriptionEngine { + name: string + description: string + language: string + requirements: string[] + type: 'binary' | 'bindings' | 'ws' + binary?: string + license: string + forgeURL: string + supportedModelFormats: ModelFormat[] - public static DEFAULT_TRANSCRIPT_DIRECTORY = join(root(), 'dist', 'transcripts') // There could be a default models. // There could be a list of default models - - public abstract transcribe ( - model: TranscriptionModel | string, - mediaFilePath: string, - language: string, - outputFormat: string - ): Promise - public abstract loadModel (model: TranscriptionModel) - public abstract detectLanguage (): Promise - public abstract supports (model: TranscriptionModel): boolean - - static getModelName (model: TranscriptionModel | string) { - return typeof model === 'string' ? model : model.name - } } diff --git a/packages/transcription/src/transcription-model.ts b/packages/transcription/src/transcription-model.ts index b76bff159..3a9a02e32 100644 --- a/packages/transcription/src/transcription-model.ts +++ b/packages/transcription/src/transcription-model.ts @@ -41,9 +41,11 @@ // .'PyTorch' | 'GGML' | 'ONNX' // CoreML, OpenVino, Scikit-Learn, TensorFlow/Keras, PySpark // https://towardsdatascience.com/guide-to-file-formats-for-machine-learning-columnar-training-inferencing-and-the-feature-store-2e0c3d18d4f9 +export type ModelFormat = 'PyTorch' | 'GGML' | 'ONNX' | 'CTranslate2' // CoreML, OpenVino, Scikit-Learn, TensorFlow/Keras, PySpark + export abstract class TranscriptionModel { name: string - format?: 'PyTorch' | 'GGML' | 'ONNX' // CoreML, OpenVino, Scikit-Learn, TensorFlow/Keras, PySpark + format?: ModelFormat path?: string url?: string diff --git a/packages/transcription/src/transcription-result.ts b/packages/transcription/src/transcription-result.ts deleted file mode 100644 index 4f00a3883..000000000 --- a/packages/transcription/src/transcription-result.ts +++ /dev/null @@ -1 +0,0 @@ -export type TranscriptionResult = { transcriptFilePath: string, language?: string } diff --git a/packages/transcription/src/update.ts b/packages/transcription/src/update.ts new file mode 100644 index 000000000..e69de29bb diff --git a/packages/transcription/src/whisper/engine/README.md b/packages/transcription/src/whisper/README.md similarity index 100% rename from packages/transcription/src/whisper/engine/README.md rename to packages/transcription/src/whisper/README.md diff --git a/packages/transcription/src/whisper/engine/cpp.ts b/packages/transcription/src/whisper/engine/cpp.ts deleted file mode 100644 index 85f3093ac..000000000 --- a/packages/transcription/src/whisper/engine/cpp.ts +++ /dev/null @@ -1,38 +0,0 @@ -import { existsSync } from 'fs' -import { TranscriptionModel } from '../../transcription-model.js' -import { TranscriptionEngine } from '../../transcription-engine.js' -import { Promise } from 'bluebird' -import { TranscriptionResult } from '../../transcription-result.js' - -export class WhisperCppEngine implements TranscriptionEngine { - name = 'transformers' - description = 'High-performance inference of OpenAI\'s Whisper automatic speech recognition model' - type: 'binary' - language = 'cpp' - requirements = [] - forgeURL = 'https://github.com/ggerganov/whisper.cpp' - license = 'MIT' - - detectLanguage () { - return Promise.resolve('') - } - - loadModel (model: TranscriptionModel) { - if (existsSync(model.path)) { /* empty */ } - } - - supports (model: TranscriptionModel) { - return true - } - - transcribe ( - model: TranscriptionModel | string, - mediaFilePath: string, - language: string, - outputFormat: string - ): Promise { - return Promise.resolve(undefined) - } -} - -export const whisperCppEngine = new WhisperCppEngine() diff --git a/packages/transcription/src/whisper/engine/engines.ts b/packages/transcription/src/whisper/engine/engines.ts deleted file mode 100644 index 739db1326..000000000 --- a/packages/transcription/src/whisper/engine/engines.ts +++ /dev/null @@ -1,12 +0,0 @@ -import { TranscriptionEngine } from '../../transcription-engine.js' -import { whisperEngine } from './python.js' -import { whisperCppEngine } from './cpp.js' -import { transformers } from './transformers.js' -import { transformersJs } from './transformers-js.js' - -export const engines: TranscriptionEngine[] = [ - whisperCppEngine, - whisperEngine, - transformers, - transformersJs -] diff --git a/packages/transcription/src/whisper/engine/index.ts b/packages/transcription/src/whisper/engine/index.ts deleted file mode 100644 index 4f3dac3b4..000000000 --- a/packages/transcription/src/whisper/engine/index.ts +++ /dev/null @@ -1,4 +0,0 @@ -export * from './cpp.js' -export * from './python.js' -export * from './transformers.js' -export * from './transformers-js.js' diff --git a/packages/transcription/src/whisper/engine/python.ts b/packages/transcription/src/whisper/engine/python.ts deleted file mode 100644 index 20694b359..000000000 --- a/packages/transcription/src/whisper/engine/python.ts +++ /dev/null @@ -1,65 +0,0 @@ -import { existsSync } from 'fs' -import { join } from 'path' -import { ChildProcess } from 'child_process' -import { $ } from 'execa' -import { TranscriptionEngine } from '../../transcription-engine.js' -import { TranscriptionModel } from '../../transcription-model.js' -import { TranscriptionResult } from '../../transcription-result.js' - -type TranscriptFormat = 'txt' | 'vtt' | 'srt' - -export class WhisperEngine implements TranscriptionEngine { - name: 'whisper' - description: 'High-performance inference of OpenAI\'s Whisper automatic speech recognition model' - requirements: ['python', 'pyTorch', 'ffmpeg'] - language: 'python' - type: 'binary' - binary: string - forgeURL: 'https://github.com/openai/whisper' - license: 'MIT' - process?: ChildProcess - transcriptDirectory: string - - public constructor (transcriptDirectory: WhisperEngine['transcriptDirectory'] = TranscriptionEngine.DEFAULT_TRANSCRIPT_DIRECTORY) { - this.transcriptDirectory = transcriptDirectory - } - - detectLanguage () { - return Promise.resolve('') - } - - loadModel (model: TranscriptionModel) { - if (existsSync(model.path)) { /* empty */ } - } - - supports (model: TranscriptionModel) { - return model.format === 'PyTorch' - } - - async transcribe ( - model: TranscriptionModel | string, - mediaFilePath: string, - format: TranscriptFormat = 'vtt' - ): Promise { - const $$ = $({ verbose: true }) - - await $$`whisper ${[ - mediaFilePath, - '--model', - TranscriptionEngine.getModelName(model), - '--output_format', - 'all', - '--output_dir', - this.transcriptDirectory - ]}` - - await $$`ls ${this.transcriptDirectory}` - - return { - language: '', - transcriptFilePath: join(this.transcriptDirectory, `test.${format}`) - } - } -} - -export const whisperEngine = new WhisperEngine() diff --git a/packages/transcription/src/whisper/engine/transformers-js.ts b/packages/transcription/src/whisper/engine/transformers-js.ts deleted file mode 100644 index 0978f43df..000000000 --- a/packages/transcription/src/whisper/engine/transformers-js.ts +++ /dev/null @@ -1,42 +0,0 @@ -// import { pipeline, env } from '@xenova/transformers' -import { TranscriptionModel } from '../../transcription-model.js' -import { TranscriptionEngine } from '../../transcription-engine.js' -import { TranscriptionResult } from '../../transcription-result.js' -import { Promise } from 'bluebird' - -// Disable local models -// env.allowLocalModels = true - -class TransformersJs implements TranscriptionEngine { - name = 'transformers.js' - description = '' - requirements = [] - language = 'js' - forgeURL: string - license: string - type: 'bindings' - - transcribe ( - model: TranscriptionModel | string, - mediaFilePath: string, - language: string, outputFormat: string): Promise { - return Promise.resolve(undefined) - // return pipeline('automatic-speech-recognition', 'no_attentions', { - // // For medium models, we need to load the `no_attentions` revision to avoid running out of memory - // revision: [].includes('/whisper-medium') ? 'no_attentions' : 'main' - // }) - } - - detectLanguage (): Promise { - return Promise.resolve('') - } - - loadModel (model: TranscriptionModel) { - } - - supports (model: TranscriptionModel): boolean { - return false - } -} - -export const transformersJs = new TransformersJs() diff --git a/packages/transcription/src/whisper/engine/transformers.ts b/packages/transcription/src/whisper/engine/transformers.ts deleted file mode 100644 index adf7b0669..000000000 --- a/packages/transcription/src/whisper/engine/transformers.ts +++ /dev/null @@ -1,38 +0,0 @@ -import { TranscriptionEngine } from '../../transcription-engine.js' -import { TranscriptionModel } from '../../transcription-model.js' -import { existsSync } from 'fs' -import { TranscriptionResult } from '../../transcription-result.js' -import { Promise } from 'bluebird' - -export class Transformers implements TranscriptionEngine { - name = 'transformers' - description = 'High-performance inference of OpenAI\'s Whisper automatic speech recognition model' - type: 'binary' - language = 'cpp' - requirements = [] - forgeURL = 'https://github.com/ggerganov/whisper.cpp' - license = 'MIT' - - supports (model: TranscriptionModel) { - return true - } - - detectLanguage () { - return Promise.resolve('') - } - - loadModel (model: TranscriptionModel) { - if (existsSync(model.path)) { /* empty */ } - } - - transcribe ( - model: TranscriptionModel | string, - mediaFilePath: string, - language: string, - outputFormat: string - ): Promise { - return Promise.resolve(undefined) - } -} - -export const transformers = new Transformers() diff --git a/packages/transcription/src/whisper/engines.ts b/packages/transcription/src/whisper/engines.ts new file mode 100644 index 000000000..2f422f93c --- /dev/null +++ b/packages/transcription/src/whisper/engines.ts @@ -0,0 +1,46 @@ +import { TranscriptionEngine } from '../transcription-engine.js' + +export const engines: TranscriptionEngine[] = [ + { + name : 'whisper-cpp', + description : 'High-performance inference of OpenAI\'s Whisper automatic speech recognition model', + type: 'binary', + language : 'cpp', + requirements : [], + forgeURL : 'https://github.com/ggerganov/whisper.cpp', + license : 'MIT', + supportedModelFormats: [ 'ONNX' ] + }, + { + name : 'transformers', + description : 'High-performance inference of OpenAI\'s Whisper automatic speech recognition model', + type: 'binary', + language : 'python', + requirements : [], + forgeURL : '', + license : '', + supportedModelFormats: [ 'ONNX' ] + }, + { + name: 'openai-whisper', + description: 'High-performance inference of OpenAI\'s Whisper automatic speech recognition model', + requirements: [ 'python', 'pyTorch', 'ffmpeg' ], + language: 'python', + type: 'binary', + binary: 'whisper', + forgeURL: 'https://github.com/openai/whisper', + license: 'MIT', + supportedModelFormats: [ 'PyTorch' ] + }, + { + name: 'whisper-ctranslate2', + description: '', + requirements: [ 'python' ], + language: 'python', + type: 'binary', + binary: 'whisper-ctranslate2', + forgeURL: 'https://github.com/openai/whisper', + license: 'MIT', + supportedModelFormats: [ 'CTranslate2' ] + } +] diff --git a/packages/transcription/src/whisper/index.ts b/packages/transcription/src/whisper/index.ts index d3cdbb358..ba4581d7f 100644 --- a/packages/transcription/src/whisper/index.ts +++ b/packages/transcription/src/whisper/index.ts @@ -1 +1,2 @@ -export * from './engine/index.js' +export * from './transcriber/index.js' +export * from './engines.js' diff --git a/packages/transcription/src/whisper/transcriber/faster-whisper-transcriber.ts b/packages/transcription/src/whisper/transcriber/faster-whisper-transcriber.ts new file mode 100644 index 000000000..f1a049710 --- /dev/null +++ b/packages/transcription/src/whisper/transcriber/faster-whisper-transcriber.ts @@ -0,0 +1,34 @@ +import { join } from 'path' +import { $ } from 'execa' +import { TranscriptionModel } from '../../transcription-model.js' +import { Transcript, TranscriptFormat } from '../../transcript.js' +import { AbstractTranscriber } from '../../abstract-transcriber.js' + +export class FasterWhisperTranscriber extends AbstractTranscriber { + async transcribe ( + mediaFilePath: string, + model: TranscriptionModel, + language: string, + format: TranscriptFormat = 'vtt' + ): Promise { + const $$ = $({ verbose: true }) + + await $$`whisper ${[ + mediaFilePath, + '--model', + model.name, + '--output_format', + 'all', + '--output_dir', + this.transcriptDirectory + ]}` + + await $$`ls ${this.transcriptDirectory}` + + return { + language, + path: join(this.transcriptDirectory, `test.${format}`), + format + } + } +} diff --git a/packages/transcription/src/whisper/transcriber/index.ts b/packages/transcription/src/whisper/transcriber/index.ts new file mode 100644 index 000000000..b4e6e5710 --- /dev/null +++ b/packages/transcription/src/whisper/transcriber/index.ts @@ -0,0 +1,3 @@ +export * from './transformers-js-transcriber.js' +export * from './transformers-transcriber.js' +export * from './openai-transcriber.js' diff --git a/packages/transcription/src/whisper/transcriber/openai-transcriber.ts b/packages/transcription/src/whisper/transcriber/openai-transcriber.ts new file mode 100644 index 000000000..40c70131e --- /dev/null +++ b/packages/transcription/src/whisper/transcriber/openai-transcriber.ts @@ -0,0 +1,34 @@ +import { join } from 'path' +import { $ } from 'execa' +import { TranscriptionModel } from '../../transcription-model.js' +import { Transcript, TranscriptFormat } from '../../transcript.js' +import { AbstractTranscriber } from '../../abstract-transcriber.js' + +export class OpenaiTranscriber extends AbstractTranscriber { + async transcribe ( + mediaFilePath: string, + model: TranscriptionModel, + language: string, + format: TranscriptFormat = 'vtt' + ): Promise { + const $$ = $({ verbose: true }) + + await $$`whisper ${[ + mediaFilePath, + '--model', + model.name, + '--output_format', + 'all', + '--output_dir', + this.transcriptDirectory + ]}` + + await $$`ls ${this.transcriptDirectory}` + + return { + language, + path: join(this.transcriptDirectory, `test.${format}`), + format + } + } +} diff --git a/packages/transcription/src/whisper/transcriber/transformers-js-transcriber.ts b/packages/transcription/src/whisper/transcriber/transformers-js-transcriber.ts new file mode 100644 index 000000000..c7bb9ab1c --- /dev/null +++ b/packages/transcription/src/whisper/transcriber/transformers-js-transcriber.ts @@ -0,0 +1,22 @@ +import { TranscriptionModel } from '../../transcription-model.js' +import { AbstractTranscriber } from '../../abstract-transcriber.js' +import { Transcript, TranscriptFormat } from '../../transcript.js' +import { Promise } from 'bluebird' + +// Disable local models +// env.allowLocalModels = true + +export class TransformersJsTranscriber extends AbstractTranscriber { + async transcribe ( + mediaFilePath: string, + model: TranscriptionModel, + language: string, + format: TranscriptFormat = 'vtt' + ): Promise { + return Promise.resolve(undefined) + // return pipeline('automatic-speech-recognition', 'no_attentions', { + // // For medium models, we need to load the `no_attentions` revision to avoid running out of memory + // revision: [].includes('/whisper-medium') ? 'no_attentions' : 'main' + // }) + } +} diff --git a/packages/transcription/src/whisper/transcriber/transformers-transcriber.ts b/packages/transcription/src/whisper/transcriber/transformers-transcriber.ts new file mode 100644 index 000000000..6341e5bef --- /dev/null +++ b/packages/transcription/src/whisper/transcriber/transformers-transcriber.ts @@ -0,0 +1,14 @@ +import { TranscriptionModel } from '../../transcription-model.js' +import { AbstractTranscriber } from '../../abstract-transcriber.js' +import { Transcript, TranscriptFormat } from '../../transcript.js' + +export class TransformersTranscriber extends AbstractTranscriber { + async transcribe ( + mediaFilePath: string, + model: TranscriptionModel, + language: string, + format: TranscriptFormat = 'vtt' + ): Promise { + return Promise.resolve(undefined) + } +} diff --git a/packages/transcription/tsconfig.types.json b/packages/transcription/tsconfig.types.json new file mode 100644 index 000000000..9edb53ece --- /dev/null +++ b/packages/transcription/tsconfig.types.json @@ -0,0 +1,10 @@ +{ + "extends": "./tsconfig.json", + "compilerOptions": { + "outDir": "../types-generator/dist/peertube-transcription", + "tsBuildInfoFile": "../types-generator/dist/peertube-transcription/.tsbuildinfo", + "stripInternal": true, + "removeComments": false, + "emitDeclarationOnly": true + } +} diff --git a/server/tsconfig.json b/server/tsconfig.json index 21442d082..ed0bfca48 100644 --- a/server/tsconfig.json +++ b/server/tsconfig.json @@ -14,6 +14,7 @@ { "path": "../packages/ffmpeg" }, { "path": "../packages/models" }, { "path": "../packages/node-utils" }, + { "path": "../packages/transcription" }, { "path": "../packages/typescript-utils" } ], "include": [ diff --git a/tsconfig.eslint.json b/tsconfig.eslint.json index c2e868173..772a9fcbc 100644 --- a/tsconfig.eslint.json +++ b/tsconfig.eslint.json @@ -27,6 +27,7 @@ { "path": "./packages/models" }, { "path": "./packages/node-utils" }, { "path": "./packages/server-commands" }, + { "path": "./packages/transcription" }, { "path": "./packages/typescript-utils" } ] }