chore: fiddling around some more

pull/6303/head
lutangar 2024-04-16 17:49:04 +02:00
parent fbc6ca2270
commit 47095673b3
33 changed files with 364 additions and 266 deletions

View File

@ -1,33 +0,0 @@
import { join } from 'path'
import { buildAbsoluteFixturePath, root } from '@peertube/peertube-node-utils'
import { remove, pathExistsSync } from 'fs-extra/esm.js'
import { $ } from 'execa'
import { expect } from 'chai'
import { WhisperEngine } from '@peertube/transcription'
describe('Whisper', function () {
const transcriptDirectory = join(root(), 'test-transcript')
const vttTranscriptPath = join(transcriptDirectory, 'test.vtt')
it('Should be present on the system', async function () {
await $`whisper`
})
it('Should run transcription on a media file without raising any errors', async function () {
const mediaFilePath = buildAbsoluteFixturePath('video_short.mp4')
const whisperEngine = new WhisperEngine({ transcriptDirectory })
await whisperEngine.transcribe('tiny', mediaFilePath)
})
it('Should be create a vtt transcript file', async function () {
const mediaFilePath = buildAbsoluteFixturePath('video_very_long_10p.mp4')
const whisperEngine = new WhisperEngine({ transcriptDirectory })
const { } = await whisperEngine.transcribe('tiny', mediaFilePath)
expect(pathExistsSync(vttTranscriptPath)).to.be.true
})
after(async function () {
await remove(transcriptDirectory)
})
})

View File

@ -0,0 +1,28 @@
import { createLogger } from 'winston'
import { join } from 'path'
import { expect } from 'chai'
import { remove, pathExistsSync } from 'fs-extra/esm.js'
import { buildAbsoluteFixturePath, root } from '@peertube/peertube-node-utils'
import { transcriberFactory } from '@peertube/transcription'
describe('Open AI Transcriber', function () {
const transcriptDirectory = join(root(), 'test-transcript')
const vttTranscriptPath = join(transcriptDirectory, 'test.vtt')
it('Should instanciate', function () {
transcriberFactory.createFromEngineName('faster-whisper')
})
it('Should run transcription on a media file without raising any errors', async function () {
const transcriber = transcriberFactory.createFromEngineName('openai-whisper', createLogger(), transcriptDirectory)
const mediaFilePath = buildAbsoluteFixturePath('video_short.mp4')
const transcript = await transcriber.transcribe(mediaFilePath, { name: 'tiny' }, 'fr', 'vtt')
expect(transcript.path).to.equals(vttTranscriptPath)
expect(pathExistsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist`)
})
after(async function () {
await remove(transcriptDirectory)
})
})

View File

@ -0,0 +1,28 @@
import { createLogger } from 'winston'
import { join } from 'path'
import { expect } from 'chai'
import { remove, pathExistsSync } from 'fs-extra/esm.js'
import { buildAbsoluteFixturePath, root } from '@peertube/peertube-node-utils'
import { transcriberFactory } from '@peertube/transcription'
describe('Open AI Transcriber', function () {
const transcriptDirectory = join(root(), 'test-transcript')
const vttTranscriptPath = join(transcriptDirectory, 'test.vtt')
it('Should instanciate', function () {
transcriberFactory.createFromEngineName('openai-whisper')
})
it('Should run transcription on a media file without raising any errors', async function () {
const transcriber = transcriberFactory.createFromEngineName('openai-whisper', createLogger(), transcriptDirectory)
const mediaFilePath = buildAbsoluteFixturePath('video_short.mp4')
const transcript = await transcriber.transcribe(mediaFilePath, { name: 'tiny' }, 'fr', 'vtt')
expect(transcript.path).to.equals(vttTranscriptPath)
expect(pathExistsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist`)
})
after(async function () {
await remove(transcriptDirectory)
})
})

View File

@ -0,0 +1,34 @@
import { createLogger } from 'winston'
import { join } from 'path'
import { expect } from 'chai'
import { remove, pathExistsSync } from 'fs-extra/esm'
import { buildAbsoluteFixturePath, root } from '@peertube/peertube-node-utils'
import { transcriberFactory } from '@peertube/peertube-transcription'
describe('Transcribers', function () {
const transcriptDirectory = join(root(), 'test-transcript')
const vttTranscriptPath = join(transcriptDirectory, 'test.vtt')
const transcribers = [
'openai-whisper',
'faster-whisper'
]
transcribers.forEach(function (transcriber) {
it(`Should instanciate a ${transcriber} transcriber`, function () {
transcriberFactory.createFromEngineName('openai-whisper')
})
it('Should run transcription on a media file without raising any errors', async function () {
const transcriber = transcriberFactory.createFromEngineName('openai-whisper', createLogger(), transcriptDirectory)
const mediaFilePath = buildAbsoluteFixturePath('video_short.mp4')
const transcript = await transcriber.transcribe(mediaFilePath, { name: 'tiny' }, 'fr', 'vtt')
expect(transcript.path).to.equals(vttTranscriptPath)
expect(pathExistsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist`)
})
})
after(async function () {
await remove(transcriptDirectory)
})
})

View File

@ -6,7 +6,8 @@
"tsBuildInfoFile": "./dist/.tsbuildinfo",
"paths": {
"@tests/*": [ "./src/*" ],
"@server/*": [ "../../server/core/*" ]
"@server/*": [ "../../server/core/*" ],
"@peertube/peertube-transcription": [ "../transcription" ]
}
},
"references": [

View File

@ -1,5 +1,5 @@
{
"name": "@peertube/transcription",
"name": "@peertube/peertube-transcription",
"private": true,
"version": "0.0.0",
"main": "dist/index.js",

View File

@ -0,0 +1,39 @@
import { Logger } from 'winston'
import { join } from 'path'
import { root } from '@peertube/peertube-node-utils'
import { TranscriptionEngine } from './transcription-engine.js'
import { TranscriptionModel } from './transcription-model.js'
import { Transcript, TranscriptFormat } from './transcript.js'
import { existsSync } from 'fs'
export abstract class AbstractTranscriber {
public static DEFAULT_TRANSCRIPT_DIRECTORY = join(root(), 'dist', 'transcripts')
engine: TranscriptionEngine
logger: Logger
transcriptDirectory: string
constructor (
engine: TranscriptionEngine,
logger: Logger,
transcriptDirectory: string = AbstractTranscriber.DEFAULT_TRANSCRIPT_DIRECTORY
) {
this.engine = engine
this.logger = logger
this.transcriptDirectory = transcriptDirectory
}
detectLanguage () {
return Promise.resolve('')
}
loadModel (model: TranscriptionModel) {
if (existsSync(model.path)) { /* empty */ }
}
supports (model: TranscriptionModel) {
return model.format === 'PyTorch'
}
abstract transcribe (mediaFilePath: string, model: TranscriptionModel, language: string, format: TranscriptFormat): Promise<Transcript>
}

View File

@ -1,4 +1,8 @@
export * from './whisper/index.js'
import { TranscriberFactory } from './transcriber-factory.js'
import { engines } from './whisper/index.js'
export * from './transcription-engine.js'
export * from './transcription-model.js'
export * from './transcription-result.js'
export * from './transcript.js'
export const transcriberFactory = new TranscriberFactory(engines)

View File

View File

@ -0,0 +1,9 @@
import { TranscriptionModel } from './transcription-model.js'
export class ModelFactory {
createModelFromName (name: string): TranscriptionModel {
return {
name
}
}
}

View File

@ -0,0 +1,30 @@
import { Logger, createLogger } from 'winston'
import { TranscriptionEngine } from './transcription-engine.js'
import { TransformersTranscriber, OpenaiTranscriber } from './whisper/index.js'
import { AbstractTranscriber } from './abstract-transcriber.js'
export class TranscriberFactory {
engines: TranscriptionEngine[]
constructor (engines: TranscriptionEngine[]) {
this.engines = engines
}
createFromEngineName (engineName: string, logger: Logger = createLogger(), transcriptDirectory: string = AbstractTranscriber.DEFAULT_TRANSCRIPT_DIRECTORY) {
const engine = this.engines.find(({ name }) => name === engineName)
if (!engine) {
throw new Error(`Unknow engine ${engineName}`)
}
const transcriberArgs: ConstructorParameters<typeof AbstractTranscriber> = [ engine, logger, transcriptDirectory ]
switch (engineName) {
case 'whisper':
return new OpenaiTranscriber(...transcriberArgs)
case 'transformers':
return new TransformersTranscriber(...transcriberArgs)
default:
throw new Error(`Unimplemented engine ${engineName}`)
}
}
}

View File

@ -0,0 +1,3 @@
export type TranscriptFormat = 'txt' | 'vtt' | 'srt'
export type Transcript = { path: string, language?: string, format: TranscriptFormat }

View File

@ -1,32 +1,19 @@
import { join } from 'path'
import { root } from '@peertube/peertube-node-utils'
import { TranscriptionModel } from './transcription-model.js'
import { TranscriptionResult } from './transcription-result.js'
import { ModelFormat } from './transcription-model.js'
export abstract class TranscriptionEngine {
public name: string
public description: string
public language: string
public requirements: string[]
public type: 'binary' | 'bindings' | 'ws'
public license: string
public forgeURL: string
/**
* The engine, or framework.
*/
export interface TranscriptionEngine {
name: string
description: string
language: string
requirements: string[]
type: 'binary' | 'bindings' | 'ws'
binary?: string
license: string
forgeURL: string
supportedModelFormats: ModelFormat[]
public static DEFAULT_TRANSCRIPT_DIRECTORY = join(root(), 'dist', 'transcripts')
// There could be a default models.
// There could be a list of default models
public abstract transcribe (
model: TranscriptionModel | string,
mediaFilePath: string,
language: string,
outputFormat: string
): Promise<TranscriptionResult>
public abstract loadModel (model: TranscriptionModel)
public abstract detectLanguage (): Promise<string>
public abstract supports (model: TranscriptionModel): boolean
static getModelName (model: TranscriptionModel | string) {
return typeof model === 'string' ? model : model.name
}
}

View File

@ -41,9 +41,11 @@
// .'PyTorch' | 'GGML' | 'ONNX' // CoreML, OpenVino, Scikit-Learn, TensorFlow/Keras, PySpark
// https://towardsdatascience.com/guide-to-file-formats-for-machine-learning-columnar-training-inferencing-and-the-feature-store-2e0c3d18d4f9
export type ModelFormat = 'PyTorch' | 'GGML' | 'ONNX' | 'CTranslate2' // CoreML, OpenVino, Scikit-Learn, TensorFlow/Keras, PySpark
export abstract class TranscriptionModel {
name: string
format?: 'PyTorch' | 'GGML' | 'ONNX' // CoreML, OpenVino, Scikit-Learn, TensorFlow/Keras, PySpark
format?: ModelFormat
path?: string
url?: string

View File

@ -1 +0,0 @@
export type TranscriptionResult = { transcriptFilePath: string, language?: string }

View File

View File

@ -1,38 +0,0 @@
import { existsSync } from 'fs'
import { TranscriptionModel } from '../../transcription-model.js'
import { TranscriptionEngine } from '../../transcription-engine.js'
import { Promise } from 'bluebird'
import { TranscriptionResult } from '../../transcription-result.js'
export class WhisperCppEngine implements TranscriptionEngine {
name = 'transformers'
description = 'High-performance inference of OpenAI\'s Whisper automatic speech recognition model'
type: 'binary'
language = 'cpp'
requirements = []
forgeURL = 'https://github.com/ggerganov/whisper.cpp'
license = 'MIT'
detectLanguage () {
return Promise.resolve('')
}
loadModel (model: TranscriptionModel) {
if (existsSync(model.path)) { /* empty */ }
}
supports (model: TranscriptionModel) {
return true
}
transcribe (
model: TranscriptionModel | string,
mediaFilePath: string,
language: string,
outputFormat: string
): Promise<TranscriptionResult> {
return Promise.resolve(undefined)
}
}
export const whisperCppEngine = new WhisperCppEngine()

View File

@ -1,12 +0,0 @@
import { TranscriptionEngine } from '../../transcription-engine.js'
import { whisperEngine } from './python.js'
import { whisperCppEngine } from './cpp.js'
import { transformers } from './transformers.js'
import { transformersJs } from './transformers-js.js'
export const engines: TranscriptionEngine[] = [
whisperCppEngine,
whisperEngine,
transformers,
transformersJs
]

View File

@ -1,4 +0,0 @@
export * from './cpp.js'
export * from './python.js'
export * from './transformers.js'
export * from './transformers-js.js'

View File

@ -1,65 +0,0 @@
import { existsSync } from 'fs'
import { join } from 'path'
import { ChildProcess } from 'child_process'
import { $ } from 'execa'
import { TranscriptionEngine } from '../../transcription-engine.js'
import { TranscriptionModel } from '../../transcription-model.js'
import { TranscriptionResult } from '../../transcription-result.js'
type TranscriptFormat = 'txt' | 'vtt' | 'srt'
export class WhisperEngine implements TranscriptionEngine {
name: 'whisper'
description: 'High-performance inference of OpenAI\'s Whisper automatic speech recognition model'
requirements: ['python', 'pyTorch', 'ffmpeg']
language: 'python'
type: 'binary'
binary: string
forgeURL: 'https://github.com/openai/whisper'
license: 'MIT'
process?: ChildProcess
transcriptDirectory: string
public constructor (transcriptDirectory: WhisperEngine['transcriptDirectory'] = TranscriptionEngine.DEFAULT_TRANSCRIPT_DIRECTORY) {
this.transcriptDirectory = transcriptDirectory
}
detectLanguage () {
return Promise.resolve('')
}
loadModel (model: TranscriptionModel) {
if (existsSync(model.path)) { /* empty */ }
}
supports (model: TranscriptionModel) {
return model.format === 'PyTorch'
}
async transcribe (
model: TranscriptionModel | string,
mediaFilePath: string,
format: TranscriptFormat = 'vtt'
): Promise<TranscriptionResult> {
const $$ = $({ verbose: true })
await $$`whisper ${[
mediaFilePath,
'--model',
TranscriptionEngine.getModelName(model),
'--output_format',
'all',
'--output_dir',
this.transcriptDirectory
]}`
await $$`ls ${this.transcriptDirectory}`
return {
language: '',
transcriptFilePath: join(this.transcriptDirectory, `test.${format}`)
}
}
}
export const whisperEngine = new WhisperEngine()

View File

@ -1,42 +0,0 @@
// import { pipeline, env } from '@xenova/transformers'
import { TranscriptionModel } from '../../transcription-model.js'
import { TranscriptionEngine } from '../../transcription-engine.js'
import { TranscriptionResult } from '../../transcription-result.js'
import { Promise } from 'bluebird'
// Disable local models
// env.allowLocalModels = true
class TransformersJs implements TranscriptionEngine {
name = 'transformers.js'
description = ''
requirements = []
language = 'js'
forgeURL: string
license: string
type: 'bindings'
transcribe (
model: TranscriptionModel | string,
mediaFilePath: string,
language: string, outputFormat: string): Promise<TranscriptionResult> {
return Promise.resolve(undefined)
// return pipeline('automatic-speech-recognition', 'no_attentions', {
// // For medium models, we need to load the `no_attentions` revision to avoid running out of memory
// revision: [].includes('/whisper-medium') ? 'no_attentions' : 'main'
// })
}
detectLanguage (): Promise<string> {
return Promise.resolve('')
}
loadModel (model: TranscriptionModel) {
}
supports (model: TranscriptionModel): boolean {
return false
}
}
export const transformersJs = new TransformersJs()

View File

@ -1,38 +0,0 @@
import { TranscriptionEngine } from '../../transcription-engine.js'
import { TranscriptionModel } from '../../transcription-model.js'
import { existsSync } from 'fs'
import { TranscriptionResult } from '../../transcription-result.js'
import { Promise } from 'bluebird'
export class Transformers implements TranscriptionEngine {
name = 'transformers'
description = 'High-performance inference of OpenAI\'s Whisper automatic speech recognition model'
type: 'binary'
language = 'cpp'
requirements = []
forgeURL = 'https://github.com/ggerganov/whisper.cpp'
license = 'MIT'
supports (model: TranscriptionModel) {
return true
}
detectLanguage () {
return Promise.resolve('')
}
loadModel (model: TranscriptionModel) {
if (existsSync(model.path)) { /* empty */ }
}
transcribe (
model: TranscriptionModel | string,
mediaFilePath: string,
language: string,
outputFormat: string
): Promise<TranscriptionResult> {
return Promise.resolve(undefined)
}
}
export const transformers = new Transformers()

View File

@ -0,0 +1,46 @@
import { TranscriptionEngine } from '../transcription-engine.js'
export const engines: TranscriptionEngine[] = [
{
name : 'whisper-cpp',
description : 'High-performance inference of OpenAI\'s Whisper automatic speech recognition model',
type: 'binary',
language : 'cpp',
requirements : [],
forgeURL : 'https://github.com/ggerganov/whisper.cpp',
license : 'MIT',
supportedModelFormats: [ 'ONNX' ]
},
{
name : 'transformers',
description : 'High-performance inference of OpenAI\'s Whisper automatic speech recognition model',
type: 'binary',
language : 'python',
requirements : [],
forgeURL : '',
license : '',
supportedModelFormats: [ 'ONNX' ]
},
{
name: 'openai-whisper',
description: 'High-performance inference of OpenAI\'s Whisper automatic speech recognition model',
requirements: [ 'python', 'pyTorch', 'ffmpeg' ],
language: 'python',
type: 'binary',
binary: 'whisper',
forgeURL: 'https://github.com/openai/whisper',
license: 'MIT',
supportedModelFormats: [ 'PyTorch' ]
},
{
name: 'whisper-ctranslate2',
description: '',
requirements: [ 'python' ],
language: 'python',
type: 'binary',
binary: 'whisper-ctranslate2',
forgeURL: 'https://github.com/openai/whisper',
license: 'MIT',
supportedModelFormats: [ 'CTranslate2' ]
}
]

View File

@ -1 +1,2 @@
export * from './engine/index.js'
export * from './transcriber/index.js'
export * from './engines.js'

View File

@ -0,0 +1,34 @@
import { join } from 'path'
import { $ } from 'execa'
import { TranscriptionModel } from '../../transcription-model.js'
import { Transcript, TranscriptFormat } from '../../transcript.js'
import { AbstractTranscriber } from '../../abstract-transcriber.js'
export class FasterWhisperTranscriber extends AbstractTranscriber {
async transcribe (
mediaFilePath: string,
model: TranscriptionModel,
language: string,
format: TranscriptFormat = 'vtt'
): Promise<Transcript> {
const $$ = $({ verbose: true })
await $$`whisper ${[
mediaFilePath,
'--model',
model.name,
'--output_format',
'all',
'--output_dir',
this.transcriptDirectory
]}`
await $$`ls ${this.transcriptDirectory}`
return {
language,
path: join(this.transcriptDirectory, `test.${format}`),
format
}
}
}

View File

@ -0,0 +1,3 @@
export * from './transformers-js-transcriber.js'
export * from './transformers-transcriber.js'
export * from './openai-transcriber.js'

View File

@ -0,0 +1,34 @@
import { join } from 'path'
import { $ } from 'execa'
import { TranscriptionModel } from '../../transcription-model.js'
import { Transcript, TranscriptFormat } from '../../transcript.js'
import { AbstractTranscriber } from '../../abstract-transcriber.js'
export class OpenaiTranscriber extends AbstractTranscriber {
async transcribe (
mediaFilePath: string,
model: TranscriptionModel,
language: string,
format: TranscriptFormat = 'vtt'
): Promise<Transcript> {
const $$ = $({ verbose: true })
await $$`whisper ${[
mediaFilePath,
'--model',
model.name,
'--output_format',
'all',
'--output_dir',
this.transcriptDirectory
]}`
await $$`ls ${this.transcriptDirectory}`
return {
language,
path: join(this.transcriptDirectory, `test.${format}`),
format
}
}
}

View File

@ -0,0 +1,22 @@
import { TranscriptionModel } from '../../transcription-model.js'
import { AbstractTranscriber } from '../../abstract-transcriber.js'
import { Transcript, TranscriptFormat } from '../../transcript.js'
import { Promise } from 'bluebird'
// Disable local models
// env.allowLocalModels = true
export class TransformersJsTranscriber extends AbstractTranscriber {
async transcribe (
mediaFilePath: string,
model: TranscriptionModel,
language: string,
format: TranscriptFormat = 'vtt'
): Promise<Transcript> {
return Promise.resolve(undefined)
// return pipeline('automatic-speech-recognition', 'no_attentions', {
// // For medium models, we need to load the `no_attentions` revision to avoid running out of memory
// revision: [].includes('/whisper-medium') ? 'no_attentions' : 'main'
// })
}
}

View File

@ -0,0 +1,14 @@
import { TranscriptionModel } from '../../transcription-model.js'
import { AbstractTranscriber } from '../../abstract-transcriber.js'
import { Transcript, TranscriptFormat } from '../../transcript.js'
export class TransformersTranscriber extends AbstractTranscriber {
async transcribe (
mediaFilePath: string,
model: TranscriptionModel,
language: string,
format: TranscriptFormat = 'vtt'
): Promise<Transcript> {
return Promise.resolve(undefined)
}
}

View File

@ -0,0 +1,10 @@
{
"extends": "./tsconfig.json",
"compilerOptions": {
"outDir": "../types-generator/dist/peertube-transcription",
"tsBuildInfoFile": "../types-generator/dist/peertube-transcription/.tsbuildinfo",
"stripInternal": true,
"removeComments": false,
"emitDeclarationOnly": true
}
}

View File

@ -14,6 +14,7 @@
{ "path": "../packages/ffmpeg" },
{ "path": "../packages/models" },
{ "path": "../packages/node-utils" },
{ "path": "../packages/transcription" },
{ "path": "../packages/typescript-utils" }
],
"include": [

View File

@ -27,6 +27,7 @@
{ "path": "./packages/models" },
{ "path": "./packages/node-utils" },
{ "path": "./packages/server-commands" },
{ "path": "./packages/transcription" },
{ "path": "./packages/typescript-utils" }
]
}