mirror of https://github.com/Chocobozzz/PeerTube
chore: fiddling around some more
parent
fbc6ca2270
commit
47095673b3
|
@ -1,33 +0,0 @@
|
|||
import { join } from 'path'
|
||||
import { buildAbsoluteFixturePath, root } from '@peertube/peertube-node-utils'
|
||||
import { remove, pathExistsSync } from 'fs-extra/esm.js'
|
||||
import { $ } from 'execa'
|
||||
import { expect } from 'chai'
|
||||
import { WhisperEngine } from '@peertube/transcription'
|
||||
|
||||
describe('Whisper', function () {
|
||||
const transcriptDirectory = join(root(), 'test-transcript')
|
||||
const vttTranscriptPath = join(transcriptDirectory, 'test.vtt')
|
||||
|
||||
it('Should be present on the system', async function () {
|
||||
await $`whisper`
|
||||
})
|
||||
|
||||
it('Should run transcription on a media file without raising any errors', async function () {
|
||||
const mediaFilePath = buildAbsoluteFixturePath('video_short.mp4')
|
||||
const whisperEngine = new WhisperEngine({ transcriptDirectory })
|
||||
await whisperEngine.transcribe('tiny', mediaFilePath)
|
||||
})
|
||||
|
||||
it('Should be create a vtt transcript file', async function () {
|
||||
const mediaFilePath = buildAbsoluteFixturePath('video_very_long_10p.mp4')
|
||||
const whisperEngine = new WhisperEngine({ transcriptDirectory })
|
||||
const { } = await whisperEngine.transcribe('tiny', mediaFilePath)
|
||||
|
||||
expect(pathExistsSync(vttTranscriptPath)).to.be.true
|
||||
})
|
||||
|
||||
after(async function () {
|
||||
await remove(transcriptDirectory)
|
||||
})
|
||||
})
|
|
@ -0,0 +1,28 @@
|
|||
import { createLogger } from 'winston'
|
||||
import { join } from 'path'
|
||||
import { expect } from 'chai'
|
||||
import { remove, pathExistsSync } from 'fs-extra/esm.js'
|
||||
import { buildAbsoluteFixturePath, root } from '@peertube/peertube-node-utils'
|
||||
import { transcriberFactory } from '@peertube/transcription'
|
||||
|
||||
describe('Open AI Transcriber', function () {
|
||||
|
||||
const transcriptDirectory = join(root(), 'test-transcript')
|
||||
const vttTranscriptPath = join(transcriptDirectory, 'test.vtt')
|
||||
|
||||
it('Should instanciate', function () {
|
||||
transcriberFactory.createFromEngineName('faster-whisper')
|
||||
})
|
||||
|
||||
it('Should run transcription on a media file without raising any errors', async function () {
|
||||
const transcriber = transcriberFactory.createFromEngineName('openai-whisper', createLogger(), transcriptDirectory)
|
||||
const mediaFilePath = buildAbsoluteFixturePath('video_short.mp4')
|
||||
const transcript = await transcriber.transcribe(mediaFilePath, { name: 'tiny' }, 'fr', 'vtt')
|
||||
expect(transcript.path).to.equals(vttTranscriptPath)
|
||||
expect(pathExistsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist`)
|
||||
})
|
||||
|
||||
after(async function () {
|
||||
await remove(transcriptDirectory)
|
||||
})
|
||||
})
|
|
@ -0,0 +1,28 @@
|
|||
import { createLogger } from 'winston'
|
||||
import { join } from 'path'
|
||||
import { expect } from 'chai'
|
||||
import { remove, pathExistsSync } from 'fs-extra/esm.js'
|
||||
import { buildAbsoluteFixturePath, root } from '@peertube/peertube-node-utils'
|
||||
import { transcriberFactory } from '@peertube/transcription'
|
||||
|
||||
describe('Open AI Transcriber', function () {
|
||||
|
||||
const transcriptDirectory = join(root(), 'test-transcript')
|
||||
const vttTranscriptPath = join(transcriptDirectory, 'test.vtt')
|
||||
|
||||
it('Should instanciate', function () {
|
||||
transcriberFactory.createFromEngineName('openai-whisper')
|
||||
})
|
||||
|
||||
it('Should run transcription on a media file without raising any errors', async function () {
|
||||
const transcriber = transcriberFactory.createFromEngineName('openai-whisper', createLogger(), transcriptDirectory)
|
||||
const mediaFilePath = buildAbsoluteFixturePath('video_short.mp4')
|
||||
const transcript = await transcriber.transcribe(mediaFilePath, { name: 'tiny' }, 'fr', 'vtt')
|
||||
expect(transcript.path).to.equals(vttTranscriptPath)
|
||||
expect(pathExistsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist`)
|
||||
})
|
||||
|
||||
after(async function () {
|
||||
await remove(transcriptDirectory)
|
||||
})
|
||||
})
|
|
@ -0,0 +1,34 @@
|
|||
import { createLogger } from 'winston'
|
||||
import { join } from 'path'
|
||||
import { expect } from 'chai'
|
||||
import { remove, pathExistsSync } from 'fs-extra/esm'
|
||||
import { buildAbsoluteFixturePath, root } from '@peertube/peertube-node-utils'
|
||||
import { transcriberFactory } from '@peertube/peertube-transcription'
|
||||
|
||||
describe('Transcribers', function () {
|
||||
const transcriptDirectory = join(root(), 'test-transcript')
|
||||
const vttTranscriptPath = join(transcriptDirectory, 'test.vtt')
|
||||
const transcribers = [
|
||||
'openai-whisper',
|
||||
'faster-whisper'
|
||||
]
|
||||
|
||||
transcribers.forEach(function (transcriber) {
|
||||
it(`Should instanciate a ${transcriber} transcriber`, function () {
|
||||
transcriberFactory.createFromEngineName('openai-whisper')
|
||||
})
|
||||
|
||||
it('Should run transcription on a media file without raising any errors', async function () {
|
||||
const transcriber = transcriberFactory.createFromEngineName('openai-whisper', createLogger(), transcriptDirectory)
|
||||
const mediaFilePath = buildAbsoluteFixturePath('video_short.mp4')
|
||||
const transcript = await transcriber.transcribe(mediaFilePath, { name: 'tiny' }, 'fr', 'vtt')
|
||||
expect(transcript.path).to.equals(vttTranscriptPath)
|
||||
expect(pathExistsSync(transcript.path), `Transcript file ${transcript.path} doesn't exist`)
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
after(async function () {
|
||||
await remove(transcriptDirectory)
|
||||
})
|
||||
})
|
|
@ -6,7 +6,8 @@
|
|||
"tsBuildInfoFile": "./dist/.tsbuildinfo",
|
||||
"paths": {
|
||||
"@tests/*": [ "./src/*" ],
|
||||
"@server/*": [ "../../server/core/*" ]
|
||||
"@server/*": [ "../../server/core/*" ],
|
||||
"@peertube/peertube-transcription": [ "../transcription" ]
|
||||
}
|
||||
},
|
||||
"references": [
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
{
|
||||
"name": "@peertube/transcription",
|
||||
"name": "@peertube/peertube-transcription",
|
||||
"private": true,
|
||||
"version": "0.0.0",
|
||||
"main": "dist/index.js",
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
import { Logger } from 'winston'
|
||||
import { join } from 'path'
|
||||
import { root } from '@peertube/peertube-node-utils'
|
||||
import { TranscriptionEngine } from './transcription-engine.js'
|
||||
import { TranscriptionModel } from './transcription-model.js'
|
||||
import { Transcript, TranscriptFormat } from './transcript.js'
|
||||
import { existsSync } from 'fs'
|
||||
|
||||
export abstract class AbstractTranscriber {
|
||||
public static DEFAULT_TRANSCRIPT_DIRECTORY = join(root(), 'dist', 'transcripts')
|
||||
|
||||
engine: TranscriptionEngine
|
||||
logger: Logger
|
||||
transcriptDirectory: string
|
||||
|
||||
constructor (
|
||||
engine: TranscriptionEngine,
|
||||
logger: Logger,
|
||||
transcriptDirectory: string = AbstractTranscriber.DEFAULT_TRANSCRIPT_DIRECTORY
|
||||
) {
|
||||
this.engine = engine
|
||||
this.logger = logger
|
||||
this.transcriptDirectory = transcriptDirectory
|
||||
}
|
||||
|
||||
detectLanguage () {
|
||||
return Promise.resolve('')
|
||||
}
|
||||
|
||||
loadModel (model: TranscriptionModel) {
|
||||
if (existsSync(model.path)) { /* empty */ }
|
||||
}
|
||||
|
||||
supports (model: TranscriptionModel) {
|
||||
return model.format === 'PyTorch'
|
||||
}
|
||||
|
||||
abstract transcribe (mediaFilePath: string, model: TranscriptionModel, language: string, format: TranscriptFormat): Promise<Transcript>
|
||||
}
|
|
@ -1,4 +1,8 @@
|
|||
export * from './whisper/index.js'
|
||||
import { TranscriberFactory } from './transcriber-factory.js'
|
||||
import { engines } from './whisper/index.js'
|
||||
|
||||
export * from './transcription-engine.js'
|
||||
export * from './transcription-model.js'
|
||||
export * from './transcription-result.js'
|
||||
export * from './transcript.js'
|
||||
|
||||
export const transcriberFactory = new TranscriberFactory(engines)
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
import { TranscriptionModel } from './transcription-model.js'
|
||||
|
||||
export class ModelFactory {
|
||||
createModelFromName (name: string): TranscriptionModel {
|
||||
return {
|
||||
name
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,30 @@
|
|||
import { Logger, createLogger } from 'winston'
|
||||
import { TranscriptionEngine } from './transcription-engine.js'
|
||||
import { TransformersTranscriber, OpenaiTranscriber } from './whisper/index.js'
|
||||
import { AbstractTranscriber } from './abstract-transcriber.js'
|
||||
|
||||
export class TranscriberFactory {
|
||||
engines: TranscriptionEngine[]
|
||||
|
||||
constructor (engines: TranscriptionEngine[]) {
|
||||
this.engines = engines
|
||||
}
|
||||
|
||||
createFromEngineName (engineName: string, logger: Logger = createLogger(), transcriptDirectory: string = AbstractTranscriber.DEFAULT_TRANSCRIPT_DIRECTORY) {
|
||||
const engine = this.engines.find(({ name }) => name === engineName)
|
||||
if (!engine) {
|
||||
throw new Error(`Unknow engine ${engineName}`)
|
||||
}
|
||||
|
||||
const transcriberArgs: ConstructorParameters<typeof AbstractTranscriber> = [ engine, logger, transcriptDirectory ]
|
||||
|
||||
switch (engineName) {
|
||||
case 'whisper':
|
||||
return new OpenaiTranscriber(...transcriberArgs)
|
||||
case 'transformers':
|
||||
return new TransformersTranscriber(...transcriberArgs)
|
||||
default:
|
||||
throw new Error(`Unimplemented engine ${engineName}`)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,3 @@
|
|||
export type TranscriptFormat = 'txt' | 'vtt' | 'srt'
|
||||
|
||||
export type Transcript = { path: string, language?: string, format: TranscriptFormat }
|
|
@ -1,32 +1,19 @@
|
|||
import { join } from 'path'
|
||||
import { root } from '@peertube/peertube-node-utils'
|
||||
import { TranscriptionModel } from './transcription-model.js'
|
||||
import { TranscriptionResult } from './transcription-result.js'
|
||||
import { ModelFormat } from './transcription-model.js'
|
||||
|
||||
export abstract class TranscriptionEngine {
|
||||
public name: string
|
||||
public description: string
|
||||
public language: string
|
||||
public requirements: string[]
|
||||
public type: 'binary' | 'bindings' | 'ws'
|
||||
public license: string
|
||||
public forgeURL: string
|
||||
/**
|
||||
* The engine, or framework.
|
||||
*/
|
||||
export interface TranscriptionEngine {
|
||||
name: string
|
||||
description: string
|
||||
language: string
|
||||
requirements: string[]
|
||||
type: 'binary' | 'bindings' | 'ws'
|
||||
binary?: string
|
||||
license: string
|
||||
forgeURL: string
|
||||
supportedModelFormats: ModelFormat[]
|
||||
|
||||
public static DEFAULT_TRANSCRIPT_DIRECTORY = join(root(), 'dist', 'transcripts')
|
||||
// There could be a default models.
|
||||
// There could be a list of default models
|
||||
|
||||
public abstract transcribe (
|
||||
model: TranscriptionModel | string,
|
||||
mediaFilePath: string,
|
||||
language: string,
|
||||
outputFormat: string
|
||||
): Promise<TranscriptionResult>
|
||||
public abstract loadModel (model: TranscriptionModel)
|
||||
public abstract detectLanguage (): Promise<string>
|
||||
public abstract supports (model: TranscriptionModel): boolean
|
||||
|
||||
static getModelName (model: TranscriptionModel | string) {
|
||||
return typeof model === 'string' ? model : model.name
|
||||
}
|
||||
}
|
||||
|
|
|
@ -41,9 +41,11 @@
|
|||
// .'PyTorch' | 'GGML' | 'ONNX' // CoreML, OpenVino, Scikit-Learn, TensorFlow/Keras, PySpark
|
||||
// https://towardsdatascience.com/guide-to-file-formats-for-machine-learning-columnar-training-inferencing-and-the-feature-store-2e0c3d18d4f9
|
||||
|
||||
export type ModelFormat = 'PyTorch' | 'GGML' | 'ONNX' | 'CTranslate2' // CoreML, OpenVino, Scikit-Learn, TensorFlow/Keras, PySpark
|
||||
|
||||
export abstract class TranscriptionModel {
|
||||
name: string
|
||||
format?: 'PyTorch' | 'GGML' | 'ONNX' // CoreML, OpenVino, Scikit-Learn, TensorFlow/Keras, PySpark
|
||||
format?: ModelFormat
|
||||
path?: string
|
||||
url?: string
|
||||
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
export type TranscriptionResult = { transcriptFilePath: string, language?: string }
|
|
@ -1,38 +0,0 @@
|
|||
import { existsSync } from 'fs'
|
||||
import { TranscriptionModel } from '../../transcription-model.js'
|
||||
import { TranscriptionEngine } from '../../transcription-engine.js'
|
||||
import { Promise } from 'bluebird'
|
||||
import { TranscriptionResult } from '../../transcription-result.js'
|
||||
|
||||
export class WhisperCppEngine implements TranscriptionEngine {
|
||||
name = 'transformers'
|
||||
description = 'High-performance inference of OpenAI\'s Whisper automatic speech recognition model'
|
||||
type: 'binary'
|
||||
language = 'cpp'
|
||||
requirements = []
|
||||
forgeURL = 'https://github.com/ggerganov/whisper.cpp'
|
||||
license = 'MIT'
|
||||
|
||||
detectLanguage () {
|
||||
return Promise.resolve('')
|
||||
}
|
||||
|
||||
loadModel (model: TranscriptionModel) {
|
||||
if (existsSync(model.path)) { /* empty */ }
|
||||
}
|
||||
|
||||
supports (model: TranscriptionModel) {
|
||||
return true
|
||||
}
|
||||
|
||||
transcribe (
|
||||
model: TranscriptionModel | string,
|
||||
mediaFilePath: string,
|
||||
language: string,
|
||||
outputFormat: string
|
||||
): Promise<TranscriptionResult> {
|
||||
return Promise.resolve(undefined)
|
||||
}
|
||||
}
|
||||
|
||||
export const whisperCppEngine = new WhisperCppEngine()
|
|
@ -1,12 +0,0 @@
|
|||
import { TranscriptionEngine } from '../../transcription-engine.js'
|
||||
import { whisperEngine } from './python.js'
|
||||
import { whisperCppEngine } from './cpp.js'
|
||||
import { transformers } from './transformers.js'
|
||||
import { transformersJs } from './transformers-js.js'
|
||||
|
||||
export const engines: TranscriptionEngine[] = [
|
||||
whisperCppEngine,
|
||||
whisperEngine,
|
||||
transformers,
|
||||
transformersJs
|
||||
]
|
|
@ -1,4 +0,0 @@
|
|||
export * from './cpp.js'
|
||||
export * from './python.js'
|
||||
export * from './transformers.js'
|
||||
export * from './transformers-js.js'
|
|
@ -1,65 +0,0 @@
|
|||
import { existsSync } from 'fs'
|
||||
import { join } from 'path'
|
||||
import { ChildProcess } from 'child_process'
|
||||
import { $ } from 'execa'
|
||||
import { TranscriptionEngine } from '../../transcription-engine.js'
|
||||
import { TranscriptionModel } from '../../transcription-model.js'
|
||||
import { TranscriptionResult } from '../../transcription-result.js'
|
||||
|
||||
type TranscriptFormat = 'txt' | 'vtt' | 'srt'
|
||||
|
||||
export class WhisperEngine implements TranscriptionEngine {
|
||||
name: 'whisper'
|
||||
description: 'High-performance inference of OpenAI\'s Whisper automatic speech recognition model'
|
||||
requirements: ['python', 'pyTorch', 'ffmpeg']
|
||||
language: 'python'
|
||||
type: 'binary'
|
||||
binary: string
|
||||
forgeURL: 'https://github.com/openai/whisper'
|
||||
license: 'MIT'
|
||||
process?: ChildProcess
|
||||
transcriptDirectory: string
|
||||
|
||||
public constructor (transcriptDirectory: WhisperEngine['transcriptDirectory'] = TranscriptionEngine.DEFAULT_TRANSCRIPT_DIRECTORY) {
|
||||
this.transcriptDirectory = transcriptDirectory
|
||||
}
|
||||
|
||||
detectLanguage () {
|
||||
return Promise.resolve('')
|
||||
}
|
||||
|
||||
loadModel (model: TranscriptionModel) {
|
||||
if (existsSync(model.path)) { /* empty */ }
|
||||
}
|
||||
|
||||
supports (model: TranscriptionModel) {
|
||||
return model.format === 'PyTorch'
|
||||
}
|
||||
|
||||
async transcribe (
|
||||
model: TranscriptionModel | string,
|
||||
mediaFilePath: string,
|
||||
format: TranscriptFormat = 'vtt'
|
||||
): Promise<TranscriptionResult> {
|
||||
const $$ = $({ verbose: true })
|
||||
|
||||
await $$`whisper ${[
|
||||
mediaFilePath,
|
||||
'--model',
|
||||
TranscriptionEngine.getModelName(model),
|
||||
'--output_format',
|
||||
'all',
|
||||
'--output_dir',
|
||||
this.transcriptDirectory
|
||||
]}`
|
||||
|
||||
await $$`ls ${this.transcriptDirectory}`
|
||||
|
||||
return {
|
||||
language: '',
|
||||
transcriptFilePath: join(this.transcriptDirectory, `test.${format}`)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export const whisperEngine = new WhisperEngine()
|
|
@ -1,42 +0,0 @@
|
|||
// import { pipeline, env } from '@xenova/transformers'
|
||||
import { TranscriptionModel } from '../../transcription-model.js'
|
||||
import { TranscriptionEngine } from '../../transcription-engine.js'
|
||||
import { TranscriptionResult } from '../../transcription-result.js'
|
||||
import { Promise } from 'bluebird'
|
||||
|
||||
// Disable local models
|
||||
// env.allowLocalModels = true
|
||||
|
||||
class TransformersJs implements TranscriptionEngine {
|
||||
name = 'transformers.js'
|
||||
description = ''
|
||||
requirements = []
|
||||
language = 'js'
|
||||
forgeURL: string
|
||||
license: string
|
||||
type: 'bindings'
|
||||
|
||||
transcribe (
|
||||
model: TranscriptionModel | string,
|
||||
mediaFilePath: string,
|
||||
language: string, outputFormat: string): Promise<TranscriptionResult> {
|
||||
return Promise.resolve(undefined)
|
||||
// return pipeline('automatic-speech-recognition', 'no_attentions', {
|
||||
// // For medium models, we need to load the `no_attentions` revision to avoid running out of memory
|
||||
// revision: [].includes('/whisper-medium') ? 'no_attentions' : 'main'
|
||||
// })
|
||||
}
|
||||
|
||||
detectLanguage (): Promise<string> {
|
||||
return Promise.resolve('')
|
||||
}
|
||||
|
||||
loadModel (model: TranscriptionModel) {
|
||||
}
|
||||
|
||||
supports (model: TranscriptionModel): boolean {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
export const transformersJs = new TransformersJs()
|
|
@ -1,38 +0,0 @@
|
|||
import { TranscriptionEngine } from '../../transcription-engine.js'
|
||||
import { TranscriptionModel } from '../../transcription-model.js'
|
||||
import { existsSync } from 'fs'
|
||||
import { TranscriptionResult } from '../../transcription-result.js'
|
||||
import { Promise } from 'bluebird'
|
||||
|
||||
export class Transformers implements TranscriptionEngine {
|
||||
name = 'transformers'
|
||||
description = 'High-performance inference of OpenAI\'s Whisper automatic speech recognition model'
|
||||
type: 'binary'
|
||||
language = 'cpp'
|
||||
requirements = []
|
||||
forgeURL = 'https://github.com/ggerganov/whisper.cpp'
|
||||
license = 'MIT'
|
||||
|
||||
supports (model: TranscriptionModel) {
|
||||
return true
|
||||
}
|
||||
|
||||
detectLanguage () {
|
||||
return Promise.resolve('')
|
||||
}
|
||||
|
||||
loadModel (model: TranscriptionModel) {
|
||||
if (existsSync(model.path)) { /* empty */ }
|
||||
}
|
||||
|
||||
transcribe (
|
||||
model: TranscriptionModel | string,
|
||||
mediaFilePath: string,
|
||||
language: string,
|
||||
outputFormat: string
|
||||
): Promise<TranscriptionResult> {
|
||||
return Promise.resolve(undefined)
|
||||
}
|
||||
}
|
||||
|
||||
export const transformers = new Transformers()
|
|
@ -0,0 +1,46 @@
|
|||
import { TranscriptionEngine } from '../transcription-engine.js'
|
||||
|
||||
export const engines: TranscriptionEngine[] = [
|
||||
{
|
||||
name : 'whisper-cpp',
|
||||
description : 'High-performance inference of OpenAI\'s Whisper automatic speech recognition model',
|
||||
type: 'binary',
|
||||
language : 'cpp',
|
||||
requirements : [],
|
||||
forgeURL : 'https://github.com/ggerganov/whisper.cpp',
|
||||
license : 'MIT',
|
||||
supportedModelFormats: [ 'ONNX' ]
|
||||
},
|
||||
{
|
||||
name : 'transformers',
|
||||
description : 'High-performance inference of OpenAI\'s Whisper automatic speech recognition model',
|
||||
type: 'binary',
|
||||
language : 'python',
|
||||
requirements : [],
|
||||
forgeURL : '',
|
||||
license : '',
|
||||
supportedModelFormats: [ 'ONNX' ]
|
||||
},
|
||||
{
|
||||
name: 'openai-whisper',
|
||||
description: 'High-performance inference of OpenAI\'s Whisper automatic speech recognition model',
|
||||
requirements: [ 'python', 'pyTorch', 'ffmpeg' ],
|
||||
language: 'python',
|
||||
type: 'binary',
|
||||
binary: 'whisper',
|
||||
forgeURL: 'https://github.com/openai/whisper',
|
||||
license: 'MIT',
|
||||
supportedModelFormats: [ 'PyTorch' ]
|
||||
},
|
||||
{
|
||||
name: 'whisper-ctranslate2',
|
||||
description: '',
|
||||
requirements: [ 'python' ],
|
||||
language: 'python',
|
||||
type: 'binary',
|
||||
binary: 'whisper-ctranslate2',
|
||||
forgeURL: 'https://github.com/openai/whisper',
|
||||
license: 'MIT',
|
||||
supportedModelFormats: [ 'CTranslate2' ]
|
||||
}
|
||||
]
|
|
@ -1 +1,2 @@
|
|||
export * from './engine/index.js'
|
||||
export * from './transcriber/index.js'
|
||||
export * from './engines.js'
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
import { join } from 'path'
|
||||
import { $ } from 'execa'
|
||||
import { TranscriptionModel } from '../../transcription-model.js'
|
||||
import { Transcript, TranscriptFormat } from '../../transcript.js'
|
||||
import { AbstractTranscriber } from '../../abstract-transcriber.js'
|
||||
|
||||
export class FasterWhisperTranscriber extends AbstractTranscriber {
|
||||
async transcribe (
|
||||
mediaFilePath: string,
|
||||
model: TranscriptionModel,
|
||||
language: string,
|
||||
format: TranscriptFormat = 'vtt'
|
||||
): Promise<Transcript> {
|
||||
const $$ = $({ verbose: true })
|
||||
|
||||
await $$`whisper ${[
|
||||
mediaFilePath,
|
||||
'--model',
|
||||
model.name,
|
||||
'--output_format',
|
||||
'all',
|
||||
'--output_dir',
|
||||
this.transcriptDirectory
|
||||
]}`
|
||||
|
||||
await $$`ls ${this.transcriptDirectory}`
|
||||
|
||||
return {
|
||||
language,
|
||||
path: join(this.transcriptDirectory, `test.${format}`),
|
||||
format
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,3 @@
|
|||
export * from './transformers-js-transcriber.js'
|
||||
export * from './transformers-transcriber.js'
|
||||
export * from './openai-transcriber.js'
|
|
@ -0,0 +1,34 @@
|
|||
import { join } from 'path'
|
||||
import { $ } from 'execa'
|
||||
import { TranscriptionModel } from '../../transcription-model.js'
|
||||
import { Transcript, TranscriptFormat } from '../../transcript.js'
|
||||
import { AbstractTranscriber } from '../../abstract-transcriber.js'
|
||||
|
||||
export class OpenaiTranscriber extends AbstractTranscriber {
|
||||
async transcribe (
|
||||
mediaFilePath: string,
|
||||
model: TranscriptionModel,
|
||||
language: string,
|
||||
format: TranscriptFormat = 'vtt'
|
||||
): Promise<Transcript> {
|
||||
const $$ = $({ verbose: true })
|
||||
|
||||
await $$`whisper ${[
|
||||
mediaFilePath,
|
||||
'--model',
|
||||
model.name,
|
||||
'--output_format',
|
||||
'all',
|
||||
'--output_dir',
|
||||
this.transcriptDirectory
|
||||
]}`
|
||||
|
||||
await $$`ls ${this.transcriptDirectory}`
|
||||
|
||||
return {
|
||||
language,
|
||||
path: join(this.transcriptDirectory, `test.${format}`),
|
||||
format
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
import { TranscriptionModel } from '../../transcription-model.js'
|
||||
import { AbstractTranscriber } from '../../abstract-transcriber.js'
|
||||
import { Transcript, TranscriptFormat } from '../../transcript.js'
|
||||
import { Promise } from 'bluebird'
|
||||
|
||||
// Disable local models
|
||||
// env.allowLocalModels = true
|
||||
|
||||
export class TransformersJsTranscriber extends AbstractTranscriber {
|
||||
async transcribe (
|
||||
mediaFilePath: string,
|
||||
model: TranscriptionModel,
|
||||
language: string,
|
||||
format: TranscriptFormat = 'vtt'
|
||||
): Promise<Transcript> {
|
||||
return Promise.resolve(undefined)
|
||||
// return pipeline('automatic-speech-recognition', 'no_attentions', {
|
||||
// // For medium models, we need to load the `no_attentions` revision to avoid running out of memory
|
||||
// revision: [].includes('/whisper-medium') ? 'no_attentions' : 'main'
|
||||
// })
|
||||
}
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
import { TranscriptionModel } from '../../transcription-model.js'
|
||||
import { AbstractTranscriber } from '../../abstract-transcriber.js'
|
||||
import { Transcript, TranscriptFormat } from '../../transcript.js'
|
||||
|
||||
export class TransformersTranscriber extends AbstractTranscriber {
|
||||
async transcribe (
|
||||
mediaFilePath: string,
|
||||
model: TranscriptionModel,
|
||||
language: string,
|
||||
format: TranscriptFormat = 'vtt'
|
||||
): Promise<Transcript> {
|
||||
return Promise.resolve(undefined)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,10 @@
|
|||
{
|
||||
"extends": "./tsconfig.json",
|
||||
"compilerOptions": {
|
||||
"outDir": "../types-generator/dist/peertube-transcription",
|
||||
"tsBuildInfoFile": "../types-generator/dist/peertube-transcription/.tsbuildinfo",
|
||||
"stripInternal": true,
|
||||
"removeComments": false,
|
||||
"emitDeclarationOnly": true
|
||||
}
|
||||
}
|
|
@ -14,6 +14,7 @@
|
|||
{ "path": "../packages/ffmpeg" },
|
||||
{ "path": "../packages/models" },
|
||||
{ "path": "../packages/node-utils" },
|
||||
{ "path": "../packages/transcription" },
|
||||
{ "path": "../packages/typescript-utils" }
|
||||
],
|
||||
"include": [
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
{ "path": "./packages/models" },
|
||||
{ "path": "./packages/node-utils" },
|
||||
{ "path": "./packages/server-commands" },
|
||||
{ "path": "./packages/transcription" },
|
||||
{ "path": "./packages/typescript-utils" }
|
||||
]
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue