mirror of https://github.com/Chocobozzz/PeerTube
chore: add performance markers
parent
2e242129b9
commit
5d1d0ab565
|
@ -4,7 +4,8 @@ import { expect } from 'chai'
|
|||
import { existsSync } from 'node:fs'
|
||||
import { rm, mkdir, readFile } from 'node:fs/promises'
|
||||
import { buildAbsoluteFixturePath, root } from '@peertube/peertube-node-utils'
|
||||
import { transcriberFactory } from '@peertube/peertube-transcription'
|
||||
import { toHumanReadable, transcriberFactory } from '@peertube/peertube-transcription'
|
||||
import { performance, PerformanceObserver } from 'node:perf_hooks'
|
||||
|
||||
describe('Transcribers', function () {
|
||||
const transcriptDirectory = join(root(), 'test-transcript')
|
||||
|
@ -17,6 +18,13 @@ describe('Transcribers', function () {
|
|||
|
||||
before(async function () {
|
||||
await mkdir(transcriptDirectory, { recursive: true })
|
||||
|
||||
const performanceObserver = new PerformanceObserver((items) => {
|
||||
items
|
||||
.getEntries()
|
||||
.forEach((entry) => console.log(`Transcription ${entry.name} took ${toHumanReadable(entry.duration)}`))
|
||||
})
|
||||
performanceObserver.observe({ type: 'measure' })
|
||||
})
|
||||
|
||||
transcribers.forEach(function (transcriberName) {
|
||||
|
@ -55,5 +63,6 @@ describe('Transcribers', function () {
|
|||
|
||||
after(async function () {
|
||||
await rm(transcriptDirectory, { recursive: true, force: true })
|
||||
performance.clearMarks()
|
||||
})
|
||||
})
|
||||
|
|
|
@ -5,6 +5,9 @@ import { TranscriptionEngine } from './transcription-engine.js'
|
|||
import { TranscriptionModel } from './transcription-model.js'
|
||||
import { Transcript, TranscriptFormat } from './transcript.js'
|
||||
import { existsSync } from 'fs'
|
||||
import { PerformanceObserver } from 'node:perf_hooks'
|
||||
import short from 'short-uuid'
|
||||
import assert from 'node:assert'
|
||||
|
||||
export abstract class AbstractTranscriber {
|
||||
public static DEFAULT_TRANSCRIPT_DIRECTORY = join(root(), 'dist', 'transcripts')
|
||||
|
@ -12,15 +15,19 @@ export abstract class AbstractTranscriber {
|
|||
engine: TranscriptionEngine
|
||||
logger: Logger
|
||||
transcriptDirectory: string
|
||||
performanceObserver?: PerformanceObserver
|
||||
runId?: string
|
||||
|
||||
constructor (
|
||||
engine: TranscriptionEngine,
|
||||
logger: Logger,
|
||||
transcriptDirectory: string = AbstractTranscriber.DEFAULT_TRANSCRIPT_DIRECTORY
|
||||
transcriptDirectory: string = AbstractTranscriber.DEFAULT_TRANSCRIPT_DIRECTORY,
|
||||
performanceObserver?: PerformanceObserver
|
||||
) {
|
||||
this.engine = engine
|
||||
this.logger = logger
|
||||
this.transcriptDirectory = transcriptDirectory
|
||||
this.performanceObserver = performanceObserver
|
||||
}
|
||||
|
||||
detectLanguage () {
|
||||
|
@ -35,5 +42,44 @@ export abstract class AbstractTranscriber {
|
|||
return model.format === 'PyTorch'
|
||||
}
|
||||
|
||||
createPerformanceMark () {
|
||||
this.runId = `${short.uuid()}-${this.engine.name}`
|
||||
performance.mark(this.getStartPerformanceMarkName())
|
||||
}
|
||||
|
||||
measurePerformanceMark () {
|
||||
try {
|
||||
performance.mark(this.getEndPerformanceMarkName())
|
||||
performance.measure(
|
||||
this.runId,
|
||||
this.getStartPerformanceMarkName(),
|
||||
this.getEndPerformanceMarkName()
|
||||
)
|
||||
} catch (e) {
|
||||
this.logger.log({ level: 'error', message: e })
|
||||
}
|
||||
}
|
||||
|
||||
getStartPerformanceMarkName () {
|
||||
assert(!!this.runId, 'Each transcription run should have an id.')
|
||||
|
||||
return `${this.runId}-started`
|
||||
}
|
||||
|
||||
getEndPerformanceMarkName () {
|
||||
assert(!!this.runId, 'Each transcription run should have an id.')
|
||||
|
||||
return `${this.runId}-ended`
|
||||
}
|
||||
|
||||
perf () {
|
||||
// const transcriptionPerformanceObserver = new PerformanceObserver((items) => {
|
||||
// items
|
||||
// .getEntries()
|
||||
// .forEach((entry) => logger.debug(`Transcription n°${entry.name} took ${toHumanReadable(entry.duration)}`, entry))
|
||||
// performance.clearMarks()
|
||||
// })
|
||||
}
|
||||
|
||||
abstract transcribe (mediaFilePath: string, model: TranscriptionModel, language: string, format: TranscriptFormat): Promise<Transcript>
|
||||
}
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
import { toHumanReadable, toTimecode } from './duration';
|
||||
|
||||
describe('duration', () => {
|
||||
test('toHumanReadable', async () => {
|
||||
const ONE_MINUTE = 60000;
|
||||
let humanDuration = toHumanReadable(ONE_MINUTE);
|
||||
expect(humanDuration).toEqual('1m');
|
||||
|
||||
humanDuration = toHumanReadable(ONE_MINUTE * 60 + ONE_MINUTE);
|
||||
expect(humanDuration).toEqual('1h 1m');
|
||||
});
|
||||
|
||||
test('toTimecode', async () => {
|
||||
const MORE_OR_LESS_ONE_MINUTE = '60.41545';
|
||||
let timecode = toTimecode(MORE_OR_LESS_ONE_MINUTE);
|
||||
expect(timecode).toEqual('00:01:00');
|
||||
|
||||
const ONE_HOUR = '3600';
|
||||
timecode = toTimecode(ONE_HOUR);
|
||||
expect(timecode).toEqual('01:00:00');
|
||||
});
|
||||
});
|
|
@ -0,0 +1,35 @@
|
|||
export interface DurationDescriptor {
|
||||
duration: number
|
||||
unit: string
|
||||
}
|
||||
|
||||
export function toHumanReadable (ms: number) {
|
||||
const date = new Date(ms)
|
||||
|
||||
const durationDescriptors: DurationDescriptor[] = [
|
||||
{ duration: date.getUTCHours(), unit: 'h' },
|
||||
{ duration: date.getUTCMinutes(), unit: 'm' },
|
||||
{ duration: date.getUTCSeconds(), unit: 's' }
|
||||
]
|
||||
|
||||
return durationDescriptors
|
||||
.map(toWords)
|
||||
.filter((words) => words)
|
||||
.join(' ')
|
||||
}
|
||||
|
||||
export function toWords ({ duration, unit }: DurationDescriptor) {
|
||||
return duration > 0 ? `${duration}${unit}` : ''
|
||||
}
|
||||
|
||||
export function toTimecode (s: number | string) {
|
||||
const date = new Date(0, 0, 0, 0, 0, parseFloat(s.toString()), 0)
|
||||
const hours = date.getHours()
|
||||
const minutes = date.getMinutes()
|
||||
const seconds = date.getSeconds()
|
||||
return `${padLeft(hours)}:${padLeft(minutes)}:${padLeft(seconds)}`
|
||||
}
|
||||
|
||||
function padLeft (value: number, length = 2): string {
|
||||
return value.toString().padStart(length, '0')
|
||||
}
|
|
@ -1,6 +1,8 @@
|
|||
import { TranscriberFactory } from './transcriber-factory.js'
|
||||
import { engines } from './whisper/index.js'
|
||||
|
||||
export * from './duration.js'
|
||||
|
||||
export * from './transcription-engine.js'
|
||||
export * from './transcription-model.js'
|
||||
export * from './transcript.js'
|
||||
|
|
|
@ -12,6 +12,7 @@ export class Ctranslate2Transcriber extends AbstractTranscriber {
|
|||
language: string,
|
||||
format: TranscriptFormat = 'vtt'
|
||||
): Promise<Transcript> {
|
||||
this.createPerformanceMark()
|
||||
const $$ = $({ verbose: true })
|
||||
const { baseName } = getFileInfo(mediaFilePath)
|
||||
|
||||
|
@ -25,7 +26,7 @@ export class Ctranslate2Transcriber extends AbstractTranscriber {
|
|||
this.transcriptDirectory
|
||||
]}`
|
||||
|
||||
await $$`ls ${this.transcriptDirectory}`
|
||||
this.measurePerformanceMark()
|
||||
|
||||
return {
|
||||
language,
|
||||
|
|
|
@ -12,12 +12,13 @@ export class OpenaiTranscriber extends AbstractTranscriber {
|
|||
language: string,
|
||||
format: TranscriptFormat = 'vtt'
|
||||
): Promise<Transcript> {
|
||||
this.createPerformanceMark()
|
||||
// Shall we run the command with `{ shell: true }` to get the same error as in sh ?
|
||||
// ex: ENOENT => Command not found
|
||||
const $$ = $({ verbose: true })
|
||||
const { baseName } = getFileInfo(mediaFilePath)
|
||||
|
||||
const { stdout } = await $$`whisper ${[
|
||||
await $$`whisper ${[
|
||||
mediaFilePath,
|
||||
'--model',
|
||||
model.name,
|
||||
|
@ -26,10 +27,8 @@ export class OpenaiTranscriber extends AbstractTranscriber {
|
|||
'--output_dir',
|
||||
this.transcriptDirectory
|
||||
]}`
|
||||
console.log(stdout)
|
||||
|
||||
const { stdout: lsStdout } = await $$`ls ${this.transcriptDirectory}`
|
||||
console.log(lsStdout)
|
||||
this.measurePerformanceMark()
|
||||
|
||||
return {
|
||||
language,
|
||||
|
|
|
@ -15,6 +15,8 @@ export class WhisperTimestampedTranscriber extends AbstractTranscriber {
|
|||
language: string,
|
||||
format: TranscriptFormat = 'vtt'
|
||||
): Promise<Transcript> {
|
||||
this.createPerformanceMark()
|
||||
|
||||
const $$ = $({ verbose: true })
|
||||
const { baseName, name } = getFileInfo(mediaFilePath)
|
||||
await $$`whisper_timestamped ${[
|
||||
|
@ -30,9 +32,9 @@ export class WhisperTimestampedTranscriber extends AbstractTranscriber {
|
|||
const internalTranscriptPath = join(this.transcriptDirectory, `${name}.${format}`)
|
||||
const transcriptPath = join(this.transcriptDirectory, `${baseName}.${format}`)
|
||||
assert(existsSync(internalTranscriptPath), '')
|
||||
|
||||
await rename(internalTranscriptPath, transcriptPath)
|
||||
await $$`ls ${this.transcriptDirectory}`
|
||||
|
||||
this.measurePerformanceMark()
|
||||
|
||||
return {
|
||||
language,
|
||||
|
|
Loading…
Reference in New Issue