2023-07-31 14:34:36 +02:00
|
|
|
import { readdir } from 'fs/promises'
|
2023-02-17 09:21:11 +01:00
|
|
|
import { constants, NodeGCPerformanceDetail, PerformanceObserver } from 'perf_hooks'
|
2022-07-19 11:58:19 +02:00
|
|
|
import * as process from 'process'
|
2022-11-15 13:27:46 +01:00
|
|
|
import { Meter, ObservableResult } from '@opentelemetry/api'
|
2022-09-09 11:11:47 +02:00
|
|
|
import { ExplicitBucketHistogramAggregation } from '@opentelemetry/sdk-metrics'
|
2023-07-31 14:34:36 +02:00
|
|
|
import { View } from '@opentelemetry/sdk-metrics/build/src/view/View.js'
|
|
|
|
import { logger } from '@server/helpers/logger.js'
|
2022-07-19 11:58:19 +02:00
|
|
|
|
|
|
|
// Thanks to https://github.com/siimon/prom-client
|
|
|
|
// We took their logic and adapter it for opentelemetry
|
|
|
|
// Try to keep consistency with their metric name/description so it's easier to process (grafana dashboard template etc)
|
|
|
|
|
|
|
|
export class NodeJSObserversBuilder {
|
|
|
|
|
2022-08-12 16:41:29 +02:00
|
|
|
constructor (private readonly meter: Meter) {
|
|
|
|
}
|
|
|
|
|
|
|
|
static getViews () {
|
|
|
|
return [
|
|
|
|
new View({
|
|
|
|
aggregation: new ExplicitBucketHistogramAggregation([ 0.001, 0.01, 0.1, 1, 2, 5 ]),
|
|
|
|
instrumentName: 'nodejs_gc_duration_seconds'
|
|
|
|
})
|
|
|
|
]
|
2022-07-19 11:58:19 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
buildObservers () {
|
|
|
|
this.buildCPUObserver()
|
|
|
|
this.buildMemoryObserver()
|
|
|
|
|
|
|
|
this.buildHandlesObserver()
|
|
|
|
this.buildFileDescriptorsObserver()
|
|
|
|
|
|
|
|
this.buildGCObserver()
|
|
|
|
this.buildEventLoopLagObserver()
|
|
|
|
|
|
|
|
this.buildLibUVActiveRequestsObserver()
|
|
|
|
this.buildActiveResourcesObserver()
|
|
|
|
}
|
|
|
|
|
|
|
|
private buildCPUObserver () {
|
|
|
|
const cpuTotal = this.meter.createObservableCounter('process_cpu_seconds_total', {
|
|
|
|
description: 'Total user and system CPU time spent in seconds.'
|
|
|
|
})
|
|
|
|
const cpuUser = this.meter.createObservableCounter('process_cpu_user_seconds_total', {
|
|
|
|
description: 'Total user CPU time spent in seconds.'
|
|
|
|
})
|
|
|
|
const cpuSystem = this.meter.createObservableCounter('process_cpu_system_seconds_total', {
|
|
|
|
description: 'Total system CPU time spent in seconds.'
|
|
|
|
})
|
|
|
|
|
|
|
|
let lastCpuUsage = process.cpuUsage()
|
|
|
|
|
|
|
|
this.meter.addBatchObservableCallback(observableResult => {
|
|
|
|
const cpuUsage = process.cpuUsage()
|
|
|
|
|
|
|
|
const userUsageMicros = cpuUsage.user - lastCpuUsage.user
|
|
|
|
const systemUsageMicros = cpuUsage.system - lastCpuUsage.system
|
|
|
|
|
|
|
|
lastCpuUsage = cpuUsage
|
|
|
|
|
|
|
|
observableResult.observe(cpuTotal, (userUsageMicros + systemUsageMicros) / 1e6)
|
|
|
|
observableResult.observe(cpuUser, userUsageMicros / 1e6)
|
|
|
|
observableResult.observe(cpuSystem, systemUsageMicros / 1e6)
|
|
|
|
|
|
|
|
}, [ cpuTotal, cpuUser, cpuSystem ])
|
|
|
|
}
|
|
|
|
|
|
|
|
private buildMemoryObserver () {
|
|
|
|
this.meter.createObservableGauge('nodejs_memory_usage_bytes', {
|
|
|
|
description: 'Memory'
|
|
|
|
}).addCallback(observableResult => {
|
|
|
|
const current = process.memoryUsage()
|
|
|
|
|
|
|
|
observableResult.observe(current.heapTotal, { memoryType: 'heapTotal' })
|
|
|
|
observableResult.observe(current.heapUsed, { memoryType: 'heapUsed' })
|
|
|
|
observableResult.observe(current.arrayBuffers, { memoryType: 'arrayBuffers' })
|
|
|
|
observableResult.observe(current.external, { memoryType: 'external' })
|
|
|
|
observableResult.observe(current.rss, { memoryType: 'rss' })
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
private buildHandlesObserver () {
|
|
|
|
if (typeof (process as any)._getActiveHandles !== 'function') return
|
|
|
|
|
|
|
|
this.meter.createObservableGauge('nodejs_active_handles_total', {
|
|
|
|
description: 'Total number of active handles.'
|
|
|
|
}).addCallback(observableResult => {
|
|
|
|
const handles = (process as any)._getActiveHandles()
|
|
|
|
|
|
|
|
observableResult.observe(handles.length)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
private buildGCObserver () {
|
|
|
|
const kinds = {
|
|
|
|
[constants.NODE_PERFORMANCE_GC_MAJOR]: 'major',
|
|
|
|
[constants.NODE_PERFORMANCE_GC_MINOR]: 'minor',
|
|
|
|
[constants.NODE_PERFORMANCE_GC_INCREMENTAL]: 'incremental',
|
|
|
|
[constants.NODE_PERFORMANCE_GC_WEAKCB]: 'weakcb'
|
|
|
|
}
|
|
|
|
|
|
|
|
const histogram = this.meter.createHistogram('nodejs_gc_duration_seconds', {
|
|
|
|
description: 'Garbage collection duration by kind, one of major, minor, incremental or weakcb'
|
|
|
|
})
|
|
|
|
|
|
|
|
const obs = new PerformanceObserver(list => {
|
|
|
|
const entry = list.getEntries()[0]
|
|
|
|
|
2023-10-11 13:59:24 +02:00
|
|
|
const kind = kinds[(entry.detail as NodeGCPerformanceDetail).kind]
|
2022-07-19 11:58:19 +02:00
|
|
|
|
|
|
|
// Convert duration from milliseconds to seconds
|
|
|
|
histogram.record(entry.duration / 1000, {
|
|
|
|
kind
|
|
|
|
})
|
|
|
|
})
|
|
|
|
|
|
|
|
obs.observe({ entryTypes: [ 'gc' ] })
|
|
|
|
}
|
|
|
|
|
|
|
|
private buildEventLoopLagObserver () {
|
|
|
|
const reportEventloopLag = (start: [ number, number ], observableResult: ObservableResult, res: () => void) => {
|
|
|
|
const delta = process.hrtime(start)
|
|
|
|
const nanosec = delta[0] * 1e9 + delta[1]
|
|
|
|
const seconds = nanosec / 1e9
|
|
|
|
|
|
|
|
observableResult.observe(seconds)
|
|
|
|
|
|
|
|
res()
|
|
|
|
}
|
|
|
|
|
|
|
|
this.meter.createObservableGauge('nodejs_eventloop_lag_seconds', {
|
|
|
|
description: 'Lag of event loop in seconds.'
|
|
|
|
}).addCallback(observableResult => {
|
|
|
|
return new Promise(res => {
|
|
|
|
const start = process.hrtime()
|
|
|
|
|
|
|
|
setImmediate(reportEventloopLag, start, observableResult, res)
|
|
|
|
})
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
private buildFileDescriptorsObserver () {
|
|
|
|
this.meter.createObservableGauge('process_open_fds', {
|
|
|
|
description: 'Number of open file descriptors.'
|
|
|
|
}).addCallback(async observableResult => {
|
|
|
|
try {
|
|
|
|
const fds = await readdir('/proc/self/fd')
|
|
|
|
observableResult.observe(fds.length - 1)
|
|
|
|
} catch (err) {
|
|
|
|
logger.debug('Cannot list file descriptors of current process for OpenTelemetry.', { err })
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
private buildLibUVActiveRequestsObserver () {
|
|
|
|
if (typeof (process as any)._getActiveRequests !== 'function') return
|
|
|
|
|
|
|
|
this.meter.createObservableGauge('nodejs_active_requests_total', {
|
|
|
|
description: 'Total number of active libuv requests.'
|
2022-07-19 15:34:44 +02:00
|
|
|
}).addCallback(observableResult => {
|
2022-07-19 11:58:19 +02:00
|
|
|
const requests = (process as any)._getActiveRequests()
|
|
|
|
|
|
|
|
observableResult.observe(requests.length)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
private buildActiveResourcesObserver () {
|
|
|
|
if (typeof (process as any).getActiveResourcesInfo !== 'function') return
|
|
|
|
|
|
|
|
const grouped = this.meter.createObservableCounter('nodejs_active_resources', {
|
|
|
|
description: 'Number of active resources that are currently keeping the event loop alive, grouped by async resource type.'
|
|
|
|
})
|
|
|
|
const total = this.meter.createObservableCounter('nodejs_active_resources_total', {
|
|
|
|
description: 'Total number of active resources.'
|
|
|
|
})
|
|
|
|
|
|
|
|
this.meter.addBatchObservableCallback(observableResult => {
|
|
|
|
const resources = (process as any).getActiveResourcesInfo()
|
|
|
|
|
|
|
|
const data = {}
|
|
|
|
|
|
|
|
for (let i = 0; i < resources.length; i++) {
|
|
|
|
const resource = resources[i]
|
|
|
|
|
|
|
|
if (data[resource] === undefined) data[resource] = 0
|
|
|
|
data[resource] += 1
|
|
|
|
}
|
|
|
|
|
|
|
|
for (const type of Object.keys(data)) {
|
|
|
|
observableResult.observe(grouped, data[type], { type })
|
|
|
|
}
|
|
|
|
|
|
|
|
observableResult.observe(total, resources.length)
|
|
|
|
}, [ grouped, total ])
|
|
|
|
}
|
|
|
|
}
|