Guard registration of process-wide metrics by existence of the requisite /proc entries

pull/1177/head
Paul "LeoNerd" Evans 2016-10-19 15:34:38 +01:00
parent 981f852d54
commit 1b179455fc
1 changed files with 45 additions and 40 deletions

View File

@ -115,6 +115,10 @@ def render_all():
TICKS_PER_SEC = 100 TICKS_PER_SEC = 100
BYTES_PER_PAGE = 4096 BYTES_PER_PAGE = 4096
HAVE_PROC_STAT = os.path.exists("/proc/stat")
HAVE_PROC_SELF_STAT = os.path.exists("/proc/self/stat")
HAVE_PROC_SELF_LIMITS = os.path.exists("/proc/self/limits")
rusage = None rusage = None
stats = None stats = None
fd_counts = None fd_counts = None
@ -122,13 +126,11 @@ fd_counts = None
# In order to report process_start_time_seconds we need to know the machine's # In order to report process_start_time_seconds we need to know the machine's
# boot time, because the value in /proc/self/stat is relative to this # boot time, because the value in /proc/self/stat is relative to this
boot_time = None boot_time = None
try: if HAVE_PROC_STAT:
with open("/proc/stat") as _procstat: with open("/proc/stat") as _procstat:
for line in _procstat: for line in _procstat:
if line.startswith("btime "): if line.startswith("btime "):
boot_time = int(line.split()[1]) boot_time = int(line.split()[1])
except IOError:
pass
TYPES = { TYPES = {
stat.S_IFSOCK: "SOCK", stat.S_IFSOCK: "SOCK",
@ -144,11 +146,12 @@ def update_resource_metrics():
global rusage global rusage
rusage = getrusage(RUSAGE_SELF) rusage = getrusage(RUSAGE_SELF)
global stats if HAVE_PROC_SELF_STAT:
with open("/proc/self/stat") as s: global stats
line = s.read() with open("/proc/self/stat") as s:
# line is PID (command) more stats go here ... line = s.read()
stats = line.split(") ", 1)[1].split(" ") # line is PID (command) more stats go here ...
stats = line.split(") ", 1)[1].split(" ")
global fd_counts global fd_counts
fd_counts = _process_fds() fd_counts = _process_fds()
@ -195,43 +198,45 @@ get_metrics_for("process").register_callback("fds", _process_fds, labels=["type"
## New prometheus-standard metric names ## New prometheus-standard metric names
process_metrics = get_metrics_for("process"); process_metrics = get_metrics_for("process");
process_metrics.register_callback( if HAVE_PROC_SELF_STAT:
"cpu_user_seconds_total", lambda: float(stats[11]) / TICKS_PER_SEC process_metrics.register_callback(
) "cpu_user_seconds_total", lambda: float(stats[11]) / TICKS_PER_SEC
process_metrics.register_callback( )
"cpu_system_seconds_total", lambda: float(stats[12]) / TICKS_PER_SEC process_metrics.register_callback(
) "cpu_system_seconds_total", lambda: float(stats[12]) / TICKS_PER_SEC
process_metrics.register_callback( )
"cpu_seconds_total", lambda: (float(stats[11]) + float(stats[12])) / TICKS_PER_SEC process_metrics.register_callback(
) "cpu_seconds_total", lambda: (float(stats[11]) + float(stats[12])) / TICKS_PER_SEC
)
process_metrics.register_callback( process_metrics.register_callback(
"virtual_memory_bytes", lambda: int(stats[20]) "virtual_memory_bytes", lambda: int(stats[20])
) )
process_metrics.register_callback( process_metrics.register_callback(
"resident_memory_bytes", lambda: int(stats[21]) * BYTES_PER_PAGE "resident_memory_bytes", lambda: int(stats[21]) * BYTES_PER_PAGE
) )
process_metrics.register_callback( process_metrics.register_callback(
"open_fds", lambda: sum(fd_counts.values()) "open_fds", lambda: sum(fd_counts.values())
) )
def _get_max_fds(): process_metrics.register_callback(
with open("/proc/self/limits") as limits: "start_time_seconds", lambda: boot_time + int(stats[19]) / TICKS_PER_SEC
for line in limits: )
if not line.startswith("Max open files "):
continue
# Line is Max open files $SOFT $HARD
return int(line.split()[3])
return None
process_metrics.register_callback( if HAVE_PROC_SELF_LIMITS:
"max_fds", lambda: _get_max_fds() def _get_max_fds():
) with open("/proc/self/limits") as limits:
for line in limits:
if not line.startswith("Max open files "):
continue
# Line is Max open files $SOFT $HARD
return int(line.split()[3])
return None
process_metrics.register_callback( process_metrics.register_callback(
"start_time_seconds", lambda: boot_time + int(stats[19]) / TICKS_PER_SEC "max_fds", lambda: _get_max_fds()
) )
reactor_metrics = get_metrics_for("reactor") reactor_metrics = get_metrics_for("reactor")
tick_time = reactor_metrics.register_distribution("tick_time") tick_time = reactor_metrics.register_distribution("tick_time")