From 1b179455fce9aaf8ce8d1f28f0e552efdee2b7f9 Mon Sep 17 00:00:00 2001 From: "Paul \"LeoNerd\" Evans" Date: Wed, 19 Oct 2016 15:34:38 +0100 Subject: [PATCH] Guard registration of process-wide metrics by existence of the requisite /proc entries --- synapse/metrics/__init__.py | 85 ++++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 40 deletions(-) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index a79fb026d4..323328061a 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -115,6 +115,10 @@ def render_all(): TICKS_PER_SEC = 100 BYTES_PER_PAGE = 4096 +HAVE_PROC_STAT = os.path.exists("/proc/stat") +HAVE_PROC_SELF_STAT = os.path.exists("/proc/self/stat") +HAVE_PROC_SELF_LIMITS = os.path.exists("/proc/self/limits") + rusage = None stats = None fd_counts = None @@ -122,13 +126,11 @@ fd_counts = None # In order to report process_start_time_seconds we need to know the machine's # boot time, because the value in /proc/self/stat is relative to this boot_time = None -try: +if HAVE_PROC_STAT: with open("/proc/stat") as _procstat: for line in _procstat: if line.startswith("btime "): boot_time = int(line.split()[1]) -except IOError: - pass TYPES = { stat.S_IFSOCK: "SOCK", @@ -144,11 +146,12 @@ def update_resource_metrics(): global rusage rusage = getrusage(RUSAGE_SELF) - global stats - with open("/proc/self/stat") as s: - line = s.read() - # line is PID (command) more stats go here ... - stats = line.split(") ", 1)[1].split(" ") + if HAVE_PROC_SELF_STAT: + global stats + with open("/proc/self/stat") as s: + line = s.read() + # line is PID (command) more stats go here ... + stats = line.split(") ", 1)[1].split(" ") global fd_counts fd_counts = _process_fds() @@ -195,43 +198,45 @@ get_metrics_for("process").register_callback("fds", _process_fds, labels=["type" ## New prometheus-standard metric names process_metrics = get_metrics_for("process"); -process_metrics.register_callback( - "cpu_user_seconds_total", lambda: float(stats[11]) / TICKS_PER_SEC -) -process_metrics.register_callback( - "cpu_system_seconds_total", lambda: float(stats[12]) / TICKS_PER_SEC -) -process_metrics.register_callback( - "cpu_seconds_total", lambda: (float(stats[11]) + float(stats[12])) / TICKS_PER_SEC -) +if HAVE_PROC_SELF_STAT: + process_metrics.register_callback( + "cpu_user_seconds_total", lambda: float(stats[11]) / TICKS_PER_SEC + ) + process_metrics.register_callback( + "cpu_system_seconds_total", lambda: float(stats[12]) / TICKS_PER_SEC + ) + process_metrics.register_callback( + "cpu_seconds_total", lambda: (float(stats[11]) + float(stats[12])) / TICKS_PER_SEC + ) -process_metrics.register_callback( - "virtual_memory_bytes", lambda: int(stats[20]) -) -process_metrics.register_callback( - "resident_memory_bytes", lambda: int(stats[21]) * BYTES_PER_PAGE -) + process_metrics.register_callback( + "virtual_memory_bytes", lambda: int(stats[20]) + ) + process_metrics.register_callback( + "resident_memory_bytes", lambda: int(stats[21]) * BYTES_PER_PAGE + ) -process_metrics.register_callback( - "open_fds", lambda: sum(fd_counts.values()) -) + process_metrics.register_callback( + "open_fds", lambda: sum(fd_counts.values()) + ) -def _get_max_fds(): - with open("/proc/self/limits") as limits: - for line in limits: - if not line.startswith("Max open files "): - continue - # Line is Max open files $SOFT $HARD - return int(line.split()[3]) - return None + process_metrics.register_callback( + "start_time_seconds", lambda: boot_time + int(stats[19]) / TICKS_PER_SEC + ) -process_metrics.register_callback( - "max_fds", lambda: _get_max_fds() -) +if HAVE_PROC_SELF_LIMITS: + def _get_max_fds(): + with open("/proc/self/limits") as limits: + for line in limits: + if not line.startswith("Max open files "): + continue + # Line is Max open files $SOFT $HARD + return int(line.split()[3]) + return None -process_metrics.register_callback( - "start_time_seconds", lambda: boot_time + int(stats[19]) / TICKS_PER_SEC -) + process_metrics.register_callback( + "max_fds", lambda: _get_max_fds() + ) reactor_metrics = get_metrics_for("reactor") tick_time = reactor_metrics.register_distribution("tick_time")