206 lines
		
	
	
		
			7.0 KiB
		
	
	
	
		
			Python
		
	
	
			
		
		
	
	
			206 lines
		
	
	
		
			7.0 KiB
		
	
	
	
		
			Python
		
	
	
| # Copyright 2015-2022 The Matrix.org Foundation C.I.C.
 | |
| #
 | |
| # Licensed under the Apache License, Version 2.0 (the "License");
 | |
| # you may not use this file except in compliance with the License.
 | |
| # You may obtain a copy of the License at
 | |
| #
 | |
| #     http://www.apache.org/licenses/LICENSE-2.0
 | |
| #
 | |
| # Unless required by applicable law or agreed to in writing, software
 | |
| # distributed under the License is distributed on an "AS IS" BASIS,
 | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| # See the License for the specific language governing permissions and
 | |
| # limitations under the License.
 | |
| 
 | |
| 
 | |
| import gc
 | |
| import logging
 | |
| import platform
 | |
| import time
 | |
| from typing import Iterable
 | |
| 
 | |
| from prometheus_client.core import (
 | |
|     REGISTRY,
 | |
|     CounterMetricFamily,
 | |
|     Gauge,
 | |
|     GaugeMetricFamily,
 | |
|     Histogram,
 | |
|     Metric,
 | |
| )
 | |
| 
 | |
| from twisted.internet import task
 | |
| 
 | |
| from synapse.metrics._types import Collector
 | |
| 
 | |
| """Prometheus metrics for garbage collection"""
 | |
| 
 | |
| 
 | |
| logger = logging.getLogger(__name__)
 | |
| 
 | |
| # The minimum time in seconds between GCs for each generation, regardless of the current GC
 | |
| # thresholds and counts.
 | |
| MIN_TIME_BETWEEN_GCS = (1.0, 10.0, 30.0)
 | |
| 
 | |
| running_on_pypy = platform.python_implementation() == "PyPy"
 | |
| 
 | |
| #
 | |
| # Python GC metrics
 | |
| #
 | |
| 
 | |
| gc_unreachable = Gauge("python_gc_unreachable_total", "Unreachable GC objects", ["gen"])
 | |
| gc_time = Histogram(
 | |
|     "python_gc_time",
 | |
|     "Time taken to GC (sec)",
 | |
|     ["gen"],
 | |
|     buckets=[
 | |
|         0.0025,
 | |
|         0.005,
 | |
|         0.01,
 | |
|         0.025,
 | |
|         0.05,
 | |
|         0.10,
 | |
|         0.25,
 | |
|         0.50,
 | |
|         1.00,
 | |
|         2.50,
 | |
|         5.00,
 | |
|         7.50,
 | |
|         15.00,
 | |
|         30.00,
 | |
|         45.00,
 | |
|         60.00,
 | |
|     ],
 | |
| )
 | |
| 
 | |
| 
 | |
| class GCCounts(Collector):
 | |
|     def collect(self) -> Iterable[Metric]:
 | |
|         cm = GaugeMetricFamily("python_gc_counts", "GC object counts", labels=["gen"])
 | |
|         for n, m in enumerate(gc.get_count()):
 | |
|             cm.add_metric([str(n)], m)
 | |
| 
 | |
|         yield cm
 | |
| 
 | |
| 
 | |
| def install_gc_manager() -> None:
 | |
|     """Disable automatic GC, and replace it with a task that runs every 100ms
 | |
| 
 | |
|     This means that (a) we can limit how often GC runs; (b) we can get some metrics
 | |
|     about GC activity.
 | |
| 
 | |
|     It does nothing on PyPy.
 | |
|     """
 | |
| 
 | |
|     if running_on_pypy:
 | |
|         return
 | |
| 
 | |
|     REGISTRY.register(GCCounts())
 | |
| 
 | |
|     gc.disable()
 | |
| 
 | |
|     # The time (in seconds since the epoch) of the last time we did a GC for each generation.
 | |
|     _last_gc = [0.0, 0.0, 0.0]
 | |
| 
 | |
|     def _maybe_gc() -> None:
 | |
|         # Check if we need to do a manual GC (since its been disabled), and do
 | |
|         # one if necessary. Note we go in reverse order as e.g. a gen 1 GC may
 | |
|         # promote an object into gen 2, and we don't want to handle the same
 | |
|         # object multiple times.
 | |
|         threshold = gc.get_threshold()
 | |
|         counts = gc.get_count()
 | |
|         end = time.time()
 | |
|         for i in (2, 1, 0):
 | |
|             # We check if we need to do one based on a straightforward
 | |
|             # comparison between the threshold and count. We also do an extra
 | |
|             # check to make sure that we don't a GC too often.
 | |
|             if threshold[i] < counts[i] and MIN_TIME_BETWEEN_GCS[i] < end - _last_gc[i]:
 | |
|                 if i == 0:
 | |
|                     logger.debug("Collecting gc %d", i)
 | |
|                 else:
 | |
|                     logger.info("Collecting gc %d", i)
 | |
| 
 | |
|                 start = time.time()
 | |
|                 unreachable = gc.collect(i)
 | |
|                 end = time.time()
 | |
| 
 | |
|                 _last_gc[i] = end
 | |
| 
 | |
|                 gc_time.labels(i).observe(end - start)
 | |
|                 gc_unreachable.labels(i).set(unreachable)
 | |
| 
 | |
|     gc_task = task.LoopingCall(_maybe_gc)
 | |
|     gc_task.start(0.1)
 | |
| 
 | |
| 
 | |
| #
 | |
| # PyPy GC / memory metrics
 | |
| #
 | |
| 
 | |
| 
 | |
| class PyPyGCStats(Collector):
 | |
|     def collect(self) -> Iterable[Metric]:
 | |
| 
 | |
|         # @stats is a pretty-printer object with __str__() returning a nice table,
 | |
|         # plus some fields that contain data from that table.
 | |
|         # unfortunately, fields are pretty-printed themselves (i. e. '4.5MB').
 | |
|         stats = gc.get_stats(memory_pressure=False)  # type: ignore
 | |
|         # @s contains same fields as @stats, but as actual integers.
 | |
|         s = stats._s  # type: ignore
 | |
| 
 | |
|         # also note that field naming is completely braindead
 | |
|         # and only vaguely correlates with the pretty-printed table.
 | |
|         # >>>> gc.get_stats(False)
 | |
|         # Total memory consumed:
 | |
|         #     GC used:            8.7MB (peak: 39.0MB)        # s.total_gc_memory, s.peak_memory
 | |
|         #        in arenas:            3.0MB                  # s.total_arena_memory
 | |
|         #        rawmalloced:          1.7MB                  # s.total_rawmalloced_memory
 | |
|         #        nursery:              4.0MB                  # s.nursery_size
 | |
|         #     raw assembler used: 31.0kB                      # s.jit_backend_used
 | |
|         #     -----------------------------
 | |
|         #     Total:              8.8MB                       # stats.memory_used_sum
 | |
|         #
 | |
|         #     Total memory allocated:
 | |
|         #     GC allocated:            38.7MB (peak: 41.1MB)  # s.total_allocated_memory, s.peak_allocated_memory
 | |
|         #        in arenas:            30.9MB                 # s.peak_arena_memory
 | |
|         #        rawmalloced:          4.1MB                  # s.peak_rawmalloced_memory
 | |
|         #        nursery:              4.0MB                  # s.nursery_size
 | |
|         #     raw assembler allocated: 1.0MB                  # s.jit_backend_allocated
 | |
|         #     -----------------------------
 | |
|         #     Total:                   39.7MB                 # stats.memory_allocated_sum
 | |
|         #
 | |
|         #     Total time spent in GC:  0.073                  # s.total_gc_time
 | |
| 
 | |
|         pypy_gc_time = CounterMetricFamily(
 | |
|             "pypy_gc_time_seconds_total",
 | |
|             "Total time spent in PyPy GC",
 | |
|             labels=[],
 | |
|         )
 | |
|         pypy_gc_time.add_metric([], s.total_gc_time / 1000)
 | |
|         yield pypy_gc_time
 | |
| 
 | |
|         pypy_mem = GaugeMetricFamily(
 | |
|             "pypy_memory_bytes",
 | |
|             "Memory tracked by PyPy allocator",
 | |
|             labels=["state", "class", "kind"],
 | |
|         )
 | |
|         # memory used by JIT assembler
 | |
|         pypy_mem.add_metric(["used", "", "jit"], s.jit_backend_used)
 | |
|         pypy_mem.add_metric(["allocated", "", "jit"], s.jit_backend_allocated)
 | |
|         # memory used by GCed objects
 | |
|         pypy_mem.add_metric(["used", "", "arenas"], s.total_arena_memory)
 | |
|         pypy_mem.add_metric(["allocated", "", "arenas"], s.peak_arena_memory)
 | |
|         pypy_mem.add_metric(["used", "", "rawmalloced"], s.total_rawmalloced_memory)
 | |
|         pypy_mem.add_metric(["allocated", "", "rawmalloced"], s.peak_rawmalloced_memory)
 | |
|         pypy_mem.add_metric(["used", "", "nursery"], s.nursery_size)
 | |
|         pypy_mem.add_metric(["allocated", "", "nursery"], s.nursery_size)
 | |
|         # totals
 | |
|         pypy_mem.add_metric(["used", "totals", "gc"], s.total_gc_memory)
 | |
|         pypy_mem.add_metric(["allocated", "totals", "gc"], s.total_allocated_memory)
 | |
|         pypy_mem.add_metric(["used", "totals", "gc_peak"], s.peak_memory)
 | |
|         pypy_mem.add_metric(["allocated", "totals", "gc_peak"], s.peak_allocated_memory)
 | |
|         yield pypy_mem
 | |
| 
 | |
| 
 | |
| if running_on_pypy:
 | |
|     REGISTRY.register(PyPyGCStats())
 |