From d0de452d1222ada8d219a8c5bc42498a89e5ecea Mon Sep 17 00:00:00 2001 From: Sean Quah <8349537+squahtx@users.noreply.github.com> Date: Fri, 19 May 2023 11:17:12 +0100 Subject: [PATCH] Fix `HomeServer`s leaking during `trial` test runs (#15630) This change fixes two memory leaks during `trial` test runs. Garbage collection is disabled during each test case and a gen-0 GC is run at the end of each test. However, when the gen-0 GC is run, the `TestCase` object usually still holds references to the `HomeServer` used during the test. As a result, the `HomeServer` gets promoted to gen-1 and then never garbage collected. Fix this by periodically running full GCs. Additionally, fix `HomeServer`s leaking after tests that touch inbound federation due to `FederationRateLimiter`s adding themselves to a global set, by turning the set into a `WeakSet`. Resolves #15622. Signed-off-by: Sean Quah --- changelog.d/15630.misc | 1 + synapse/util/ratelimitutils.py | 6 +++++- tests/unittest.py | 11 +++++++++-- 3 files changed, 15 insertions(+), 3 deletions(-) create mode 100644 changelog.d/15630.misc diff --git a/changelog.d/15630.misc b/changelog.d/15630.misc new file mode 100644 index 0000000000..a30304bfd6 --- /dev/null +++ b/changelog.d/15630.misc @@ -0,0 +1 @@ +Fix two memory leaks in `trial` test runs. diff --git a/synapse/util/ratelimitutils.py b/synapse/util/ratelimitutils.py index f262bf95a0..2ad55ac13e 100644 --- a/synapse/util/ratelimitutils.py +++ b/synapse/util/ratelimitutils.py @@ -25,10 +25,12 @@ from typing import ( Iterator, List, Mapping, + MutableSet, Optional, Set, Tuple, ) +from weakref import WeakSet from prometheus_client.core import Counter from typing_extensions import ContextManager @@ -86,7 +88,9 @@ queue_wait_timer = Histogram( ) -_rate_limiter_instances: Set["FederationRateLimiter"] = set() +# This must be a `WeakSet`, otherwise we indirectly hold on to entire `HomeServer`s +# during trial test runs and leak a lot of memory. +_rate_limiter_instances: MutableSet["FederationRateLimiter"] = WeakSet() # Protects the _rate_limiter_instances set from concurrent access _rate_limiter_instances_lock = threading.Lock() diff --git a/tests/unittest.py b/tests/unittest.py index b6fdf69635..623c5a75a2 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -229,13 +229,20 @@ class TestCase(unittest.TestCase): # # The easiest way to do this would be to do a full GC after each test # run, but that is very expensive. Instead, we disable GC (above) for - # the duration of the test so that we only need to run a gen-0 GC, which - # is a lot quicker. + # the duration of the test and only run a gen-0 GC, which is a lot + # quicker. This doesn't clean up everything, since the TestCase + # instance still holds references to objects created during the test, + # such as HomeServers, so we do a full GC every so often. @around(self) def tearDown(orig: Callable[[], R]) -> R: ret = orig() gc.collect(0) + # Run a full GC every 50 gen-0 GCs. + gen0_stats = gc.get_stats()[0] + gen0_collections = gen0_stats["collections"] + if gen0_collections % 50 == 0: + gc.collect() gc.enable() set_current_context(SENTINEL_CONTEXT)