Avoid incrementing bg process utime/stime counters by negative durations (#14323)

pull/14340/head
David Robertson 2022-10-31 13:02:07 +00:00 committed by GitHub
parent 7911e2835d
commit 2bb2c32e8e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 35 additions and 5 deletions

1
changelog.d/14323.bugfix Normal file
View File

@ -0,0 +1 @@
Fix a bug introduced in Synapse 0.34.0rc2 where logs could include error spam when background processes are measured as taking a negative amount of time.

View File

@ -56,7 +56,6 @@ exclude = (?x)
|tests/rest/media/v1/test_media_storage.py
|tests/server.py
|tests/server_notices/test_resource_limits_server_notices.py
|tests/test_metrics.py
|tests/test_state.py
|tests/test_terms_auth.py
|tests/util/caches/test_cached_call.py
@ -106,6 +105,9 @@ disallow_untyped_defs = False
[mypy-tests.handlers.test_user_directory]
disallow_untyped_defs = True
[mypy-tests.metrics.test_background_process_metrics]
disallow_untyped_defs = True
[mypy-tests.push.test_bulk_push_rule_evaluator]
disallow_untyped_defs = True

View File

@ -174,8 +174,10 @@ class _BackgroundProcess:
diff = new_stats - self._reported_stats
self._reported_stats = new_stats
_background_process_ru_utime.labels(self.desc).inc(diff.ru_utime)
_background_process_ru_stime.labels(self.desc).inc(diff.ru_stime)
# For unknown reasons, the difference in times can be negative. See comment in
# synapse.http.request_metrics.RequestMetrics.update_metrics.
_background_process_ru_utime.labels(self.desc).inc(max(diff.ru_utime, 0))
_background_process_ru_stime.labels(self.desc).inc(max(diff.ru_stime, 0))
_background_process_db_txn_count.labels(self.desc).inc(diff.db_txn_count)
_background_process_db_txn_duration.labels(self.desc).inc(
diff.db_txn_duration_sec

View File

View File

@ -0,0 +1,19 @@
from unittest import TestCase as StdlibTestCase
from unittest.mock import Mock
from synapse.logging.context import ContextResourceUsage, LoggingContext
from synapse.metrics.background_process_metrics import _BackgroundProcess
class TestBackgroundProcessMetrics(StdlibTestCase):
def test_update_metrics_with_negative_time_diff(self) -> None:
"""We should ignore negative reported utime and stime differences"""
usage = ContextResourceUsage()
usage.ru_stime = usage.ru_utime = -1.0
mock_logging_context = Mock(spec=LoggingContext)
mock_logging_context.get_resource_usage.return_value = usage
process = _BackgroundProcess("test process", mock_logging_context)
# Should not raise
process.update_metrics()

View File

@ -12,6 +12,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing_extensions import Protocol
try:
from importlib import metadata
except ImportError:
@ -52,7 +54,11 @@ def get_sample_labels_value(sample):
class TestMauLimit(unittest.TestCase):
def test_basic(self):
gauge = InFlightGauge(
class MetricEntry(Protocol):
foo: int
bar: int
gauge: InFlightGauge[MetricEntry] = InFlightGauge(
"test1", "", labels=["test_label"], sub_metrics=["foo", "bar"]
)
@ -146,7 +152,7 @@ class CacheMetricsTests(unittest.HomeserverTestCase):
Caches produce metrics reflecting their state when scraped.
"""
CACHE_NAME = "cache_metrics_test_fgjkbdfg"
cache = DeferredCache(CACHE_NAME, max_entries=777)
cache: DeferredCache[str, str] = DeferredCache(CACHE_NAME, max_entries=777)
items = {
x.split(b"{")[0].decode("ascii"): x.split(b" ")[1].decode("ascii")