diff --git a/synapse/http/server.py b/synapse/http/server.py index 9edb957ec2..165c684d0d 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -102,6 +102,10 @@ response_db_txn_duration = metrics.register_counter( ), ) +# seconds spent waiting for a db connection, when processing this request +response_db_sched_duration = metrics.register_counter( + "response_db_sched_duration_seconds", labels=["method", "servlet", "tag"] +) _next_request_id = 0 @@ -372,6 +376,9 @@ class RequestMetrics(object): response_db_txn_duration.inc_by( context.db_txn_duration_ms / 1000., request.method, self.name, tag ) + response_db_sched_duration.inc_by( + context.db_sched_duration_ms / 1000., request.method, self.name, tag + ) class RootRedirect(resource.Resource): diff --git a/synapse/http/site.py b/synapse/http/site.py index dc64f0f6f5..e422c8dfae 100644 --- a/synapse/http/site.py +++ b/synapse/http/site.py @@ -67,13 +67,14 @@ class SynapseRequest(Request): ru_utime, ru_stime = context.get_resource_usage() db_txn_count = context.db_txn_count db_txn_duration_ms = context.db_txn_duration_ms + db_sched_duration_ms = context.db_sched_duration_ms except Exception: ru_utime, ru_stime = (0, 0) db_txn_count, db_txn_duration_ms = (0, 0) self.site.access_logger.info( "%s - %s - {%s}" - " Processed request: %dms (%dms, %dms) (%dms/%d)" + " Processed request: %dms (%dms, %dms) (%dms/%dms/%d)" " %sB %s \"%s %s %s\" \"%s\"", self.getClientIP(), self.site.site_tag, @@ -81,6 +82,7 @@ class SynapseRequest(Request): int(time.time() * 1000) - self.start_time, int(ru_utime * 1000), int(ru_stime * 1000), + db_sched_duration_ms, db_txn_duration_ms, int(db_txn_count), self.sentLength, diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 986617674c..68125006eb 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -347,7 +347,9 @@ class SQLBaseStore(object): def inner_func(conn, *args, **kwargs): with LoggingContext("runWithConnection") as context: - sql_scheduling_timer.inc_by(time.time() * 1000 - start_time) + sched_duration_ms = time.time() * 1000 - start_time + sql_scheduling_timer.inc_by(sched_duration_ms) + current_context.add_database_scheduled(sched_duration_ms) if self.database_engine.is_connection_closed(conn): logger.debug("Reconnecting closed database connection") diff --git a/synapse/util/logcontext.py b/synapse/util/logcontext.py index a78e53812f..94fa7cac98 100644 --- a/synapse/util/logcontext.py +++ b/synapse/util/logcontext.py @@ -59,7 +59,8 @@ class LoggingContext(object): __slots__ = [ "previous_context", "name", "ru_stime", "ru_utime", - "db_txn_count", "db_txn_duration_ms", "usage_start", "usage_end", + "db_txn_count", "db_txn_duration_ms", "db_sched_duration_ms", + "usage_start", "usage_end", "main_thread", "alive", "request", "tag", ] @@ -86,6 +87,9 @@ class LoggingContext(object): def add_database_transaction(self, duration_ms): pass + def add_database_scheduled(self, sched_ms): + pass + def __nonzero__(self): return False @@ -101,6 +105,9 @@ class LoggingContext(object): # ms spent waiting for db txns, excluding scheduling time self.db_txn_duration_ms = 0 + # ms spent waiting for db txns to be scheduled + self.db_sched_duration_ms = 0 + self.usage_start = None self.usage_end = None self.main_thread = threading.current_thread() @@ -210,6 +217,15 @@ class LoggingContext(object): self.db_txn_count += 1 self.db_txn_duration_ms += duration_ms + def add_database_scheduled(self, sched_ms): + """Record a use of the database pool + + Args: + sched_ms (int): number of milliseconds it took us to get a + connection + """ + self.db_sched_duration_ms += sched_ms + class LoggingContextFilter(logging.Filter): """Logging filter that adds values from the current logging context to each diff --git a/synapse/util/metrics.py b/synapse/util/metrics.py index d25629cc50..059bb7fedf 100644 --- a/synapse/util/metrics.py +++ b/synapse/util/metrics.py @@ -80,6 +80,11 @@ block_db_txn_duration = metrics.register_counter( ), ) +# seconds spent waiting for a db connection, in this block +block_db_sched_duration = metrics.register_counter( + "block_db_sched_duration_seconds", labels=["block_name"], +) + def measure_func(name): def wrapper(func): @@ -96,7 +101,9 @@ def measure_func(name): class Measure(object): __slots__ = [ "clock", "name", "start_context", "start", "new_context", "ru_utime", - "ru_stime", "db_txn_count", "db_txn_duration_ms", "created_context" + "ru_stime", + "db_txn_count", "db_txn_duration_ms", "db_sched_duration_ms", + "created_context", ] def __init__(self, clock, name): @@ -117,6 +124,7 @@ class Measure(object): self.ru_utime, self.ru_stime = self.start_context.get_resource_usage() self.db_txn_count = self.start_context.db_txn_count self.db_txn_duration_ms = self.start_context.db_txn_duration_ms + self.db_sched_duration_ms = self.start_context.db_sched_duration_ms def __exit__(self, exc_type, exc_val, exc_tb): if isinstance(exc_type, Exception) or not self.start_context: @@ -149,6 +157,10 @@ class Measure(object): (context.db_txn_duration_ms - self.db_txn_duration_ms) / 1000., self.name ) + block_db_sched_duration.inc_by( + (context.db_sched_duration_ms - self.db_sched_duration_ms) / 1000., + self.name + ) if self.created_context: self.start_context.__exit__(exc_type, exc_val, exc_tb)