From 18279631e9555bd9032b993074e62c7af886d9cd Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 23 Aug 2023 16:24:30 +0100 Subject: [PATCH] Fix rare deadlock when using read/write locks (#16169) --- changelog.d/16169.bugfix | 1 + .../02_read_write_locks_deadlock.sql.postgres | 37 ---------- .../04_read_write_locks_deadlock.sql.postgres | 71 +++++++++++++++++++ 3 files changed, 72 insertions(+), 37 deletions(-) create mode 100644 changelog.d/16169.bugfix delete mode 100644 synapse/storage/schema/main/delta/80/02_read_write_locks_deadlock.sql.postgres create mode 100644 synapse/storage/schema/main/delta/80/04_read_write_locks_deadlock.sql.postgres diff --git a/changelog.d/16169.bugfix b/changelog.d/16169.bugfix new file mode 100644 index 0000000000..ed8830692f --- /dev/null +++ b/changelog.d/16169.bugfix @@ -0,0 +1 @@ +Fix a rare race that could block new events from being sent for up to two minutes. Introduced in v1.90.0. diff --git a/synapse/storage/schema/main/delta/80/02_read_write_locks_deadlock.sql.postgres b/synapse/storage/schema/main/delta/80/02_read_write_locks_deadlock.sql.postgres deleted file mode 100644 index 401c42e18a..0000000000 --- a/synapse/storage/schema/main/delta/80/02_read_write_locks_deadlock.sql.postgres +++ /dev/null @@ -1,37 +0,0 @@ -/* Copyright 2023 The Matrix.org Foundation C.I.C - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - --- To avoid the possibility of a deadlock, lock the --- `worker_read_write_locks_mode` table so that we serialize inserts/deletes --- for a specific lock name/key. - -CREATE OR REPLACE FUNCTION delete_read_write_lock_parent_before() RETURNS trigger AS $$ -BEGIN - -- `PERFORM` is a `SELECT` which discards the rows. - PERFORM * FROM worker_read_write_locks_mode - WHERE - lock_name = OLD.lock_name - AND lock_key = OLD.lock_key - FOR UPDATE; - - RETURN OLD; -END -$$ -LANGUAGE plpgsql; - -DROP TRIGGER IF EXISTS delete_read_write_lock_parent_before_trigger ON worker_read_write_locks; -CREATE TRIGGER delete_read_write_lock_parent_before_trigger BEFORE DELETE ON worker_read_write_locks - FOR EACH ROW - EXECUTE PROCEDURE delete_read_write_lock_parent_before(); diff --git a/synapse/storage/schema/main/delta/80/04_read_write_locks_deadlock.sql.postgres b/synapse/storage/schema/main/delta/80/04_read_write_locks_deadlock.sql.postgres new file mode 100644 index 0000000000..0eb459c0b9 --- /dev/null +++ b/synapse/storage/schema/main/delta/80/04_read_write_locks_deadlock.sql.postgres @@ -0,0 +1,71 @@ +/* Copyright 2023 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +-- Remove a previous attempt to avoid deadlocks +DROP TRIGGER IF EXISTS delete_read_write_lock_parent_before_trigger ON worker_read_write_locks; +DROP FUNCTION IF EXISTS delete_read_write_lock_parent_before; + + +-- Ensure that we keep `worker_read_write_locks_mode` up to date whenever a lock +-- is released (i.e. a row deleted from `worker_read_write_locks`). Either we +-- update the `worker_read_write_locks_mode.token` to match another instance +-- that has currently acquired the lock, or we delete the row if nobody has +-- currently acquired a lock. +CREATE OR REPLACE FUNCTION delete_read_write_lock_parent() RETURNS trigger AS $$ +DECLARE + new_token TEXT; + mode_row_token TEXT; +BEGIN + -- Only update the token in `_mode` if its our token. This prevents + -- deadlocks. + -- + -- We shove the token into `mode_row_token`, as otherwise postgres complains + -- we're not using the returned data. + SELECT token INTO mode_row_token FROM worker_read_write_locks_mode + WHERE + lock_name = OLD.lock_name + AND lock_key = OLD.lock_key + AND token = OLD.token + FOR UPDATE; + + IF NOT FOUND THEN + RETURN NEW; + END IF; + + SELECT token INTO new_token FROM worker_read_write_locks + WHERE + lock_name = OLD.lock_name + AND lock_key = OLD.lock_key + LIMIT 1 FOR UPDATE SKIP LOCKED; + + IF NOT FOUND THEN + DELETE FROM worker_read_write_locks_mode + WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key AND token = OLD.token; + ELSE + UPDATE worker_read_write_locks_mode + SET token = new_token + WHERE lock_name = OLD.lock_name AND lock_key = OLD.lock_key; + END IF; + + RETURN NEW; +END +$$ +LANGUAGE plpgsql; + +DROP TRIGGER IF EXISTS delete_read_write_lock_parent_trigger ON worker_read_write_locks; +CREATE TRIGGER delete_read_write_lock_parent_trigger AFTER DELETE ON worker_read_write_locks + FOR EACH ROW + EXECUTE PROCEDURE delete_read_write_lock_parent();