diff --git a/synapse/storage/events_bg_updates.py b/synapse/storage/events_bg_updates.py index 5717baf48c..e77a7e28af 100644 --- a/synapse/storage/events_bg_updates.py +++ b/synapse/storage/events_bg_updates.py @@ -71,6 +71,19 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): "redactions_received_ts", self._redactions_received_ts ) + # This index gets deleted in `event_fix_redactions_bytes` update + self.register_background_index_update( + "event_fix_redactions_bytes_create_index", + index_name="redactions_censored_redacts", + table="redactions", + columns=["redacts"], + where_clause="have_censored", + ) + + self.register_background_update_handler( + "event_fix_redactions_bytes", self._event_fix_redactions_bytes + ) + @defer.inlineCallbacks def _background_reindex_fields_sender(self, progress, batch_size): target_min_stream_id = progress["target_min_stream_id_inclusive"] @@ -458,3 +471,33 @@ class EventsBackgroundUpdatesStore(BackgroundUpdateStore): yield self._end_background_update("redactions_received_ts") return count + + @defer.inlineCallbacks + def _event_fix_redactions_bytes(self, progress, batch_size): + """Undoes hex encoded censored redacted event JSON. + """ + + def _event_fix_redactions_bytes_txn(txn): + # This update is quite fast due to new index. + txn.execute( + """ + UPDATE event_json + SET + json = convert_from(json::bytea, 'utf8') + FROM redactions + WHERE + redactions.have_censored + AND event_json.event_id = redactions.redacts + AND json NOT LIKE '{%'; + """ + ) + + txn.execute("DROP INDEX redactions_censored_redacts") + + yield self.runInteraction( + "_event_fix_redactions_bytes", _event_fix_redactions_bytes_txn + ) + + yield self._end_background_update("event_fix_redactions_bytes") + + return 1 diff --git a/synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres b/synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres index f7bcc5e2f2..67471f3ef5 100644 --- a/synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres +++ b/synapse/storage/schema/delta/56/redaction_censor3_fix_update.sql.postgres @@ -15,12 +15,11 @@ -- There was a bug where we may have updated censored redactions as bytes, --- which can (somehow) cause json to be inserted hex encoded. This goes and --- undoes any such hex encoded JSON. -UPDATE event_json SET json = convert_from(json::bytea, 'utf8') -WHERE event_id IN ( - SELECT event_json.event_id - FROM event_json - INNER JOIN redactions ON (event_json.event_id = redacts) - WHERE have_censored AND json NOT LIKE '{%' -); +-- which can (somehow) cause json to be inserted hex encoded. These updates go +-- and undoes any such hex encoded JSON. + +INSERT into background_updates (update_name, progress_json) + VALUES ('event_fix_redactions_bytes_create_index', '{}'); + +INSERT into background_updates (update_name, progress_json, depends_on) + VALUES ('event_fix_redactions_bytes', '{}', 'event_fix_redactions_bytes_create_index');