Merge pull request #4627 from matrix-org/erikj/user_ips_analyze
Analyze user_ips before running deduplicationpull/4635/head
						commit
						cf82338930
					
				|  | @ -0,0 +1 @@ | |||
| Improve 'user_ips' table deduplication background update | ||||
|  | @ -65,6 +65,11 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): | |||
|             columns=["last_seen"], | ||||
|         ) | ||||
| 
 | ||||
|         self.register_background_update_handler( | ||||
|             "user_ips_analyze", | ||||
|             self._analyze_user_ip, | ||||
|         ) | ||||
| 
 | ||||
|         self.register_background_update_handler( | ||||
|             "user_ips_remove_dupes", | ||||
|             self._remove_user_ip_dupes, | ||||
|  | @ -108,6 +113,25 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): | |||
|         yield self._end_background_update("user_ips_drop_nonunique_index") | ||||
|         defer.returnValue(1) | ||||
| 
 | ||||
|     @defer.inlineCallbacks | ||||
|     def _analyze_user_ip(self, progress, batch_size): | ||||
|         # Background update to analyze user_ips table before we run the | ||||
|         # deduplication background update. The table may not have been analyzed | ||||
|         # for ages due to the table locks. | ||||
|         # | ||||
|         # This will lock out the naive upserts to user_ips while it happens, but | ||||
|         # the analyze should be quick (28GB table takes ~10s) | ||||
|         def user_ips_analyze(txn): | ||||
|             txn.execute("ANALYZE user_ips") | ||||
| 
 | ||||
|         yield self.runInteraction( | ||||
|             "user_ips_analyze", user_ips_analyze | ||||
|         ) | ||||
| 
 | ||||
|         yield self._end_background_update("user_ips_analyze") | ||||
| 
 | ||||
|         defer.returnValue(1) | ||||
| 
 | ||||
|     @defer.inlineCallbacks | ||||
|     def _remove_user_ip_dupes(self, progress, batch_size): | ||||
|         # This works function works by scanning the user_ips table in batches | ||||
|  |  | |||
|  | @ -13,9 +13,13 @@ | |||
|  * limitations under the License. | ||||
|  */ | ||||
| 
 | ||||
| -- delete duplicates | ||||
|  -- analyze user_ips, to help ensure the correct indices are used | ||||
| INSERT INTO background_updates (update_name, progress_json) VALUES | ||||
|   ('user_ips_remove_dupes', '{}'); | ||||
|   ('user_ips_analyze', '{}'); | ||||
| 
 | ||||
| -- delete duplicates | ||||
| INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES | ||||
|   ('user_ips_remove_dupes', '{}', 'user_ips_analyze'); | ||||
| 
 | ||||
| -- add a new unique index to user_ips table | ||||
| INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES | ||||
|  | @ -23,4 +27,4 @@ INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES | |||
| 
 | ||||
| -- drop the old original index | ||||
| INSERT INTO background_updates (update_name, progress_json, depends_on) VALUES | ||||
|   ('user_ips_drop_nonunique_index', '{}', 'user_ips_device_unique_index'); | ||||
|   ('user_ips_drop_nonunique_index', '{}', 'user_ips_device_unique_index'); | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	 Erik Johnston
						Erik Johnston