Fix user_threepids schema delta

The delta `37/user_threepids.sql` aimed to update all the email
addresses to be lower case, however duplicate emails may exist in the
table already.

This commit adds a step where the delta moves the duplicate emails to a
new `medium` `email_old`. Only the most recently used account keeps the
binding intact. We move rather than delete so that we retain some record
of which emails were associated with which account.
pull/1183/head
Erik Johnston 2016-10-27 14:14:44 +01:00
parent db0609f1ec
commit 1fc1bc2a51
1 changed files with 38 additions and 1 deletions

View File

@ -17,7 +17,44 @@
* Update any email addresses that were stored with mixed case into all * Update any email addresses that were stored with mixed case into all
* lowercase * lowercase
*/ */
UPDATE user_threepids SET address = LOWER(address) where medium = 'email';
-- There may be "duplicate" emails (with different case) already in the table,
-- so we find them and move all but the most recently used account.
UPDATE user_threepids
SET medium = 'email_old'
WHERE medium = 'email'
AND address IN (
-- `user_last_seen` maps user_ids to the last time we saw them
WITH user_last_seen AS (
SELECT user_id, max(last_seen) AS ts FROM user_ips GROUP BY user_id
),
-- `duplicate_addresses` is a table of all the email addresses that
-- appear multiple times and the most recently we saw any of their users
duplicate_addresses AS (
SELECT lower(u1.address) AS address, max(ts.ts) AS max_ts
FROM user_threepids AS u1
INNER JOIN user_threepids AS u2 ON u1.medium = u2.medium AND lower(u1.address) = lower(u2.address) AND u1.address != u2.address
INNER JOIN user_last_seen as ts ON ts.user_id = u1.user_id
WHERE u1.medium = 'email' AND u2.medium = 'email'
GROUP BY lower(u1.address)
)
-- We select all the addresses that are linked to the user_id that is NOT
-- the most recently seen.
SELECT u.address
FROM
user_threepids AS u,
duplicate_addresses,
user_last_seen AS ts
WHERE
lower(u.address) = duplicate_addresses.address
AND u.user_id = ts.user_id
AND ts.ts != max_ts -- NOT the most recently used
);
-- This update is now safe since we've removed the duplicate addresses.
UPDATE user_threepids SET address = LOWER(address) WHERE medium = 'email';
/* Add an index for the select we do on passwored reset */ /* Add an index for the select we do on passwored reset */
CREATE INDEX user_threepids_medium_address on user_threepids (medium, address); CREATE INDEX user_threepids_medium_address on user_threepids (medium, address);