From f8e56778ea5b0577d0d594f4416305b79ee6d281 Mon Sep 17 00:00:00 2001
From: Richard van der Hoff <richard@matrix.org>
Date: Wed, 11 Jan 2017 22:22:11 +0000
Subject: [PATCH] Encryption and decryption for megolm backups

---
 src/utils/MegolmExportEncryption.js        | 312 +++++++++++++++++++++
 test/utils/MegolmExportEncryption-test.js  | 115 ++++++++
 test/utils/generate-megolm-test-vectors.py | 117 ++++++++
 3 files changed, 544 insertions(+)
 create mode 100644 src/utils/MegolmExportEncryption.js
 create mode 100644 test/utils/MegolmExportEncryption-test.js
 create mode 100755 test/utils/generate-megolm-test-vectors.py
diff --git a/src/utils/MegolmExportEncryption.js b/src/utils/MegolmExportEncryption.js
new file mode 100644
index 0000000000..5b2e16ef29
--- /dev/null
+++ b/src/utils/MegolmExportEncryption.js
@@ -0,0 +1,312 @@
+/*
+Copyright 2017 Vector Creations Ltd
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+"use strict";
+
+// polyfill textencoder if necessary
+let TextEncoder = window.TextEncoder;
+if (!TextEncoder) {
+    TextEncoder = require('./TextEncoderPolyfill');
+}
+let TextDecoder = window.TextDecoder;
+if (TextDecoder) {
+    TextDecoder = require('./TextDecoderPolyfill');
+}
+
+const subtleCrypto = window.crypto.subtle || window.crypto.webkitSubtle;
+
+/**
+ * Decrypt a megolm key file
+ *
+ * @param {ArrayBuffer} file
+ * @param {String} password
+ * @return {Promise<String>} promise for decrypted output
+ */
+export function decryptMegolmKeyFile(data, password) {
+    const body = unpackMegolmKeyFile(data);
+
+    // check we have a version byte
+    if (body.length < 1) {
+        throw new Error('Invalid file: too short');
+    }
+
+    const version = body[0];
+    if (version !== 1) {
+        throw new Error('Unsupported version');
+    }
+
+    const ciphertextLength = body.length-(1+16+16+4+32);
+    if (body.length < 0) {
+        throw new Error('Invalid file: too short');
+    }
+
+    const salt = body.subarray(1, 1+16);
+    const iv = body.subarray(17, 17+16);
+    const iterations = body[33] << 24 | body[34] << 16 | body[35] << 8 | body[36];
+    const ciphertext = body.subarray(37, 37+ciphertextLength);
+    const hmac = body.subarray(-32);
+
+    return deriveKeys(salt, iterations, password).then((keys) => {
+        const [aes_key, sha_key] = keys;
+
+        const toVerify = body.subarray(0, -32);
+        return subtleCrypto.verify(
+            {name: 'HMAC'},
+            sha_key,
+            hmac,
+            toVerify,
+        ).then((isValid) => {
+            if (!isValid) {
+                throw new Error('Authentication check failed: incorrect password?')
+            }
+
+            return subtleCrypto.decrypt(
+                {
+                    name: "AES-CTR",
+                    counter: iv,
+                    length: 64,
+                },
+                aes_key,
+                ciphertext,
+            );
+        });
+    }).then((plaintext) => {
+        return new TextDecoder().decode(new Uint8Array(plaintext));
+    });
+}
+
+
+/**
+ * Encrypt a megolm key file
+ *
+ * @param {String} data
+ * @param {String} password
+ * @param {Object=} options
+ * @param {Nunber=} options.kdf_rounds Number of iterations to perform of the
+ *    key-derivation function.
+ * @return {Promise<ArrayBuffer>} promise for encrypted output
+ */
+export function encryptMegolmKeyFile(data, password, options) {
+    options = options || {};
+    const kdf_rounds = options.kdf_rounds || 100000;
+
+    const salt = new Uint8Array(16);
+    window.crypto.getRandomValues(salt);
+    const iv = new Uint8Array(16);
+    window.crypto.getRandomValues(iv);
+
+    return deriveKeys(salt, kdf_rounds, password).then((keys) => {
+        const [aes_key, sha_key] = keys;
+
+        return subtleCrypto.encrypt(
+            {
+                name: "AES-CTR",
+                counter: iv,
+                length: 64,
+            },
+            aes_key,
+            new TextEncoder().encode(data),
+        ).then((ciphertext) => {
+            const cipherArray = new Uint8Array(ciphertext);
+            const bodyLength = (1+salt.length+iv.length+4+cipherArray.length+32);
+            const resultBuffer = new Uint8Array(bodyLength);
+            let idx = 0;
+            resultBuffer[idx++] = 1; // version
+            resultBuffer.set(salt, idx); idx += salt.length;
+            resultBuffer.set(iv, idx); idx += iv.length;
+            resultBuffer[idx++] = kdf_rounds >> 24;
+            resultBuffer[idx++] = (kdf_rounds >> 16) & 0xff;
+            resultBuffer[idx++] = (kdf_rounds >> 8) & 0xff;
+            resultBuffer[idx++] = kdf_rounds & 0xff;
+            resultBuffer.set(cipherArray, idx); idx += cipherArray.length;
+
+            const toSign = resultBuffer.subarray(0, idx);
+
+            return subtleCrypto.sign(
+                {name: 'HMAC'},
+                sha_key,
+                toSign,
+            ).then((hmac) => {
+                hmac = new Uint8Array(hmac);
+                resultBuffer.set(hmac, idx);
+                return packMegolmKeyFile(resultBuffer);
+            });
+        });
+    });
+}
+
+/**
+ * Derive the AES and SHA keys for the file
+ *
+ * @param {Unit8Array} salt  salt for pbkdf
+ * @param {Number} iterations number of pbkdf iterations
+ * @param {String} password  password
+ * @return {Promise<[CryptoKey, CryptoKey]>} promise for [aes key, sha key]
+ */
+function deriveKeys(salt, iterations, password) {
+    return subtleCrypto.importKey(
+        'raw',
+        new TextEncoder().encode(password),
+        {name: 'PBKDF2'},
+        false,
+        ['deriveBits']
+    ).then((key) => {
+        return subtleCrypto.deriveBits(
+            {
+                name: 'PBKDF2',
+                salt: salt,
+                iterations: iterations,
+                hash: 'SHA-512',
+            },
+            key,
+            512
+        );
+    }).then((keybits) => {
+        const aes_key = keybits.slice(0, 32);
+        const sha_key = keybits.slice(32);
+
+        const aes_prom = subtleCrypto.importKey(
+            'raw',
+            aes_key,
+            {name: 'AES-CTR'},
+            false,
+            ['encrypt', 'decrypt']
+        );
+        const sha_prom = subtleCrypto.importKey(
+            'raw',
+            sha_key,
+            {
+                name: 'HMAC',
+                hash: {name: 'SHA-256'},
+            },
+            false,
+            ['sign', 'verify']
+        );
+        return Promise.all([aes_prom, sha_prom]);
+    });
+}
+
+const HEADER_LINE = '-----BEGIN MEGOLM SESSION DATA-----';
+const TRAILER_LINE = '-----END MEGOLM SESSION DATA-----';
+
+/**
+ * Unbase64 an ascii-armoured megolm key file
+ *
+ * Strips the header and trailer lines, and unbase64s the content
+ *
+ * @param {ArrayBuffer} data  input file
+ * @return {Uint8Array} unbase64ed content
+ */
+function unpackMegolmKeyFile(data) {
+    // parse the file as a great big String. This should be safe, because there
+    // should be no non-ASCII characters, and it means that we can do string
+    // comparisons to find the header and footer, and feed it into window.atob.
+    const fileStr = new TextDecoder().decode(new Uint8Array(data));
+
+    // look for the start line
+    let lineStart = 0;
+    while (1) {
+        const lineEnd = fileStr.indexOf('\n', lineStart);
+        if (lineEnd < 0) {
+            throw new Error('Header line not found');
+        }
+        const line = fileStr.slice(lineStart, lineEnd).trim();
+
+        // start the next line after the newline
+        lineStart = lineEnd+1;
+
+        if (line === HEADER_LINE) {
+            break;
+        }
+    }
+
+    const dataStart = lineStart;
+
+    // look for the end line
+    while (1) {
+        const lineEnd = fileStr.indexOf('\n', lineStart);
+        const line = fileStr.slice(lineStart, lineEnd < 0 ? undefined : lineEnd)
+              .trim();
+        if (line === TRAILER_LINE) {
+            break;
+        }
+
+        if (lineEnd < 0) {
+            throw new Error('Trailer line not found');
+        }
+
+        // start the next line after the newline
+        lineStart = lineEnd+1;
+    }
+
+    const dataEnd = lineStart;
+    return decodeBase64(fileStr.slice(dataStart, dataEnd));
+}
+
+/**
+ * ascii-armour a  megolm key file
+ *
+ * base64s the content, and adds header and trailer lines
+ *
+ * @param {Uint8Array} data  raw data
+ * @return {ArrayBuffer} formatted file
+ */
+function packMegolmKeyFile(data) {
+    // we split into lines before base64ing, because encodeBase64 doesn't deal
+    // terribly well with large arrays.
+    const LINE_LENGTH = (72 * 4 / 3);
+    const nLines = Math.ceil(data.length / LINE_LENGTH);
+    const lines = new Array(nLines + 3);
+    lines[0] = HEADER_LINE;
+    let o = 0;
+    let i;
+    for (i = 1; i <= nLines; i++) {
+        lines[i] = encodeBase64(data.subarray(o, o+LINE_LENGTH));
+        o += LINE_LENGTH;
+    }
+    lines[i++] = TRAILER_LINE;
+    lines[i] = '';
+    return (new TextEncoder().encode(lines.join('\n'))).buffer;
+}
+
+/**
+ * Encode a typed array of uint8 as base64.
+ * @param {Uint8Array} uint8Array The data to encode.
+ * @return {string} The base64.
+ */
+function encodeBase64(uint8Array) {
+    // Misinterpt the Uint8Array as Latin-1.
+    // window.btoa expects a unicode string with codepoints in the range 0-255.
+    var latin1String = String.fromCharCode.apply(null, uint8Array);
+    // Use the builtin base64 encoder.
+    return window.btoa(latin1String);
+}
+
+/**
+ * Decode a base64 string to a typed array of uint8.
+ * @param {string} base64 The base64 to decode.
+ * @return {Uint8Array} The decoded data.
+ */
+function decodeBase64(base64) {
+    // window.atob returns a unicode string with codepoints in the range 0-255.
+    var latin1String = window.atob(base64);
+    // Encode the string as a Uint8Array
+    var uint8Array = new Uint8Array(latin1String.length);
+    for (var i = 0; i < latin1String.length; i++) {
+        uint8Array[i] = latin1String.charCodeAt(i);
+    }
+    return uint8Array;
+}
diff --git a/test/utils/MegolmExportEncryption-test.js b/test/utils/MegolmExportEncryption-test.js
new file mode 100644
index 0000000000..fa51d83c6d
--- /dev/null
+++ b/test/utils/MegolmExportEncryption-test.js
@@ -0,0 +1,115 @@
+/*
+Copyright 2017 Vector Creations Ltd
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+"use strict";
+
+import * as MegolmExportEncryption from 'utils/MegolmExportEncryption';
+
+import * as testUtils from '../test-utils';
+import expect from 'expect';
+
+// polyfill textencoder if necessary
+let TextEncoder = window.TextEncoder;
+if (!TextEncoder) {
+    TextEncoder = require('utils/TextEncoderPolyfill');
+}
+
+const TEST_VECTORS=[
+    [
+        "plain",
+        "password",
+        "-----BEGIN MEGOLM SESSION DATA-----\nAXNhbHRzYWx0c2FsdHNhbHSIiIiIiIiIiIiIiIiIiIiIAAAACmIRUW2OjZ3L2l6j9h0lHlV3M2dx\ncissyYBxjsfsAndErh065A8=\n-----END MEGOLM SESSION DATA-----"
+    ],
+    [
+        "Hello, World",
+        "betterpassword",
+        "-----BEGIN MEGOLM SESSION DATA-----\nAW1vcmVzYWx0bW9yZXNhbHT//////////wAAAAAAAAAAAAAD6KyBpe1Niv5M5NPm4ZATsJo5nghk\nKYu63a0YQ5DRhUWEKk7CcMkrKnAUiZny\n-----END MEGOLM SESSION DATA-----"
+    ],
+    [
+        "alphanumericallyalphanumericallyalphanumericallyalphanumerically",
+        "SWORDFISH",
+        "-----BEGIN MEGOLM SESSION DATA-----\nAXllc3NhbHR5Z29vZG5lc3P//////////wAAAAAAAAAAAAAD6OIW+Je7gwvjd4kYrb+49gKCfExw\nMgJBMD4mrhLkmgAngwR1pHjbWXaoGybtiAYr0moQ93GrBQsCzPbvl82rZhaXO3iH5uHo/RCEpOqp\nPgg29363BGR+/Ripq/VCLKGNbw==\n-----END MEGOLM SESSION DATA-----"
+    ],
+    [
+        "alphanumericallyalphanumericallyalphanumericallyalphanumerically",
+        "passwordpasswordpasswordpasswordpasswordpasswordpasswordpasswordpasswordpasswordpasswordpasswordpasswordpasswordpasswordpasswordpasswordpasswordpasswordpasswordpasswordpasswordpasswordpasswordpasswordpasswordpasswordpasswordpasswordpasswordpasswordpassword",
+        "-----BEGIN MEGOLM SESSION DATA-----\nAf//////////////////////////////////////////AAAD6IAZJy7IQ7Y0idqSw/bmpngEEVVh\ngsH+8ptgqxw6ZVWQnohr8JsuwH9SwGtiebZuBu5smPCO+RFVWH2cQYslZijXv/BEH/txvhUrrtCd\nbWnSXS9oymiqwUIGs08sXI33ZA==\n-----END MEGOLM SESSION DATA-----"
+    ]
+]
+;
+
+function stringToArray(s) {
+    return new TextEncoder().encode(s).buffer;
+}
+
+describe('MegolmExportEncryption', function() {
+    beforeEach(function() {
+        testUtils.beforeEach(this);
+    });
+
+    describe('decrypt', function() {
+        it('should handle missing header', function() {
+            const input=stringToArray(`-----`);
+            expect(()=>{MegolmExportEncryption.decryptMegolmKeyFile(input, '')})
+                .toThrow('Header line not found');
+        });
+
+        it('should handle missing trailer', function() {
+            const input=stringToArray(`-----BEGIN MEGOLM SESSION DATA-----
+-----`);
+            expect(()=>{MegolmExportEncryption.decryptMegolmKeyFile(input, '')})
+                .toThrow('Trailer line not found');
+        });
+
+        it('should decrypt a range of inputs', function(done) {
+            function next(i) {
+                if (i >= TEST_VECTORS.length) {
+                    done();
+                    return;
+                }
+
+                const [plain, password, input] = TEST_VECTORS[i];
+                return MegolmExportEncryption.decryptMegolmKeyFile(
+                    stringToArray(input), password
+                ).then((decrypted) => {
+                    expect(decrypted).toEqual(plain);
+                    return next(i+1);
+                })
+            };
+            return next(0).catch(done);
+        });
+    });
+
+    describe('encrypt', function() {
+        it('should round-trip', function(done) {
+            const input =
+                  'words words many words in plain text here'.repeat(100);
+
+            const password = 'my super secret passphrase';
+
+            return MegolmExportEncryption.encryptMegolmKeyFile(
+                input, password, {kdf_rounds: 1000},
+            ).then((ciphertext) => {
+                return MegolmExportEncryption.decryptMegolmKeyFile(
+                    ciphertext, password
+                );
+            }).then((plaintext) => {
+                expect(plaintext).toEqual(input);
+                done();
+            }).catch(done);
+        });
+    });
+});
diff --git a/test/utils/generate-megolm-test-vectors.py b/test/utils/generate-megolm-test-vectors.py
new file mode 100755
index 0000000000..0ce5f5e4b3
--- /dev/null
+++ b/test/utils/generate-megolm-test-vectors.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python
+
+from __future__ import print_function
+
+import base64
+import json
+import struct
+
+from cryptography.hazmat import backends
+from cryptography.hazmat.primitives import ciphers, hashes, hmac
+from cryptography.hazmat.primitives.kdf import pbkdf2
+from cryptography.hazmat.primitives.ciphers import algorithms, modes
+
+backend = backends.default_backend()
+
+def parse_u128(s):
+    a, b = struct.unpack(">QQ", s)
+    return (a << 64) | b
+
+def encrypt_ctr(key, iv, plaintext, counter_bits=64):
+    alg = algorithms.AES(key)
+
+    # Some AES-CTR implementations treat some parts of the IV as a nonce (which
+    # remains constant throughought encryption), and some as a counter (which
+    # increments every block, ie 16 bytes, and wraps after a while).  Different
+    # implmententations use different amounts of the IV for each part.
+    #
+    # The python cryptography library uses the whole IV as a counter; to make
+    # it match other implementations with a given counter size, we manually
+    # implement wrapping the counter.
+
+    # number of AES blocks between each counter wrap
+    limit = 1 << counter_bits
+
+    # parse IV as a 128-bit int
+    parsed_iv = parse_u128(iv)
+
+    # split IV into counter and nonce
+    counter = parsed_iv & (limit - 1)
+    nonce = parsed_iv & ~(limit - 1)
+
+    # encrypt up to the first counter wraparound
+    size = 16 * (limit - counter)
+    encryptor = ciphers.Cipher(
+        alg,
+        modes.CTR(iv),
+        backend=backend
+    ).encryptor()
+    input = plaintext[:size]
+    result = encryptor.update(input) + encryptor.finalize()
+    offset = size
+
+    # do remaining data starting with a counter of zero
+    iv = struct.pack(">QQ", nonce >> 64, nonce & ((1 << 64) - 1))
+    size = 16 * limit
+
+    while offset < len(plaintext):
+        encryptor = ciphers.Cipher(
+            alg,
+            modes.CTR(iv),
+            backend=backend
+        ).encryptor()
+        input = plaintext[offset:offset+size]
+        result += encryptor.update(input) + encryptor.finalize()
+        offset += size
+
+    return result
+
+def hmac_sha256(key, message):
+     h = hmac.HMAC(key, hashes.SHA256(), backend=backend)
+     h.update(message)
+     return h.finalize()
+
+def encrypt(key, iv, salt, plaintext, iterations=1000):
+    """
+    Returns:
+       (bytes) ciphertext
+    """
+    if len(salt) != 16:
+        raise Exception("Expected 128 bits of salt - got %i bits" % len((salt) * 8))
+    if len(iv) != 16:
+        raise Exception("Expected 128 bits of IV - got %i bits" % (len(iv) * 8))
+
+    sha = hashes.SHA512()
+    kdf = pbkdf2.PBKDF2HMAC(sha, 64, salt, iterations, backend)
+    k = kdf.derive(key)
+
+    aes_key = k[0:32]
+    sha_key = k[32:]
+
+    packed_file = (
+        b"\x01"     # version
+        + salt
+        + iv
+        + struct.pack(">L", iterations)
+        + encrypt_ctr(aes_key, iv, plaintext)
+    )
+    packed_file += hmac_sha256(sha_key, packed_file)
+
+    return (
+        b"-----BEGIN MEGOLM SESSION DATA-----\n" +
+        base64.encodestring(packed_file) +
+        b"-----END MEGOLM SESSION DATA-----"
+    )
+
+def gen(password, iv, salt, plaintext, iterations=1000):
+    ciphertext = encrypt(
+        password.encode('utf-8'), iv, salt, plaintext.encode('utf-8'), iterations
+    )
+    return (plaintext, password, ciphertext.decode('utf-8'))
+
+print (json.dumps([
+    gen("password", b"\x88"*16, b"saltsaltsaltsalt", "plain", 10),
+    gen("betterpassword", b"\xFF"*8 + b"\x00"*8, b"moresaltmoresalt", "Hello, World"),
+    gen("SWORDFISH", b"\xFF"*8 + b"\x00"*8, b"yessaltygoodness", "alphanumerically" * 4),
+    gen("password"*32, b"\xFF"*16, b"\xFF"*16, "alphanumerically" * 4),
+], indent=4))