TextEncoder polyfill
Apparently Safari doesn't sport a TextEncoder, so here's a polyfill for it.pull/21833/head
parent
07b3c58c61
commit
1d5d44d63d
|
@ -0,0 +1,131 @@
|
|||
/*
|
||||
Copyright 2017 Vector Creations Ltd
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
"use strict";
|
||||
|
||||
// Polyfill for TextDecoder.
|
||||
|
||||
const REPLACEMENT_CHAR = '\uFFFD';
|
||||
|
||||
export default class TextDecoder {
|
||||
/**
|
||||
* Decode a UTF-8 byte array as a javascript string
|
||||
*
|
||||
* @param {Uint8Array} u8Array UTF-8-encoded onput
|
||||
* @return {str}
|
||||
*/
|
||||
decode(u8Array) {
|
||||
let u0, u1, u2, u3;
|
||||
|
||||
let str = '';
|
||||
let idx = 0;
|
||||
while (idx < u8Array.length) {
|
||||
u0 = u8Array[idx++];
|
||||
if (!(u0 & 0x80)) {
|
||||
str += String.fromCharCode(u0);
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((u0 & 0xC0) != 0xC0) {
|
||||
// continuation byte where we expect a leading byte
|
||||
str += REPLACEMENT_CHAR;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (u0 > 0xF4) {
|
||||
// this would imply a 5-byte or longer encoding, which is
|
||||
// invalid and unsupported here.
|
||||
str += REPLACEMENT_CHAR;
|
||||
continue;
|
||||
}
|
||||
|
||||
u1 = u8Array[idx++];
|
||||
if (u1 === undefined) {
|
||||
str += REPLACEMENT_CHAR;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((u1 & 0xC0) != 0x80) {
|
||||
// leading byte where we expect a continuation byte
|
||||
str += REPLACEMENT_CHAR.repeat(2);
|
||||
continue;
|
||||
}
|
||||
u1 &= 0x3F;
|
||||
if (!(u0 & 0x20)) {
|
||||
const u = ((u0 & 0x1F) << 6) | u1;
|
||||
if (u < 0x80) {
|
||||
// over-long
|
||||
str += REPLACEMENT_CHAR.repeat(2);
|
||||
} else {
|
||||
str += String.fromCharCode(u);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
u2 = u8Array[idx++];
|
||||
if (u2 === undefined) {
|
||||
str += REPLACEMENT_CHAR.repeat(2);
|
||||
continue;
|
||||
}
|
||||
if ((u2 & 0xC0) != 0x80) {
|
||||
// leading byte where we expect a continuation byte
|
||||
str += REPLACEMENT_CHAR.repeat(3);
|
||||
continue;
|
||||
}
|
||||
u2 &= 0x3F;
|
||||
if (!(u0 & 0x10)) {
|
||||
const u = ((u0 & 0x0F) << 12) | (u1 << 6) | u2;
|
||||
if (u < 0x800) {
|
||||
// over-long
|
||||
str += REPLACEMENT_CHAR.repeat(3);
|
||||
} else if (u == 0xFEFF && idx == 3) {
|
||||
// byte-order mark: do not add to output
|
||||
} else {
|
||||
str += String.fromCharCode(u);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
u3 = u8Array[idx++];
|
||||
if (u3 === undefined) {
|
||||
str += REPLACEMENT_CHAR.repeat(3);
|
||||
continue;
|
||||
}
|
||||
if ((u3 & 0xC0) != 0x80) {
|
||||
// leading byte where we expect a continuation byte
|
||||
str += REPLACEMENT_CHAR.repeat(4);
|
||||
continue;
|
||||
}
|
||||
u3 &= 0x3F;
|
||||
const u = ((u0 & 7) << 18) | (u1 << 12) | (u2 << 6) | u3;
|
||||
if (u < 0x10000) {
|
||||
// over-long
|
||||
str += REPLACEMENT_CHAR.repeat(4);
|
||||
continue;
|
||||
}
|
||||
if (u > 0x1FFFF) {
|
||||
// unicode stops here.
|
||||
str += REPLACEMENT_CHAR.repeat(4);
|
||||
continue;
|
||||
}
|
||||
|
||||
// encode as utf-16
|
||||
const v = u - 0x10000;
|
||||
str += String.fromCharCode(0xD800 | (v >> 10), 0xDC00 | (v & 0x3FF));
|
||||
}
|
||||
return str;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,78 @@
|
|||
/*
|
||||
Copyright 2017 Vector Creations Ltd
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
"use strict";
|
||||
|
||||
// Polyfill for TextEncoder. Based on emscripten's stringToUTF8Array.
|
||||
|
||||
function utf8len(str) {
|
||||
var len = 0;
|
||||
for (var i = 0; i < str.length; ++i) {
|
||||
var u = str.charCodeAt(i);
|
||||
if (u >= 0xD800 && u <= 0xDFFF && i < str.length-1) {
|
||||
// lead surrogate - combine with next surrogate
|
||||
u = 0x10000 + ((u & 0x3FF) << 10) | (str.charCodeAt(++i) & 0x3FF);
|
||||
}
|
||||
|
||||
if (u <= 0x7F) {
|
||||
++len;
|
||||
} else if (u <= 0x7FF) {
|
||||
len += 2;
|
||||
} else if (u <= 0xFFFF) {
|
||||
len += 3;
|
||||
} else {
|
||||
len += 4;
|
||||
}
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
export default class TextEncoder {
|
||||
/**
|
||||
* Encode a javascript string as utf-8
|
||||
*
|
||||
* @param {String} str String to encode
|
||||
* @return {Uint8Array} UTF-8-encoded output
|
||||
*/
|
||||
encode(str) {
|
||||
const outU8Array = new Uint8Array(utf8len(str));
|
||||
var outIdx = 0;
|
||||
for (var i = 0; i < str.length; ++i) {
|
||||
var u = str.charCodeAt(i);
|
||||
if (u >= 0xD800 && u <= 0xDFFF && i < str.length-1) {
|
||||
// lead surrogate - combine with next surrogate
|
||||
u = 0x10000 + ((u & 0x3FF) << 10) | (str.charCodeAt(++i) & 0x3FF);
|
||||
}
|
||||
|
||||
if (u <= 0x7F) {
|
||||
outU8Array[outIdx++] = u;
|
||||
} else if (u <= 0x7FF) {
|
||||
outU8Array[outIdx++] = 0xC0 | (u >> 6);
|
||||
outU8Array[outIdx++] = 0x80 | (u & 63);
|
||||
} else if (u <= 0xFFFF) {
|
||||
outU8Array[outIdx++] = 0xE0 | (u >> 12);
|
||||
outU8Array[outIdx++] = 0x80 | ((u >> 6) & 63);
|
||||
outU8Array[outIdx++] = 0x80 | (u & 63);
|
||||
} else {
|
||||
outU8Array[outIdx++] = 0xF0 | (u >> 18);
|
||||
outU8Array[outIdx++] = 0x80 | ((u >> 12) & 63);
|
||||
outU8Array[outIdx++] = 0x80 | ((u >> 6) & 63);
|
||||
outU8Array[outIdx++] = 0x80 | (u & 63);
|
||||
}
|
||||
}
|
||||
return outU8Array;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,85 @@
|
|||
/*
|
||||
Copyright 2017 Vector Creations Ltd
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
"use strict";
|
||||
|
||||
import TextDecoderPolyfill from 'utils/TextDecoderPolyfill';
|
||||
|
||||
import * as testUtils from '../test-utils';
|
||||
import expect from 'expect';
|
||||
|
||||
describe('textDecoderPolyfill', function() {
|
||||
beforeEach(function() {
|
||||
testUtils.beforeEach(this);
|
||||
});
|
||||
|
||||
it('should correctly decode a range of strings', function() {
|
||||
const decoder = new TextDecoderPolyfill();
|
||||
|
||||
expect(decoder.decode(Uint8Array.of(65, 66, 67))).toEqual('ABC');
|
||||
expect(decoder.decode(Uint8Array.of(0xC3, 0xA6))).toEqual('æ');
|
||||
expect(decoder.decode(Uint8Array.of(0xE2, 0x82, 0xAC))).toEqual('€');
|
||||
expect(decoder.decode(Uint8Array.of(0xF0, 0x9F, 0x92, 0xA9))).toEqual('\uD83D\uDCA9');
|
||||
});
|
||||
|
||||
it('should ignore byte-order marks', function() {
|
||||
const decoder = new TextDecoderPolyfill();
|
||||
expect(decoder.decode(Uint8Array.of(0xEF, 0xBB, 0xBF, 65)))
|
||||
.toEqual('A');
|
||||
});
|
||||
|
||||
it('should not ignore byte-order marks in the middle of the array', function() {
|
||||
const decoder = new TextDecoderPolyfill();
|
||||
expect(decoder.decode(Uint8Array.of(65, 0xEF, 0xBB, 0xBF, 66)))
|
||||
.toEqual('A\uFEFFB');
|
||||
});
|
||||
|
||||
it('should reject overlong encodings', function() {
|
||||
const decoder = new TextDecoderPolyfill();
|
||||
|
||||
// euro, as 4 bytes
|
||||
expect(decoder.decode(Uint8Array.of(65, 0xF0, 0x82, 0x82, 0xAC, 67)))
|
||||
.toEqual('A\uFFFD\uFFFD\uFFFD\uFFFDC');
|
||||
});
|
||||
|
||||
it('should reject 5 and 6-byte encodings', function() {
|
||||
const decoder = new TextDecoderPolyfill();
|
||||
|
||||
expect(decoder.decode(Uint8Array.of(65, 0xF8, 0x82, 0x82, 0x82, 0x82, 67)))
|
||||
.toEqual('A\uFFFD\uFFFD\uFFFD\uFFFD\uFFFDC');
|
||||
});
|
||||
|
||||
it('should reject code points beyond 0x10000', function() {
|
||||
const decoder = new TextDecoderPolyfill();
|
||||
|
||||
expect(decoder.decode(Uint8Array.of(0xF4, 0xA0, 0x80, 0x80)))
|
||||
.toEqual('\uFFFD\uFFFD\uFFFD\uFFFD');
|
||||
});
|
||||
|
||||
it('should cope with end-of-string', function() {
|
||||
const decoder = new TextDecoderPolyfill();
|
||||
|
||||
expect(decoder.decode(Uint8Array.of(65, 0xC3)))
|
||||
.toEqual('A\uFFFD');
|
||||
|
||||
expect(decoder.decode(Uint8Array.of(65, 0xE2, 0x82)))
|
||||
.toEqual('A\uFFFD\uFFFD');
|
||||
|
||||
expect(decoder.decode(Uint8Array.of(65, 0xF0, 0x9F, 0x92)))
|
||||
.toEqual('A\uFFFD\uFFFD\uFFFD');
|
||||
});
|
||||
|
||||
});
|
|
@ -0,0 +1,39 @@
|
|||
/*
|
||||
Copyright 2017 Vector Creations Ltd
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
"use strict";
|
||||
|
||||
import TextEncoderPolyfill from 'utils/TextEncoderPolyfill';
|
||||
|
||||
import * as testUtils from '../test-utils';
|
||||
import expect from 'expect';
|
||||
|
||||
describe('textEncoderPolyfill', function() {
|
||||
beforeEach(function() {
|
||||
testUtils.beforeEach(this);
|
||||
});
|
||||
|
||||
it('should correctly encode a range of strings', function() {
|
||||
const encoder = new TextEncoderPolyfill();
|
||||
|
||||
expect(encoder.encode('ABC')).toEqual(Uint8Array.of(65, 66, 67));
|
||||
expect(encoder.encode('æ')).toEqual(Uint8Array.of(0xC3, 0xA6));
|
||||
expect(encoder.encode('€')).toEqual(Uint8Array.of(0xE2, 0x82, 0xAC));
|
||||
|
||||
// PILE OF POO (💩)
|
||||
expect(encoder.encode('\uD83D\uDCA9')).toEqual(Uint8Array.of(0xF0, 0x9F, 0x92, 0xA9));
|
||||
});
|
||||
});
|
Loading…
Reference in New Issue