Skip to content

Commit 98d0dae

Browse files
authored
Merge pull request #163 from mustafaiman/fixEmojiStringSerialization
fix utf8 serialization/deserialization algorithm
2 parents 251c6d1 + 9bd484c commit 98d0dae

File tree

3 files changed

+98
-25
lines changed

3 files changed

+98
-25
lines changed

src/serialization/ObjectData.ts

Lines changed: 63 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ export class ObjectDataOutput implements DataOutput {
3030
private ensureAvailable(size: number): void {
3131
if (this.available() < size ) {
3232
var newBuffer = new Buffer(this.pos + size);
33-
this.buffer.copy(newBuffer);
33+
this.buffer.copy(newBuffer, 0, 0, this.pos);
3434
this.buffer = newBuffer;
3535
}
3636
}
@@ -62,7 +62,13 @@ export class ObjectDataOutput implements DataOutput {
6262
}
6363

6464
toBuffer(): Buffer {
65-
return this.buffer;
65+
if (this.buffer == null || this.pos === 0) {
66+
return new Buffer(0);
67+
} else {
68+
var snapBuffer = new Buffer(this.pos);
69+
this.buffer.copy(snapBuffer, 0, 0, this.pos);
70+
return snapBuffer;
71+
}
6672
}
6773

6874
write(byte: number|Buffer): void {
@@ -199,8 +205,19 @@ export class ObjectDataOutput implements DataOutput {
199205
writeUTF(val: string): void {
200206
var len = (val != null) ? val.length : BitsUtil.NULL_ARRAY_LENGTH;
201207
this.writeInt(len);
202-
if (len > 0 ) {
203-
this.write(new Buffer(val, 'utf8'));
208+
this.ensureAvailable(len * 3);
209+
for (let i = 0; i < len; i++) {
210+
let ch = val.charCodeAt(i);
211+
if (ch <= 0x007F) {
212+
this.writeByte(ch);
213+
} else if (ch <= 0x07FF) {
214+
this.write(0xC0 | ch >> 6 & 0x1F);
215+
this.write(0x80 | ch & 0x3F);
216+
} else {
217+
this.write(0xE0 | ch >> 12 & 0x0F);
218+
this.write(0x80 | ch >> 6 & 0x3F);
219+
this.write(0x80 | ch & 0x3F);
220+
}
204221
}
205222
}
206223

@@ -472,31 +489,52 @@ export class ObjectDataInput implements DataInput {
472489
}
473490

474491
readUTF(pos?: number): string {
475-
var len = this.readInt(pos);
476-
var readingIndex = this.addOrUndefined(pos, 4);
492+
let len = this.readInt(pos);
493+
let readingIndex = this.addOrUndefined(pos, 4);
477494
if (len === BitsUtil.NULL_ARRAY_LENGTH) {
478495
return null;
479-
} else {
480-
var result: number[] = [];
481-
var leadingByte: number;
482-
var continuationByte: number;
483-
for (var i = 0; i < len; i++) {
484-
leadingByte = this.readByte(readingIndex) & MASK_1BYTE;
485-
readingIndex = this.addOrUndefined(readingIndex, 1);
486-
result.push(leadingByte);
487-
if (leadingByte >= 128) {
488-
while (((leadingByte <<= 1) & MASK_1BYTE) >= 128) {
489-
continuationByte = this.readByte(readingIndex);
490-
readingIndex = this.addOrUndefined(readingIndex, 1);
491-
if (((continuationByte >> 6) & MASK_1BYTE) !== 2) {
492-
throw new Error('String is not properly UTF8 encoded');
493-
}
494-
result.push(continuationByte);
495-
}
496-
}
496+
}
497+
let result: string = '';
498+
let leadingByte: number;
499+
for (let i = 0; i < len; i++) {
500+
let charCode: number;
501+
leadingByte = this.readByte(readingIndex) & MASK_1BYTE;
502+
readingIndex = this.addOrUndefined(readingIndex, 1);
503+
504+
let b = leadingByte & 0xFF;
505+
switch (b >> 4) {
506+
/* tslint:disable:no-switch-case-fall-through */
507+
case 0:
508+
case 1:
509+
case 2:
510+
case 3:
511+
case 4:
512+
case 5:
513+
case 6:
514+
case 7:
515+
charCode = leadingByte;
516+
break;
517+
case 12:
518+
case 13:
519+
let first = (b & 0x1F) << 6;
520+
let second = this.readByte(readingIndex) & 0x3F;
521+
readingIndex = this.addOrUndefined(readingIndex, 1);
522+
charCode = first | second;
523+
break;
524+
case 14:
525+
let first2 = (b & 0x0F) << 12;
526+
let second2 = (this.readByte(readingIndex) & 0x3F) << 6;
527+
readingIndex = this.addOrUndefined(readingIndex, 1);
528+
let third2 = this.readByte(readingIndex) & 0x3F;
529+
readingIndex = this.addOrUndefined(readingIndex, 1);
530+
charCode = (first2 | second2 | third2);
531+
break;
532+
default:
533+
throw new Error('Malformed UTF8 string');
497534
}
498-
return new Buffer(result).toString('utf8');
535+
result += String.fromCharCode(charCode);
499536
}
537+
return result;
500538
}
501539

502540
readUTFArray(pos?: number): string[] {

test/serialization/DefaultSerializersLiveTest.js

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,38 @@ describe('Default serializers with live instance', function() {
4848
})
4949
});
5050

51+
it('emoji string', function () {
52+
return map.put('key', '1⚐中💦2😭‍🙆😔5').then(function () {
53+
return RC.executeOnController(cluster.id, _generateGet('key'), 1);
54+
}).then(function (response) {
55+
return expect(response.result.toString()).to.equal('1⚐中💦2😭‍🙆😔5');
56+
});
57+
});
58+
59+
it('utf8 characters test', function() {
60+
return map.put('key', '\u0040\u0041\u01DF\u06A0\u12E0\u{1D306}').then(function () {
61+
return RC.executeOnController(cluster.id, _generateGet('key'), 1);
62+
}).then(function (response) {
63+
return expect(response.result.toString()).to.equal('\u0040\u0041\u01DF\u06A0\u12E0\u{1D306}');
64+
});
65+
});
66+
67+
it('utf8 characters test with surrogates', function() {
68+
return map.put('key', '\u0040\u0041\u01DF\u06A0\u12E0\uD834\uDF06').then(function () {
69+
return RC.executeOnController(cluster.id, _generateGet('key'), 1);
70+
}).then(function (response) {
71+
return expect(response.result.toString()).to.equal('\u0040\u0041\u01DF\u06A0\u12E0\u{1D306}');
72+
});
73+
});
74+
75+
it('utf8 sample string test', function() {
76+
return map.put('key', 'Iñtërnâtiônàlizætiøn').then(function () {
77+
return RC.executeOnController(cluster.id, _generateGet('key'), 1);
78+
}).then(function (response) {
79+
return expect(response.result.toString()).to.equal('Iñtërnâtiônàlizætiøn');
80+
});
81+
});
82+
5183
it('number', function () {
5284
return map.put('a', 23).then(function () {
5385
return RC.executeOnController(cluster.id, _generateGet('a'), 1);

test/serialization/DefaultSerializersTest.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ describe('Default serializers Test', function() {
2323
[''],
2424
'',
2525
'client',
26+
'1⚐中💦2😭‍🙆😔5',
27+
'Iñtërnâtiônàlizætiøn',
28+
'\u0040\u0041\u01DF\u06A0\u12E0\u{1D306}',
2629
[12, 56, 54, 12],
2730
[43546.6, 2343.4, 8988,4],
2831
[23545798.6],

0 commit comments

Comments
 (0)