Skip to content

Commit de55289

Browse files
Update utils.ts
1 parent aba10c3 commit de55289

File tree

1 file changed

+31
-7
lines changed

1 file changed

+31
-7
lines changed

src/primitives/utils.ts

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -248,37 +248,61 @@ export const toUTF8 = (arr: number[]): string => {
248248
// 1-byte sequence (0xxxxxxx)
249249
if (byte <= 0x7f) {
250250
result += String.fromCharCode(byte)
251-
} else if (byte >= 0xc0 && byte <= 0xdf) {
252-
// 2-byte sequence (110xxxxx 10xxxxxx)
251+
continue
252+
}
253+
254+
// 2-byte sequence (110xxxxx 10xxxxxx)
255+
if (byte >= 0xc0 && byte <= 0xdf) {
256+
if (i + 1 >= arr.length) {
257+
throw new Error("Truncated UTF-8: expected 2 bytes")
258+
}
253259
const byte2 = arr[i + 1]
254260
skip = 1
261+
255262
const codePoint = ((byte & 0x1f) << 6) | (byte2 & 0x3f)
256263
result += String.fromCharCode(codePoint)
257-
} else if (byte >= 0xe0 && byte <= 0xef) {
258-
// 3-byte sequence (1110xxxx 10xxxxxx 10xxxxxx)
264+
continue
265+
}
266+
267+
// 3-byte sequence (1110xxxx 10xxxxxx 10xxxxxx)
268+
if (byte >= 0xe0 && byte <= 0xef) {
269+
if (i + 2 >= arr.length) {
270+
throw new Error("Truncated UTF-8: expected 3 bytes")
271+
}
259272
const byte2 = arr[i + 1]
260273
const byte3 = arr[i + 2]
261274
skip = 2
275+
262276
const codePoint =
263277
((byte & 0x0f) << 12) | ((byte2 & 0x3f) << 6) | (byte3 & 0x3f)
264278
result += String.fromCharCode(codePoint)
265-
} else if (byte >= 0xf0 && byte <= 0xf7) {
266-
// 4-byte sequence (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx)
279+
continue
280+
}
281+
282+
// 4-byte sequence (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx)
283+
if (byte >= 0xf0 && byte <= 0xf7) {
284+
if (i + 3 >= arr.length) {
285+
throw new Error("Truncated UTF-8: expected 4 bytes")
286+
}
267287
const byte2 = arr[i + 1]
268288
const byte3 = arr[i + 2]
269289
const byte4 = arr[i + 3]
270290
skip = 3
291+
271292
const codePoint =
272293
((byte & 0x07) << 18) |
273294
((byte2 & 0x3f) << 12) |
274295
((byte3 & 0x3f) << 6) |
275296
(byte4 & 0x3f)
276297

277-
// Convert to UTF-16 surrogate pair
278298
const surrogate1 = 0xd800 + ((codePoint - 0x10000) >> 10)
279299
const surrogate2 = 0xdc00 + ((codePoint - 0x10000) & 0x3ff)
280300
result += String.fromCharCode(surrogate1, surrogate2)
301+
continue
281302
}
303+
304+
// invalid leading byte for UTF-8
305+
// throw new Error(`Invalid UTF-8 leading byte: 0x${byte.toString(16)}`)
282306
}
283307

284308
return result

0 commit comments

Comments
 (0)