@@ -248,37 +248,61 @@ export const toUTF8 = (arr: number[]): string => {
248248 // 1-byte sequence (0xxxxxxx)
249249 if ( byte <= 0x7f ) {
250250 result += String . fromCharCode ( byte )
251- } else if ( byte >= 0xc0 && byte <= 0xdf ) {
252- // 2-byte sequence (110xxxxx 10xxxxxx)
251+ continue
252+ }
253+
254+ // 2-byte sequence (110xxxxx 10xxxxxx)
255+ if ( byte >= 0xc0 && byte <= 0xdf ) {
256+ if ( i + 1 >= arr . length ) {
257+ throw new Error ( "Truncated UTF-8: expected 2 bytes" )
258+ }
253259 const byte2 = arr [ i + 1 ]
254260 skip = 1
261+
255262 const codePoint = ( ( byte & 0x1f ) << 6 ) | ( byte2 & 0x3f )
256263 result += String . fromCharCode ( codePoint )
257- } else if ( byte >= 0xe0 && byte <= 0xef ) {
258- // 3-byte sequence (1110xxxx 10xxxxxx 10xxxxxx)
264+ continue
265+ }
266+
267+ // 3-byte sequence (1110xxxx 10xxxxxx 10xxxxxx)
268+ if ( byte >= 0xe0 && byte <= 0xef ) {
269+ if ( i + 2 >= arr . length ) {
270+ throw new Error ( "Truncated UTF-8: expected 3 bytes" )
271+ }
259272 const byte2 = arr [ i + 1 ]
260273 const byte3 = arr [ i + 2 ]
261274 skip = 2
275+
262276 const codePoint =
263277 ( ( byte & 0x0f ) << 12 ) | ( ( byte2 & 0x3f ) << 6 ) | ( byte3 & 0x3f )
264278 result += String . fromCharCode ( codePoint )
265- } else if ( byte >= 0xf0 && byte <= 0xf7 ) {
266- // 4-byte sequence (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx)
279+ continue
280+ }
281+
282+ // 4-byte sequence (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx)
283+ if ( byte >= 0xf0 && byte <= 0xf7 ) {
284+ if ( i + 3 >= arr . length ) {
285+ throw new Error ( "Truncated UTF-8: expected 4 bytes" )
286+ }
267287 const byte2 = arr [ i + 1 ]
268288 const byte3 = arr [ i + 2 ]
269289 const byte4 = arr [ i + 3 ]
270290 skip = 3
291+
271292 const codePoint =
272293 ( ( byte & 0x07 ) << 18 ) |
273294 ( ( byte2 & 0x3f ) << 12 ) |
274295 ( ( byte3 & 0x3f ) << 6 ) |
275296 ( byte4 & 0x3f )
276297
277- // Convert to UTF-16 surrogate pair
278298 const surrogate1 = 0xd800 + ( ( codePoint - 0x10000 ) >> 10 )
279299 const surrogate2 = 0xdc00 + ( ( codePoint - 0x10000 ) & 0x3ff )
280300 result += String . fromCharCode ( surrogate1 , surrogate2 )
301+ continue
281302 }
303+
304+ // invalid leading byte for UTF-8
305+ // throw new Error(`Invalid UTF-8 leading byte: 0x${byte.toString(16)}`)
282306 }
283307
284308 return result
0 commit comments