@@ -303,12 +303,11 @@ escape_unicode(PyObject *pystr)
303303 return rval ;
304304}
305305
306- #define ESCAPE_BUF_SIZE 200
307-
308306// Take a PyUnicode pystr and write an escaped string to writer. (ensure_ascii)
309307static int
310308write_escaped_ascii (PyUnicodeWriter * writer , PyObject * pystr )
311309{
310+ #define ESCAPE_BUF_SIZE 200
312311 Py_ssize_t i ;
313312 Py_ssize_t input_chars ;
314313 Py_ssize_t buf_len ;
@@ -367,60 +366,74 @@ static int
367366write_escaped_unicode (PyUnicodeWriter * writer , PyObject * pystr )
368367{
369368 Py_ssize_t i ;
370- Py_ssize_t input_size ;
371- Py_ssize_t buf_len ;
372- const unsigned char * input ;
369+ Py_ssize_t input_chars ;
370+ Py_ssize_t chars = 0 ;
371+ const void * input ;
372+ int kind ;
373373 int ret ;
374- unsigned char c = 0 ;
375- char buf [ESCAPE_BUF_SIZE ];
374+ Py_UCS4 output [ESCAPE_BUF_SIZE ];
376375
377- // We don't need to escape non-ASCII chars.
378- // So we just copy UTF-8 from pystr to buf.
379- input = ( const unsigned char * ) PyUnicode_AsUTF8AndSize ( pystr , & input_size );
376+ input_chars = PyUnicode_GET_LENGTH ( pystr );
377+ input = PyUnicode_DATA ( pystr );
378+ kind = PyUnicode_KIND ( pystr );
380379
381380 ret = PyUnicodeWriter_WriteChar (writer , '"' );
382381 if (ret ) return ret ;
383382
384383 // Fast path for string doesn't need escape at all: e.g. "id", "name"
385- for (i = 0 ; i < input_size ; i ++ ) {
386- c = input [ i ] ;
384+ for (i = 0 ; i < input_chars ; i ++ ) {
385+ Py_UCS4 c = PyUnicode_READ ( kind , input , i ) ;
387386 if (c <= 0x1f || c == '\\' || c == '"' ) {
388387 break ;
389388 }
390389 }
391390 if (i > 0 ) {
392- ret = PyUnicodeWriter_WriteUTF8 (writer , ( const char * ) input , i );
391+ ret = PyUnicodeWriter_WriteSubstring (writer , pystr , 0 , i );
393392 if (ret ) return ret ;
394393 }
395- if (i == input_size ) {
394+ if (i == input_chars ) {
396395 return PyUnicodeWriter_WriteChar (writer , '"' );
397396 }
398397
399- buf_len = ascii_escape_unichar (c , (unsigned char * )buf , 0 );
398+ for (; i < input_chars ; i ++ ) {
399+ Py_UCS4 c = PyUnicode_READ (kind , input , i );
400400
401- for (i ++ ; i < input_size ; i ++ ) {
402- c = input [i ];
403- if (c <= 0x1f || c == '\\' || c == '"' ) {
404- buf_len = ascii_escape_unichar (c , (unsigned char * )buf , buf_len );
405- }
406- else {
407- buf [buf_len ++ ] = c ;
401+ // Same to ENCODE_OUTPUT in escape_unicode
402+ switch (c ) {
403+ case '\\' : output [chars ++ ] = '\\' ; output [chars ++ ] = c ; break ;
404+ case '"' : output [chars ++ ] = '\\' ; output [chars ++ ] = c ; break ;
405+ case '\b' : output [chars ++ ] = '\\' ; output [chars ++ ] = 'b' ; break ;
406+ case '\f' : output [chars ++ ] = '\\' ; output [chars ++ ] = 'f' ; break ;
407+ case '\n' : output [chars ++ ] = '\\' ; output [chars ++ ] = 'n' ; break ;
408+ case '\r' : output [chars ++ ] = '\\' ; output [chars ++ ] = 'r' ; break ;
409+ case '\t' : output [chars ++ ] = '\\' ; output [chars ++ ] = 't' ; break ;
410+ default :
411+ if (c <= 0x1f ) {
412+ output [chars ++ ] = '\\' ;
413+ output [chars ++ ] = 'u' ;
414+ output [chars ++ ] = '0' ;
415+ output [chars ++ ] = '0' ;
416+ output [chars ++ ] = Py_hexdigits [(c >> 4 ) & 0xf ];
417+ output [chars ++ ] = Py_hexdigits [(c ) & 0xf ];
418+ } else {
419+ output [chars ++ ] = c ;
420+ }
408421 }
409422
410- if (buf_len + 6 > ESCAPE_BUF_SIZE ) {
411- ret = PyUnicodeWriter_WriteUTF8 (writer , buf , buf_len );
423+ if (chars + 6 > ESCAPE_BUF_SIZE ) {
424+ ret = PyUnicodeWriter_WriteUCS4 (writer , output , chars );
412425 if (ret ) return ret ;
413- buf_len = 0 ;
426+ chars = 0 ;
414427 }
415428 }
416429
417- assert (buf_len < ESCAPE_BUF_SIZE );
418- buf [ buf_len ++ ] = '"' ;
419- return PyUnicodeWriter_WriteUTF8 (writer , buf , buf_len );
430+ assert (chars < ESCAPE_BUF_SIZE );
431+ output [ chars ++ ] = '"' ;
432+ return PyUnicodeWriter_WriteUCS4 (writer , output , chars );
420433}
421-
422434#undef ESCAPE_BUF_SIZE
423435
436+
424437static void
425438raise_errmsg (const char * msg , PyObject * s , Py_ssize_t end )
426439{
0 commit comments