@@ -2418,11 +2418,7 @@ void validate(S)(in S str) @safe pure
24182418}
24192419
24202420/* =================== Conversion to UTF8 ======================= */
2421-
2422- pure
2423- {
2424-
2425- char [] toUTF8 (return out char [4 ] buf, dchar c) nothrow @nogc @safe
2421+ char [] toUTF8 (return out char [4 ] buf, dchar c) nothrow @nogc @safe pure
24262422{
24272423 if (c <= 0x7F )
24282424 {
@@ -2462,73 +2458,66 @@ char[] toUTF8(return out char[4] buf, dchar c) nothrow @nogc @safe
24622458 }
24632459}
24642460
2465- /* ******************
2466- * Encodes string $(D_PARAM s) into UTF-8 and returns the encoded string.
2461+ /**
2462+ * Encodes the elements of `s` to UTF-8 and returns a newly allocated
2463+ * string of the elements.
2464+ *
2465+ * Params:
2466+ * s = the string to encode
2467+ * Returns:
2468+ * A UTF-8 string
2469+ * See_Also:
2470+ * For a lazy, non-allocating version of these functions, see $(LREF byUTF).
24672471 */
2468- string toUTF8 (scope const char [] s) @safe
2472+ string toUTF8 (S)(S s) if (isInputRange ! S && isSomeChar ! (ElementEncodingType ! S))
24692473{
2470- validate(s);
2471- return s.idup;
2472- }
2474+ static if (is (S : string ))
2475+ {
2476+ return s.idup;
2477+ }
2478+ else
2479+ {
2480+ import std.array : appender;
2481+ auto app = appender! string ();
24732482
2474- // / ditto
2475- string toUTF8 (scope const wchar [] s) @safe
2476- {
2477- char [] r;
2478- size_t i;
2479- immutable slen = s.length;
2483+ static if (hasLength! S || isSomeString! S)
2484+ app.reserve (s.length);
24802485
2481- r.length = slen;
2482- for (i = 0 ; i < slen; i++ )
2483- {
2484- immutable c = s[i];
2486+ foreach (c; s.byUTF! char )
2487+ app.put(c);
24852488
2486- if (c <= 0x7F )
2487- r[i] = cast (char )c; // fast path for ascii
2488- else
2489- {
2490- r.length = i;
2491- while (i < slen)
2492- encode(r, decode (s, i));
2493- break ;
2494- }
2489+ return app.data;
24952490 }
2496-
2497- return r;
24982491}
24992492
2500- // / ditto
2501- string toUTF8 ( scope const dchar [] s) @safe
2493+ // /
2494+ @safe pure unittest
25022495{
2503- char [] r;
2504- size_t i;
2505- immutable slen = s.length;
2496+ import std.algorithm.comparison : equal;
25062497
2507- r.length = slen;
2508- for (i = 0 ; i < slen; i++ )
2509- {
2510- immutable c = s[i];
2498+ // The ö is represented by two UTF-8 code units
2499+ assert (" Hellø" w.toUTF8.equal([' H' , ' e' , ' l' , ' l' , 0xC3 , 0xB8 ]));
25112500
2512- if (c <= 0x7F )
2513- r[i] = cast (char )c; // fast path for ascii
2514- else
2515- {
2516- r.length = i;
2517- foreach (dchar d; s[i .. slen])
2518- {
2519- encode(r, d);
2520- }
2521- break ;
2522- }
2523- }
2501+ // 𐐷 is four code units in UTF-8
2502+ assert (" 𐐷" d.toUTF8.equal([0xF0 , 0x90 , 0x90 , 0xB7 ]));
2503+ }
25242504
2525- return r;
2505+ @system pure unittest
2506+ {
2507+ import std.internal.test.dummyrange : ReferenceInputRange;
2508+ import std.algorithm.comparison : equal;
2509+
2510+ auto r1 = new ReferenceInputRange! dchar (" Hellø" );
2511+ auto r2 = new ReferenceInputRange! dchar (" 𐐷" );
2512+
2513+ assert (r1.toUTF8.equal([' H' , ' e' , ' l' , ' l' , 0xC3 , 0xB8 ]));
2514+ assert (r2.toUTF8.equal([0xF0 , 0x90 , 0x90 , 0xB7 ]));
25262515}
25272516
25282517
25292518/* =================== Conversion to UTF16 ======================= */
25302519
2531- wchar [] toUTF16 (return ref wchar [2 ] buf, dchar c) nothrow @nogc @safe
2520+ wchar [] toUTF16 (return ref wchar [2 ] buf, dchar c) nothrow @nogc @safe pure
25322521in
25332522{
25342523 assert (isValidDchar(c));
@@ -2551,7 +2540,7 @@ body
25512540/* ***************
25522541 * Encodes string $(D s) into UTF-16 and returns the encoded string.
25532542 */
2554- wstring toUTF16 (scope const char [] s) @safe
2543+ wstring toUTF16 (scope const char [] s) @safe pure
25552544{
25562545 wchar [] r;
25572546 immutable slen = s.length;
@@ -2577,14 +2566,14 @@ wstring toUTF16(scope const char[] s) @safe
25772566}
25782567
25792568// / ditto
2580- wstring toUTF16 (scope const wchar [] s) @safe
2569+ wstring toUTF16 (scope const wchar [] s) @safe pure
25812570{
25822571 validate(s);
25832572 return s.idup;
25842573}
25852574
25862575// / ditto
2587- wstring toUTF16 (scope const dchar [] s) @safe
2576+ wstring toUTF16 (scope const dchar [] s) @safe pure
25882577{
25892578 wchar [] r;
25902579 immutable slen = s.length;
@@ -2605,7 +2594,7 @@ wstring toUTF16(scope const dchar[] s) @safe
26052594/* ****
26062595 * Encodes string $(D_PARAM s) into UTF-32 and returns the encoded string.
26072596 */
2608- dstring toUTF32 (scope const char [] s) @safe
2597+ dstring toUTF32 (scope const char [] s) @safe pure
26092598{
26102599 dchar [] r;
26112600 immutable slen = s.length;
@@ -2626,7 +2615,7 @@ dstring toUTF32(scope const char[] s) @safe
26262615}
26272616
26282617// / ditto
2629- dstring toUTF32 (scope const wchar [] s) @safe
2618+ dstring toUTF32 (scope const wchar [] s) @safe pure
26302619{
26312620 dchar [] r;
26322621 immutable slen = s.length;
@@ -2647,15 +2636,12 @@ dstring toUTF32(scope const wchar[] s) @safe
26472636}
26482637
26492638// / ditto
2650- dstring toUTF32 (scope const dchar [] s) @safe
2639+ dstring toUTF32 (scope const dchar [] s) @safe pure
26512640{
26522641 validate(s);
26532642 return s.idup;
26542643}
26552644
2656- } // Convert functions are @safe
2657-
2658-
26592645/* =================== toUTFz ======================= */
26602646
26612647/+ +
0 commit comments