@@ -2471,23 +2471,7 @@ char[] toUTF8(return out char[4] buf, dchar c) nothrow @nogc @safe pure
24712471 */
24722472string toUTF8 (S)(S s) if (isInputRange! S && isSomeChar! (ElementEncodingType! S))
24732473{
2474- static if (is (S : string ))
2475- {
2476- return s.idup;
2477- }
2478- else
2479- {
2480- import std.array : appender;
2481- auto app = appender! string ();
2482-
2483- static if (hasLength! S || isSomeString! S)
2484- app.reserve (s.length);
2485-
2486- foreach (c; s.byUTF! char )
2487- app.put(c);
2488-
2489- return app.data;
2490- }
2474+ return toUTFImpl! string (s);
24912475}
24922476
24932477// /
@@ -2537,58 +2521,47 @@ body
25372521 }
25382522}
25392523
2540- /* ***************
2541- * Encodes string $(D s) into UTF-16 and returns the encoded string.
2524+ /**
2525+ * Encodes the elements of `s` to UTF-16 and returns a newly GC allocated
2526+ * `wstring` of the elements.
2527+ *
2528+ * Params:
2529+ * s = the range to encode
2530+ * Returns:
2531+ * A UTF-16 string
2532+ * See_Also:
2533+ * For a lazy, non-allocating version of these functions, see $(LREF byUTF).
25422534 */
2543- wstring toUTF16 (scope const char [] s) @safe pure
2535+ wstring toUTF16 (S)(S s) if (isInputRange ! S && isSomeChar ! (ElementEncodingType ! S))
25442536{
2545- wchar [] r;
2546- immutable slen = s.length;
2547-
2548- r.length = slen;
2549- r.length = 0 ;
2550- for (size_t i = 0 ; i < slen; )
2551- {
2552- dchar c = s[i];
2553- if (c <= 0x7F )
2554- {
2555- i++ ;
2556- r ~= cast (wchar )c;
2557- }
2558- else
2559- {
2560- c = decode(s, i);
2561- encode(r, c);
2562- }
2563- }
2564-
2565- return r;
2537+ return toUTFImpl! wstring (s);
25662538}
25672539
2568- // / ditto
2569- wstring toUTF16 ( scope const wchar [] s) @safe pure
2540+ // /
2541+ @safe pure unittest
25702542{
2571- validate(s);
2572- return s.idup;
2543+ import std.algorithm.comparison : equal;
2544+
2545+ // these graphemes are two code units in UTF-16 and one in UTF-32
2546+ assert (" 𤭢" d.length == 1 );
2547+ assert (" 𐐷" d.length == 1 );
2548+
2549+ assert (" 𤭢" d.toUTF16.equal([0xD852 , 0xDF62 ]));
2550+ assert (" 𐐷" d.toUTF16.equal([0xD801 , 0xDC37 ]));
25732551}
25742552
2575- // / ditto
2576- wstring toUTF16 (scope const dchar [] s) @safe pure
2553+ @system pure unittest
25772554{
2578- wchar [] r ;
2579- immutable slen = s.length ;
2555+ import std.internal.test.dummyrange : ReferenceInputRange ;
2556+ import std.algorithm.comparison : equal ;
25802557
2581- r.length = slen;
2582- r.length = 0 ;
2583- for (size_t i = 0 ; i < slen; i++ )
2584- {
2585- encode(r, s[i]);
2586- }
2558+ auto r1 = new ReferenceInputRange! dchar (" 𤭢" );
2559+ auto r2 = new ReferenceInputRange! dchar (" 𐐷" );
25872560
2588- return r;
2561+ assert (r1.toUTF16.equal([0xD852 , 0xDF62 ]));
2562+ assert (r2.toUTF16.equal([0xD801 , 0xDC37 ]));
25892563}
25902564
2591-
25922565/* =================== Conversion to UTF32 ======================= */
25932566
25942567/* ****
@@ -2642,6 +2615,27 @@ dstring toUTF32(scope const dchar[] s) @safe pure
26422615 return s.idup;
26432616}
26442617
2618+ private T toUTFImpl (T, S)(S s)
2619+ {
2620+ static if (is (S : T))
2621+ {
2622+ return s.idup;
2623+ }
2624+ else
2625+ {
2626+ import std.array : appender;
2627+ auto app = appender! T();
2628+
2629+ static if (hasLength! S || isSomeString! S)
2630+ app.reserve (s.length);
2631+
2632+ foreach (c; s.byUTF! (Unqual! (ElementEncodingType! T)))
2633+ app.put(c);
2634+
2635+ return app.data;
2636+ }
2637+ }
2638+
26452639/* =================== toUTFz ======================= */
26462640
26472641/+ +
0 commit comments