Skip to content

Commit e357050

Browse files
authored
Merge pull request #4796 from JackStouffer/toUTF16
Make std.uni.toUTF16 DRY by using byUTF internally
2 parents ace0b93 + 3498fc1 commit e357050

File tree

1 file changed

+51
-57
lines changed

1 file changed

+51
-57
lines changed

std/utf.d

Lines changed: 51 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -2471,23 +2471,7 @@ char[] toUTF8(return out char[4] buf, dchar c) nothrow @nogc @safe pure
24712471
*/
24722472
string toUTF8(S)(S s) if (isInputRange!S && isSomeChar!(ElementEncodingType!S))
24732473
{
2474-
static if (is(S : string))
2475-
{
2476-
return s.idup;
2477-
}
2478-
else
2479-
{
2480-
import std.array : appender;
2481-
auto app = appender!string();
2482-
2483-
static if (hasLength!S || isSomeString!S)
2484-
app.reserve(s.length);
2485-
2486-
foreach (c; s.byUTF!char)
2487-
app.put(c);
2488-
2489-
return app.data;
2490-
}
2474+
return toUTFImpl!string(s);
24912475
}
24922476

24932477
///
@@ -2537,58 +2521,47 @@ body
25372521
}
25382522
}
25392523

2540-
/****************
2541-
* Encodes string $(D s) into UTF-16 and returns the encoded string.
2524+
/**
2525+
* Encodes the elements of `s` to UTF-16 and returns a newly GC allocated
2526+
* `wstring` of the elements.
2527+
*
2528+
* Params:
2529+
* s = the range to encode
2530+
* Returns:
2531+
* A UTF-16 string
2532+
* See_Also:
2533+
* For a lazy, non-allocating version of these functions, see $(LREF byUTF).
25422534
*/
2543-
wstring toUTF16(scope const char[] s) @safe pure
2535+
wstring toUTF16(S)(S s) if (isInputRange!S && isSomeChar!(ElementEncodingType!S))
25442536
{
2545-
wchar[] r;
2546-
immutable slen = s.length;
2547-
2548-
r.length = slen;
2549-
r.length = 0;
2550-
for (size_t i = 0; i < slen; )
2551-
{
2552-
dchar c = s[i];
2553-
if (c <= 0x7F)
2554-
{
2555-
i++;
2556-
r ~= cast(wchar)c;
2557-
}
2558-
else
2559-
{
2560-
c = decode(s, i);
2561-
encode(r, c);
2562-
}
2563-
}
2564-
2565-
return r;
2537+
return toUTFImpl!wstring(s);
25662538
}
25672539

2568-
/// ditto
2569-
wstring toUTF16(scope const wchar[] s) @safe pure
2540+
///
2541+
@safe pure unittest
25702542
{
2571-
validate(s);
2572-
return s.idup;
2543+
import std.algorithm.comparison : equal;
2544+
2545+
// these graphemes are two code units in UTF-16 and one in UTF-32
2546+
assert("𤭢"d.length == 1);
2547+
assert("𐐷"d.length == 1);
2548+
2549+
assert("𤭢"d.toUTF16.equal([0xD852, 0xDF62]));
2550+
assert("𐐷"d.toUTF16.equal([0xD801, 0xDC37]));
25732551
}
25742552

2575-
/// ditto
2576-
wstring toUTF16(scope const dchar[] s) @safe pure
2553+
@system pure unittest
25772554
{
2578-
wchar[] r;
2579-
immutable slen = s.length;
2555+
import std.internal.test.dummyrange : ReferenceInputRange;
2556+
import std.algorithm.comparison : equal;
25802557

2581-
r.length = slen;
2582-
r.length = 0;
2583-
for (size_t i = 0; i < slen; i++)
2584-
{
2585-
encode(r, s[i]);
2586-
}
2558+
auto r1 = new ReferenceInputRange!dchar("𤭢");
2559+
auto r2 = new ReferenceInputRange!dchar("𐐷");
25872560

2588-
return r;
2561+
assert(r1.toUTF16.equal([0xD852, 0xDF62]));
2562+
assert(r2.toUTF16.equal([0xD801, 0xDC37]));
25892563
}
25902564

2591-
25922565
/* =================== Conversion to UTF32 ======================= */
25932566

25942567
/*****
@@ -2642,6 +2615,27 @@ dstring toUTF32(scope const dchar[] s) @safe pure
26422615
return s.idup;
26432616
}
26442617

2618+
private T toUTFImpl(T, S)(S s)
2619+
{
2620+
static if (is(S : T))
2621+
{
2622+
return s.idup;
2623+
}
2624+
else
2625+
{
2626+
import std.array : appender;
2627+
auto app = appender!T();
2628+
2629+
static if (hasLength!S || isSomeString!S)
2630+
app.reserve(s.length);
2631+
2632+
foreach (c; s.byUTF!(Unqual!(ElementEncodingType!T)))
2633+
app.put(c);
2634+
2635+
return app.data;
2636+
}
2637+
}
2638+
26452639
/* =================== toUTFz ======================= */
26462640

26472641
/++

0 commit comments

Comments
 (0)