Skip to content

Commit 89b2f56

Browse files
authored
Merge pull request #4422 from JackStouffer/patch-11
Improve the documentation for std.utf.byUTF
2 parents 27c77d9 + 1c8c694 commit 89b2f56

File tree

1 file changed

+30
-14
lines changed

1 file changed

+30
-14
lines changed

std/utf.d

Lines changed: 30 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3537,19 +3537,29 @@ int impureVariable;
35373537
}
35383538

35393539
/****************************
3540-
* Iterate an input range of characters by char type C.
3540+
* Iterate an input range of characters by char type `C` by
3541+
* encoding the elements of the range.
35413542
*
3542-
* UTF sequences that cannot be converted to UTF-8 are replaced by U+FFFD
3543-
* per "5.22 Best Practice for U+FFFD Substitution" of the Unicode Standard 6.2.
3544-
* Hence byUTF is not symmetric.
3543+
* UTF sequences that cannot be converted to the specified encoding are
3544+
* replaced by U+FFFD per "5.22 Best Practice for U+FFFD Substitution"
3545+
* of the Unicode Standard 6.2. Hence byUTF is not symmetric.
35453546
* This algorithm is lazy, and does not allocate memory.
3546-
* Purity, nothrow, and safety are inferred from the r parameter.
3547+
* `@nogc`, `pure`-ity, `nothrow`, and `@safe`-ty are inferred from the
3548+
* `r` parameter.
35473549
*
35483550
* Params:
3549-
* C = char, wchar, or dchar
3551+
* C = `char`, `wchar`, or `dchar`
35503552
* r = input range of characters, or array of characters
35513553
* Returns:
3552-
* input range of type C
3554+
* A forward range if r is a range and not auto-decodable, as defined by
3555+
* $(REF isAutodecodableString, std, traits), and if the base range is
3556+
* also a forward range.
3557+
*
3558+
* Or, if r is a range and it is auto-decodable and
3559+
* `is(ElementEncodingType!typeof(r) == C)`, then the range is passed
3560+
* to $(LREF byCodeUnit).
3561+
*
3562+
* Otherwise, an input range of characters.
35533563
*/
35543564
template byUTF(C) if (isSomeChar!C)
35553565
{
@@ -3627,12 +3637,18 @@ template byUTF(C) if (isSomeChar!C)
36273637
}
36283638

36293639
///
3630-
@safe pure nothrow @nogc unittest
3640+
@safe pure nothrow unittest
36313641
{
3632-
foreach (c; "h".byUTF!char())
3633-
assert(c == 'h');
3634-
foreach (c; "h".byUTF!wchar())
3635-
assert(c == 'h');
3636-
foreach (c; "h".byUTF!dchar())
3637-
assert(c == 'h');
3642+
import std.algorithm.comparison : equal;
3643+
3644+
// hellö as a range of `char`s, which are UTF-8
3645+
"hell\u00F6".byUTF!char().equal(['h', 'e', 'l', 'l', 0xC3, 0xB6]);
3646+
3647+
// `wchar`s are able to hold the ö in a single element (UTF-16 code unit)
3648+
"hell\u00F6".byUTF!wchar().equal(['h', 'e', 'l', 'l', 'ö']);
3649+
3650+
// 𐐷 is four code units in UTF-8, two in UTF-16, and one in UTF-32
3651+
"𐐷".byUTF!char().equal([0xF0, 0x90, 0x90, 0xB7]);
3652+
"𐐷".byUTF!wchar().equal([0xD801, 0xDC37]);
3653+
"𐐷".byUTF!dchar().equal([0x00010437]);
36383654
}

0 commit comments

Comments
 (0)