@@ -3537,19 +3537,29 @@ int impureVariable;
35373537}
35383538
35393539/* ***************************
3540- * Iterate an input range of characters by char type C.
3540+ * Iterate an input range of characters by char type `C` by
3541+ * encoding the elements of the range.
35413542 *
3542- * UTF sequences that cannot be converted to UTF-8 are replaced by U+FFFD
3543- * per "5.22 Best Practice for U+FFFD Substitution" of the Unicode Standard 6.2.
3544- * Hence byUTF is not symmetric.
3543+ * UTF sequences that cannot be converted to the specified encoding are
3544+ * replaced by U+FFFD per "5.22 Best Practice for U+FFFD Substitution"
3545+ * of the Unicode Standard 6.2. Hence byUTF is not symmetric.
35453546 * This algorithm is lazy, and does not allocate memory.
3546- * Purity, nothrow, and safety are inferred from the r parameter.
3547+ * `@nogc`, `pure`-ity, `nothrow`, and `@safe`-ty are inferred from the
3548+ * `r` parameter.
35473549 *
35483550 * Params:
3549- * C = char, wchar, or dchar
3551+ * C = ` char`, ` wchar` , or ` dchar`
35503552 * r = input range of characters, or array of characters
35513553 * Returns:
3552- * input range of type C
3554+ * A forward range if r is a range and not auto-decodable, as defined by
3555+ * $(REF isAutodecodableString, std, traits), and if the base range is
3556+ * also a forward range.
3557+ *
3558+ * Or, if r is a range and it is auto-decodable and
3559+ * `is(ElementEncodingType!typeof(r) == C)`, then the range is passed
3560+ * to $(LREF byCodeUnit).
3561+ *
3562+ * Otherwise, an input range of characters.
35533563 */
35543564template byUTF (C) if (isSomeChar! C)
35553565{
@@ -3627,12 +3637,18 @@ template byUTF(C) if (isSomeChar!C)
36273637}
36283638
36293639// /
3630- @safe pure nothrow @nogc unittest
3640+ @safe pure nothrow unittest
36313641{
3632- foreach (c; " h" .byUTF! char ())
3633- assert (c == ' h' );
3634- foreach (c; " h" .byUTF! wchar ())
3635- assert (c == ' h' );
3636- foreach (c; " h" .byUTF! dchar ())
3637- assert (c == ' h' );
3642+ import std.algorithm.comparison : equal;
3643+
3644+ // hellö as a range of `char`s, which are UTF-8
3645+ " hell\u00F6 " .byUTF! char ().equal([' h' , ' e' , ' l' , ' l' , 0xC3 , 0xB6 ]);
3646+
3647+ // `wchar`s are able to hold the ö in a single element (UTF-16 code unit)
3648+ " hell\u00F6 " .byUTF! wchar ().equal([' h' , ' e' , ' l' , ' l' , ' ö' ]);
3649+
3650+ // 𐐷 is four code units in UTF-8, two in UTF-16, and one in UTF-32
3651+ " 𐐷" .byUTF! char ().equal([0xF0 , 0x90 , 0x90 , 0xB7 ]);
3652+ " 𐐷" .byUTF! wchar ().equal([0xD801 , 0xDC37 ]);
3653+ " 𐐷" .byUTF! dchar ().equal([0x00010437 ]);
36383654}
0 commit comments