@@ -3490,19 +3490,29 @@ int impureVariable;
34903490}
34913491
34923492/* ***************************
3493- * Iterate an input range of characters by char type C.
3493+ * Iterate an input range of characters by char type `C` by
3494+ * encoding the elements of the range.
34943495 *
3495- * UTF sequences that cannot be converted to UTF-8 are replaced by U+FFFD
3496- * per "5.22 Best Practice for U+FFFD Substitution" of the Unicode Standard 6.2.
3497- * Hence byUTF is not symmetric.
3496+ * UTF sequences that cannot be converted to the specified encoding are
3497+ * replaced by U+FFFD per "5.22 Best Practice for U+FFFD Substitution"
3498+ * of the Unicode Standard 6.2. Hence byUTF is not symmetric.
34983499 * This algorithm is lazy, and does not allocate memory.
3499- * Purity, nothrow, and safety are inferred from the r parameter.
3500+ * `@nogc`, `pure`-ity, `nothrow`, and `@safe`-ty are inferred from the
3501+ * `r` parameter.
35003502 *
35013503 * Params:
3502- * C = char, wchar, or dchar
3504+ * C = ` char`, ` wchar` , or ` dchar`
35033505 * r = input range of characters, or array of characters
35043506 * Returns:
3505- * input range of type C
3507+ * A forward range if r is a range and not auto-decodable, as defined by
3508+ * $(REF isAutodecodableString, std, traits), and if the base range is
3509+ * also a forward range.
3510+ *
3511+ * Or, if r is a range and it is auto-decodable and
3512+ * `is(ElementEncodingType!typeof(r) == C)`, then the range is passed
3513+ * to $(LREF byCodeUnit).
3514+ *
3515+ * Otherwise, an input range of characters.
35063516 */
35073517template byUTF (C) if (isSomeChar! C)
35083518{
@@ -3580,12 +3590,18 @@ template byUTF(C) if (isSomeChar!C)
35803590}
35813591
35823592// /
3583- @safe pure nothrow @nogc unittest
3584- {
3585- foreach (c; " h" .byUTF! char ())
3586- assert (c == ' h' );
3587- foreach (c; " h" .byUTF! wchar ())
3588- assert (c == ' h' );
3589- foreach (c; " h" .byUTF! dchar ())
3590- assert (c == ' h' );
3593+ @safe pure nothrow unittest
3594+ {
3595+ import std.algorithm.comparison : equal;
3596+
3597+ // hellö as a range of `char`s, which are UTF-8
3598+ " hell\u00F6 " .byUTF! char ().equal([' h' , ' e' , ' l' , ' l' , 0xC3 , 0xB6 ]);
3599+
3600+ // `wchar`s are able to hold the ö in a single element (UTF-16 code unit)
3601+ " hell\u00F6 " .byUTF! wchar ().equal([' h' , ' e' , ' l' , ' l' , ' ö' ]);
3602+
3603+ // 𐐷 is four code units in UTF-8, two in UTF-16, and one in UTF-32
3604+ " 𐐷" .byUTF! char ().equal([0xF0 , 0x90 , 0x90 , 0xB7 ]);
3605+ " 𐐷" .byUTF! wchar ().equal([0xD801 , 0xDC37 ]);
3606+ " 𐐷" .byUTF! dchar ().equal([0x00010437 ]);
35913607}
0 commit comments