Skip to content

Commit 7d33a6c

Browse files
author
Hana Dusíková
committed
fix ascii case-insensitive range matching
1 parent 26b2579 commit 7d33a6c

File tree

4 files changed

+64
-9
lines changed

4 files changed

+64
-9
lines changed

include/ctre/atoms_characters.hpp

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,14 @@ template <typename T> constexpr CTRE_FORCE_INLINE bool is_ascii_alpha(T v) {
2020
return ((v >= static_cast<T>('a') && v <= static_cast<T>('z')) || (v >= static_cast<T>('A') && v <= static_cast<T>('Z')));
2121
}
2222

23+
template <typename T> constexpr CTRE_FORCE_INLINE bool is_ascii_alpha_lowercase(T v) {
24+
return (v >= static_cast<T>('a')) && (v <= static_cast<T>('z'));
25+
}
26+
27+
template <typename T> constexpr CTRE_FORCE_INLINE bool is_ascii_alpha_uppercase(T v) {
28+
return (v >= static_cast<T>('A')) && v <= (static_cast<T>('Z'));
29+
}
30+
2331
template <auto V> struct character {
2432
template <typename CharT> CTRE_FORCE_INLINE static constexpr bool match_char(CharT value, const flags & f) noexcept {
2533
if constexpr (is_ascii_alpha(V)) {
@@ -55,17 +63,22 @@ template <typename... Content> struct negate {
5563

5664
template <auto A, auto B> struct char_range {
5765
template <typename CharT> CTRE_FORCE_INLINE static constexpr bool match_char(CharT value, const flags & f) noexcept {
58-
if constexpr (is_ascii_alpha(A) && is_ascii_alpha(B)) {
66+
if constexpr (is_ascii_alpha_lowercase(A) && is_ascii_alpha_lowercase(B)) {
5967
if (is_case_insensitive(f)) {
60-
if (value >= (A ^ static_cast<decltype(A)>(0x20)) && value <= (A ^ static_cast<decltype(B)>(0x20))) {
68+
if (value >= (A ^ static_cast<decltype(A)>(0x20)) && value <= (B ^ static_cast<decltype(B)>(0x20))) {
69+
return true;//
70+
}
71+
}
72+
} else if constexpr (is_ascii_alpha_uppercase(A) && is_ascii_alpha_uppercase(B)) {
73+
if (is_case_insensitive(f)) {
74+
if (value >= (A ^ static_cast<decltype(A)>(0x20)) && value <= (B ^ static_cast<decltype(B)>(0x20))) {
6175
return true;//
6276
}
6377
}
6478
}
6579
return (value >= A) && (value <= B);
6680
}
6781
};
68-
6982
using word_chars = set<char_range<'A','Z'>, char_range<'a','z'>, char_range<'0','9'>, character<'_'> >;
7083

7184
using space_chars = enumeration<' ', '\t', '\n', '\v', '\f', '\r'>;

single-header/ctre-unicode.hpp

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1479,6 +1479,14 @@ template <typename T> constexpr CTRE_FORCE_INLINE bool is_ascii_alpha(T v) {
14791479
return ((v >= static_cast<T>('a') && v <= static_cast<T>('z')) || (v >= static_cast<T>('A') && v <= static_cast<T>('Z')));
14801480
}
14811481

1482+
template <typename T> constexpr CTRE_FORCE_INLINE bool is_ascii_alpha_lowercase(T v) {
1483+
return (v >= static_cast<T>('a')) && (v <= static_cast<T>('z'));
1484+
}
1485+
1486+
template <typename T> constexpr CTRE_FORCE_INLINE bool is_ascii_alpha_uppercase(T v) {
1487+
return (v >= static_cast<T>('A')) && v <= (static_cast<T>('Z'));
1488+
}
1489+
14821490
template <auto V> struct character {
14831491
template <typename CharT> CTRE_FORCE_INLINE static constexpr bool match_char(CharT value, const flags & f) noexcept {
14841492
if constexpr (is_ascii_alpha(V)) {
@@ -1514,17 +1522,22 @@ template <typename... Content> struct negate {
15141522

15151523
template <auto A, auto B> struct char_range {
15161524
template <typename CharT> CTRE_FORCE_INLINE static constexpr bool match_char(CharT value, const flags & f) noexcept {
1517-
if constexpr (is_ascii_alpha(A) && is_ascii_alpha(B)) {
1525+
if constexpr (is_ascii_alpha_lowercase(A) && is_ascii_alpha_lowercase(B)) {
15181526
if (is_case_insensitive(f)) {
1519-
if (value >= (A ^ static_cast<decltype(A)>(0x20)) && value <= (A ^ static_cast<decltype(B)>(0x20))) {
1527+
if (value >= (A ^ static_cast<decltype(A)>(0x20)) && value <= (B ^ static_cast<decltype(B)>(0x20))) {
1528+
return true;//
1529+
}
1530+
}
1531+
} else if constexpr (is_ascii_alpha_uppercase(A) && is_ascii_alpha_uppercase(B)) {
1532+
if (is_case_insensitive(f)) {
1533+
if (value >= (A ^ static_cast<decltype(A)>(0x20)) && value <= (B ^ static_cast<decltype(B)>(0x20))) {
15201534
return true;//
15211535
}
15221536
}
15231537
}
15241538
return (value >= A) && (value <= B);
15251539
}
15261540
};
1527-
15281541
using word_chars = set<char_range<'A','Z'>, char_range<'a','z'>, char_range<'0','9'>, character<'_'> >;
15291542

15301543
using space_chars = enumeration<' ', '\t', '\n', '\v', '\f', '\r'>;

single-header/ctre.hpp

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1476,6 +1476,14 @@ template <typename T> constexpr CTRE_FORCE_INLINE bool is_ascii_alpha(T v) {
14761476
return ((v >= static_cast<T>('a') && v <= static_cast<T>('z')) || (v >= static_cast<T>('A') && v <= static_cast<T>('Z')));
14771477
}
14781478

1479+
template <typename T> constexpr CTRE_FORCE_INLINE bool is_ascii_alpha_lowercase(T v) {
1480+
return (v >= static_cast<T>('a')) && (v <= static_cast<T>('z'));
1481+
}
1482+
1483+
template <typename T> constexpr CTRE_FORCE_INLINE bool is_ascii_alpha_uppercase(T v) {
1484+
return (v >= static_cast<T>('A')) && v <= (static_cast<T>('Z'));
1485+
}
1486+
14791487
template <auto V> struct character {
14801488
template <typename CharT> CTRE_FORCE_INLINE static constexpr bool match_char(CharT value, const flags & f) noexcept {
14811489
if constexpr (is_ascii_alpha(V)) {
@@ -1511,17 +1519,22 @@ template <typename... Content> struct negate {
15111519

15121520
template <auto A, auto B> struct char_range {
15131521
template <typename CharT> CTRE_FORCE_INLINE static constexpr bool match_char(CharT value, const flags & f) noexcept {
1514-
if constexpr (is_ascii_alpha(A) && is_ascii_alpha(B)) {
1522+
if constexpr (is_ascii_alpha_lowercase(A) && is_ascii_alpha_lowercase(B)) {
15151523
if (is_case_insensitive(f)) {
1516-
if (value >= (A ^ static_cast<decltype(A)>(0x20)) && value <= (A ^ static_cast<decltype(B)>(0x20))) {
1524+
if (value >= (A ^ static_cast<decltype(A)>(0x20)) && value <= (B ^ static_cast<decltype(B)>(0x20))) {
1525+
return true;//
1526+
}
1527+
}
1528+
} else if constexpr (is_ascii_alpha_uppercase(A) && is_ascii_alpha_uppercase(B)) {
1529+
if (is_case_insensitive(f)) {
1530+
if (value >= (A ^ static_cast<decltype(A)>(0x20)) && value <= (B ^ static_cast<decltype(B)>(0x20))) {
15171531
return true;//
15181532
}
15191533
}
15201534
}
15211535
return (value >= A) && (value <= B);
15221536
}
15231537
};
1524-
15251538
using word_chars = set<char_range<'A','Z'>, char_range<'a','z'>, char_range<'0','9'>, character<'_'> >;
15261539

15271540
using space_chars = enumeration<' ', '\t', '\n', '\v', '\f', '\r'>;

tests/ci.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,5 +20,21 @@
2020
TEST_MATCH("aloha","aloha", true);
2121
TEST_MATCH("aloha","ALOHA", false);
2222

23+
TEST_MATCH("[a-z]+","aloha", true);
24+
TEST_MATCH("[a-z]+","ALOHA", false);
25+
2326
TEST_CI_MATCH("aloha", "aloha", true);
2427
TEST_CI_MATCH("aloha", "ALOHA", true);
28+
29+
TEST_CI_MATCH("[a-z]+", "aloha", true);
30+
TEST_CI_MATCH("[a-z]+", "ALOHA", true);
31+
32+
TEST_CI_MATCH("[a-z]+", "9", false);
33+
TEST_CI_MATCH("[a-z]+", " ", false);
34+
35+
TEST_CI_MATCH("[a-Z]+", "9", false);
36+
TEST_CI_MATCH("[a-Z]+", " ", false);
37+
38+
TEST_CI_MATCH("[a-Z]+", "z", false);
39+
TEST_CI_MATCH("[a-Z]+", "z", false);
40+

0 commit comments

Comments
 (0)