Skip to content

Commit 6dea360

Browse files
committed
New tests for uppercase changes
1 parent 7530731 commit 6dea360

File tree

5 files changed

+29
-23
lines changed

5 files changed

+29
-23
lines changed

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ keywords = ["Characters"]
44
license = "MIT"
55
desc = "Basic functionality for Chr type"
66
authors = ["ScottPJones <scottjones@alum.mit.edu>"]
7-
version = "0.1.6"
7+
version = "0.1.7"
88

99
[deps]
1010
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"

src/ChrBase.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@ using ModuleInterfaceTools
2222
codepoint_cse, codepoint_rng, codepoint_adj, utf8proc_error,
2323
write_utf8, write_utf16, _write_utf8_2, _write_utf8_3, _write_utf8_4, _write_ucs2,
2424
_lowercase_l, _uppercase_l, _lowercase_u, _uppercase_u, _titlecase_u,
25-
_islower_a, _islower_u, _isupper_a, _isupper_l, _isupper_al, _isupper_u,
26-
_can_upper_ch, _can_lower_ch, _can_upper, _can_upper_l
25+
_islower_a, _islower_l, _islower_u, _isupper_a, _isupper_l, _isupper_al, _isupper_u,
26+
_can_upper, _can_upper_l
2727

2828
@api develop! _isvalid_chr
2929

src/casefold.jl

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,12 @@ Copyright 2017-2018 Gandalf Software, Inc., Scott P. Jones
55
Licensed under MIT License, see LICENSE.md
66
=#
77

8-
_lowercase_l(ch) = ifelse(_isupper_al(ch), ch + 0x20, ch)
9-
_uppercase_l(ch) = ifelse(_can_upper(ch), ch - 0x20, ch)
8+
_wide_upper(ch) =
9+
ifelse(ch == 0xb5, 0x39c,
10+
ifelse(ch == 0xff, 0x178, ifelse(!V6_COMPAT && ch == 0xdf, 0x1e9e, ch%UInt16)))
11+
12+
_lowercase_l(ch) = _isupper_al(ch) ? ch + 0x20 : ch
13+
_uppercase_l(ch) = _can_upper(ch) ? ch - 0x20 : _wide_upper(ch)
1014

1115
_lowercase(ch) = is_latin(ch) ? _lowercase_l(ch) : _lowercase_u(ch)
1216
_uppercase(ch) = is_latin(ch) ? _uppercase_l(ch) : _uppercase_u(ch)
@@ -16,8 +20,8 @@ lowercase(ch::T) where {T<:Chr} = T(_lowercase(codepoint(ch)))
1620
uppercase(ch::T) where {T<:Chr} = T(_uppercase(codepoint(ch)))
1721
titlecase(ch::T) where {T<:Chr} = T(_titlecase(codepoint(ch)))
1822

19-
lowercase(ch::ASCIIChr) = ifelse(_isupper_a(ch), ASCIIChr(ch + 0x20), ch)
20-
uppercase(ch::ASCIIChr) = ifelse(_islower_a(ch), ASCIIChr(ch - 0x20), ch)
23+
lowercase(ch::ASCIIChr) = _isupper_a(ch) ? ASCIIChr(ch + 0x20) : ch
24+
uppercase(ch::ASCIIChr) = _islower_a(ch) ? ASCIIChr(ch - 0x20) : ch
2125
titlecase(ch::ASCIIChr) = uppercase(ch)
2226

2327
lowercase(ch::T) where {T<:LatinChars} = T(_lowercase_l(codepoint(ch)))
@@ -33,16 +37,3 @@ function uppercase(ch::_LatinChr)
3337
cb == 0xb5 ? UCS2Chr(0x39c) : cb == 0xff ? UCS2Chr(0x178) : ch
3438
end
3539
titlecase(ch::LatinChars) = uppercase(ch)
36-
37-
@static if V6_COMPAT
38-
@inline _can_upper_ch(ch) =
39-
(ch <= 0x7f
40-
? _islower_a(ch)
41-
: (ch > 0xff ? _islower_u(ch) : ifelse(c > 0xdf, c != 0xf7, c == 0xb5)))
42-
else
43-
@inline _can_upper_ch(ch) =
44-
ch <= 0x7f ? _islower_a(ch) : (ch <= 0xff ? _is_lower_l(ch) : _islower_u(ch))
45-
end
46-
47-
@inline _can_lower_ch(ch) =
48-
ch <= 0x7f ? _isupper_a(ch) : (ch <= 0xff ? _isupper_l(ch) : _isupper_u(ch))

src/unicode.jl

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,9 @@ const _isalnum_mask = _isnumeric_mask | _isalpha_mask
7777

7878
const _isnumeric_a = _isdigit
7979
@inline _ispunct_a(ch) = ((UInt128(1) << ch) & 0x2800_0000_b800_0001_8c00_f7ee_0000_0000) != 0
80-
@inline _isspace_a(ch) = (ch == 32) | (9 <= ch <= 13)
81-
@inline _islower_a(ch) = (ch - 'a'%UInt8) < 26
82-
@inline _isupper_a(ch) = (ch - 'A'%UInt8) < 26
80+
@inline _isspace_a(ch) = (ch == 0x20) | (0x9 <= ch <= 0xd)
81+
@inline _islower_a(ch) = (ch%UInt8 - 'a'%UInt8) < 0x1a
82+
@inline _isupper_a(ch) = (ch%UInt8 - 'A'%UInt8) < 0x1a
8383
@inline _isalpha_a(ch) = _islower_a(ch) | _isupper_a(ch)
8484
@inline _isalnum_a(ch) = _isdigit(ch) | _isalpha_a(ch)
8585
@inline _isprint_a(ch) = 0x20 <= ch < 0x7f

test/runtests.jl

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,21 @@ for C in (ASCIIChr, LatinChr, UCS2Chr, UTF32Chr, Char)
2020
@test last(rng) === C('\x7f')
2121
end
2222

23+
C != Char && @testset "Casefold character" begin
24+
for c = 0:UInt(maxch)
25+
is_valid(C, c) || continue
26+
ch = C(c)
27+
cj = Char(c)
28+
uj = uppercase(cj)
29+
if uj <= maxch
30+
uc = uppercase(ch)
31+
uc == uj || println(" $c: $maxch $uc $uj")
32+
@test uc == uj
33+
end
34+
@test lowercase(ch) == lowercase(cj)
35+
end
36+
end
37+
2338
@testset "Edge conditions" begin
2439
for (val, pass) in (
2540
(0, true), (0xd7ff, true),

0 commit comments

Comments
 (0)