Skip to content

Commit 32e42c9

Browse files
committed
Upgrade to unicode 16
1 parent 9cb5a84 commit 32e42c9

File tree

9 files changed

+1105
-303
lines changed

9 files changed

+1105
-303
lines changed

cpp/ycm/Character.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ bool CodePointCompare( const CodePoint *left, const CodePoint *right ) {
3131

3232

3333
// Sort the code points according to the Canonical Ordering Algorithm.
34-
// See https://www.unicode.org/versions/latest/ch03.pdf#G49591
34+
// See https://www.unicode.org/versions/latest/core-spec/chapter-3/#G49591
3535
CodePointSequence CanonicalSort( CodePointSequence code_points ) {
3636
auto code_point_start = code_points.begin();
3737
auto code_point_end = code_points.end();
@@ -64,7 +64,7 @@ CodePointSequence CanonicalSort( CodePointSequence code_points ) {
6464

6565
// Decompose a UTF-8 encoded string into a sequence of code points according to
6666
// Canonical Decomposition. See
67-
// https://www.unicode.org/versions/latest/ch03.pdf#G733
67+
// https://www.unicode.org/versions/latest/core-spec/chapter-3/#G733
6868
CodePointSequence CanonicalDecompose( std::string_view text ) {
6969
assert( NormalizeInput( text ) == text );
7070
return CanonicalSort( BreakIntoCodePoints( text ) );
@@ -78,7 +78,7 @@ Character::Character( std::string_view character )
7878
is_punctuation_( false ),
7979
is_uppercase_( false ) {
8080
// Normalize the character through NFD (Normalization Form D). See
81-
// https://www.unicode.org/versions/latest/ch03.pdf#G49621
81+
// https://www.unicode.org/versions/latest/core-spec/chapter-3/#G49621
8282
CodePointSequence code_points = CanonicalDecompose( character );
8383

8484
for ( const auto &code_point : code_points ) {

cpp/ycm/Character.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ namespace YouCompleteMe {
2727
// This class represents a UTF-8 character. It takes a UTF-8 encoded string
2828
// corresponding to a grapheme cluster (see
2929
// https://www.unicode.org/glossary/#grapheme_cluster), normalize it through NFD
30-
// (see https://www.unicode.org/versions/latest/ch03.pdf#G49621), and
30+
// (see https://www.unicode.org/versions/latest/core-spec/chapter-3/#G49621), and
3131
// compute the folded and swapped case versions of the normalized character. It
3232
// also holds some properties like if the character is a letter or a
3333
// punctuation, and if it is uppercase.

cpp/ycm/CodePoint.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ struct RawCodePoint {
9191
// - its breaking property: used to split a word into characters.
9292
// - its combining class: used to sort a sequence of code points according to
9393
// the Canonical Ordering algorithm (see
94-
// https://www.unicode.org/versions/latest/ch03.pdf#G49591).
94+
// https://www.unicode.org/versions/latest/core-spec/chapter-3/#G49591).
9595
class CodePoint {
9696
public:
9797
YCM_EXPORT explicit CodePoint( std::string_view code_point );

cpp/ycm/UnicodeTable.inc

Lines changed: 21 additions & 21 deletions
Large diffs are not rendered by default.

cpp/ycm/tests/CodePoint_test.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -87,16 +87,21 @@ const TextCodePointPair tests[] = {
8787
GraphemeBreakProperty::EXTEND,
8888
IndicConjunctBreakProperty::EXTEND } },
8989
// Bengali vowel sign Aa
90-
{ "", { "", "", "", false, false, false, GraphemeBreakProperty::EXTEND } },
90+
{ "", { "", "", "", false, false, false,
91+
GraphemeBreakProperty::EXTEND,
92+
IndicConjunctBreakProperty::EXTEND } },
9193
// Zero-width non-joiner
9294
{ "", { "", "", "", false, false, false,
9395
GraphemeBreakProperty::EXTEND } },
9496
// Combining cyrillic millions sign
95-
{ "҈", { "҈", "҈", "҈", false, false, false, GraphemeBreakProperty::EXTEND } },
97+
{ "҈", { "҈", "҈", "҈", false, false, false,
98+
GraphemeBreakProperty::EXTEND,
99+
IndicConjunctBreakProperty::EXTEND } },
96100

97101
// Zero-width joiner
98102
{ "", { "", "", "", false, false, false,
99-
GraphemeBreakProperty::ZWJ, IndicConjunctBreakProperty::EXTEND } },
103+
GraphemeBreakProperty::ZWJ,
104+
IndicConjunctBreakProperty::EXTEND } },
100105

101106
// Regional indicator symbol letter b
102107
{ "🇧", { "🇧", "🇧", "🇧", false, false, false,

0 commit comments

Comments
 (0)