@@ -3,22 +3,23 @@ package de.cketti.codepoints
33/* *
44 * Returns the Unicode code point at the specified index.
55 *
6- * The `index` parameter is the regular `String` index, i.e. the number of `Char`s from the start of the string.
6+ * The `index` parameter is the regular `CharSequence` index, i.e. the number of `Char`s from the start of the character
7+ * sequence.
78 *
89 * If the code point at the specified index is part of the Basic Multilingual Plane (BMP), its value can be represented
9- * using a single `Char` and this method will behave exactly like [String .get].
10+ * using a single `Char` and this method will behave exactly like [CharSequence .get].
1011 * Code points outside the BMP are encoded using a surrogate pair – a `Char` containing a value in the high surrogate
1112 * range followed by a `Char` containing a value in the low surrogate range. Together these two `Char`s encode a single
1213 * code point in one of the supplementary planes. This method will do the necessary decoding and return the value of
1314 * that single code point.
1415 *
1516 * In situations where surrogate characters are encountered that don't form a valid surrogate pair starting at `index`,
16- * this method will return the surrogate code point itself, behaving like [String .get].
17+ * this method will return the surrogate code point itself, behaving like [CharSequence .get].
1718 *
18- * If the `index` is out of bounds of this string , this method throws an [IndexOutOfBoundsException].
19+ * If the `index` is out of bounds of this character sequence , this method throws an [IndexOutOfBoundsException].
1920 *
20- * To iterate over all code points in a string the index has to be adjusted depending on the value of the returned code
21- * point. Use [CodePoints.charCount] for this.
21+ * To iterate over all code points in a character sequence the index has to be adjusted depending on the value of the
22+ * returned code point. Use [CodePoints.charCount] for this.
2223 *
2324 * ```kotlin
2425 * // Text containing code points outside the BMP (encoded as a surrogate pairs)
@@ -33,23 +34,50 @@ package de.cketti.codepoints
3334 * }
3435 * ```
3536 */
36- expect fun String.codePointAt (index : Int ): Int
37+ fun CharSequence.codePointAt (index : Int ): Int {
38+ if (index !in indices) throw IndexOutOfBoundsException ()
39+
40+ val firstChar = this [index]
41+ if (firstChar.isHighSurrogate() && index + 1 < length) {
42+ val nextChar = this [index + 1 ]
43+ if (nextChar.isLowSurrogate()) {
44+ return CodePoints .toCodePoint(firstChar, nextChar)
45+ }
46+ }
47+
48+ return firstChar.code
49+ }
3750
3851/* *
3952 * Returns the Unicode code point before the specified index.
4053 *
41- * The `index` parameter is the regular `String` index, i.e. the number of `Char`s from the start of the string.
54+ * The `index` parameter is the regular `CharSequence` index, i.e. the number of `Char`s from the start of the character
55+ * sequence.
4256 *
4357 * If the `Char` value at `index - 1` is in the low surrogate range and the `Char` value at `index - 2` is in the high
4458 * surrogate range, then the surrogate pair is decoded and the code point in one of the supplementary planes is
45- * returned. In all other cases this method behaves like [String .get] was called with an argument of `index - 1`.
59+ * returned. In all other cases this method behaves like [CharSequence .get] was called with an argument of `index - 1`.
4660 *
47- * If the value `index - 1` is out of bounds of this string, this method throws an [IndexOutOfBoundsException].
61+ * If the value `index - 1` is out of bounds of this character sequence, this method throws an
62+ * [IndexOutOfBoundsException].
4863 */
49- expect fun String.codePointBefore (index : Int ): Int
64+ fun CharSequence.codePointBefore (index : Int ): Int {
65+ val startIndex = index - 1
66+ if (startIndex !in indices) throw IndexOutOfBoundsException ()
67+
68+ val firstChar = this [startIndex]
69+ if (firstChar.isLowSurrogate() && startIndex - 1 >= 0 ) {
70+ val previousChar = this [startIndex - 1 ]
71+ if (previousChar.isHighSurrogate()) {
72+ return CodePoints .toCodePoint(previousChar, firstChar)
73+ }
74+ }
75+
76+ return firstChar.code
77+ }
5078
5179/* *
52- * Returns the number of Unicode code points in the specified text range of this `String `.
80+ * Returns the number of Unicode code points in the specified text range of this `CharSequence `.
5381 *
5482 * The text range begins at the specified `beginIndex` and extends to the `Char` at index `endIndex - 1`. Thus, the
5583 * length (in `Char`s) of the text range is `endIndex - beginIndex`. Unpaired surrogates within the text range count as
@@ -58,16 +86,70 @@ expect fun String.codePointBefore(index: Int): Int
5886 * If `beginIndex` is negative, or `endIndex` is larger than the length of this string, or `beginIndex` is larger than
5987 * `endIndex`, this method throws an [IndexOutOfBoundsException].
6088 */
61- expect fun String.codePointCount (beginIndex : Int , endIndex : Int ): Int
89+ fun CharSequence.codePointCount (beginIndex : Int , endIndex : Int ): Int {
90+ if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) throw IndexOutOfBoundsException ()
91+
92+ var index = beginIndex
93+ var count = 0
94+ do {
95+ val firstChar = this [index]
96+ index++
97+ if (firstChar.isHighSurrogate() && index < endIndex) {
98+ val nextChar = this [index]
99+ if (nextChar.isLowSurrogate()) {
100+ index++
101+ }
102+ }
103+
104+ count++
105+ } while (index < endIndex)
106+
107+ return count
108+ }
62109
63110/* *
64- * Returns the index within this `String ` that is offset from the given `index` by `codePointOffset` code points.
111+ * Returns the index within this `CharSequence ` that is offset from the given `index` by `codePointOffset` code points.
65112 *
66113 * Unpaired surrogates within the text range given by `index` and `codePointOffset` count as one code point each.
67114 *
68- * If `index` is negative or larger than the length of this string , or if `codePointOffset` is positive and the
69- * substring starting with `index` has fewer than `codePointOffset` code points, or if `codePointOffset` is negative
70- * and the substring before index has fewer than the absolute value of `codePointOffset` code points, this method throws
71- * an [IndexOutOfBoundsException].
115+ * If `index` is negative or larger than the length of this character sequence , or if `codePointOffset` is positive and
116+ * the subsequence starting with `index` has fewer than `codePointOffset` code points, or if `codePointOffset` is
117+ * negative and the subsequence before index has fewer than the absolute value of `codePointOffset` code points, this
118+ * method throws an [IndexOutOfBoundsException].
72119 */
73- expect fun String.offsetByCodePoints (index : Int , codePointOffset : Int ): Int
120+ fun CharSequence.offsetByCodePoints (index : Int , codePointOffset : Int ): Int {
121+ if (index !in 0 .. length) throw IndexOutOfBoundsException ()
122+ if (codePointOffset == 0 ) return index
123+
124+ if (codePointOffset > 0 ) {
125+ var currentIndex = index
126+ repeat(codePointOffset) {
127+ if (currentIndex > lastIndex) throw IndexOutOfBoundsException ()
128+ val firstChar = this [currentIndex]
129+ currentIndex++
130+ if (firstChar.isHighSurrogate() && currentIndex <= lastIndex) {
131+ val nextChar = this [currentIndex]
132+ if (nextChar.isLowSurrogate()) {
133+ currentIndex++
134+ }
135+ }
136+ }
137+
138+ return currentIndex
139+ } else {
140+ var currentIndex = index - 1
141+ repeat(- codePointOffset) {
142+ if (currentIndex < 0 ) throw IndexOutOfBoundsException ()
143+ val firstChar = this [currentIndex]
144+ currentIndex--
145+ if (firstChar.isLowSurrogate() && currentIndex >= 0 ) {
146+ val previousChar = this [currentIndex]
147+ if (previousChar.isHighSurrogate()) {
148+ currentIndex--
149+ }
150+ }
151+ }
152+
153+ return currentIndex + 1
154+ }
155+ }
0 commit comments