Skip to content

Commit 6aab95c

Browse files
committed
Switch String extension functions to CharSequence extension functions
1 parent 99ce70e commit 6aab95c

File tree

5 files changed

+120
-144
lines changed

5 files changed

+120
-144
lines changed

kotlin-codepoints-deluxe/src/commonMain/kotlin/CodePointSequence.kt

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,23 @@ package de.cketti.codepoints.deluxe
33
import kotlin.jvm.JvmInline
44

55
/**
6-
* Sequence of [CodePoint]s in the given [String].
6+
* Sequence of [CodePoint]s in the given [CharSequence].
77
*/
88
@JvmInline
9-
value class CodePointSequence(private val text: String) : Sequence<CodePoint> {
9+
value class CodePointSequence(private val text: CharSequence) : Sequence<CodePoint> {
1010
override fun iterator(): CodePointIterator {
1111
return text.codePointIterator()
1212
}
1313
}
1414

1515
/**
16-
* Iterator for [CodePoint]s in the given [String].
16+
* Iterator for [CodePoint]s in the given [CharSequence].
1717
*
18-
* The `startIndex` and `endIndex` parameters are the regular `String` indices, i.e. the number of `Char`s from the
19-
* start of the string.
18+
* The `startIndex` and `endIndex` parameters are the regular `CharSequence` indices, i.e. the number of `Char`s from
19+
* the start of the character sequence.
2020
*/
2121
class CodePointIterator(
22-
private val text: String,
22+
private val text: CharSequence,
2323
startIndex: Int,
2424
private val endIndex: Int
2525
) : Iterator<CodePoint> {

kotlin-codepoints-deluxe/src/commonMain/kotlin/StringExtensions.kt

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,41 +10,44 @@ import de.cketti.codepoints.codePointBefore as intCodePointBefore
1010
/**
1111
* Returns the Unicode code point at the specified index.
1212
*
13-
* The `index` parameter is the regular `String` index, i.e. the number of `Char`s from the start of the string.
13+
* The `index` parameter is the regular `CharSequence` index, i.e. the number of `Char`s from the start of the character
14+
* sequence.
1415
*
15-
* If the `index` is out of bounds of this string, this method throws an [IndexOutOfBoundsException].
16+
* If the `index` is out of bounds of this character sequence, this method throws an [IndexOutOfBoundsException].
1617
*
1718
* See [codePointAt][intCodePointAt].
1819
* ```
1920
*/
2021
@kotlin.internal.HidesMembers
21-
fun String.codePointAt(index: Int): CodePoint {
22+
fun CharSequence.codePointAt(index: Int): CodePoint {
2223
return intCodePointAt(index).toCodePoint()
2324
}
2425

2526
/**
2627
* Returns the Unicode code point before the specified index.
2728
*
28-
* The `index` parameter is the regular `String` index, i.e. the number of `Char`s from the start of the string.
29+
* The `index` parameter is the regular `CharSequence` index, i.e. the number of `Char`s from the start of the character
30+
* sequence.
2931
*
30-
* If the value `index - 1` is out of bounds of this string, this method throws an [IndexOutOfBoundsException].
32+
* If the value `index - 1` is out of bounds of this character sequence, this method throws an
33+
* [IndexOutOfBoundsException].
3134
*
3235
* See [codePointBefore][intCodePointBefore].
3336
*/
34-
fun String.codePointBefore(index: Int): CodePoint {
37+
fun CharSequence.codePointBefore(index: Int): CodePoint {
3538
return intCodePointBefore(index).toCodePoint()
3639
}
3740

3841
/**
39-
* Sequence of [CodePoint]s in this string.
42+
* Sequence of [CodePoint]s in this character sequence.
4043
*/
41-
fun String.codePointSequence(): CodePointSequence {
44+
fun CharSequence.codePointSequence(): CodePointSequence {
4245
return CodePointSequence(this)
4346
}
4447

4548
/**
46-
* Iterator for [CodePoint]s in this string.
49+
* Iterator for [CodePoint]s in this character sequence.
4750
*/
48-
fun String.codePointIterator(startIndex: Int = 0, endIndex: Int = length): CodePointIterator {
51+
fun CharSequence.codePointIterator(startIndex: Int = 0, endIndex: Int = length): CodePointIterator {
4952
return CodePointIterator(this, startIndex, endIndex)
5053
}

kotlin-codepoints/src/commonImplementation/kotlin/StringExtensions.kt

Lines changed: 0 additions & 90 deletions
This file was deleted.

kotlin-codepoints/src/commonMain/kotlin/StringExtensions.kt

Lines changed: 101 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,22 +3,23 @@ package de.cketti.codepoints
33
/**
44
* Returns the Unicode code point at the specified index.
55
*
6-
* The `index` parameter is the regular `String` index, i.e. the number of `Char`s from the start of the string.
6+
* The `index` parameter is the regular `CharSequence` index, i.e. the number of `Char`s from the start of the character
7+
* sequence.
78
*
89
* If the code point at the specified index is part of the Basic Multilingual Plane (BMP), its value can be represented
9-
* using a single `Char` and this method will behave exactly like [String.get].
10+
* using a single `Char` and this method will behave exactly like [CharSequence.get].
1011
* Code points outside the BMP are encoded using a surrogate pair – a `Char` containing a value in the high surrogate
1112
* range followed by a `Char` containing a value in the low surrogate range. Together these two `Char`s encode a single
1213
* code point in one of the supplementary planes. This method will do the necessary decoding and return the value of
1314
* that single code point.
1415
*
1516
* In situations where surrogate characters are encountered that don't form a valid surrogate pair starting at `index`,
16-
* this method will return the surrogate code point itself, behaving like [String.get].
17+
* this method will return the surrogate code point itself, behaving like [CharSequence.get].
1718
*
18-
* If the `index` is out of bounds of this string, this method throws an [IndexOutOfBoundsException].
19+
* If the `index` is out of bounds of this character sequence, this method throws an [IndexOutOfBoundsException].
1920
*
20-
* To iterate over all code points in a string the index has to be adjusted depending on the value of the returned code
21-
* point. Use [CodePoints.charCount] for this.
21+
* To iterate over all code points in a character sequence the index has to be adjusted depending on the value of the
22+
* returned code point. Use [CodePoints.charCount] for this.
2223
*
2324
* ```kotlin
2425
* // Text containing code points outside the BMP (encoded as a surrogate pairs)
@@ -33,23 +34,50 @@ package de.cketti.codepoints
3334
* }
3435
* ```
3536
*/
36-
expect fun String.codePointAt(index: Int): Int
37+
fun CharSequence.codePointAt(index: Int): Int {
38+
if (index !in indices) throw IndexOutOfBoundsException()
39+
40+
val firstChar = this[index]
41+
if (firstChar.isHighSurrogate() && index + 1 < length) {
42+
val nextChar = this[index + 1]
43+
if (nextChar.isLowSurrogate()) {
44+
return CodePoints.toCodePoint(firstChar, nextChar)
45+
}
46+
}
47+
48+
return firstChar.code
49+
}
3750

3851
/**
3952
* Returns the Unicode code point before the specified index.
4053
*
41-
* The `index` parameter is the regular `String` index, i.e. the number of `Char`s from the start of the string.
54+
* The `index` parameter is the regular `CharSequence` index, i.e. the number of `Char`s from the start of the character
55+
* sequence.
4256
*
4357
* If the `Char` value at `index - 1` is in the low surrogate range and the `Char` value at `index - 2` is in the high
4458
* surrogate range, then the surrogate pair is decoded and the code point in one of the supplementary planes is
45-
* returned. In all other cases this method behaves like [String.get] was called with an argument of `index - 1`.
59+
* returned. In all other cases this method behaves like [CharSequence.get] was called with an argument of `index - 1`.
4660
*
47-
* If the value `index - 1` is out of bounds of this string, this method throws an [IndexOutOfBoundsException].
61+
* If the value `index - 1` is out of bounds of this character sequence, this method throws an
62+
* [IndexOutOfBoundsException].
4863
*/
49-
expect fun String.codePointBefore(index: Int): Int
64+
fun CharSequence.codePointBefore(index: Int): Int {
65+
val startIndex = index - 1
66+
if (startIndex !in indices) throw IndexOutOfBoundsException()
67+
68+
val firstChar = this[startIndex]
69+
if (firstChar.isLowSurrogate() && startIndex - 1 >= 0) {
70+
val previousChar = this[startIndex - 1]
71+
if (previousChar.isHighSurrogate()) {
72+
return CodePoints.toCodePoint(previousChar, firstChar)
73+
}
74+
}
75+
76+
return firstChar.code
77+
}
5078

5179
/**
52-
* Returns the number of Unicode code points in the specified text range of this `String`.
80+
* Returns the number of Unicode code points in the specified text range of this `CharSequence`.
5381
*
5482
* The text range begins at the specified `beginIndex` and extends to the `Char` at index `endIndex - 1`. Thus, the
5583
* length (in `Char`s) of the text range is `endIndex - beginIndex`. Unpaired surrogates within the text range count as
@@ -58,16 +86,70 @@ expect fun String.codePointBefore(index: Int): Int
5886
* If `beginIndex` is negative, or `endIndex` is larger than the length of this string, or `beginIndex` is larger than
5987
* `endIndex`, this method throws an [IndexOutOfBoundsException].
6088
*/
61-
expect fun String.codePointCount(beginIndex: Int, endIndex: Int): Int
89+
fun CharSequence.codePointCount(beginIndex: Int, endIndex: Int): Int {
90+
if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) throw IndexOutOfBoundsException()
91+
92+
var index = beginIndex
93+
var count = 0
94+
do {
95+
val firstChar = this[index]
96+
index++
97+
if (firstChar.isHighSurrogate() && index < endIndex) {
98+
val nextChar = this[index]
99+
if (nextChar.isLowSurrogate()) {
100+
index++
101+
}
102+
}
103+
104+
count++
105+
} while (index < endIndex)
106+
107+
return count
108+
}
62109

63110
/**
64-
* Returns the index within this `String` that is offset from the given `index` by `codePointOffset` code points.
111+
* Returns the index within this `CharSequence` that is offset from the given `index` by `codePointOffset` code points.
65112
*
66113
* Unpaired surrogates within the text range given by `index` and `codePointOffset` count as one code point each.
67114
*
68-
* If `index` is negative or larger than the length of this string, or if `codePointOffset` is positive and the
69-
* substring starting with `index` has fewer than `codePointOffset` code points, or if `codePointOffset` is negative
70-
* and the substring before index has fewer than the absolute value of `codePointOffset` code points, this method throws
71-
* an [IndexOutOfBoundsException].
115+
* If `index` is negative or larger than the length of this character sequence, or if `codePointOffset` is positive and
116+
* the subsequence starting with `index` has fewer than `codePointOffset` code points, or if `codePointOffset` is
117+
* negative and the subsequence before index has fewer than the absolute value of `codePointOffset` code points, this
118+
* method throws an [IndexOutOfBoundsException].
72119
*/
73-
expect fun String.offsetByCodePoints(index: Int, codePointOffset: Int): Int
120+
fun CharSequence.offsetByCodePoints(index: Int, codePointOffset: Int): Int {
121+
if (index !in 0..length) throw IndexOutOfBoundsException()
122+
if (codePointOffset == 0) return index
123+
124+
if (codePointOffset > 0) {
125+
var currentIndex = index
126+
repeat(codePointOffset) {
127+
if (currentIndex > lastIndex) throw IndexOutOfBoundsException()
128+
val firstChar = this[currentIndex]
129+
currentIndex++
130+
if (firstChar.isHighSurrogate() && currentIndex <= lastIndex) {
131+
val nextChar = this[currentIndex]
132+
if (nextChar.isLowSurrogate()) {
133+
currentIndex++
134+
}
135+
}
136+
}
137+
138+
return currentIndex
139+
} else {
140+
var currentIndex = index - 1
141+
repeat(-codePointOffset) {
142+
if (currentIndex < 0) throw IndexOutOfBoundsException()
143+
val firstChar = this[currentIndex]
144+
currentIndex--
145+
if (firstChar.isLowSurrogate() && currentIndex >= 0) {
146+
val previousChar = this[currentIndex]
147+
if (previousChar.isHighSurrogate()) {
148+
currentIndex--
149+
}
150+
}
151+
}
152+
153+
return currentIndex + 1
154+
}
155+
}

kotlin-codepoints/src/jvmMain/kotlin/StringExtensions.kt

Lines changed: 0 additions & 19 deletions
This file was deleted.

0 commit comments

Comments
 (0)