Skip to content

Commit 8fa6142

Browse files
authored
Merge pull request #21 from cketti/charsequence_support
Change `StringExtensions` to `CharSequenceExtensions`
2 parents 99ce70e + b925950 commit 8fa6142

File tree

9 files changed

+216
-240
lines changed

9 files changed

+216
-240
lines changed
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
@file:Suppress(
2+
"INVISIBLE_MEMBER", // Required to be able to use kotlin.internal.HidesMembers
3+
"INVISIBLE_REFERENCE", // Required to be able to use kotlin.internal.HidesMembers
4+
)
5+
package de.cketti.codepoints.deluxe
6+
7+
import de.cketti.codepoints.codePointAt as intCodePointAt
8+
import de.cketti.codepoints.codePointBefore as intCodePointBefore
9+
10+
/**
11+
* Returns the Unicode code point at the specified index.
12+
*
13+
* The `index` parameter is the regular `CharSequence` index, i.e. the number of `Char`s from the start of the character
14+
* sequence.
15+
*
16+
* If the `index` is out of bounds of this character sequence, this method throws an [IndexOutOfBoundsException].
17+
*
18+
* See [codePointAt][intCodePointAt].
19+
* ```
20+
*/
21+
@kotlin.internal.HidesMembers
22+
fun CharSequence.codePointAt(index: Int): CodePoint {
23+
return intCodePointAt(index).toCodePoint()
24+
}
25+
26+
/**
27+
* Returns the Unicode code point before the specified index.
28+
*
29+
* The `index` parameter is the regular `CharSequence` index, i.e. the number of `Char`s from the start of the character
30+
* sequence.
31+
*
32+
* If the value `index - 1` is out of bounds of this character sequence, this method throws an
33+
* [IndexOutOfBoundsException].
34+
*
35+
* See [codePointBefore][intCodePointBefore].
36+
*/
37+
fun CharSequence.codePointBefore(index: Int): CodePoint {
38+
return intCodePointBefore(index).toCodePoint()
39+
}
40+
41+
/**
42+
* Sequence of [CodePoint]s in this character sequence.
43+
*/
44+
fun CharSequence.codePointSequence(): CodePointSequence {
45+
return CodePointSequence(this)
46+
}
47+
48+
/**
49+
* Iterator for [CodePoint]s in this character sequence.
50+
*/
51+
fun CharSequence.codePointIterator(startIndex: Int = 0, endIndex: Int = length): CodePointIterator {
52+
return CodePointIterator(this, startIndex, endIndex)
53+
}

kotlin-codepoints-deluxe/src/commonMain/kotlin/CodePointSequence.kt

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,23 @@ package de.cketti.codepoints.deluxe
33
import kotlin.jvm.JvmInline
44

55
/**
6-
* Sequence of [CodePoint]s in the given [String].
6+
* Sequence of [CodePoint]s in the given [CharSequence].
77
*/
88
@JvmInline
9-
value class CodePointSequence(private val text: String) : Sequence<CodePoint> {
9+
value class CodePointSequence(private val text: CharSequence) : Sequence<CodePoint> {
1010
override fun iterator(): CodePointIterator {
1111
return text.codePointIterator()
1212
}
1313
}
1414

1515
/**
16-
* Iterator for [CodePoint]s in the given [String].
16+
* Iterator for [CodePoint]s in the given [CharSequence].
1717
*
18-
* The `startIndex` and `endIndex` parameters are the regular `String` indices, i.e. the number of `Char`s from the
19-
* start of the string.
18+
* The `startIndex` and `endIndex` parameters are the regular `CharSequence` indices, i.e. the number of `Char`s from
19+
* the start of the character sequence.
2020
*/
2121
class CodePointIterator(
22-
private val text: String,
22+
private val text: CharSequence,
2323
startIndex: Int,
2424
private val endIndex: Int
2525
) : Iterator<CodePoint> {

kotlin-codepoints-deluxe/src/commonMain/kotlin/StringExtensions.kt

Lines changed: 0 additions & 50 deletions
This file was deleted.

kotlin-codepoints-deluxe/src/commonTest/kotlin/StringExtensionsTest.kt renamed to kotlin-codepoints-deluxe/src/commonTest/kotlin/CharSequenceExtensionsTest.kt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ package de.cketti.codepoints.deluxe
33
import kotlin.test.assertEquals
44
import kotlin.test.Test
55

6-
class StringExtensionsTest {
6+
class CharSequenceExtensionsTest {
77
@Test
88
fun codePointAt() {
99
assertEquals('a'.toCodePoint(), "a".codePointAt(0))

kotlin-codepoints/src/commonImplementation/kotlin/StringExtensions.kt

Lines changed: 0 additions & 90 deletions
This file was deleted.
Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
package de.cketti.codepoints
2+
3+
/**
4+
* Returns the Unicode code point at the specified index.
5+
*
6+
* The `index` parameter is the regular `CharSequence` index, i.e. the number of `Char`s from the start of the character
7+
* sequence.
8+
*
9+
* If the code point at the specified index is part of the Basic Multilingual Plane (BMP), its value can be represented
10+
* using a single `Char` and this method will behave exactly like [CharSequence.get].
11+
* Code points outside the BMP are encoded using a surrogate pair – a `Char` containing a value in the high surrogate
12+
* range followed by a `Char` containing a value in the low surrogate range. Together these two `Char`s encode a single
13+
* code point in one of the supplementary planes. This method will do the necessary decoding and return the value of
14+
* that single code point.
15+
*
16+
* In situations where surrogate characters are encountered that don't form a valid surrogate pair starting at `index`,
17+
* this method will return the surrogate code point itself, behaving like [CharSequence.get].
18+
*
19+
* If the `index` is out of bounds of this character sequence, this method throws an [IndexOutOfBoundsException].
20+
*
21+
* To iterate over all code points in a character sequence the index has to be adjusted depending on the value of the
22+
* returned code point. Use [CodePoints.charCount] for this.
23+
*
24+
* ```kotlin
25+
* // Text containing code points outside the BMP (encoded as a surrogate pairs)
26+
* val text = "\uD83E\uDD95\uD83E\uDD96"
27+
*
28+
* var index = 0
29+
* while (index < text.length) {
30+
* val codePoint = text.codePointAt(index)
31+
* // Do something with codePoint
32+
*
33+
* index += CodePoints.charCount(codePoint)
34+
* }
35+
* ```
36+
*/
37+
fun CharSequence.codePointAt(index: Int): Int {
38+
if (index !in indices) throw IndexOutOfBoundsException()
39+
40+
val firstChar = this[index]
41+
if (firstChar.isHighSurrogate() && index + 1 < length) {
42+
val nextChar = this[index + 1]
43+
if (nextChar.isLowSurrogate()) {
44+
return CodePoints.toCodePoint(firstChar, nextChar)
45+
}
46+
}
47+
48+
return firstChar.code
49+
}
50+
51+
/**
52+
* Returns the Unicode code point before the specified index.
53+
*
54+
* The `index` parameter is the regular `CharSequence` index, i.e. the number of `Char`s from the start of the character
55+
* sequence.
56+
*
57+
* If the `Char` value at `index - 1` is in the low surrogate range and the `Char` value at `index - 2` is in the high
58+
* surrogate range, then the surrogate pair is decoded and the code point in one of the supplementary planes is
59+
* returned. In all other cases this method behaves like [CharSequence.get] was called with an argument of `index - 1`.
60+
*
61+
* If the value `index - 1` is out of bounds of this character sequence, this method throws an
62+
* [IndexOutOfBoundsException].
63+
*/
64+
fun CharSequence.codePointBefore(index: Int): Int {
65+
val startIndex = index - 1
66+
if (startIndex !in indices) throw IndexOutOfBoundsException()
67+
68+
val firstChar = this[startIndex]
69+
if (firstChar.isLowSurrogate() && startIndex - 1 >= 0) {
70+
val previousChar = this[startIndex - 1]
71+
if (previousChar.isHighSurrogate()) {
72+
return CodePoints.toCodePoint(previousChar, firstChar)
73+
}
74+
}
75+
76+
return firstChar.code
77+
}
78+
79+
/**
80+
* Returns the number of Unicode code points in the specified text range of this `CharSequence`.
81+
*
82+
* The text range begins at the specified `beginIndex` and extends to the `Char` at index `endIndex - 1`. Thus, the
83+
* length (in `Char`s) of the text range is `endIndex - beginIndex`. Unpaired surrogates within the text range count as
84+
* one code point each.
85+
*
86+
* If `beginIndex` is negative, or `endIndex` is larger than the length of this string, or `beginIndex` is larger than
87+
* `endIndex`, this method throws an [IndexOutOfBoundsException].
88+
*/
89+
fun CharSequence.codePointCount(beginIndex: Int, endIndex: Int): Int {
90+
if (beginIndex < 0 || endIndex > length || beginIndex > endIndex) throw IndexOutOfBoundsException()
91+
92+
var index = beginIndex
93+
var count = 0
94+
do {
95+
val firstChar = this[index]
96+
index++
97+
if (firstChar.isHighSurrogate() && index < endIndex) {
98+
val nextChar = this[index]
99+
if (nextChar.isLowSurrogate()) {
100+
index++
101+
}
102+
}
103+
104+
count++
105+
} while (index < endIndex)
106+
107+
return count
108+
}
109+
110+
/**
111+
* Returns the index within this `CharSequence` that is offset from the given `index` by `codePointOffset` code points.
112+
*
113+
* Unpaired surrogates within the text range given by `index` and `codePointOffset` count as one code point each.
114+
*
115+
* If `index` is negative or larger than the length of this character sequence, or if `codePointOffset` is positive and
116+
* the subsequence starting with `index` has fewer than `codePointOffset` code points, or if `codePointOffset` is
117+
* negative and the subsequence before index has fewer than the absolute value of `codePointOffset` code points, this
118+
* method throws an [IndexOutOfBoundsException].
119+
*/
120+
fun CharSequence.offsetByCodePoints(index: Int, codePointOffset: Int): Int {
121+
if (index !in 0..length) throw IndexOutOfBoundsException()
122+
if (codePointOffset == 0) return index
123+
124+
if (codePointOffset > 0) {
125+
var currentIndex = index
126+
repeat(codePointOffset) {
127+
if (currentIndex > lastIndex) throw IndexOutOfBoundsException()
128+
val firstChar = this[currentIndex]
129+
currentIndex++
130+
if (firstChar.isHighSurrogate() && currentIndex <= lastIndex) {
131+
val nextChar = this[currentIndex]
132+
if (nextChar.isLowSurrogate()) {
133+
currentIndex++
134+
}
135+
}
136+
}
137+
138+
return currentIndex
139+
} else {
140+
var currentIndex = index - 1
141+
repeat(-codePointOffset) {
142+
if (currentIndex < 0) throw IndexOutOfBoundsException()
143+
val firstChar = this[currentIndex]
144+
currentIndex--
145+
if (firstChar.isLowSurrogate() && currentIndex >= 0) {
146+
val previousChar = this[currentIndex]
147+
if (previousChar.isHighSurrogate()) {
148+
currentIndex--
149+
}
150+
}
151+
}
152+
153+
return currentIndex + 1
154+
}
155+
}

0 commit comments

Comments
 (0)