Skip to content

Commit 7831019

Browse files
authored
Merge pull request #20 from cketti/substring_CodePointIterator
Add support for iterating over parts of a string
2 parents d68acb8 + cdb92ee commit 7831019

File tree

3 files changed

+93
-8
lines changed

3 files changed

+93
-8
lines changed

kotlin-codepoints-deluxe/src/commonMain/kotlin/CodePointSequence.kt

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,23 +8,38 @@ import kotlin.jvm.JvmInline
88
@JvmInline
99
value class CodePointSequence(private val text: String) : Sequence<CodePoint> {
1010
override fun iterator(): CodePointIterator {
11-
return CodePointIterator(text)
11+
return text.codePointIterator()
1212
}
1313
}
1414

1515
/**
1616
* Iterator for [CodePoint]s in the given [String].
17+
*
18+
* The `startIndex` and `endIndex` parameters are the regular `String` indices, i.e. the number of `Char`s from the
19+
* start of the string.
1720
*/
18-
class CodePointIterator(private val text: String) : Iterator<CodePoint> {
19-
private var index = 0
21+
class CodePointIterator(
22+
private val text: String,
23+
startIndex: Int,
24+
private val endIndex: Int
25+
) : Iterator<CodePoint> {
26+
private var index = startIndex
2027

2128
override fun hasNext(): Boolean {
22-
return index < text.length
29+
return index < endIndex
2330
}
2431

2532
override fun next(): CodePoint {
26-
return text.codePointAt(index).also { codePoint ->
27-
index += codePoint.charCount
33+
return if (index + 1 == endIndex) {
34+
text[index].toCodePoint().also {
35+
index++
36+
}
37+
} else if (hasNext()) {
38+
text.codePointAt(index).also { codePoint ->
39+
index += codePoint.charCount
40+
}
41+
} else {
42+
throw IndexOutOfBoundsException()
2843
}
2944
}
3045
}

kotlin-codepoints-deluxe/src/commonMain/kotlin/StringExtensions.kt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,6 @@ fun String.codePointSequence(): CodePointSequence {
4545
/**
4646
* Iterator for [CodePoint]s in this string.
4747
*/
48-
fun String.codePointIterator(): CodePointIterator {
49-
return CodePointIterator(this)
48+
fun String.codePointIterator(startIndex: Int = 0, endIndex: Int = length): CodePointIterator {
49+
return CodePointIterator(this, startIndex, endIndex)
5050
}

kotlin-codepoints-deluxe/src/commonTest/kotlin/CodePointSequenceTest.kt

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,74 @@ class CodePointSequenceTest {
2929
iterator.next()
3030
}
3131
}
32+
33+
@Test
34+
fun codePointIterator_with_start_index() {
35+
val iterator = "a\uD83E\uDD95b".codePointIterator(startIndex = 1)
36+
37+
assertTrue(iterator.hasNext())
38+
assertEquals("\uD83E\uDD95".codePointAt(0), iterator.next())
39+
assertTrue(iterator.hasNext())
40+
assertEquals('b'.toCodePoint(), iterator.next())
41+
assertFalse(iterator.hasNext())
42+
assertFailsWith<IndexOutOfBoundsException> {
43+
iterator.next()
44+
}
45+
}
46+
47+
@Test
48+
fun codePointIterator_with_end_index() {
49+
val iterator = "a\uD83E\uDD95b".codePointIterator(endIndex = 3)
50+
51+
assertTrue(iterator.hasNext())
52+
assertEquals('a'.toCodePoint(), iterator.next())
53+
assertTrue(iterator.hasNext())
54+
assertEquals("\uD83E\uDD95".codePointAt(0), iterator.next())
55+
assertFalse(iterator.hasNext())
56+
assertFailsWith<IndexOutOfBoundsException> {
57+
iterator.next()
58+
}
59+
}
60+
61+
@Test
62+
fun codePointIterator_with_start_and_end_index() {
63+
val iterator = "ab\uD83E\uDD95c".codePointIterator(startIndex = 1, endIndex = 4)
64+
65+
assertTrue(iterator.hasNext())
66+
assertEquals('b'.toCodePoint(), iterator.next())
67+
assertTrue(iterator.hasNext())
68+
assertEquals("\uD83E\uDD95".codePointAt(0), iterator.next())
69+
assertFalse(iterator.hasNext())
70+
assertFailsWith<IndexOutOfBoundsException> {
71+
iterator.next()
72+
}
73+
}
74+
75+
@Test
76+
fun codePointIterator_with_start_index_inside_surrogate_pair() {
77+
val iterator = "a\uD83E\uDD95b".codePointIterator(startIndex = 2)
78+
79+
assertTrue(iterator.hasNext())
80+
assertEquals('\uDD95'.toCodePoint(), iterator.next())
81+
assertTrue(iterator.hasNext())
82+
assertEquals('b'.toCodePoint(), iterator.next())
83+
assertFalse(iterator.hasNext())
84+
assertFailsWith<IndexOutOfBoundsException> {
85+
iterator.next()
86+
}
87+
}
88+
89+
@Test
90+
fun codePointIterator_with_end_index_inside_surrogate_pair() {
91+
val iterator = "a\uD83E\uDD95b".codePointIterator(endIndex = 2)
92+
93+
assertTrue(iterator.hasNext())
94+
assertEquals('a'.toCodePoint(), iterator.next())
95+
assertTrue(iterator.hasNext())
96+
assertEquals('\uD83E'.toCodePoint(), iterator.next())
97+
assertFalse(iterator.hasNext())
98+
assertFailsWith<IndexOutOfBoundsException> {
99+
iterator.next()
100+
}
101+
}
32102
}

0 commit comments

Comments
 (0)