Skip to content

Commit 1aaf04a

Browse files
committed
Add kotlin-codepoints-deluxe
Uses the basic functionality in `kotlin-codepoints` to provide a nicer API to work with Unicode code points.
1 parent 39abf06 commit 1aaf04a

File tree

8 files changed

+549
-0
lines changed

8 files changed

+549
-0
lines changed
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
plugins {
2+
alias(libs.plugins.kotlin.multiplatform)
3+
alias(libs.plugins.vanniktech.maven.publish)
4+
}
5+
6+
kotlin {
7+
androidNativeArm32()
8+
androidNativeArm64()
9+
androidNativeX86()
10+
androidNativeX64()
11+
12+
iosArm32()
13+
iosArm64()
14+
iosX64()
15+
iosSimulatorArm64()
16+
17+
js(IR) {
18+
browser {}
19+
}
20+
21+
jvm {
22+
compilations.all {
23+
kotlinOptions.jvmTarget = "1.8"
24+
}
25+
}
26+
27+
linuxArm32Hfp()
28+
linuxArm64()
29+
linuxMips32()
30+
linuxMipsel32()
31+
linuxX64()
32+
33+
macosX64()
34+
macosArm64()
35+
36+
mingwX64()
37+
mingwX86()
38+
39+
tvosArm64()
40+
tvosX64()
41+
tvosSimulatorArm64()
42+
43+
wasm32()
44+
45+
watchosArm32()
46+
watchosArm64()
47+
watchosDeviceArm64()
48+
watchosX86()
49+
watchosX64()
50+
watchosSimulatorArm64()
51+
52+
sourceSets {
53+
val commonMain by getting {
54+
dependencies {
55+
api(project(":kotlin-codepoints"))
56+
}
57+
}
58+
val commonTest by getting {
59+
dependencies {
60+
implementation(kotlin("test"))
61+
}
62+
}
63+
}
64+
}
65+
66+
@Suppress("UnstableApiUsage")
67+
mavenPublishing {
68+
pom {
69+
name.set("kotlin-codepoint-deluxe")
70+
description.set("Kotlin Multiplatform (KMP) library that adds a nicer API than kotlin-codepoint for dealing with Unicode code points.")
71+
}
72+
}
73+
74+
tasks.create("publishMips") {
75+
dependsOn(
76+
"publishLinuxMips32PublicationToMavenCentralRepository",
77+
"publishLinuxMipsel32PublicationToMavenCentralRepository"
78+
)
79+
}
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
package de.cketti.codepoints.deluxe
2+
3+
import de.cketti.codepoints.CodePoints
4+
import kotlin.jvm.JvmInline
5+
6+
/**
7+
* Represents a Unicode code point.
8+
*
9+
* You can create/retrieve instances of this class by using the following functions:
10+
* - [Int.toCodePoint]
11+
* - [Char.toCodePoint]
12+
* - [String.codePointSequence]
13+
* - [String.codePointIterator]
14+
*/
15+
@JvmInline
16+
value class CodePoint internal constructor(val value: Int) {
17+
init {
18+
require(CodePoints.isValidCodePoint(value)) { "Not a valid code point" }
19+
}
20+
21+
/**
22+
* `true` if this Unicode code point is in the Basic Multilingual Plane (BMP).
23+
*
24+
* Such code points can be represented using a single `Char`.
25+
*/
26+
val isBasic: Boolean
27+
get() = CodePoints.isBmpCodePoint(value)
28+
29+
/**
30+
* `true` if this Unicode code point is in the supplementary character range.
31+
*
32+
* In a `String` such a code point is represented using a surrogate pair, i.e. two `Char` values.
33+
*/
34+
val isSupplementary: Boolean
35+
get() = CodePoints.isSupplementaryCodePoint(value)
36+
37+
/**
38+
* The number of `Char` values needed to represent this Unicode code point.
39+
*
40+
* If the specified code point is in the [BMP][CodePoint.isBasic], this property is `1`, otherwise it is `2`.
41+
*/
42+
val charCount: Int
43+
get() = CodePoints.charCount(value)
44+
45+
/**
46+
* `true` if this code point is a surrogate code unit.
47+
*/
48+
val isSurrogate: Boolean
49+
get() = !isSupplementary && value.toChar().isSurrogate()
50+
51+
/**
52+
* `true` if this code point is a high surrogate code unit.
53+
*/
54+
val isHighSurrogate: Boolean
55+
get() = !isSupplementary && value.toChar().isHighSurrogate()
56+
57+
/**
58+
* `true` if this code point is a low surrogate code unit.
59+
*/
60+
val isLowSurrogate: Boolean
61+
get() = !isSupplementary && value.toChar().isLowSurrogate()
62+
63+
/**
64+
* The leading surrogate (a high surrogate code unit) of the surrogate pair representing this supplementary
65+
* Unicode code point.
66+
*
67+
* If this code point is not a supplementary character, an unspecified `Char` is returned.
68+
*/
69+
val highSurrogateChar: Char
70+
get() = CodePoints.highSurrogate(value)
71+
72+
/**
73+
* The trailing surrogate (a low surrogate code unit) of the surrogate pair representing this supplementary
74+
* Unicode code point.
75+
*
76+
* If this code point is not a supplementary character, an unspecified `Char` is returned.
77+
*/
78+
val lowSurrogateChar: Char
79+
get() = CodePoints.lowSurrogate(value)
80+
81+
/**
82+
* Converts this Unicode code point to its UTF-16 representation stored in a char array.
83+
*
84+
* If this code point is a BMP (Basic Multilingual Plane or Plane 0) value, the resulting char array has the same
85+
* value as [value]. If the specified code point is a supplementary code point, the resulting char array has the
86+
* corresponding surrogate pair.
87+
*/
88+
fun toChars(): CharArray {
89+
return CodePoints.toChars(value)
90+
}
91+
92+
/**
93+
* Converts this Unicode code point to its UTF-16 representation.
94+
*
95+
* If this code point is a BMP (Basic Multilingual Plane or Plane 0) value, the same value is stored in
96+
* `destination[offset]`, and 1 is returned. If this code point is a supplementary character, its surrogate values
97+
* are stored in `destination[offset]` (high-surrogate) and `destination[offset+1]` (low-surrogate), and 2 is
98+
* returned.
99+
*/
100+
fun toChars(destination: CharArray, offset: Int): Int {
101+
return CodePoints.toChars(value, destination, offset)
102+
}
103+
104+
/**
105+
* Returns a string representation of this code point.
106+
*
107+
* "U+" followed by the code point value in hexadecimal (using upper case letters), which is prepended with leading
108+
* zeros to a minimum of four digits.
109+
*/
110+
override fun toString(): String {
111+
return "U+${value.toString(16).uppercase().padStart(4, '0')}"
112+
}
113+
}
114+
115+
/**
116+
* Returns a [CodePoint] with this value.
117+
*
118+
* Throws [IllegalArgumentException] if this value falls outside the range of valid code points.
119+
*/
120+
fun Int.toCodePoint(): CodePoint {
121+
return CodePoint(this)
122+
}
123+
124+
/**
125+
* Returns a [CodePoint] with the same value as this `Char`.
126+
*/
127+
fun Char.toCodePoint(): CodePoint {
128+
return CodePoint(this.code)
129+
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
package de.cketti.codepoints.deluxe
2+
3+
import kotlin.jvm.JvmInline
4+
5+
/**
6+
* Sequence of [CodePoint]s in the given [String].
7+
*/
8+
@JvmInline
9+
value class CodePointSequence(private val text: String) : Sequence<CodePoint> {
10+
override fun iterator(): CodePointIterator {
11+
return CodePointIterator(text)
12+
}
13+
}
14+
15+
/**
16+
* Iterator for [CodePoint]s in the given [String].
17+
*/
18+
class CodePointIterator(private val text: String) : Iterator<CodePoint> {
19+
private var index = 0
20+
21+
override fun hasNext(): Boolean {
22+
return index < text.length
23+
}
24+
25+
override fun next(): CodePoint {
26+
return text.codePointAt(index).also { codePoint ->
27+
index += codePoint.charCount
28+
}
29+
}
30+
}
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
@file:Suppress(
2+
"INVISIBLE_MEMBER", // Required to be able to use kotlin.internal.HidesMembers
3+
"INVISIBLE_REFERENCE", // Required to be able to use kotlin.internal.HidesMembers
4+
)
5+
package de.cketti.codepoints.deluxe
6+
7+
import de.cketti.codepoints.codePointAt as intCodePointAt
8+
import de.cketti.codepoints.codePointBefore as intCodePointBefore
9+
10+
/**
11+
* Returns the Unicode code point at the specified index.
12+
*
13+
* The `index` parameter is the regular `String` index, i.e. the number of `Char`s from the start of the string.
14+
*
15+
* If the `index` is out of bounds of this string, this method throws an [IndexOutOfBoundsException].
16+
*
17+
* See [codePointAt][intCodePointAt].
18+
* ```
19+
*/
20+
@kotlin.internal.HidesMembers
21+
fun String.codePointAt(index: Int): CodePoint {
22+
return intCodePointAt(index).toCodePoint()
23+
}
24+
25+
/**
26+
* Returns the Unicode code point before the specified index.
27+
*
28+
* The `index` parameter is the regular `String` index, i.e. the number of `Char`s from the start of the string.
29+
*
30+
* If the value `index - 1` is out of bounds of this string, this method throws an [IndexOutOfBoundsException].
31+
*
32+
* See [codePointBefore][intCodePointBefore].
33+
*/
34+
fun String.codePointBefore(index: Int): CodePoint {
35+
return intCodePointBefore(index).toCodePoint()
36+
}
37+
38+
/**
39+
* Sequence of [CodePoint]s in this string.
40+
*/
41+
fun String.codePointSequence(): CodePointSequence {
42+
return CodePointSequence(this)
43+
}
44+
45+
/**
46+
* Iterator for [CodePoint]s in this string.
47+
*/
48+
fun String.codePointIterator(): CodePointIterator {
49+
return CodePointIterator(this)
50+
}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
package de.cketti.codepoints.deluxe
2+
3+
import kotlin.test.Test
4+
import kotlin.test.assertEquals
5+
import kotlin.test.assertFailsWith
6+
import kotlin.test.assertFalse
7+
import kotlin.test.assertTrue
8+
9+
class CodePointSequenceTest {
10+
@Test
11+
fun codePointSequence() {
12+
val codePoints = "a\uD83E\uDD95b\uD83E\uDD96c".codePointSequence().map { it.value }.toList()
13+
14+
assertEquals(listOf(0x0061, 0x1F995, 0x0062, 0x1F996, 0x0063), codePoints)
15+
}
16+
17+
@Test
18+
fun codePointIterator() {
19+
val iterator = "a\uD83E\uDD95b".codePointIterator()
20+
21+
assertTrue(iterator.hasNext())
22+
assertEquals('a'.toCodePoint(), iterator.next())
23+
assertTrue(iterator.hasNext())
24+
assertEquals("\uD83E\uDD95".codePointAt(0), iterator.next())
25+
assertTrue(iterator.hasNext())
26+
assertEquals('b'.toCodePoint(), iterator.next())
27+
assertFalse(iterator.hasNext())
28+
assertFailsWith<IndexOutOfBoundsException> {
29+
iterator.next()
30+
}
31+
}
32+
}

0 commit comments

Comments
 (0)