Fixes Emoji Joining issues.

This commit is contained in:
Vitor Pamplona 2023-07-06 17:24:42 -04:00
parent 6d6720a511
commit 64d96ab1d7
2 changed files with 73 additions and 3 deletions

View File

@ -1,10 +1,11 @@
package com.vitorpamplona.amethyst.service
fun String.isUTF16Char(pos: Int): Boolean {
println("Test $pos ${Character.charCount(this.codePointAt(pos))}")
return Character.charCount(this.codePointAt(pos)) == 2
}
fun String.firstFullChar(): String {
fun String.firstFullCharOld(): String {
return when (this.length) {
0, 1 -> return this
2, 3 -> return if (isUTF16Char(0)) this.take(2) else this.take(1)
@ -21,3 +22,57 @@ fun String.firstFullChar(): String {
}
}
}
fun String.firstFullChar(): String {
var isInJoin = false
var start = 0
var previousCharLength = 0
var next: Int
var codePoint: Int
var i = 0
while (i < this.length) {
codePoint = codePointAt(i)
// Skips if it starts with the join char 0x200D
if (codePoint == 0x200D && previousCharLength == 0) {
next = offsetByCodePoints(i, 1)
start = next
} else {
// If join, searches for the next char
if (codePoint == 0x200D) {
isInJoin = true
} else {
// stops when two chars are not joined together
if ((previousCharLength > 0) && (!isInJoin) && Character.charCount(codePoint) == 1) {
break
}
isInJoin = false
}
// next char to evaluate
next = offsetByCodePoints(i, 1)
previousCharLength += (next - i)
}
i = next
}
// if ends in join, then seachers backwards until a char is found.
if (isInJoin) {
i = previousCharLength - 1
while (i > 0) {
if (this[i].code == 0x200D) {
previousCharLength -= 1
} else {
break
}
i -= 1
}
}
return substring(start, start + previousCharLength)
}

View File

@ -11,12 +11,12 @@ class CharsetTest {
}
@Test
fun testUTF16Char() {
fun testUTF16JoinChar() {
Assert.assertEquals("\uD83C\uDF48", "\uD83C\uDF48Hi".firstFullChar())
}
@Test
fun testUTF32Char() {
fun testUTF32JoinChar() {
Assert.assertEquals("\uD83E\uDDD1\uD83C\uDFFE", "\uD83E\uDDD1\uD83C\uDFFEHi".firstFullChar())
}
@ -34,4 +34,19 @@ class CharsetTest {
fun testSpecialChars() {
Assert.assertEquals("=", "=x".firstFullChar())
}
@Test
fun test5CharEmoji() {
Assert.assertEquals("\uD83D\uDC68\u200D\uD83D\uDCBB", "\uD83D\uDC68\u200D\uD83D\uDCBBadsfasdf".firstFullChar())
}
@Test
fun testFamily() {
Assert.assertEquals("\uD83D\uDC68\u200d\uD83D\uDC69\u200d\uD83D\uDC67\u200d\uD83D\uDC67", "\uD83D\uDC68\u200D\uD83D\uDC69\u200D\uD83D\uDC67\u200D\uD83D\uDC67adsfasdf".firstFullChar())
}
@Test
fun testTeacher() {
Assert.assertEquals("\uD83E\uDDD1\uD83C\uDFFF\u200D\uD83C\uDFEB", "\uD83E\uDDD1\uD83C\uDFFF\u200D\uD83C\uDFEBasdf".firstFullChar())
}
}