diff --git a/app/src/androidTest/java/com/vitorpamplona/amethyst/RichTextParserTest.kt b/app/src/androidTest/java/com/vitorpamplona/amethyst/RichTextParserTest.kt index 25f0fe21a..278b05ce3 100644 --- a/app/src/androidTest/java/com/vitorpamplona/amethyst/RichTextParserTest.kt +++ b/app/src/androidTest/java/com/vitorpamplona/amethyst/RichTextParserTest.kt @@ -668,7 +668,7 @@ class RichTextParserTest { fun testTextToParse() { val state = RichTextParser().parseText(textToParse, ImmutableListOfLists()) Assert.assertEquals( - "relay.shitforce.one, relayable.org, universe.nostrich.land, nos.lol, universe.nostrich.land?lang=zh, universe.nostrich.land?lang=en, relay.damus.io, relay.nostr.wirednet.jp, offchain.pub, nostr.rocks, relay.wellorder.net, nostr.oxtr.dev, universe.nostrich.land?lang=ja, relay.mostr.pub, nostr.bitcoiner.social, โšก41.7M, Nostr-Check.com, MR.Rabbit, Ancap.su, โšก๏ธsatscoinsv@getalby.com, miceliomad@miceliomad.github.io/nostr/, zapper.lol, smies.me, baller.hodl", + "relay.shitforce.one, relayable.org, universe.nostrich.land, nos.lol, universe.nostrich.land?lang=zh, universe.nostrich.land?lang=en, relay.damus.io, relay.nostr.wirednet.jp, offchain.pub, nostr.rocks, relay.wellorder.net, nostr.oxtr.dev, universe.nostrich.land?lang=ja, relay.mostr.pub, nostr.bitcoiner.social, Nostr-Check.com, MR.Rabbit, Ancap.su, zapper.lol, smies.me, baller.hodl", state.urlSet.joinToString(", ") ) @@ -806,134 +806,101 @@ class RichTextParserTest { "HashTag(#bitcoin,)", "RegularText(1.7K,)", "RegularText(109)", - "RegularText()", "HashTag(#concussion,)", "RegularText(1.1K,)", "RegularText(25)", - "RegularText()", "HashTag(#press,)", "RegularText(0.9K,)", "RegularText(65)", - "RegularText()", "HashTag(#france,)", "RegularText(492,)", "RegularText(46)", - "RegularText()", "HashTag(#presse,)", "RegularText(480,)", "RegularText(42)", - "RegularText()", "HashTag(#covid19,)", "RegularText(465,)", "RegularText(65)", - "RegularText()", "HashTag(#nostr,)", "RegularText(414,)", "RegularText(109)", - "RegularText()", "HashTag(#zapathon,)", "RegularText(386,)", "RegularText(76)", - "RegularText()", "HashTag(#rssfeed,)", "RegularText(309,)", "RegularText(53)", - "RegularText()", "HashTag(#btc,)", "RegularText(299,)", "RegularText(109)", - "RegularText()", "HashTag(#news,)", "RegularText(294,)", "RegularText(91)", - "RegularText()", "HashTag(#zap,)", "RegularText(283,)", "RegularText(109)", - "RegularText()", "HashTag(#linux,)", "RegularText(253,)", "RegularText(88)", - "RegularText()", "HashTag(#respond,)", "RegularText(246,)", "RegularText(90)", - "RegularText()", "HashTag(#kompost,)", "RegularText(240,)", "RegularText(31)", - "RegularText()", "HashTag(#plebchain,)", "RegularText(236,)", "RegularText(109)", - "RegularText()", "HashTag(#gardenaward,)", "RegularText(236,)", "RegularText(31)", - "RegularText()", "HashTag(#start,)", "RegularText(236,)", "RegularText(31)", - "RegularText()", "HashTag(#unicef,)", "RegularText(233,)", "RegularText(32)", - "RegularText()", "HashTag(#coronavirus,)", "RegularText(233,)", "RegularText(33)", - "RegularText()", "HashTag(#bew,)", "RegularText(229,)", "RegularText(31)", - "RegularText()", "HashTag(#balkon,)", "RegularText(229,)", "RegularText(31)", - "RegularText()", "HashTag(#terrasse,)", "RegularText(229,)", "RegularText(31)", - "RegularText()", "HashTag(#braininjuryawareness,)", "RegularText(229,)", "RegularText(24)", - "RegularText()", "HashTag(#garten,)", "RegularText(220,)", "RegularText(21)", - "RegularText()", "HashTag(#smart,)", "RegularText(220,)", "RegularText(21)", - "RegularText()", "HashTag(#nsfw,)", "RegularText(211,)", "RegularText(85)", - "RegularText()", "HashTag(#protoncalendar,)", "RegularText(206,)", "RegularText(31)", - "RegularText()", "HashTag(#stacksats,)", "RegularText(195,)", "RegularText(99)", - "RegularText()", "HashTag(#nokyc,)", "RegularText(179,)", "RegularText(98)", "RegularText()", - "RegularText()", "RegularText(Emoji sentiment today)", "RegularText()", "RegularText(โšก (1.6K) ๐Ÿ‘‰ (1.4K) ๐Ÿ‡ช๐Ÿ‡บ (1.2K) ๐Ÿซ‚ (1.2K) ๐Ÿ‡บ๐Ÿ‡ธ (1.1K) ๐Ÿ’œ (875) ๐Ÿง  (858) ๐Ÿ˜‚ (830) ๐Ÿ”ฅ (690) ๐Ÿคฃ (566) ๐Ÿค™ (525) โ˜• (444) ๐Ÿ‘‡ (443) ๐Ÿ™Œ๐Ÿป (425) โ˜€ (307) ๐Ÿ˜Ž (305) ๐Ÿฅณ (301) ๐Ÿค” (276) ๐ŸŒป (270) ๐Ÿงก (270) ๐Ÿฅ‡ (269) ๐Ÿ—“ (269) ๐Ÿ™ (268) ๐Ÿ† (267) ๐ŸŒฑ (264) ๐Ÿ“ฐ (230) ๐Ÿ‰ (221) ๐Ÿ˜ญ (220) ๐Ÿ’ฐ (219) ๐Ÿ”— (209) ๐Ÿ‘€ (201) ๐Ÿ˜… (199) โœจ (193) ๐Ÿ‡ท๐Ÿ‡บ (182) ๐Ÿ’ช (167) โœ… (164) ๐Ÿ’ค (163) ๐Ÿถ (151) ๐Ÿ‡จ๐Ÿ‡ญ (141) ๐Ÿ“ (137) ๐Ÿ˜ (136) ๐ŸŒž (136) ๐Ÿพ (136) โค (132) ๐Ÿ’ป (126) ๐Ÿš€ (125) ๐Ÿ‘ (125) ๐Ÿ‡ง๐Ÿ‡ท (125) ๐Ÿ˜Š (121) ๐Ÿ“š (120) โžก (120) ๐Ÿ‘ (118) ๐ŸŽ‰ (117) ๐ŸŽฎ (115) ๐Ÿคท (113) ๐Ÿ‘‹ (112) ๐Ÿ’ƒ (108) ๐Ÿ•บ๐Ÿป (106) ๐Ÿ’ก (104) ๐Ÿšจ (99) ๐Ÿ˜† (97) ๐Ÿ’ฏ (95) โš  (92) ๐Ÿ“ข (92) ๐Ÿค— (89) ๐Ÿ˜ด (87) ๐Ÿ” (83) ๐Ÿฐ (81) ๐Ÿ˜€ (79) ๐ŸŽŸ (78) โ› (78) ๐Ÿฆ (76) ๐Ÿ’ธ (76) โœŒ๐Ÿป (75) ๐Ÿค (73) ๐Ÿ‡ฌ๐Ÿ‡ง (73) ๐ŸŒฝ (70) ๐Ÿคก (69) ๐Ÿคฎ (69) โ— (66) ๐Ÿค (65) ๐Ÿ˜‰ (65) ๐Ÿ™‡ (65) ๐Ÿป (64) ๐ŸŒ (64) ๐Ÿ’• (63) ๐ŸŒธ (62) ๐Ÿ’ฌ (61) โ˜บ (61) ๐Ÿ‡ฆ๐Ÿ‡ท (59) ๐Ÿ‡ฎ๐Ÿ‡ฉ (57) ๐Ÿ˜ณ (57) ๐Ÿ˜„ (57) ๐ŸŽถ (57) ๐Ÿฅท๐Ÿป (56) ๐ŸŽต (56) ๐Ÿ˜ƒ (56) ๐Ÿ” (55) ๐Ÿ’ฅ (55) ๐ŸŽฒ (54) โœ (54) ๐Ÿ•’ (53) โฌ‡ (53) ๐Ÿ’™ (51) ๐Ÿ”’ (50) ๐Ÿ“ˆ (50) ๐Ÿช™ (50) ๐ŸŒง (50) ๐Ÿฅฐ (50) ๐Ÿ•ธ (50) ๐ŸŒ (50) ๐Ÿ’ญ (49) ๐ŸŒ™ (49) ๐Ÿ˜ (49) ๐Ÿ“ฑ (48) ๐ŸŒŸ (48) ๐Ÿคฉ (48) ๐Ÿ’” (47) ๐Ÿ”Œ (47) ๐Ÿ˜‹ (47) ๐ŸŽ– (47) ๐Ÿฃ (46) ๐Ÿ“ท (46) ๐Ÿ’ผ (45) โญ (45) ๐Ÿฅ” (45) ๐Ÿฅบ (45) ๐Ÿ‘Œ (44) ๐Ÿ‘ท๐Ÿผ (43) ๐Ÿ˜ฑ (43) ๐Ÿ“… (43) ๐Ÿค– (43) ๐Ÿ“ธ (42) ๐Ÿ“Š (42) ๐Ÿฆ‘ (40) ๐Ÿ’ต (40) ๐Ÿคฆ (39) โฃ (38) ๐Ÿ’Ž (38) ๐Ÿ–ค (38) ๐Ÿ“บ (37) ๐Ÿ‡ต๐Ÿ‡ฑ (37) ๐Ÿ‡ฏ๐Ÿ‡ต (36) ๐Ÿ”ง (36) ๐Ÿค˜ (36) ๐Ÿ’– (36) โ€ผ (35) ๐Ÿ˜ข (35) ๐Ÿ˜บ (34) ๐Ÿ”Š (34) ๐Ÿ˜ (34) ๐Ÿ‡ธ๐Ÿ‡ฐ (34) ๐Ÿƒ (34) ๐Ÿ‘ฉโ€๐Ÿ‘ง (34) โฐ (33) ๐Ÿ‘จโ€๐Ÿ’ป (33) ๐Ÿ‘‘ (33) ๐Ÿ‘ฅ (32) ๐Ÿ–ฅ (32) ๐Ÿ’จ (32) ๐Ÿ’— (31) ๐Ÿ‡ฒ๐Ÿ‡ฝ (31) ๐Ÿ“– (31) ๐Ÿšซ (31) ๐Ÿ‘Š๐Ÿป (31) ๐Ÿ˜ก (31) ๐ŸŒŽ (31) ๐Ÿ‘ (30) ๐Ÿ—ž (30) ๐Ÿ€ (30) ๐Ÿฝ (29) ๐Ÿธ (29) ๐Ÿฅš (29) ๐Ÿ’ฉ (29) โœŠ๐Ÿพ (29) ๐Ÿ˜ฎ (29) ๐ŸŒก (29) ๐Ÿ™ƒ (28) ๐Ÿ”” (28) ๐Ÿ‡ป๐Ÿ‡ช (28) ๐Ÿ’ฆ (28) ๐ŸŽฏ (28) ๐ŸŽจ (28) ๐Ÿ› (28) ๐Ÿ–ผ (27) โ˜๐Ÿป (27) ๐Ÿ›‘ (27) ๐Ÿ™„ (27) ๐Ÿง‘๐Ÿปโ€๐Ÿคโ€๐Ÿง‘๐Ÿฝ (27) ๐ŸŒˆ (27) ๐Ÿฅ‚ (26) ๐Ÿ‡ซ๐Ÿ‡ฎ (26) ๐ŸŽฅ (26) ๐Ÿ˜ฌ (26) ๐Ÿฅฒ (25) ๐Ÿฆพ (24) ๐Ÿคœ (24) ๐Ÿ™‚ (24) ๐Ÿ–• (24) ๐Ÿ˜ฉ (24) )", "RegularText()", "RegularText(Zap economy)", "RegularText()", - "Link(โšก41.7M)", - "RegularText(sats)", - "RegularText((โ‚ฟ0.417))", - "RegularText()", + "RegularText(โšก41.7M sats (โ‚ฟ0.417) )", "RegularText(1,816 zappers & 920 zapped (unique pubkeys))", "RegularText(๐ŸŒฉ๏ธ 33,248 zaps, 1,253 sats per zap (avg))", "RegularText()", @@ -2248,7 +2215,7 @@ class RichTextParserTest { "HashTag(#214)", "RegularText(2%)", "RegularText(Satscoinsv,)", - "Link(โšก๏ธsatscoinsv@getalby.com)", + "RegularText(โšก๏ธsatscoinsv@getalby.com)", "RegularText(-)", "RegularText(80db64657ea0358c5332c5cca01565eeddd4b8799688b1c46d3cb2d7c966671f)", "HashTag(#215)", @@ -2458,7 +2425,7 @@ class RichTextParserTest { "HashTag(#249)", "RegularText(2%)", "RegularText(micmad,)", - "Link(miceliomad@miceliomad.github.io/nostr/)", + "RegularText(miceliomad@miceliomad.github.io/nostr/)", "RegularText(-)", "RegularText(cd806edcf8ff40ea94fa574ea9cd97da16e5beb2b85aac6e1d648b8388504343)", "HashTag(#250)", @@ -3805,7 +3772,7 @@ class RichTextParserTest { "HashTag(#471)", "RegularText(1%)", "RegularText(leonwankum,)", - "SchemelessUrl(@leonawankum@BitcoinNostr.com)", + "RegularText(@leonawankum@BitcoinNostr.com)", "RegularText()", "RegularText(-)", "RegularText(652d58acafa105af8475c0fe8029a52e7ddbc337b2bd9c98bb17a111dc4cde60)", @@ -4098,7 +4065,6 @@ https://nostr.build/i/fd53fcf5ad950fbe45127e4bcee1b59e8301d41de6beee211f45e344db "RegularText(here:)", "Link(https://lnshort.it/live-stream-embeds/)", "RegularText()", - "RegularText()", "Image(https://nostr.build/i/fd53fcf5ad950fbe45127e4bcee1b59e8301d41de6beee211f45e344db214e8a.jpg)" ) @@ -4110,6 +4076,57 @@ https://nostr.build/i/fd53fcf5ad950fbe45127e4bcee1b59e8301d41de6beee211f45e344db } } + @Test + fun testNewLineAfterImage() { + val text = "Thatโ€™s it ! Thatโ€™s the #note https://cdn.nostr.build/i/1dc0726b6cb0f94a92bd66765ffb90f6c67e90c17bb957fc3d5d4782cbd73de7.jpg " + + val state = RichTextParser().parseText(text, ImmutableListOfLists()) + + printStateForDebug(state) + + val expectedResult = listOf( + "RegularText(Thatโ€™s)", + "RegularText(it)", + "RegularText(!)", + "RegularText(Thatโ€™s)", + "RegularText(the)", + "HashTag(#note)", + "Image(https://cdn.nostr.build/i/1dc0726b6cb0f94a92bd66765ffb90f6c67e90c17bb957fc3d5d4782cbd73de7.jpg)" + ) + + state.paragraphs.map { it.words }.flatten().forEachIndexed { index, seg -> + Assert.assertEquals( + expectedResult[index], + "${seg.javaClass.simpleName.replace("Segment", "")}(${seg.segmentText})" + ) + } + } + + @Test + fun testSapceAfterImage() { + val text = "Thatโ€™s it! https://cdn.nostr.build/i/1dc0726b6cb0f94a92bd66765ffb90f6c67e90c17bb957fc3d5d4782cbd73de7.jpg Thatโ€™s the #note" + + val state = RichTextParser().parseText(text, ImmutableListOfLists()) + + printStateForDebug(state) + + val expectedResult = listOf( + "RegularText(Thatโ€™s)", + "RegularText(it!)", + "Image(https://cdn.nostr.build/i/1dc0726b6cb0f94a92bd66765ffb90f6c67e90c17bb957fc3d5d4782cbd73de7.jpg)", + "RegularText(Thatโ€™s)", + "RegularText(the)", + "HashTag(#note)" + ) + + state.paragraphs.map { it.words }.flatten().forEachIndexed { index, seg -> + Assert.assertEquals( + expectedResult[index], + "${seg.javaClass.simpleName.replace("Segment", "")}(${seg.segmentText})" + ) + } + } + private fun printStateForDebug(state: RichTextViewerState) { state.paragraphs.forEachIndexed { index, paragraph -> paragraph.words.forEach { seg -> diff --git a/app/src/main/java/com/vitorpamplona/amethyst/service/CachedRichTextParser.kt b/app/src/main/java/com/vitorpamplona/amethyst/service/CachedRichTextParser.kt index 163ffc0ea..5b84d68f6 100644 --- a/app/src/main/java/com/vitorpamplona/amethyst/service/CachedRichTextParser.kt +++ b/app/src/main/java/com/vitorpamplona/amethyst/service/CachedRichTextParser.kt @@ -54,7 +54,7 @@ object CachedRichTextParser { // Android9 seems to have an issue starting this regex. val noProtocolUrlValidator = try { - Pattern.compile("(([\\w\\d-]+\\.)*[a-zA-Z][\\w-]+[\\.\\:]\\w+([\\/\\?\\=\\&\\#\\.]?[\\w-]*[^\\p{IsHan}\\p{IsHiragana}\\p{IsKatakana}])*\\/?)(.*)") + Pattern.compile("(([\\w\\d-]+\\.)*[a-zA-Z][\\w-]+[\\.\\:]\\w+([\\/\\?\\=\\&\\#\\.]?[\\w-]+[^\\p{IsHan}\\p{IsHiragana}\\p{IsKatakana}])*\\/?)(.*)") } catch (e: Exception) { Pattern.compile("(([\\w\\d-]+\\.)*[a-zA-Z][\\w-]+[\\.\\:]\\w+([\\/\\?\\=\\&\\#\\.]?[\\w-]+)*\\/?)(.*)") } @@ -119,7 +119,7 @@ class RichTextParser() { val isRTL = isArabic(paragraph) - val wordList = paragraph.split(' ') + val wordList = paragraph.trimEnd().split(' ') wordList.forEach { word -> val wordSegment = wordIdentifier(word, images, urls, emojis, tags) if (wordSegment !is RegularTextSegment) { @@ -182,8 +182,8 @@ class RichTextParser() { } else if (schemelessMatcher.find()) { val url = schemelessMatcher.group(1) // url val additionalChars = schemelessMatcher.group(4) // additional chars - val pattern = "^([A-Za-z0-9-]+(\\.[A-Za-z0-9]+)+)(:[0-9]+)?(/[^?#]*)?(\\?[^#]*)?(#.*)?".toRegex(RegexOption.IGNORE_CASE) - if (pattern.matches(word)) { + val pattern = "^([A-Za-z0-9-_]+(\\.[A-Za-z0-9-_]+)+)(:[0-9]+)?(/[^?#]*)?(\\?[^#]*)?(#.*)?".toRegex(RegexOption.IGNORE_CASE) + if (pattern.find(word) != null) { SchemelessUrlSegment(word, url, additionalChars) } else { RegularTextSegment(word)