Merge pull request #819 from jiftechnify/meta-parser-benchmark

Add benchmark for MetaTagsParser
This commit is contained in:
Vitor Pamplona
2024-03-29 17:46:35 -04:00
committed by GitHub
5 changed files with 7959 additions and 44 deletions

View File

@@ -20,6 +20,8 @@
*/ */
package com.vitorpamplona.amethyst.service.previews package com.vitorpamplona.amethyst.service.previews
import com.vitorpamplona.amethyst.commons.preview.MetaTag
import com.vitorpamplona.amethyst.commons.preview.MetaTagsParser
import com.vitorpamplona.amethyst.service.HttpClientManager import com.vitorpamplona.amethyst.service.HttpClientManager
import com.vitorpamplona.amethyst.service.checkNotInMainThread import com.vitorpamplona.amethyst.service.checkNotInMainThread
import kotlinx.coroutines.Dispatchers import kotlinx.coroutines.Dispatchers

View File

@@ -0,0 +1,55 @@
/**
* Copyright (c) 2024 Vitor Pamplona
*
* Permission is hereby granted, free of charge, to any person obtaining a copy of
* this software and associated documentation files (the "Software"), to deal in
* the Software without restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the
* Software, and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package com.vitorpamplona.amethyst.benchmark
import androidx.benchmark.junit4.BenchmarkRule
import androidx.benchmark.junit4.measureRepeated
import androidx.test.ext.junit.runners.AndroidJUnit4
import androidx.test.platform.app.InstrumentationRegistry.getInstrumentation
import com.vitorpamplona.amethyst.commons.preview.MetaTagsParser
import org.junit.Assert.assertEquals
import org.junit.Assert.assertNotNull
import org.junit.Rule
import org.junit.Test
import org.junit.runner.RunWith
import java.nio.charset.Charset
@RunWith(AndroidJUnit4::class)
class MetaTagsParserBenchmark {
private val html =
getInstrumentation().context.assets.open("github_amethyst.html")
.readBytes().toString(Charset.forName("utf-8"))
@get:Rule
val benchmarkRule = BenchmarkRule()
@Test
fun parseMetaTags() {
benchmarkRule.measureRepeated {
val metaOgTitle = MetaTagsParser.parse(html).find { it.attr("property") == "og:title" }
assertNotNull(metaOgTitle)
assertEquals(
"GitHub - vitorpamplona/amethyst: Nostr client for Android",
metaOgTitle!!.attr("content"),
)
}
}
}

View File

@@ -18,7 +18,7 @@
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
package com.vitorpamplona.amethyst.service.previews package com.vitorpamplona.amethyst.commons.preview
import org.junit.Assert.assertEquals import org.junit.Assert.assertEquals
import org.junit.Test import org.junit.Test

View File

@@ -18,26 +18,30 @@
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
package com.vitorpamplona.amethyst.service.previews package com.vitorpamplona.amethyst.commons.preview
import kotlinx.collections.immutable.toImmutableMap import kotlinx.collections.immutable.toImmutableMap
import java.lang.StringBuilder
internal data class MetaTag(private val attrs: Map<String, String>) { data class MetaTag(private val attrs: Map<String, String>) {
/**
* Returns a value of an attribute specified by its name (case insensitive), or empty string if it doesn't exist.
*/
fun attr(name: String): String = attrs[name.lowercase()] ?: "" fun attr(name: String): String = attrs[name.lowercase()] ?: ""
} }
// parse a partial HTML document and extract meta tags object MetaTagsParser {
internal object MetaTagsParser {
private val NON_ATTR_NAME_CHARS = setOf(Char(0x0), '"', '\'', '>', '/') private val NON_ATTR_NAME_CHARS = setOf(Char(0x0), '"', '\'', '>', '/')
private val NON_UNQUOTED_ATTR_VALUE_CHARS = setOf('"', '\'', '=', '>', '<', '`') private val NON_UNQUOTED_ATTR_VALUE_CHARS = setOf('"', '\'', '=', '>', '<', '`')
/**
* Lazily parse a partial HTML document and extract meta tags.
*/
fun parse(input: String): Sequence<MetaTag> = fun parse(input: String): Sequence<MetaTag> =
sequence { sequence {
val s = TagScanner(input) val s = TagScanner(input)
while (!s.exhausted()) { while (!s.exhausted()) {
val t = s.nextTag() ?: continue val t = s.nextTag() ?: continue
if (t.name == "/head") { if (t.name == "head" && t.isEnd) {
break break
} }
if (t.name == "meta") { if (t.name == "meta") {
@@ -47,61 +51,45 @@ internal object MetaTagsParser {
} }
} }
private data class RawTag(val name: String, val attrPart: String) private data class RawTag(val isEnd: Boolean, val name: String, val attrPart: String)
private class TagScanner(private val input: String) { private class TagScanner(private val input: String) {
var p = 0 private var p = 0
fun exhausted(): Boolean = p >= input.length fun exhausted(): Boolean = p >= input.length
private fun peek(): Char = input[p] private fun peek(): Char = input[p]
private fun consume(): Char { private fun consume(): Char = input[p++]
return input[p++]
}
private fun consumeChar(c: Char): Boolean { private fun skipWhile(pred: (Char) -> Boolean) {
if (this.peek() == c) { while (!this.exhausted() && pred(this.peek())) {
this.consume() this.consume()
return true
} }
return false
} }
private fun skipSpaces() { private fun skipSpaces() {
while (!this.exhausted() && this.peek().isWhitespace()) { this.skipWhile { it.isWhitespace() }
this.consume()
}
}
private fun skipUntil(c: Char) {
while (!this.exhausted() && this.peek() != c) {
this.consume()
}
}
private fun readWhile(pred: (Char) -> Boolean): String {
val sb = StringBuilder()
while (!this.exhausted() && pred(this.peek())) {
sb.append(this.consume())
}
return sb.toString()
} }
fun nextTag(): RawTag? { fun nextTag(): RawTag? {
skipUntil('<') skipWhile { it != '<' }
consume() consume()
// read tag name // read tag name
val name = StringBuilder() val isEnd = peek() == '/'
if (consumeChar('/')) { if (isEnd) {
name.append('/') consume()
} }
val n = readWhile { !it.isWhitespace() && it != '>' } val nameStart = p
skipSpaces() skipWhile { !it.isWhitespace() && it != '>' }
val nameEnd = p
// read until end of tag // seek to start of attrs part
val attrsPart = StringBuilder() skipSpaces()
val attrsStart = p
// skip until end of tag
var quote: Char? = null var quote: Char? = null
while (!exhausted()) { while (!exhausted()) {
val c = consume() val c = consume()
@@ -124,13 +112,15 @@ internal object MetaTagsParser {
quote = null quote = null
} }
} }
attrsPart.append(c)
} }
val attrsEnd = p - 1
if (!n.matches(Regex("""[0-9a-zA-Z]+"""))) { val name = input.slice(nameStart..<nameEnd)
if (!name.matches(Regex("""[0-9a-zA-Z]+"""))) {
return null return null
} }
return RawTag(name.append(n).toString().lowercase(), attrsPart.toString()) val attrsPart = input.slice(attrsStart..<attrsEnd)
return RawTag(isEnd, name.lowercase(), attrsPart)
} }
} }

File diff suppressed because one or more lines are too long