Speeding MurMur up

Adding more test cases
Mapping out memory used by filter
This commit is contained in:
Vitor Pamplona 2025-02-20 17:10:24 -05:00
parent aaf86bf53e
commit 84a52a1ce0
7 changed files with 388 additions and 126 deletions

View File

@ -24,6 +24,8 @@ import androidx.benchmark.junit4.BenchmarkRule
import androidx.benchmark.junit4.measureRepeated
import androidx.test.ext.junit.runners.AndroidJUnit4
import androidx.test.platform.app.InstrumentationRegistry.getInstrumentation
import com.vitorpamplona.quartz.nip01Core.core.HexKey
import com.vitorpamplona.quartz.nip01Core.core.toHexKey
import com.vitorpamplona.quartz.nip01Core.hints.HintIndexer
import com.vitorpamplona.quartz.utils.RandomInstance
import org.junit.Rule
@ -35,20 +37,22 @@ import java.nio.charset.Charset
class HintIndexerBenchmark {
@get:Rule val benchmarkRule = BenchmarkRule()
val keys =
mutableListOf<ByteArray>().apply {
for (seed in 0..1_000_000) {
add(RandomInstance.bytes(32))
companion object {
val keys =
mutableListOf<HexKey>().apply {
for (seed in 0..1_000_000) {
add(RandomInstance.bytes(32).toHexKey())
}
}
}
val relays =
getInstrumentation()
.context.assets
.open("relayDB.txt")
.readBytes()
.toString(Charset.forName("utf-8"))
.split("\n")
val relays =
getInstrumentation()
.context.assets
.open("relayDB.txt")
.readBytes()
.toString(Charset.forName("utf-8"))
.split("\n")
}
@Test
fun relayUriHashcode() {
@ -63,14 +67,14 @@ class HintIndexerBenchmark {
keys.forEach { key ->
(0..5).map {
indexer.index(key, relays.random())
indexer.addKey(key, relays.random())
}
}
val key = keys.random()
benchmarkRule.measureRepeated {
indexer.get(key)
indexer.getKey(key)
}
}
@ -80,7 +84,7 @@ class HintIndexerBenchmark {
val indexer = HintIndexer()
keys.forEach { key ->
(0..5).map {
indexer.index(key, relays.random())
indexer.addKey(key, relays.random())
}
}
}

View File

@ -0,0 +1,46 @@
/**
* Copyright (c) 2024 Vitor Pamplona
*
* Permission is hereby granted, free of charge, to any person obtaining a copy of
* this software and associated documentation files (the "Software"), to deal in
* the Software without restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the
* Software, and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package com.vitorpamplona.quartz.benchmark
import androidx.benchmark.junit4.BenchmarkRule
import androidx.benchmark.junit4.measureRepeated
import androidx.test.ext.junit.runners.AndroidJUnit4
import com.vitorpamplona.quartz.nip01Core.hints.bloom.MurmurHash3
import com.vitorpamplona.quartz.utils.RandomInstance
import org.junit.Rule
import org.junit.Test
import org.junit.runner.RunWith
@RunWith(AndroidJUnit4::class)
class MurMurBenchmark {
@get:Rule val benchmarkRule = BenchmarkRule()
@Test
fun hash() {
val hasher = MurmurHash3()
val byteArray = RandomInstance.bytes(32)
benchmarkRule.measureRepeated {
hasher.hash(byteArray, 293)
}
}
}

View File

@ -20,30 +20,84 @@
*/
package com.vitorpamplona.quartz.nip01Core.hints
import com.vitorpamplona.quartz.nip01Core.core.HexKey
import com.vitorpamplona.quartz.nip01Core.core.hexToByteArray
import com.vitorpamplona.quartz.nip01Core.hints.bloom.BloomFilterMurMur3
/**
* Instead of having one bloom filter per relay, which could create many
* large filters for very few events, this class uses only one mega bloom
* filter and uses the hashcode of the relay uri as differentiator in salt.
* Instead of having one bloom filter per relay per type, which could create
* many large bloom filters for collections of very few items, this class uses
* only one mega bloom filter per type and uses the hashcode of the relay uri
* as seed differentiator in the hash function.
*/
class HintIndexer(
size: Int = 10_000_000,
rounds: Int = 5,
) {
private val bloomFilter = BloomFilterMurMur3(size, rounds)
class HintIndexer {
private val eventHints = BloomFilterMurMur3(10_000_000, 5)
private val addressHints = BloomFilterMurMur3(1_000_000, 5)
private val pubKeyHints = BloomFilterMurMur3(10_000_000, 5)
private val relayDB = hashSetOf<String>()
fun index(
private fun add(
id: ByteArray,
relay: String,
bloom: BloomFilterMurMur3,
) {
relayDB.add(relay)
bloomFilter.add(id, relay.hashCode())
bloom.add(id, relay.hashCode())
}
fun get(id: ByteArray): List<String> =
relayDB.filter {
bloomFilter.mightContain(id, it.hashCode())
}
private fun get(
id: ByteArray,
bloom: BloomFilterMurMur3,
) = relayDB.filter { bloom.mightContain(id, it.hashCode()) }
// --------------------
// Event Host hints
// --------------------
fun addEvent(
eventId: ByteArray,
relay: String,
) = add(eventId, relay, eventHints)
fun addEvent(
eventId: HexKey,
relay: String,
) = addEvent(eventId.hexToByteArray(), relay)
fun getEvent(eventId: ByteArray) = get(eventId, eventHints)
fun getEvent(eventId: HexKey) = getEvent(eventId.hexToByteArray())
// --------------------
// PubKeys Outbox hints
// --------------------
fun addAddress(
addressId: ByteArray,
relay: String,
) = add(addressId, relay, addressHints)
fun addAddress(
addressId: String,
relay: String,
) = addAddress(addressId.toByteArray(), relay)
fun getAddress(addressId: ByteArray) = get(addressId, addressHints)
fun getAddress(addressId: String) = getAddress(addressId.toByteArray())
// --------------------
// PubKeys Outbox hints
// --------------------
fun addKey(
key: ByteArray,
relay: String,
) = add(key, relay, pubKeyHints)
fun addKey(
key: HexKey,
relay: String,
) = addKey(key.hexToByteArray(), relay)
fun getKey(key: ByteArray) = get(key, pubKeyHints)
fun getKey(key: HexKey) = getKey(key.hexToByteArray())
}

View File

@ -20,61 +20,54 @@
*/
package com.vitorpamplona.quartz.nip01Core.hints.bloom
import org.apache.commons.lang3.BooleanUtils.xor
class MurmurHash3 {
fun hash(
data: ByteArray,
seed: Int,
) = hash(data, 0, data.size, seed)
companion object {
val ROUND_DOWN = 0xFFFFFFFC.toInt()
val C1 = -0x3361d2af // 0xcc9e2d51
val C2 = 0x1b873593
}
/**
* Generates 32 bit hash .
* @param data the byte array to hash
* @param offset the start offset of the data in the array (always 0)
* @param length the length of the data in the array
* @param seed the seed for the hash (int)
* @return 32 bit hash of the given array
*/
fun hash(
data: ByteArray,
offset: Int,
length: Int,
seed: Int,
): Int {
val c1 = -0x3361d2af // 0xcc9e2d51
val c2 = 0x1b873593
var h1 = seed
val roundedEnd = offset + (length and 0xFFFFFFFC.toInt()) // Round down to 4-byte blocks
val roundedEnd = data.size and ROUND_DOWN // Round down to 4-byte blocks
var i = offset
var i = 0
var k1 = 0
while (i < roundedEnd) {
var k1 =
(data[i].toInt() and 0xFF) or
((data[i + 1].toInt() and 0xFF) shl 8) or
((data[i + 2].toInt() and 0xFF) shl 16) or
((data[i + 3].toInt() and 0xFF) shl 24)
k1 =
(
data[i++].toInt() and 0xFF or
(data[i++].toInt() and 0xFF shl 8) or
(data[i++].toInt() and 0xFF shl 16) or
(data[i++].toInt() and 0xFF shl 24)
) * C1
i += 4
k1 *= c1
k1 = Integer.rotateLeft(k1, 15)
k1 *= c2
h1 = h1 xor k1
h1 = Integer.rotateLeft(h1, 13)
h1 = h1 * 5 + -0x19ab949c // 0xe6546b64
h1 = h1 xor (((k1 shl 15) or (k1 ushr -15)) * C2)
h1 = ((h1 shl 13) or (h1 ushr -13)) * 5 + -0x19ab949c // 0xe6546b64
}
// processing tail (remaining bytes)
var k1 = 0
when (length and 3) {
k1 = 0
when (data.size and 3) {
3 -> {
k1 = k1 or ((data[i + 2].toInt() and 0xFF) shl 16)
k1 = k1 or ((data[i + 1].toInt() and 0xFF) shl 8)
k1 = k1 or (data[i].toInt() and 0xFF)
k1 *= c1
k1 = Integer.rotateLeft(k1, 15)
k1 *= c2
k1 *= C1
k1 = (k1 shl 15) or (k1 ushr -15)
k1 *= C2
h1 = h1 xor k1
}
@ -83,9 +76,9 @@ class MurmurHash3 {
k1 = k1 or (data[i + 1].toInt() and 0xFF shl 8)
k1 = k1 or (data[i].toInt() and 0xFF)
k1 *= c1
k1 = Integer.rotateLeft(k1, 15)
k1 *= c2
k1 *= C1
k1 = (k1 shl 15) or (k1 ushr -15)
k1 *= C2
h1 = h1 xor k1
}
@ -93,29 +86,22 @@ class MurmurHash3 {
1 -> {
k1 = k1 or (data[i].toInt() and 0xFF)
k1 *= c1
k1 = Integer.rotateLeft(k1, 15)
k1 *= c2
k1 *= C1
k1 = (k1 shl 15) or (k1 ushr -15)
k1 *= C2
h1 = h1 xor k1
}
}
// final mix
h1 = h1 xor length
h1 = fmix32(h1)
h1 = h1 xor data.size
// fmix32
h1 = (h1 xor (h1 ushr 16)) * -0x7a143595 // 0x85ebca6b
h1 = (h1 xor (h1 ushr 13)) * -0x3d4d51cb // 0xc2b2ae35
h1 = h1 xor (h1 ushr 16)
return h1
}
private fun fmix32(h: Int): Int {
var f = h
f = f xor (f ushr 16)
f *= -0x7a143595 // 0x85ebca6b
f = f xor (f ushr 13)
f *= -0x3d4d51cb // 0xc2b2ae35
f = f xor (f ushr 16)
return f
}
}

View File

@ -21,73 +21,142 @@
package com.vitorpamplona.quartz.nip01Core.hints
import com.vitorpamplona.quartz.nip01Core.core.HexKey
import com.vitorpamplona.quartz.nip01Core.core.hexToByteArray
import com.vitorpamplona.quartz.nip01Core.core.toHexKey
import com.vitorpamplona.quartz.nip01Core.hints.types.PubKeyHint
import com.vitorpamplona.quartz.nip01Core.hints.HintIndexerTest.Companion.indexer
import com.vitorpamplona.quartz.nip01Core.tags.addressables.Address
import com.vitorpamplona.quartz.utils.RandomInstance
import com.vitorpamplona.quartz.utils.usedMemoryMb
import junit.framework.TestCase.assertTrue
import org.junit.Test
class HintIndexerTest {
val key1 = "ca29c211f1c72d5b6622268ff43d2288ea2b2cb5b9aa196ff9f1704fc914b71b".hexToByteArray()
val key2 = "460c25e682fda7832b52d1f22d3d22b3176d972f60dcdc3212ed8c92ef85065c".hexToByteArray()
val key3 = "560c25e682fda7832b52d1f22d3d22b3176d972f60dcdc3212ed8c92ef85065c".hexToByteArray()
companion object {
val VALID_CHARS: List<Char> = ('0'..'9') + ('a'..'z') + ('A'..'Z')
val keys =
mutableListOf<HexKey>().apply {
for (seed in 0..1_000_000) {
add(RandomInstance.bytes(32).toHexKey())
private fun randomChars(size: Int): String = CharArray(size) { VALID_CHARS.random() }.concatToString()
val keys =
mutableListOf<HexKey>().apply {
for (seed in 0..1_000_000) {
add(RandomInstance.bytes(32).toHexKey())
}
}
val eventIds =
mutableListOf<HexKey>().apply {
for (seed in 0..1_000_000) {
add(RandomInstance.bytes(32).toHexKey())
}
}
val addresses =
keys.take(10_000).map {
Address.assemble(
RandomInstance.int(150_000),
it,
randomChars(10),
)
}
val relays =
this::class.java
.getResourceAsStream("relayDB.txt")
?.readAllBytes()
.toString()
.split("\n")
val indexer by lazy {
System.gc()
Thread.sleep(1000)
val startingMemory = Runtime.getRuntime().usedMemoryMb()
val result = HintIndexer()
val endingMemory = Runtime.getRuntime().usedMemoryMb()
println("Filter using ${endingMemory - startingMemory}MB")
// Simulates 5 outbox relays for each key
keys.forEach { key ->
(0..5).map {
result.addKey(key, relays.random())
}
}
// Simulates each event being in 8 + 0..10 relays.
eventIds.forEach { id ->
repeat(8 + RandomInstance.int(10)) {
result.addEvent(id, relays.random())
}
}
// Simulates each address being in 8 + 0..10 relays.
addresses.forEach { address ->
repeat(8 + RandomInstance.int(10)) {
result.addAddress(address, relays.random())
}
}
result
}
}
val relays =
this.javaClass
.getResourceAsStream("relayDB.txt")
?.readAllBytes()
.toString()
.split("\n")
val keyHints =
keys
.map { key ->
(0..5).map { PubKeyHint(key, relays.random()) }
}.flatten()
val key1Relays = (0..5).map { relays.random() }
val key2Relays = (0..4).map { relays.random() }
val key3Relays = (0..3).map { relays.random() }
@Test
fun runExistingKeys() {
val indexer = HintIndexer()
keyHints.forEach {
indexer.index(it.id(), it.relay!!)
}
val testSize = 1000
val testProb = 0.015f
fun assert99PercentSucess(success: () -> Boolean) {
var failureCounter = 0
repeat(1000) {
val key = keys.random()
if (indexer.get(key.hexToByteArray()).isEmpty()) {
repeat(testSize) {
if (!success()) {
failureCounter++
}
}
assertTrue("Failures $failureCounter ${failureCounter / 1_000.0}", failureCounter / 1_000.0f < 0.015)
assertTrue(
"Failure rate: $failureCounter of 1000 elements => ${(failureCounter / testSize.toFloat()) * 100}%",
failureCounter / testSize.toFloat() < testProb,
)
}
@Test
fun runProb() {
val indexer = HintIndexer()
keyHints.forEach {
indexer.index(it.id(), it.relay!!)
fun runProbExistingKeys() =
assert99PercentSucess {
indexer.getKey(keys.random()).isNotEmpty()
}
var failureCounter = 0
repeat(1_000) {
if (indexer.get(RandomInstance.bytes(32)).isNotEmpty()) {
failureCounter++
}
@Test
fun runProbNewKeys() =
assert99PercentSucess {
indexer.getKey(RandomInstance.bytes(32)).isEmpty()
}
@Test
fun runProbExistingEventIds() =
assert99PercentSucess {
indexer.getEvent(eventIds.random()).isNotEmpty()
}
@Test
fun runProbNewEventIds() =
assert99PercentSucess {
indexer.getEvent(RandomInstance.bytes(32)).isEmpty()
}
@Test
fun runProbExistingAddresses() =
assert99PercentSucess {
indexer.getAddress(addresses.random()).isNotEmpty()
}
@Test
fun runProbNewAddresses() =
assert99PercentSucess {
val newAddress =
Address.assemble(
RandomInstance.int(65000),
RandomInstance.bytes(32).toHexKey(),
randomChars(10),
)
indexer.getAddress(newAddress).isEmpty()
}
assertTrue("Failures $failureCounter ${failureCounter / 1_000.0}", failureCounter / 1_000.0f < 0.015)
}
}

View File

@ -0,0 +1,76 @@
/**
* Copyright (c) 2024 Vitor Pamplona
*
* Permission is hereby granted, free of charge, to any person obtaining a copy of
* this software and associated documentation files (the "Software"), to deal in
* the Software without restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the
* Software, and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package com.vitorpamplona.quartz.nip01Core.hints
import com.vitorpamplona.quartz.nip01Core.core.HexKey
import com.vitorpamplona.quartz.nip01Core.core.hexToByteArray
import com.vitorpamplona.quartz.nip01Core.hints.bloom.MurmurHash3
import junit.framework.TestCase.assertEquals
import org.junit.Test
class MurMur3Test {
class Case(
val bytesHex: HexKey,
val seed: Int,
val result: Int,
)
val testCases =
listOf(
Case("9fd4e9a905ca9e1a3086fa4c0a1ed829dbf18c15ec05af95c76b78d3d2f5651b", 886838366, -525456393),
Case("e6c8f70f0d35a983bfebd00e5f29787c009c52971cfb4ac3a49b534b256b59cc", 1717487548, 1605080838),
Case("7f7113833feb31e877f193e2fc75a64e9c70252c3ae3c73373ff34430ae40ea6", 1275582690, 225480992),
Case("61770be6ec9df0f490743318e796e28ae34609732b61d365947871532d77d697", 514559346, 1424957638),
Case("375f46b4687ba3cd035db303fa294d943816e64ca6b3adcda2ae40e8ac9d91a0", 1898708424, 1730418066),
Case("c67044cd1d07a2aeb92b7bec973b6feb8abb9197840c59c101cacaa992489d49", 294602161, -1944496371),
Case("49db4bfcc4da62e38c4076843cdde1425570806f09f121f5e7f2507c5ee1db85", 910710684, 944243368),
Case("c5e98a30dead5ade4900b26eabae3435cfcdb64ff5e55c99641915a0c6ee73fc", 1107230285, 1550302684),
Case("b0ed2e7568e6b4e1d5e5bab46fde01149331b824e48a281798d7216dde8f5890", 1013875681, -1265544300),
Case("805f290e865bde094d77e82fb8b338d83347bc5449a4aed9fb08afb6a53a079b", 1674416787, -1821262025),
// special cases
Case("805f290e865bde094d77e82fb8b338d83347bc5449a4aed9fb08afb6a53a079b", Int.MAX_VALUE, -422576759),
Case("805f290e865bde094d77e82fb8b338d83347bc5449a4aed9fb08afb6a53a079b", Int.MAX_VALUE + 1, 851385048),
Case("805f290e865bde094d77e82fb8b338d83347bc5449a4aed9fb08afb6a53a079b", Int.MIN_VALUE, 851385048),
Case("805f290e865bde094d77e82fb8b338d83347bc5449a4aed9fb08afb6a53a079b", 0, 1615518380),
Case("fd", 1, 975430984),
Case("00", 1, 0),
Case("FF", 1, -797126820),
Case("3033", 1, 1435178296),
Case("0000", 1, -2047822809),
Case("FFFF", 1, 1459517456),
Case("3652a8", 1, 103723868),
Case("000000", 1, 821347078),
Case("FFFFFF", 1, -761438248),
Case("00000000", 1, 2028806445),
Case("FFFFFFFF", 1, 919009801),
)
@Test
fun testMurMur() {
val hasher = MurmurHash3()
testCases.forEach {
assertEquals(
it.result,
hasher.hash(it.bytesHex.hexToByteArray(), it.seed),
)
}
}
}

View File

@ -0,0 +1,27 @@
/**
* Copyright (c) 2024 Vitor Pamplona
*
* Permission is hereby granted, free of charge, to any person obtaining a copy of
* this software and associated documentation files (the "Software"), to deal in
* the Software without restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the
* Software, and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
* AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package com.vitorpamplona.quartz.utils
fun Runtime.usedMemoryMb(): Long {
val totalMemoryMb = totalMemory() / (1024 * 1024)
val freeMemoryMb = freeMemory() / (1024 * 1024)
return totalMemoryMb - freeMemoryMb
}