rest: fetch spent transaction outputs by blockhash

Today, it is possible to fetch a block's spent prevouts in order to
build an external index by using the `/rest/block/HASH.json` endpoint.
However, its performance is low due to JSON serialization overhead.

We can significantly optimize it by adding a new REST endpoint, using
a binary response format:

```
$ BLOCKHASH=00000000000000000002a7c4c1e48d76c5a37902165a270156b7a8d72728a054

$ ab -k -c 1 -n 100 http://localhost:8332/rest/block/$BLOCKHASH.json
Document Length:        13278152 bytes
Requests per second:    3.53 [#/sec] (mean)
Time per request:       283.569 [ms] (mean)

$ ab -k -c 1 -n 10000 http://localhost:8332/rest/spentoutputs/$BLOCKHASH.bin
Document Length:        195591 bytes
Requests per second:    254.47 [#/sec] (mean)
Time per request:       3.930 [ms] (mean)
```

Currently, this PR is being used and tested by Bindex:

 * https://github.com/romanz/bindex-rs

This PR would allow to improve the performance of external indexers
such as electrs, ElectrumX, Fulcrum and Blockbook:

 * https://github.com/romanz/electrs (also https://github.com/Blockstream/electrs and https://github.com/mempool/electrs)
 * https://github.com/spesmilo/electrumx
 * https://github.com/cculianu/Fulcrum
 * https://github.com/trezor/blockbook
This commit is contained in:
Roman Zeyde
2024-06-30 20:51:51 +03:00
parent c461d15287
commit d4e212e8a6
3 changed files with 144 additions and 0 deletions

View File

@ -27,6 +27,7 @@
#include <streams.h> #include <streams.h>
#include <sync.h> #include <sync.h>
#include <txmempool.h> #include <txmempool.h>
#include <undo.h>
#include <util/any.h> #include <util/any.h>
#include <util/check.h> #include <util/check.h>
#include <util/strencodings.h> #include <util/strencodings.h>
@ -281,6 +282,113 @@ static bool rest_headers(const std::any& context,
} }
} }
/**
* Serialize spent outputs as a list of per-transaction CTxOut lists using binary format.
*/
static void SerializeBlockUndo(DataStream& stream, const CBlockUndo& block_undo)
{
WriteCompactSize(stream, block_undo.vtxundo.size() + 1);
WriteCompactSize(stream, 0); // block_undo.vtxundo doesn't contain coinbase tx
for (const CTxUndo& tx_undo : block_undo.vtxundo) {
WriteCompactSize(stream, tx_undo.vprevout.size());
for (const Coin& coin : tx_undo.vprevout) {
coin.out.Serialize(stream);
}
}
}
/**
* Serialize spent outputs as a list of per-transaction CTxOut lists using JSON format.
*/
static void BlockUndoToJSON(const CBlockUndo& block_undo, UniValue& result)
{
result.push_back({UniValue::VARR}); // block_undo.vtxundo doesn't contain coinbase tx
for (const CTxUndo& tx_undo : block_undo.vtxundo) {
UniValue tx_prevouts(UniValue::VARR);
for (const Coin& coin : tx_undo.vprevout) {
UniValue prevout(UniValue::VOBJ);
prevout.pushKV("value", ValueFromAmount(coin.out.nValue));
UniValue script_pub_key(UniValue::VOBJ);
ScriptToUniv(coin.out.scriptPubKey, /*out=*/script_pub_key, /*include_hex=*/true, /*include_address=*/true);
prevout.pushKV("scriptPubKey", std::move(script_pub_key));
tx_prevouts.push_back(std::move(prevout));
}
result.push_back(std::move(tx_prevouts));
}
}
static bool rest_spent_txouts(const std::any& context, HTTPRequest* req, const std::string& strURIPart)
{
if (!CheckWarmup(req)) {
return false;
}
std::string param;
const RESTResponseFormat rf = ParseDataFormat(param, strURIPart);
std::vector<std::string> path = SplitString(param, '/');
std::string hashStr;
if (path.size() == 1) {
// path with query parameter: /rest/spenttxouts/<hash>
hashStr = path[0];
} else {
return RESTERR(req, HTTP_BAD_REQUEST, "Invalid URI format. Expected /rest/spenttxouts/<hash>.<ext>");
}
auto hash{uint256::FromHex(hashStr)};
if (!hash) {
return RESTERR(req, HTTP_BAD_REQUEST, "Invalid hash: " + hashStr);
}
ChainstateManager* chainman = GetChainman(context, req);
if (!chainman) {
return false;
}
const CBlockIndex* pblockindex = WITH_LOCK(cs_main, return chainman->m_blockman.LookupBlockIndex(*hash));
if (!pblockindex) {
return RESTERR(req, HTTP_NOT_FOUND, hashStr + " not found");
}
CBlockUndo block_undo;
if (pblockindex->nHeight > 0 && !chainman->m_blockman.ReadBlockUndo(block_undo, *pblockindex)) {
return RESTERR(req, HTTP_NOT_FOUND, hashStr + " undo not available");
}
switch (rf) {
case RESTResponseFormat::BINARY: {
DataStream ssSpentResponse{};
SerializeBlockUndo(ssSpentResponse, block_undo);
req->WriteHeader("Content-Type", "application/octet-stream");
req->WriteReply(HTTP_OK, ssSpentResponse);
return true;
}
case RESTResponseFormat::HEX: {
DataStream ssSpentResponse{};
SerializeBlockUndo(ssSpentResponse, block_undo);
const std::string strHex{HexStr(ssSpentResponse) + "\n"};
req->WriteHeader("Content-Type", "text/plain");
req->WriteReply(HTTP_OK, strHex);
return true;
}
case RESTResponseFormat::JSON: {
UniValue result(UniValue::VARR);
BlockUndoToJSON(block_undo, result);
std::string strJSON = result.write() + "\n";
req->WriteHeader("Content-Type", "application/json");
req->WriteReply(HTTP_OK, strJSON);
return true;
}
default: {
return RESTERR(req, HTTP_NOT_FOUND, "output format not found (available: " + AvailableDataFormatsString() + ")");
}
}
}
static bool rest_block(const std::any& context, static bool rest_block(const std::any& context,
HTTPRequest* req, HTTPRequest* req,
const std::string& strURIPart, const std::string& strURIPart,
@ -1021,6 +1129,7 @@ static const struct {
{"/rest/deploymentinfo/", rest_deploymentinfo}, {"/rest/deploymentinfo/", rest_deploymentinfo},
{"/rest/deploymentinfo", rest_deploymentinfo}, {"/rest/deploymentinfo", rest_deploymentinfo},
{"/rest/blockhashbyheight/", rest_blockhash_by_height}, {"/rest/blockhashbyheight/", rest_blockhash_by_height},
{"/rest/spenttxouts/", rest_spent_txouts},
}; };
void StartREST(const std::any& context) void StartREST(const std::any& context)

View File

@ -6,6 +6,7 @@
from decimal import Decimal from decimal import Decimal
from enum import Enum from enum import Enum
from io import BytesIO
import http.client import http.client
import json import json
import typing import typing
@ -15,6 +16,7 @@ import urllib.parse
from test_framework.messages import ( from test_framework.messages import (
BLOCK_HEADER_SIZE, BLOCK_HEADER_SIZE,
COIN, COIN,
deser_block_spent_outputs,
) )
from test_framework.test_framework import BitcoinTestFramework from test_framework.test_framework import BitcoinTestFramework
from test_framework.util import ( from test_framework.util import (
@ -424,6 +426,34 @@ class RESTTest (BitcoinTestFramework):
assert_equal(self.test_rest_request(f"/headers/{bb_hash}", query_params={"count": 1}), self.test_rest_request(f"/headers/1/{bb_hash}")) assert_equal(self.test_rest_request(f"/headers/{bb_hash}", query_params={"count": 1}), self.test_rest_request(f"/headers/1/{bb_hash}"))
assert_equal(self.test_rest_request(f"/blockfilterheaders/basic/{bb_hash}", query_params={"count": 1}), self.test_rest_request(f"/blockfilterheaders/basic/5/{bb_hash}")) assert_equal(self.test_rest_request(f"/blockfilterheaders/basic/{bb_hash}", query_params={"count": 1}), self.test_rest_request(f"/blockfilterheaders/basic/5/{bb_hash}"))
self.log.info("Test the /spenttxouts URI")
block_count = self.nodes[0].getblockcount()
for height in range(0, block_count + 1):
blockhash = self.nodes[0].getblockhash(height)
spent_bin = self.test_rest_request(f"/spenttxouts/{blockhash}", req_type=ReqType.BIN, ret_type=RetType.BYTES)
spent_hex = self.test_rest_request(f"/spenttxouts/{blockhash}", req_type=ReqType.HEX, ret_type=RetType.BYTES)
spent_json = self.test_rest_request(f"/spenttxouts/{blockhash}", req_type=ReqType.JSON, ret_type=RetType.JSON)
assert_equal(bytes.fromhex(spent_hex.decode()), spent_bin)
spent = deser_block_spent_outputs(BytesIO(spent_bin))
block = self.nodes[0].getblock(blockhash, 3) # return prevout for each input
assert_equal(len(spent), len(block["tx"]))
assert_equal(len(spent_json), len(block["tx"]))
for i, tx in enumerate(block["tx"]):
prevouts = [txin["prevout"] for txin in tx["vin"] if "coinbase" not in txin]
# compare with `getblock` JSON output (coinbase tx has no prevouts)
actual = [(txout.scriptPubKey.hex(), Decimal(txout.nValue) / COIN) for txout in spent[i]]
expected = [(p["scriptPubKey"]["hex"], p["value"]) for p in prevouts]
assert_equal(expected, actual)
# also compare JSON format
actual = [(prevout["scriptPubKey"], prevout["value"]) for prevout in spent_json[i]]
expected = [(p["scriptPubKey"], p["value"]) for p in prevouts]
assert_equal(expected, actual)
self.log.info("Test the /deploymentinfo URI") self.log.info("Test the /deploymentinfo URI")
deployment_info = self.nodes[0].getdeploymentinfo() deployment_info = self.nodes[0].getdeploymentinfo()

View File

@ -228,6 +228,11 @@ def ser_string_vector(l):
return r return r
def deser_block_spent_outputs(f):
nit = deser_compact_size(f)
return [deser_vector(f, CTxOut) for _ in range(nit)]
def from_hex(obj, hex_string): def from_hex(obj, hex_string):
"""Deserialize from a hex string representation (e.g. from RPC) """Deserialize from a hex string representation (e.g. from RPC)