Merge bitcoin/bitcoin#32421: test: refactor: overhaul (w)txid determination for CTransaction objects

4ef6253017 test: avoid unneeded (w)txid hex -> integer conversions (Sebastian Falbesoner)
472f3770ae scripted-diff: test: rename CTransaction `.getwtxid()` -> `wtxid_hex` for consistency (Sebastian Falbesoner)
81af4334e8 test: rename CTransaction `.sha256` -> `.txid_int` for consistency (Sebastian Falbesoner)
ce83924237 test: rename CTransaction `.rehash()`/`.hash` -> `.txid_hex` for consistency (Sebastian Falbesoner)
e9cdaefb0a test: introduce and use CTransaction `.wtxid_int` property (Sebastian Falbesoner)
9b3dce24a3 test: remove bare CTransaction `.rehash()`/`.calc_sha256()` calls (Sebastian Falbesoner)
a2724e3ea3 test: remove txid caching in CTransaction class (Sebastian Falbesoner)

Pull request description:

  In the functional test framework, determining a (w)txid for a `CTransaction` instance is currently rather confusing and footgunny due to inconsistent naming/interfaces (see table below) and statefulness involved. This PR aims to improve that by:
  * removing the (w)txid caching mechanism, in order to avoid the need to call additional rehashing functions (`.rehash()`/`.calculate_sha256()`, see first two commits and https://github.com/bitcoin/bitcoin/pull/32050#discussion_r1993286997). This change in theory decreases the performance, as the involved serialization and hashing involved might be called more often than previously, but I couldn't find a functional test where this leads to a measurable run-time increase on my machine.
  * introduce consistent naming that shows the type of the returned txid, i.e. hex string vs. test-framework-internal representation [currently integers] (see remaining commits)

  Summary table showing (w)txid determaination before/after this PR:

  | Task                   | master                 | PR           |
  |:-----------------------|:-----------------------|:-------------|
  | get TXID (hex string)  | `.rehash()` / `.hash`[1] | `.txid_hex`  |
  | get TXID (integer)     | `.sha256`[1]             | `.txid_int`  |
  | get WTXID (hex string) | `.getwtxid()`          | `.wtxid_hex` |
  | get WTXID (integer)    | `.calc_sha256(True)`   | `.wtxid_int` |

  Unfortunately, most renames can't be done with a scripted-diff, as the property names (`.hash`, `.sha256`) are also used for blocks and other message types. The PR is rather invasive and touches a lot of files, but I think it's worth to do it, also to make life easier for new contributors. Future tasks like e.g. doing the same overhaul for block (header) objects or getting rid of the integer representation (see https://github.com/bitcoin/bitcoin/pull/32050) become easier should become easier after this one.

  [1] = returned value might be out-of-date, if rehashing function wasn't called after modification

ACKs for top commit:
  maflcko:
    re-ACK 4ef6253017 🏈
  achow101:
    ACK 4ef6253017
  marcofleon:
    code review ACK 4ef6253017

Tree-SHA512: 4b472c31d169966b6f6878911a8404d25bf3e503b6e8ef30f36a7415d21ad4bc1265083af2d3ead6edfcd9fac9ccb0a8be57e1b0739ad431b836413070d7d583
This commit is contained in:
Ava Chow
2025-06-11 14:21:11 -07:00
45 changed files with 333 additions and 499 deletions

View File

@@ -107,7 +107,7 @@ def create_block(hashprev=None, coinbase=None, ntime=None, *, version=None, tmpl
block.vtx.append(coinbase)
if txlist:
for tx in txlist:
if not hasattr(tx, 'calc_sha256'):
if type(tx) is str:
tx = tx_from_hex(tx)
block.vtx.append(tx)
block.hashMerkleRoot = block.calc_merkle_root()
@@ -134,7 +134,6 @@ def add_witness_commitment(block, nonce=0):
# witness commitment is the last OP_RETURN output in coinbase
block.vtx[0].vout.append(CTxOut(0, get_witness_script(witness_root, witness_nonce)))
block.vtx[0].rehash()
block.hashMerkleRoot = block.calc_merkle_root()
block.rehash()
@@ -176,7 +175,6 @@ def create_coinbase(height, pubkey=None, *, script_pubkey=None, extra_output_scr
coinbaseoutput2.nValue = 0
coinbaseoutput2.scriptPubKey = extra_output_script
coinbase.vout.append(coinbaseoutput2)
coinbase.calc_sha256()
return coinbase
def create_tx_with_script(prevtx, n, script_sig=b"", *, amount, output_script=None):
@@ -189,9 +187,8 @@ def create_tx_with_script(prevtx, n, script_sig=b"", *, amount, output_script=No
output_script = CScript()
tx = CTransaction()
assert n < len(prevtx.vout)
tx.vin.append(CTxIn(COutPoint(prevtx.sha256, n), script_sig, SEQUENCE_FINAL))
tx.vin.append(CTxIn(COutPoint(prevtx.txid_int, n), script_sig, SEQUENCE_FINAL))
tx.vout.append(CTxOut(amount, output_script))
tx.calc_sha256()
return tx
def get_legacy_sigopcount_block(block, accurate=True):

View File

@@ -34,7 +34,7 @@ def assert_mempool_contents(test_framework, node, expected=None, sync=True):
mempool = node.getrawmempool(verbose=False)
assert_equal(len(mempool), len(expected))
for tx in expected:
assert tx.rehash() in mempool
assert tx.txid_hex in mempool
def fill_mempool(test_framework, node, *, tx_sync_fun=None):
@@ -104,5 +104,5 @@ def fill_mempool(test_framework, node, *, tx_sync_fun=None):
def tx_in_orphanage(node, tx: CTransaction) -> bool:
"""Returns true if the transaction is in the orphanage."""
found = [o for o in node.getorphantxs(verbosity=1) if o["txid"] == tx.rehash() and o["wtxid"] == tx.getwtxid()]
found = [o for o in node.getorphantxs(verbosity=1) if o["txid"] == tx.txid_hex and o["wtxid"] == tx.wtxid_hex]
return len(found) == 1

View File

@@ -585,8 +585,7 @@ class CTxWitness:
class CTransaction:
__slots__ = ("hash", "nLockTime", "version", "sha256", "vin", "vout",
"wit")
__slots__ = ("nLockTime", "version", "vin", "vout", "wit")
def __init__(self, tx=None):
if tx is None:
@@ -595,15 +594,11 @@ class CTransaction:
self.vout = []
self.wit = CTxWitness()
self.nLockTime = 0
self.sha256 = None
self.hash = None
else:
self.version = tx.version
self.vin = copy.deepcopy(tx.vin)
self.vout = copy.deepcopy(tx.vout)
self.nLockTime = tx.nLockTime
self.sha256 = tx.sha256
self.hash = tx.hash
self.wit = copy.deepcopy(tx.wit)
def deserialize(self, f):
@@ -625,8 +620,6 @@ class CTransaction:
else:
self.wit = CTxWitness()
self.nLockTime = int.from_bytes(f.read(4), "little")
self.sha256 = None
self.hash = None
def serialize_without_witness(self):
r = b""
@@ -664,28 +657,27 @@ class CTransaction:
def serialize(self):
return self.serialize_with_witness()
def getwtxid(self):
@property
def wtxid_hex(self):
"""Return wtxid (transaction hash with witness) as hex string."""
return hash256(self.serialize())[::-1].hex()
# Recalculate the txid (transaction hash without witness)
def rehash(self):
self.sha256 = None
self.calc_sha256()
return self.hash
@property
def wtxid_int(self):
"""Return wtxid (transaction hash with witness) as integer."""
return uint256_from_str(hash256(self.serialize_with_witness()))
# We will only cache the serialization without witness in
# self.sha256 and self.hash -- those are expected to be the txid.
def calc_sha256(self, with_witness=False):
if with_witness:
# Don't cache the result, just return it
return uint256_from_str(hash256(self.serialize_with_witness()))
@property
def txid_hex(self):
"""Return txid (transaction hash without witness) as hex string."""
return hash256(self.serialize_without_witness())[::-1].hex()
if self.sha256 is None:
self.sha256 = uint256_from_str(hash256(self.serialize_without_witness()))
self.hash = hash256(self.serialize_without_witness())[::-1].hex()
@property
def txid_int(self):
"""Return txid (transaction hash without witness) as integer."""
return uint256_from_str(hash256(self.serialize_without_witness()))
def is_valid(self):
self.calc_sha256()
for tout in self.vout:
if tout.nValue < 0 or tout.nValue > 21000000 * COIN:
return False
@@ -813,8 +805,7 @@ class CBlock(CBlockHeader):
def calc_merkle_root(self):
hashes = []
for tx in self.vtx:
tx.calc_sha256()
hashes.append(ser_uint256(tx.sha256))
hashes.append(ser_uint256(tx.txid_int))
return self.get_merkle_root(hashes)
def calc_witness_merkle_root(self):
@@ -824,7 +815,7 @@ class CBlock(CBlockHeader):
for tx in self.vtx[1:]:
# Calculate the hashes with witness data
hashes.append(ser_uint256(tx.calc_sha256(True)))
hashes.append(ser_uint256(tx.wtxid_int))
return self.get_merkle_root(hashes)
@@ -1006,9 +997,9 @@ class HeaderAndShortIDs:
[k0, k1] = self.get_siphash_keys()
for i in range(len(block.vtx)):
if i not in prefill_list:
tx_hash = block.vtx[i].sha256
tx_hash = block.vtx[i].txid_int
if use_witness:
tx_hash = block.vtx[i].calc_sha256(with_witness=True)
tx_hash = block.vtx[i].wtxid_int
self.shortids.append(calculate_shortid(k0, k1, tx_hash))
def __repr__(self):

View File

@@ -619,7 +619,7 @@ class P2PInterface(P2PConnection):
def test_function():
if not self.last_message.get('tx'):
return False
return self.last_message['tx'].tx.rehash() == txid
return self.last_message['tx'].tx.txid_hex == txid
self.wait_until(test_function, timeout=timeout)
@@ -911,7 +911,7 @@ class P2PDataStore(P2PInterface):
with p2p_lock:
for tx in txs:
self.tx_store[tx.sha256] = tx
self.tx_store[tx.txid_int] = tx
reject_reason = [reject_reason] if reject_reason else []
with node.assert_debug_log(expected_msgs=reject_reason):
@@ -927,11 +927,11 @@ class P2PDataStore(P2PInterface):
if success:
# Check that all txs are now in the mempool
for tx in txs:
assert tx.hash in raw_mempool, "{} not found in mempool".format(tx.hash)
assert tx.txid_hex in raw_mempool, "{} not found in mempool".format(tx.txid_hex)
else:
# Check that none of the txs are now in the mempool
for tx in txs:
assert tx.hash not in raw_mempool, "{} tx found in mempool".format(tx.hash)
assert tx.txid_hex not in raw_mempool, "{} tx found in mempool".format(tx.txid_hex)
class P2PTxInvStore(P2PInterface):
"""A P2PInterface which stores a count of how many times each txid has been announced."""

View File

@@ -694,7 +694,6 @@ def sign_input_legacy(tx, input_index, input_scriptpubkey, privkey, sighash_type
assert err is None
der_sig = privkey.sign_ecdsa(sighash)
tx.vin[input_index].scriptSig = bytes(CScript([der_sig + bytes([sighash_type])])) + tx.vin[input_index].scriptSig
tx.rehash()
def sign_input_segwitv0(tx, input_index, input_scriptpubkey, input_amount, privkey, sighash_type=SIGHASH_ALL):
"""Add segwitv0 ECDSA signature for a given transaction input. Note that the signature
@@ -703,7 +702,6 @@ def sign_input_segwitv0(tx, input_index, input_scriptpubkey, input_amount, privk
sighash = SegwitV0SignatureHash(input_scriptpubkey, tx, input_index, sighash_type, input_amount)
der_sig = privkey.sign_ecdsa(sighash)
tx.wit.vtxinwit[input_index].scriptWitness.stack.insert(0, der_sig + bytes([sighash_type]))
tx.rehash()
# TODO: Allow cached hashPrevouts/hashSequence/hashOutputs to be provided.
# Performance optimization probably not necessary for python tests, however.

View File

@@ -287,7 +287,7 @@ class MiniWallet:
return {
"sent_vout": 1,
"txid": txid,
"wtxid": tx.getwtxid(),
"wtxid": tx.wtxid_hex,
"hex": tx.serialize().hex(),
"tx": tx,
}
@@ -340,7 +340,7 @@ class MiniWallet:
if target_vsize:
self._bulk_tx(tx, target_vsize)
txid = tx.rehash()
txid = tx.txid_hex
return {
"new_utxos": [self._create_utxo(
txid=txid,
@@ -352,7 +352,7 @@ class MiniWallet:
) for i in range(len(tx.vout))],
"fee": fee,
"txid": txid,
"wtxid": tx.getwtxid(),
"wtxid": tx.wtxid_hex,
"hex": tx.serialize().hex(),
"tx": tx,
}