mirror of
https://github.com/bitcoin/bitcoin.git
synced 2026-04-08 22:57:56 +02:00
refactor: Operate on bytes instead of bits in Asmap code
Co-authored-by: Hodlinator <172445034+hodlinator@users.noreply.github.com>
This commit is contained in:
@@ -13,6 +13,7 @@
|
||||
#include <algorithm>
|
||||
#include <bit>
|
||||
#include <cassert>
|
||||
#include <cstddef>
|
||||
#include <cstdio>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
@@ -21,16 +22,28 @@ namespace {
|
||||
|
||||
constexpr uint32_t INVALID = 0xFFFFFFFF;
|
||||
|
||||
uint32_t DecodeBits(std::vector<bool>::const_iterator& bitpos, const std::vector<bool>::const_iterator& endpos, uint8_t minval, const std::vector<uint8_t> &bit_sizes)
|
||||
inline bool ConsumeBitLE(size_t& bitpos, std::span<const std::byte> bytes) noexcept
|
||||
{
|
||||
const bool bit = (std::to_integer<uint8_t>(bytes[bitpos / 8]) >> (bitpos % 8)) & 1;
|
||||
++bitpos;
|
||||
return bit;
|
||||
}
|
||||
|
||||
inline bool ConsumeBitBE(uint8_t& bitpos, std::span<const std::byte> bytes) noexcept
|
||||
{
|
||||
const bool bit = (std::to_integer<uint8_t>(bytes[bitpos / 8]) >> (7 - (bitpos % 8))) & 1;
|
||||
++bitpos;
|
||||
return bit;
|
||||
}
|
||||
|
||||
uint32_t DecodeBits(size_t& bitpos, const std::vector<std::byte>& data, uint8_t minval, const std::vector<uint8_t>& bit_sizes)
|
||||
{
|
||||
uint32_t val = minval;
|
||||
bool bit;
|
||||
for (std::vector<uint8_t>::const_iterator bit_sizes_it = bit_sizes.begin();
|
||||
bit_sizes_it != bit_sizes.end(); ++bit_sizes_it) {
|
||||
for (auto bit_sizes_it = bit_sizes.begin(); bit_sizes_it != bit_sizes.end(); ++bit_sizes_it) {
|
||||
if (bit_sizes_it + 1 != bit_sizes.end()) {
|
||||
if (bitpos == endpos) break;
|
||||
bit = *bitpos;
|
||||
bitpos++;
|
||||
if (bitpos >= data.size() * 8) break;
|
||||
bit = ConsumeBitLE(bitpos, data);
|
||||
} else {
|
||||
bit = 0;
|
||||
}
|
||||
@@ -38,9 +51,8 @@ uint32_t DecodeBits(std::vector<bool>::const_iterator& bitpos, const std::vector
|
||||
val += (1 << *bit_sizes_it);
|
||||
} else {
|
||||
for (int b = 0; b < *bit_sizes_it; b++) {
|
||||
if (bitpos == endpos) return INVALID; // Reached EOF in mantissa
|
||||
bit = *bitpos;
|
||||
bitpos++;
|
||||
if (bitpos >= data.size() * 8) return INVALID; // Reached EOF in mantissa
|
||||
bit = ConsumeBitLE(bitpos, data);
|
||||
val += bit << (*bit_sizes_it - 1 - b);
|
||||
}
|
||||
return val;
|
||||
@@ -58,69 +70,68 @@ enum class Instruction : uint32_t
|
||||
};
|
||||
|
||||
const std::vector<uint8_t> TYPE_BIT_SIZES{0, 0, 1};
|
||||
Instruction DecodeType(std::vector<bool>::const_iterator& bitpos, const std::vector<bool>::const_iterator& endpos)
|
||||
Instruction DecodeType(size_t& bitpos, const std::vector<std::byte>& data)
|
||||
{
|
||||
return Instruction(DecodeBits(bitpos, endpos, 0, TYPE_BIT_SIZES));
|
||||
return Instruction(DecodeBits(bitpos, data, 0, TYPE_BIT_SIZES));
|
||||
}
|
||||
|
||||
const std::vector<uint8_t> ASN_BIT_SIZES{15, 16, 17, 18, 19, 20, 21, 22, 23, 24};
|
||||
uint32_t DecodeASN(std::vector<bool>::const_iterator& bitpos, const std::vector<bool>::const_iterator& endpos)
|
||||
uint32_t DecodeASN(size_t& bitpos, const std::vector<std::byte>& data)
|
||||
{
|
||||
return DecodeBits(bitpos, endpos, 1, ASN_BIT_SIZES);
|
||||
return DecodeBits(bitpos, data, 1, ASN_BIT_SIZES);
|
||||
}
|
||||
|
||||
|
||||
const std::vector<uint8_t> MATCH_BIT_SIZES{1, 2, 3, 4, 5, 6, 7, 8};
|
||||
uint32_t DecodeMatch(std::vector<bool>::const_iterator& bitpos, const std::vector<bool>::const_iterator& endpos)
|
||||
uint32_t DecodeMatch(size_t& bitpos, const std::vector<std::byte>& data)
|
||||
{
|
||||
return DecodeBits(bitpos, endpos, 2, MATCH_BIT_SIZES);
|
||||
return DecodeBits(bitpos, data, 2, MATCH_BIT_SIZES);
|
||||
}
|
||||
|
||||
|
||||
const std::vector<uint8_t> JUMP_BIT_SIZES{5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30};
|
||||
uint32_t DecodeJump(std::vector<bool>::const_iterator& bitpos, const std::vector<bool>::const_iterator& endpos)
|
||||
uint32_t DecodeJump(size_t& bitpos, const std::vector<std::byte>& data)
|
||||
{
|
||||
return DecodeBits(bitpos, endpos, 17, JUMP_BIT_SIZES);
|
||||
return DecodeBits(bitpos, data, 17, JUMP_BIT_SIZES);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
uint32_t Interpret(const std::vector<bool> &asmap, const std::vector<bool> &ip)
|
||||
uint32_t Interpret(const std::vector<std::byte>& asmap, const std::vector<std::byte>& ip)
|
||||
{
|
||||
std::vector<bool>::const_iterator pos = asmap.begin();
|
||||
const std::vector<bool>::const_iterator endpos = asmap.end();
|
||||
uint8_t bits = ip.size();
|
||||
size_t pos{0};
|
||||
const size_t endpos{asmap.size() * 8};
|
||||
uint8_t ip_bit{0};
|
||||
const uint8_t ip_bits_end = ip.size() * 8;
|
||||
uint32_t default_asn = 0;
|
||||
uint32_t jump, match, matchlen;
|
||||
Instruction opcode;
|
||||
while (pos != endpos) {
|
||||
opcode = DecodeType(pos, endpos);
|
||||
while (pos < endpos) {
|
||||
opcode = DecodeType(pos, asmap);
|
||||
if (opcode == Instruction::RETURN) {
|
||||
default_asn = DecodeASN(pos, endpos);
|
||||
default_asn = DecodeASN(pos, asmap);
|
||||
if (default_asn == INVALID) break; // ASN straddles EOF
|
||||
return default_asn;
|
||||
} else if (opcode == Instruction::JUMP) {
|
||||
jump = DecodeJump(pos, endpos);
|
||||
jump = DecodeJump(pos, asmap);
|
||||
if (jump == INVALID) break; // Jump offset straddles EOF
|
||||
if (bits == 0) break; // No input bits left
|
||||
if (int64_t{jump} >= int64_t{endpos - pos}) break; // Jumping past EOF
|
||||
if (ip[ip.size() - bits]) {
|
||||
if (ip_bit == ip_bits_end) break; // No input bits left
|
||||
if (int64_t{jump} >= static_cast<int64_t>(endpos - pos)) break; // Jumping past EOF
|
||||
if (ConsumeBitBE(ip_bit, ip)) {
|
||||
pos += jump;
|
||||
}
|
||||
bits--;
|
||||
} else if (opcode == Instruction::MATCH) {
|
||||
match = DecodeMatch(pos, endpos);
|
||||
match = DecodeMatch(pos, asmap);
|
||||
if (match == INVALID) break; // Match bits straddle EOF
|
||||
matchlen = std::bit_width(match) - 1;
|
||||
if (bits < matchlen) break; // Not enough input bits
|
||||
if ((ip_bits_end - ip_bit) < matchlen) break; // Not enough input bits
|
||||
for (uint32_t bit = 0; bit < matchlen; bit++) {
|
||||
if ((ip[ip.size() - bits]) != ((match >> (matchlen - 1 - bit)) & 1)) {
|
||||
if (ConsumeBitBE(ip_bit, ip) != ((match >> (matchlen - 1 - bit)) & 1)) {
|
||||
return default_asn;
|
||||
}
|
||||
bits--;
|
||||
}
|
||||
} else if (opcode == Instruction::DEFAULT) {
|
||||
default_asn = DecodeASN(pos, endpos);
|
||||
default_asn = DecodeASN(pos, asmap);
|
||||
if (default_asn == INVALID) break; // ASN straddles EOF
|
||||
} else {
|
||||
break; // Instruction straddles EOF
|
||||
@@ -130,50 +141,47 @@ uint32_t Interpret(const std::vector<bool> &asmap, const std::vector<bool> &ip)
|
||||
return 0; // 0 is not a valid ASN
|
||||
}
|
||||
|
||||
bool SanityCheckASMap(const std::vector<bool>& asmap, int bits)
|
||||
bool SanityCheckASMap(const std::vector<std::byte>& asmap, int bits)
|
||||
{
|
||||
const std::vector<bool>::const_iterator begin = asmap.begin(), endpos = asmap.end();
|
||||
std::vector<bool>::const_iterator pos = begin;
|
||||
size_t pos{0};
|
||||
const size_t endpos{asmap.size() * 8};
|
||||
std::vector<std::pair<uint32_t, int>> jumps; // All future positions we may jump to (bit offset in asmap -> bits to consume left)
|
||||
jumps.reserve(bits);
|
||||
Instruction prevopcode = Instruction::JUMP;
|
||||
bool had_incomplete_match = false;
|
||||
while (pos != endpos) {
|
||||
uint32_t offset = pos - begin;
|
||||
if (!jumps.empty() && offset >= jumps.back().first) return false; // There was a jump into the middle of the previous instruction
|
||||
Instruction opcode = DecodeType(pos, endpos);
|
||||
if (!jumps.empty() && pos >= jumps.back().first) return false; // There was a jump into the middle of the previous instruction
|
||||
Instruction opcode = DecodeType(pos, asmap);
|
||||
if (opcode == Instruction::RETURN) {
|
||||
if (prevopcode == Instruction::DEFAULT) return false; // There should not be any RETURN immediately after a DEFAULT (could be combined into just RETURN)
|
||||
uint32_t asn = DecodeASN(pos, endpos);
|
||||
uint32_t asn = DecodeASN(pos, asmap);
|
||||
if (asn == INVALID) return false; // ASN straddles EOF
|
||||
if (jumps.empty()) {
|
||||
// Nothing to execute anymore
|
||||
if (endpos - pos > 7) return false; // Excessive padding
|
||||
while (pos != endpos) {
|
||||
if (*pos) return false; // Nonzero padding bit
|
||||
++pos;
|
||||
if (ConsumeBitLE(pos, asmap)) return false; // Nonzero padding bit
|
||||
}
|
||||
return true; // Sanely reached EOF
|
||||
} else {
|
||||
// Continue by pretending we jumped to the next instruction
|
||||
offset = pos - begin;
|
||||
if (offset != jumps.back().first) return false; // Unreachable code
|
||||
if (pos != jumps.back().first) return false; // Unreachable code
|
||||
bits = jumps.back().second; // Restore the number of bits we would have had left after this jump
|
||||
jumps.pop_back();
|
||||
prevopcode = Instruction::JUMP;
|
||||
}
|
||||
} else if (opcode == Instruction::JUMP) {
|
||||
uint32_t jump = DecodeJump(pos, endpos);
|
||||
uint32_t jump = DecodeJump(pos, asmap);
|
||||
if (jump == INVALID) return false; // Jump offset straddles EOF
|
||||
if (int64_t{jump} > int64_t{endpos - pos}) return false; // Jump out of range
|
||||
if (int64_t{jump} > static_cast<int64_t>(endpos - pos)) return false; // Jump out of range
|
||||
if (bits == 0) return false; // Consuming bits past the end of the input
|
||||
--bits;
|
||||
uint32_t jump_offset = pos - begin + jump;
|
||||
uint32_t jump_offset = pos + jump;
|
||||
if (!jumps.empty() && jump_offset >= jumps.back().first) return false; // Intersecting jumps
|
||||
jumps.emplace_back(jump_offset, bits);
|
||||
prevopcode = Instruction::JUMP;
|
||||
} else if (opcode == Instruction::MATCH) {
|
||||
uint32_t match = DecodeMatch(pos, endpos);
|
||||
uint32_t match = DecodeMatch(pos, asmap);
|
||||
if (match == INVALID) return false; // Match bits straddle EOF
|
||||
int matchlen = std::bit_width(match) - 1;
|
||||
if (prevopcode != Instruction::MATCH) had_incomplete_match = false;
|
||||
@@ -184,7 +192,7 @@ bool SanityCheckASMap(const std::vector<bool>& asmap, int bits)
|
||||
prevopcode = Instruction::MATCH;
|
||||
} else if (opcode == Instruction::DEFAULT) {
|
||||
if (prevopcode == Instruction::DEFAULT) return false; // There should not be two successive DEFAULTs (they could be combined into one)
|
||||
uint32_t asn = DecodeASN(pos, endpos);
|
||||
uint32_t asn = DecodeASN(pos, asmap);
|
||||
if (asn == INVALID) return false; // ASN straddles EOF
|
||||
prevopcode = Instruction::DEFAULT;
|
||||
} else {
|
||||
@@ -194,27 +202,24 @@ bool SanityCheckASMap(const std::vector<bool>& asmap, int bits)
|
||||
return false; // Reached EOF without RETURN instruction
|
||||
}
|
||||
|
||||
std::vector<bool> DecodeAsmap(fs::path path)
|
||||
std::vector<std::byte> DecodeAsmap(fs::path path)
|
||||
{
|
||||
std::vector<bool> bits;
|
||||
FILE *filestr = fsbridge::fopen(path, "rb");
|
||||
AutoFile file{filestr};
|
||||
if (file.IsNull()) {
|
||||
LogWarning("Failed to open asmap file from disk");
|
||||
return bits;
|
||||
return {};
|
||||
}
|
||||
int64_t length{file.size()};
|
||||
LogInfo("Opened asmap file %s (%d bytes) from disk", fs::quoted(fs::PathToString(path)), length);
|
||||
uint8_t cur_byte;
|
||||
for (int i = 0; i < length; ++i) {
|
||||
file >> cur_byte;
|
||||
for (int bit = 0; bit < 8; ++bit) {
|
||||
bits.push_back((cur_byte >> bit) & 1);
|
||||
}
|
||||
}
|
||||
if (!SanityCheckASMap(bits, 128)) {
|
||||
|
||||
std::vector<std::byte> buffer(length);
|
||||
file.read(buffer);
|
||||
|
||||
if (!SanityCheckASMap(buffer, 128)) {
|
||||
LogWarning("Sanity check of asmap file %s failed", fs::quoted(fs::PathToString(path)));
|
||||
return {};
|
||||
}
|
||||
return bits;
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user