Merge #19335: wallet: Cleanup and separate BerkeleyDatabase and BerkeleyBatch

74507ce71e walletdb: Remove BerkeleyBatch friend class from BerkeleyDatabase (Andrew Chow)
00f0041351 No need to check for duplicate fileids in all dbenvs (Andrew Chow)
d86efab370 walletdb: Move Db->open to BerkeleyDatabase::Open (Andrew Chow)
4fe4b3bf1b walletdb: track database file use as m_refcount within BerkeleyDatabase (Andrew Chow)
65fb8807ac Combine BerkeleyEnvironment::Verify into BerkeleyDatabase::Verify (Andrew Chow)

Pull request description:

  `BerkeleyBatch` and `BerkeleyDatabase` are kind of messy. The goal of this is to clean up them up so that they are logically separated.

  `BerkeleyBatch` currently handles the creation of the `BerkeleyDatabase`'s `Db` handle. This is instead moved into `BerkeleyDatabase` and is called by `BerkeleyBatch`.

  Instead of having `BerkeleyEnvironment` track each database's usage, have `BerkeleyDatabase` track this usage itself with the `m_refcount` variable that is present in `WalletDatabase`.

  Lastly, instead of having each `BerkeleyEnvironment` store the fileids of the databases open in it, have a global `g_fileids` to track those fileids. We were already checking fileid uniqueness globally (by checking the fileids in every environment when opening a database) so it's cleaner to do this with a global variable.

  All of these changes allow us to make `BerkeleyBatch` and `BerkeleyDatabase` no longer be friend classes.

  The diff of this PR is currently the same as in ##18971

  Requires #19334

ACKs for top commit:
  laanwj:
    Code review ACK 74507ce71e
  ryanofsky:
    Code review ACK 74507ce71e. No changes since last review other than rebase

Tree-SHA512: 845d84ee1a470e2bf5d2e2e3d7738183d8ce43ddd06a0bbd57edecf5779b2f55d70728b1b57f5daab0f078650a8d60c3e19dc30b75b36e7aa952ce268399d5f6
This commit is contained in:
Wladimir J. van der Laan
2020-07-29 16:53:42 +02:00
3 changed files with 74 additions and 90 deletions

View File

@@ -32,12 +32,12 @@ void CheckUniqueFileid(const BerkeleyEnvironment& env, const std::string& filena
int ret = db.get_mpf()->get_fileid(fileid.value); int ret = db.get_mpf()->get_fileid(fileid.value);
if (ret != 0) { if (ret != 0) {
throw std::runtime_error(strprintf("BerkeleyBatch: Can't open database %s (get_fileid failed with %d)", filename, ret)); throw std::runtime_error(strprintf("BerkeleyDatabase: Can't open database %s (get_fileid failed with %d)", filename, ret));
} }
for (const auto& item : env.m_fileids) { for (const auto& item : env.m_fileids) {
if (fileid == item.second && &fileid != &item.second) { if (fileid == item.second && &fileid != &item.second) {
throw std::runtime_error(strprintf("BerkeleyBatch: Can't open database %s (duplicates fileid %s from %s)", filename, throw std::runtime_error(strprintf("BerkeleyDatabase: Can't open database %s (duplicates fileid %s from %s)", filename,
HexStr(std::begin(item.second.value), std::end(item.second.value)), item.first)); HexStr(std::begin(item.second.value), std::end(item.second.value)), item.first));
} }
} }
@@ -97,9 +97,8 @@ void BerkeleyEnvironment::Close()
fDbEnvInit = false; fDbEnvInit = false;
for (auto& db : m_databases) { for (auto& db : m_databases) {
auto count = mapFileUseCount.find(db.first);
assert(count == mapFileUseCount.end() || count->second == 0);
BerkeleyDatabase& database = db.second.get(); BerkeleyDatabase& database = db.second.get();
assert(database.m_refcount <= 0);
if (database.m_db) { if (database.m_db) {
database.m_db->close(0); database.m_db->close(0);
database.m_db.reset(); database.m_db.reset();
@@ -232,16 +231,6 @@ BerkeleyEnvironment::BerkeleyEnvironment()
fMockDb = true; fMockDb = true;
} }
bool BerkeleyEnvironment::Verify(const std::string& strFile)
{
LOCK(cs_db);
assert(mapFileUseCount.count(strFile) == 0);
Db db(dbenv.get(), 0);
int result = db.verify(strFile.c_str(), nullptr, nullptr, 0);
return result == 0;
}
BerkeleyBatch::SafeDbt::SafeDbt() BerkeleyBatch::SafeDbt::SafeDbt()
{ {
m_dbt.set_flags(DB_DBT_MALLOC); m_dbt.set_flags(DB_DBT_MALLOC);
@@ -295,7 +284,11 @@ bool BerkeleyDatabase::Verify(bilingual_str& errorStr)
if (fs::exists(file_path)) if (fs::exists(file_path))
{ {
if (!env->Verify(strFile)) { assert(m_refcount == 0);
Db db(env->dbenv.get(), 0);
int result = db.verify(strFile.c_str(), nullptr, nullptr, 0);
if (result != 0) {
errorStr = strprintf(_("%s corrupt. Try using the wallet tool bitcoin-wallet to salvage or restoring a backup."), file_path); errorStr = strprintf(_("%s corrupt. Try using the wallet tool bitcoin-wallet to salvage or restoring a backup."), file_path);
return false; return false;
} }
@@ -316,6 +309,8 @@ BerkeleyDatabase::~BerkeleyDatabase()
{ {
if (env) { if (env) {
LOCK(cs_db); LOCK(cs_db);
env->CloseDb(strFile);
assert(!m_db);
size_t erased = env->m_databases.erase(strFile); size_t erased = env->m_databases.erase(strFile);
assert(erased == 1); assert(erased == 1);
env->m_fileids.erase(strFile); env->m_fileids.erase(strFile);
@@ -324,13 +319,27 @@ BerkeleyDatabase::~BerkeleyDatabase()
BerkeleyBatch::BerkeleyBatch(BerkeleyDatabase& database, const char* pszMode, bool fFlushOnCloseIn) : pdb(nullptr), activeTxn(nullptr), m_cursor(nullptr), m_database(database) BerkeleyBatch::BerkeleyBatch(BerkeleyDatabase& database, const char* pszMode, bool fFlushOnCloseIn) : pdb(nullptr), activeTxn(nullptr), m_cursor(nullptr), m_database(database)
{ {
database.AddRef();
database.Open(pszMode);
fReadOnly = (!strchr(pszMode, '+') && !strchr(pszMode, 'w')); fReadOnly = (!strchr(pszMode, '+') && !strchr(pszMode, 'w'));
fFlushOnClose = fFlushOnCloseIn; fFlushOnClose = fFlushOnCloseIn;
env = database.env.get(); env = database.env.get();
if (database.IsDummy()) { pdb = database.m_db.get();
strFile = database.strFile;
bool fCreate = strchr(pszMode, 'c') != nullptr;
if (fCreate && !Exists(std::string("version"))) {
bool fTmp = fReadOnly;
fReadOnly = false;
Write(std::string("version"), CLIENT_VERSION);
fReadOnly = fTmp;
}
}
void BerkeleyDatabase::Open(const char* pszMode)
{
if (IsDummy()){
return; return;
} }
const std::string &strFilename = database.strFile;
bool fCreate = strchr(pszMode, 'c') != nullptr; bool fCreate = strchr(pszMode, 'c') != nullptr;
unsigned int nFlags = DB_THREAD; unsigned int nFlags = DB_THREAD;
@@ -341,10 +350,9 @@ BerkeleyBatch::BerkeleyBatch(BerkeleyDatabase& database, const char* pszMode, bo
LOCK(cs_db); LOCK(cs_db);
bilingual_str open_err; bilingual_str open_err;
if (!env->Open(open_err)) if (!env->Open(open_err))
throw std::runtime_error("BerkeleyBatch: Failed to open database environment."); throw std::runtime_error("BerkeleyDatabase: Failed to open database environment.");
pdb = database.m_db.get(); if (m_db == nullptr) {
if (pdb == nullptr) {
int ret; int ret;
std::unique_ptr<Db> pdb_temp = MakeUnique<Db>(env->dbenv.get(), 0); std::unique_ptr<Db> pdb_temp = MakeUnique<Db>(env->dbenv.get(), 0);
@@ -353,60 +361,33 @@ BerkeleyBatch::BerkeleyBatch(BerkeleyDatabase& database, const char* pszMode, bo
DbMpoolFile* mpf = pdb_temp->get_mpf(); DbMpoolFile* mpf = pdb_temp->get_mpf();
ret = mpf->set_flags(DB_MPOOL_NOFILE, 1); ret = mpf->set_flags(DB_MPOOL_NOFILE, 1);
if (ret != 0) { if (ret != 0) {
throw std::runtime_error(strprintf("BerkeleyBatch: Failed to configure for no temp file backing for database %s", strFilename)); throw std::runtime_error(strprintf("BerkeleyDatabase: Failed to configure for no temp file backing for database %s", strFile));
} }
} }
ret = pdb_temp->open(nullptr, // Txn pointer ret = pdb_temp->open(nullptr, // Txn pointer
fMockDb ? nullptr : strFilename.c_str(), // Filename fMockDb ? nullptr : strFile.c_str(), // Filename
fMockDb ? strFilename.c_str() : "main", // Logical db name fMockDb ? strFile.c_str() : "main", // Logical db name
DB_BTREE, // Database type DB_BTREE, // Database type
nFlags, // Flags nFlags, // Flags
0); 0);
if (ret != 0) { if (ret != 0) {
throw std::runtime_error(strprintf("BerkeleyBatch: Error %d, can't open database %s", ret, strFilename)); throw std::runtime_error(strprintf("BerkeleyDatabase: Error %d, can't open database %s", ret, strFile));
} }
m_file_path = (env->Directory() / strFile).string();
// Call CheckUniqueFileid on the containing BDB environment to // Call CheckUniqueFileid on the containing BDB environment to
// avoid BDB data consistency bugs that happen when different data // avoid BDB data consistency bugs that happen when different data
// files in the same environment have the same fileid. // files in the same environment have the same fileid.
// CheckUniqueFileid(*env, strFile, *pdb_temp, this->env->m_fileids[strFile]);
// Also call CheckUniqueFileid on all the other g_dbenvs to prevent
// bitcoin from opening the same data file through another
// environment when the file is referenced through equivalent but
// not obviously identical symlinked or hard linked or bind mounted
// paths. In the future a more relaxed check for equal inode and
// device ids could be done instead, which would allow opening
// different backup copies of a wallet at the same time. Maybe even
// more ideally, an exclusive lock for accessing the database could
// be implemented, so no equality checks are needed at all. (Newer
// versions of BDB have an set_lk_exclusive method for this
// purpose, but the older version we use does not.)
for (const auto& env : g_dbenvs) {
CheckUniqueFileid(*env.second.lock().get(), strFilename, *pdb_temp, this->env->m_fileids[strFilename]);
}
pdb = pdb_temp.release(); m_db.reset(pdb_temp.release());
database.m_db.reset(pdb);
if (fCreate && !Exists(std::string("version"))) {
bool fTmp = fReadOnly;
fReadOnly = false;
Write(std::string("version"), CLIENT_VERSION);
fReadOnly = fTmp;
}
} }
database.AddRef();
strFile = strFilename;
} }
} }
void BerkeleyDatabase::Open(const char* mode)
{
throw std::logic_error("BerkeleyDatabase does not implement Open. This function should not be called.");
}
void BerkeleyBatch::Flush() void BerkeleyBatch::Flush()
{ {
if (activeTxn) if (activeTxn)
@@ -427,6 +408,12 @@ void BerkeleyDatabase::IncrementUpdateCounter()
++nUpdateCounter; ++nUpdateCounter;
} }
BerkeleyBatch::~BerkeleyBatch()
{
Close();
m_database.RemoveRef();
}
void BerkeleyBatch::Close() void BerkeleyBatch::Close()
{ {
if (!pdb) if (!pdb)
@@ -439,8 +426,6 @@ void BerkeleyBatch::Close()
if (fFlushOnClose) if (fFlushOnClose)
Flush(); Flush();
m_database.RemoveRef();
} }
void BerkeleyEnvironment::CloseDb(const std::string& strFile) void BerkeleyEnvironment::CloseDb(const std::string& strFile)
@@ -464,8 +449,8 @@ void BerkeleyEnvironment::ReloadDbEnv()
AssertLockNotHeld(cs_db); AssertLockNotHeld(cs_db);
std::unique_lock<RecursiveMutex> lock(cs_db); std::unique_lock<RecursiveMutex> lock(cs_db);
m_db_in_use.wait(lock, [this](){ m_db_in_use.wait(lock, [this](){
for (auto& count : mapFileUseCount) { for (auto& db : m_databases) {
if (count.second > 0) return false; if (db.second.get().m_refcount > 0) return false;
} }
return true; return true;
}); });
@@ -493,11 +478,11 @@ bool BerkeleyDatabase::Rewrite(const char* pszSkip)
while (true) { while (true) {
{ {
LOCK(cs_db); LOCK(cs_db);
if (!env->mapFileUseCount.count(strFile) || env->mapFileUseCount[strFile] == 0) { if (m_refcount <= 0) {
// Flush log data to the dat file // Flush log data to the dat file
env->CloseDb(strFile); env->CloseDb(strFile);
env->CheckpointLSN(strFile); env->CheckpointLSN(strFile);
env->mapFileUseCount.erase(strFile); m_refcount = -1;
bool fSuccess = true; bool fSuccess = true;
LogPrintf("BerkeleyBatch::Rewrite: Rewriting %s...\n", strFile); LogPrintf("BerkeleyBatch::Rewrite: Rewriting %s...\n", strFile);
@@ -581,10 +566,11 @@ void BerkeleyEnvironment::Flush(bool fShutdown)
return; return;
{ {
LOCK(cs_db); LOCK(cs_db);
std::map<std::string, int>::iterator mi = mapFileUseCount.begin(); bool no_dbs_accessed = true;
while (mi != mapFileUseCount.end()) { for (auto& db_it : m_databases) {
std::string strFile = (*mi).first; std::string strFile = db_it.first;
int nRefCount = (*mi).second; int nRefCount = db_it.second.get().m_refcount;
if (nRefCount < 0) continue;
LogPrint(BCLog::WALLETDB, "BerkeleyEnvironment::Flush: Flushing %s (refcount = %d)...\n", strFile, nRefCount); LogPrint(BCLog::WALLETDB, "BerkeleyEnvironment::Flush: Flushing %s (refcount = %d)...\n", strFile, nRefCount);
if (nRefCount == 0) { if (nRefCount == 0) {
// Move log data to the dat file // Move log data to the dat file
@@ -595,14 +581,15 @@ void BerkeleyEnvironment::Flush(bool fShutdown)
if (!fMockDb) if (!fMockDb)
dbenv->lsn_reset(strFile.c_str(), 0); dbenv->lsn_reset(strFile.c_str(), 0);
LogPrint(BCLog::WALLETDB, "BerkeleyEnvironment::Flush: %s closed\n", strFile); LogPrint(BCLog::WALLETDB, "BerkeleyEnvironment::Flush: %s closed\n", strFile);
mapFileUseCount.erase(mi++); nRefCount = -1;
} else } else {
mi++; no_dbs_accessed = false;
}
} }
LogPrint(BCLog::WALLETDB, "BerkeleyEnvironment::Flush: Flush(%s)%s took %15dms\n", fShutdown ? "true" : "false", fDbEnvInit ? "" : " database not started", GetTimeMillis() - nStart); LogPrint(BCLog::WALLETDB, "BerkeleyEnvironment::Flush: Flush(%s)%s took %15dms\n", fShutdown ? "true" : "false", fDbEnvInit ? "" : " database not started", GetTimeMillis() - nStart);
if (fShutdown) { if (fShutdown) {
char** listp; char** listp;
if (mapFileUseCount.empty()) { if (no_dbs_accessed) {
dbenv->log_archive(&listp, DB_ARCH_REMOVE); dbenv->log_archive(&listp, DB_ARCH_REMOVE);
Close(); Close();
if (!fMockDb) { if (!fMockDb) {
@@ -623,13 +610,12 @@ bool BerkeleyDatabase::PeriodicFlush()
if (!lockDb) return false; if (!lockDb) return false;
// Don't flush if any databases are in use // Don't flush if any databases are in use
for (const auto& use_count : env->mapFileUseCount) { for (auto& it : env->m_databases) {
if (use_count.second > 0) return false; if (it.second.get().m_refcount > 0) return false;
} }
// Don't flush if there haven't been any batch writes for this database. // Don't flush if there haven't been any batch writes for this database.
auto it = env->mapFileUseCount.find(strFile); if (m_refcount < 0) return false;
if (it == env->mapFileUseCount.end()) return false;
LogPrint(BCLog::WALLETDB, "Flushing %s\n", strFile); LogPrint(BCLog::WALLETDB, "Flushing %s\n", strFile);
int64_t nStart = GetTimeMillis(); int64_t nStart = GetTimeMillis();
@@ -637,7 +623,7 @@ bool BerkeleyDatabase::PeriodicFlush()
// Flush wallet file so it's self contained // Flush wallet file so it's self contained
env->CloseDb(strFile); env->CloseDb(strFile);
env->CheckpointLSN(strFile); env->CheckpointLSN(strFile);
env->mapFileUseCount.erase(it); m_refcount = -1;
LogPrint(BCLog::WALLETDB, "Flushed %s %dms\n", strFile, GetTimeMillis() - nStart); LogPrint(BCLog::WALLETDB, "Flushed %s %dms\n", strFile, GetTimeMillis() - nStart);
@@ -653,12 +639,11 @@ bool BerkeleyDatabase::Backup(const std::string& strDest) const
{ {
{ {
LOCK(cs_db); LOCK(cs_db);
if (!env->mapFileUseCount.count(strFile) || env->mapFileUseCount[strFile] == 0) if (m_refcount <= 0)
{ {
// Flush log data to the dat file // Flush log data to the dat file
env->CloseDb(strFile); env->CloseDb(strFile);
env->CheckpointLSN(strFile); env->CheckpointLSN(strFile);
env->mapFileUseCount.erase(strFile);
// Copy wallet file // Copy wallet file
fs::path pathSrc = env->Directory() / strFile; fs::path pathSrc = env->Directory() / strFile;
@@ -840,16 +825,18 @@ bool BerkeleyBatch::HasKey(CDataStream&& key)
void BerkeleyDatabase::AddRef() void BerkeleyDatabase::AddRef()
{ {
LOCK(cs_db); LOCK(cs_db);
++env->mapFileUseCount[strFile]; if (m_refcount < 0) {
m_refcount = 1;
} else {
m_refcount++;
}
} }
void BerkeleyDatabase::RemoveRef() void BerkeleyDatabase::RemoveRef()
{ {
{ LOCK(cs_db);
LOCK(cs_db); m_refcount--;
--env->mapFileUseCount[strFile]; if (env) env->m_db_in_use.notify_all();
}
env->m_db_in_use.notify_all();
} }
std::unique_ptr<DatabaseBatch> BerkeleyDatabase::MakeBatch(const char* mode, bool flush_on_close) std::unique_ptr<DatabaseBatch> BerkeleyDatabase::MakeBatch(const char* mode, bool flush_on_close)

View File

@@ -52,7 +52,6 @@ private:
public: public:
std::unique_ptr<DbEnv> dbenv; std::unique_ptr<DbEnv> dbenv;
std::map<std::string, int> mapFileUseCount;
std::map<std::string, std::reference_wrapper<BerkeleyDatabase>> m_databases; std::map<std::string, std::reference_wrapper<BerkeleyDatabase>> m_databases;
std::unordered_map<std::string, WalletDatabaseFileId> m_fileids; std::unordered_map<std::string, WalletDatabaseFileId> m_fileids;
std::condition_variable_any m_db_in_use; std::condition_variable_any m_db_in_use;
@@ -67,8 +66,6 @@ public:
bool IsDatabaseLoaded(const std::string& db_filename) const { return m_databases.find(db_filename) != m_databases.end(); } bool IsDatabaseLoaded(const std::string& db_filename) const { return m_databases.find(db_filename) != m_databases.end(); }
fs::path Directory() const { return strPath; } fs::path Directory() const { return strPath; }
bool Verify(const std::string& strFile);
bool Open(bilingual_str& error); bool Open(bilingual_str& error);
void Close(); void Close();
void Flush(bool fShutdown); void Flush(bool fShutdown);
@@ -100,7 +97,6 @@ class BerkeleyBatch;
**/ **/
class BerkeleyDatabase : public WalletDatabase class BerkeleyDatabase : public WalletDatabase
{ {
friend class BerkeleyBatch;
public: public:
/** Create dummy DB handle */ /** Create dummy DB handle */
BerkeleyDatabase() : WalletDatabase(), env(nullptr) BerkeleyDatabase() : WalletDatabase(), env(nullptr)
@@ -166,11 +162,12 @@ public:
/** Database pointer. This is initialized lazily and reset during flushes, so it can be null. */ /** Database pointer. This is initialized lazily and reset during flushes, so it can be null. */
std::unique_ptr<Db> m_db; std::unique_ptr<Db> m_db;
std::string strFile;
/** Make a BerkeleyBatch connected to this database */ /** Make a BerkeleyBatch connected to this database */
std::unique_ptr<DatabaseBatch> MakeBatch(const char* mode = "r+", bool flush_on_close = true) override; std::unique_ptr<DatabaseBatch> MakeBatch(const char* mode = "r+", bool flush_on_close = true) override;
private: private:
std::string strFile;
/** Return whether this database handle is a dummy for testing. /** Return whether this database handle is a dummy for testing.
* Only to be used at a low level, application should ideally not care * Only to be used at a low level, application should ideally not care
@@ -220,7 +217,7 @@ protected:
public: public:
explicit BerkeleyBatch(BerkeleyDatabase& database, const char* pszMode = "r+", bool fFlushOnCloseIn=true); explicit BerkeleyBatch(BerkeleyDatabase& database, const char* pszMode = "r+", bool fFlushOnCloseIn=true);
~BerkeleyBatch() override { Close(); } ~BerkeleyBatch() override;
BerkeleyBatch(const BerkeleyBatch&) = delete; BerkeleyBatch(const BerkeleyBatch&) = delete;
BerkeleyBatch& operator=(const BerkeleyBatch&) = delete; BerkeleyBatch& operator=(const BerkeleyBatch&) = delete;

View File

@@ -120,7 +120,7 @@ class MultiWalletTest(BitcoinTestFramework):
# should not initialize if one wallet is a copy of another # should not initialize if one wallet is a copy of another
shutil.copyfile(wallet_dir('w8'), wallet_dir('w8_copy')) shutil.copyfile(wallet_dir('w8'), wallet_dir('w8_copy'))
exp_stderr = r"BerkeleyBatch: Can't open database w8_copy \(duplicates fileid \w+ from w8\)" exp_stderr = r"BerkeleyDatabase: Can't open database w8_copy \(duplicates fileid \w+ from w8\)"
self.nodes[0].assert_start_raises_init_error(['-wallet=w8', '-wallet=w8_copy'], exp_stderr, match=ErrorMatch.PARTIAL_REGEX) self.nodes[0].assert_start_raises_init_error(['-wallet=w8', '-wallet=w8_copy'], exp_stderr, match=ErrorMatch.PARTIAL_REGEX)
# should not initialize if wallet file is a symlink # should not initialize if wallet file is a symlink
@@ -258,10 +258,10 @@ class MultiWalletTest(BitcoinTestFramework):
assert_raises_rpc_error(-4, "Wallet file verification failed. Error loading wallet wallet.dat. Duplicate -wallet filename specified.", self.nodes[0].loadwallet, 'wallet.dat') assert_raises_rpc_error(-4, "Wallet file verification failed. Error loading wallet wallet.dat. Duplicate -wallet filename specified.", self.nodes[0].loadwallet, 'wallet.dat')
# Fail to load if one wallet is a copy of another # Fail to load if one wallet is a copy of another
assert_raises_rpc_error(-4, "BerkeleyBatch: Can't open database w8_copy (duplicates fileid", self.nodes[0].loadwallet, 'w8_copy') assert_raises_rpc_error(-4, "BerkeleyDatabase: Can't open database w8_copy (duplicates fileid", self.nodes[0].loadwallet, 'w8_copy')
# Fail to load if one wallet is a copy of another, test this twice to make sure that we don't re-introduce #14304 # Fail to load if one wallet is a copy of another, test this twice to make sure that we don't re-introduce #14304
assert_raises_rpc_error(-4, "BerkeleyBatch: Can't open database w8_copy (duplicates fileid", self.nodes[0].loadwallet, 'w8_copy') assert_raises_rpc_error(-4, "BerkeleyDatabase: Can't open database w8_copy (duplicates fileid", self.nodes[0].loadwallet, 'w8_copy')
# Fail to load if wallet file is a symlink # Fail to load if wallet file is a symlink