Merge pull request #5637 from yyforyongyu/fix-bitcoind-backup

itest: fix restore backup file test flake for bitcoind backend
This commit is contained in:
Olaoluwa Osuntokun 2021-09-13 20:36:06 -07:00 committed by GitHub
commit 3608d36cca
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 164 additions and 72 deletions

View File

@ -70,6 +70,10 @@ proposed channel type is used.
* [Delete a specific payment, or its failed HTLCs](https://github.com/lightningnetwork/lnd/pull/5660).
* A new state, [`WalletState_SERVER_ACTIVE`](https://github.com/lightningnetwork/lnd/pull/5637),
is added to the state server. This state indicates whether the `lnd` server
and all its subservers have been fully started or not.
### Batched channel funding
[Multiple channels can now be opened in a single
@ -94,6 +98,16 @@ documentation](../psbt.md#use-the-batchopenchannel-rpc-for-safe-batch-channel-fu
* [Publish transaction is now reachable through
lncli](https://github.com/lightningnetwork/lnd/pull/5460).
* Prior to this release, when running on `simnet` or `regtest`, `lnd` would
skip the check on wallet synchronization during its startup. In doing so, the
integration test can bypass the rule set by `bitcoind`, which considers the
node is out of sync when the last block is older than 2 hours([more
discussion](https://github.com/lightningnetwork/lnd/pull/4685#discussion_r503080709)).
This synchronization check is put back now as we want to make the integration
test more robust in catching real world situations. This also means it might
take longer to start a `lnd` node when running in `simnet` or `regtest`,
something developers need to watch out from this release.
## Security
### Admin macaroon permissions
@ -209,6 +223,8 @@ you.
* [Integration tests save embedded etcd logs to help debugging flakes](https://github.com/lightningnetwork/lnd/pull/5702)
* [Fixed restore backup file test flake with bitcoind](https://github.com/lightningnetwork/lnd/pull/5637).
## Database
* [Ensure single writer for legacy

95
lnd.go
View File

@ -915,56 +915,52 @@ func Main(cfg *Config, lisCfg ListenerCfg, interceptor signal.Interceptor) error
return err
}
// If we're not in regtest or simnet mode, We'll wait until we're fully
// synced to continue the start up of the remainder of the daemon. This
// ensures that we don't accept any possibly invalid state transitions, or
// accept channels with spent funds.
if !(cfg.Bitcoin.RegTest || cfg.Bitcoin.SimNet ||
cfg.Litecoin.RegTest || cfg.Litecoin.SimNet) {
_, bestHeight, err := activeChainControl.ChainIO.GetBestBlock()
if err != nil {
err := fmt.Errorf("unable to determine chain tip: %v",
err)
ltndLog.Error(err)
return err
}
ltndLog.Infof("Waiting for chain backend to finish sync, "+
"start_height=%v", bestHeight)
for {
if !interceptor.Alive() {
return nil
}
synced, _, err := activeChainControl.Wallet.IsSynced()
if err != nil {
err := fmt.Errorf("unable to determine if "+
"wallet is synced: %v", err)
ltndLog.Error(err)
return err
}
if synced {
break
}
time.Sleep(time.Second * 1)
}
_, bestHeight, err = activeChainControl.ChainIO.GetBestBlock()
if err != nil {
err := fmt.Errorf("unable to determine chain tip: %v",
err)
ltndLog.Error(err)
return err
}
ltndLog.Infof("Chain backend is fully synced (end_height=%v)!",
bestHeight)
// We'll wait until we're fully synced to continue the start up of the
// remainder of the daemon. This ensures that we don't accept any
// possibly invalid state transitions, or accept channels with spent
// funds.
_, bestHeight, err := activeChainControl.ChainIO.GetBestBlock()
if err != nil {
err := fmt.Errorf("unable to determine chain tip: %v",
err)
ltndLog.Error(err)
return err
}
ltndLog.Infof("Waiting for chain backend to finish sync, "+
"start_height=%v", bestHeight)
for {
if !interceptor.Alive() {
return nil
}
synced, _, err := activeChainControl.Wallet.IsSynced()
if err != nil {
err := fmt.Errorf("unable to determine if "+
"wallet is synced: %v", err)
ltndLog.Error(err)
return err
}
if synced {
break
}
time.Sleep(time.Second * 1)
}
_, bestHeight, err = activeChainControl.ChainIO.GetBestBlock()
if err != nil {
err := fmt.Errorf("unable to determine chain tip: %v",
err)
ltndLog.Error(err)
return err
}
ltndLog.Infof("Chain backend is fully synced (end_height=%v)!",
bestHeight)
// With all the relevant chains initialized, we can finally start the
// server itself.
if err := server.Start(); err != nil {
@ -974,6 +970,9 @@ func Main(cfg *Config, lisCfg ListenerCfg, interceptor signal.Interceptor) error
}
defer server.Stop()
// We transition the server state to Active, as the server is up.
interceptorChain.SetServerActive()
// Now that the server has started, if the autopilot mode is currently
// active, then we'll start the autopilot agent immediately. It will be
// stopped together with the autopilot service.

View File

@ -23,10 +23,12 @@ const (
type WalletState int32
const (
WalletState_NON_EXISTING WalletState = 0
WalletState_LOCKED WalletState = 1
WalletState_UNLOCKED WalletState = 2
WalletState_RPC_ACTIVE WalletState = 3
WalletState_NON_EXISTING WalletState = 0
WalletState_LOCKED WalletState = 1
WalletState_UNLOCKED WalletState = 2
WalletState_RPC_ACTIVE WalletState = 3
// SERVER_ACTIVE means that the lnd server is ready to accept calls.
WalletState_SERVER_ACTIVE WalletState = 4
WalletState_WAITING_TO_START WalletState = 255
)
@ -37,6 +39,7 @@ var (
1: "LOCKED",
2: "UNLOCKED",
3: "RPC_ACTIVE",
4: "SERVER_ACTIVE",
255: "WAITING_TO_START",
}
WalletState_value = map[string]int32{
@ -44,6 +47,7 @@ var (
"LOCKED": 1,
"UNLOCKED": 2,
"RPC_ACTIVE": 3,
"SERVER_ACTIVE": 4,
"WAITING_TO_START": 255,
}
)
@ -260,26 +264,27 @@ var file_stateservice_proto_rawDesc = []byte{
0x65, 0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12,
0x28, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x12,
0x2e, 0x6c, 0x6e, 0x72, 0x70, 0x63, 0x2e, 0x57, 0x61, 0x6c, 0x6c, 0x65, 0x74, 0x53, 0x74, 0x61,
0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x2a, 0x60, 0x0a, 0x0b, 0x57, 0x61, 0x6c,
0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x2a, 0x73, 0x0a, 0x0b, 0x57, 0x61, 0x6c,
0x6c, 0x65, 0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x10, 0x0a, 0x0c, 0x4e, 0x4f, 0x4e, 0x5f,
0x45, 0x58, 0x49, 0x53, 0x54, 0x49, 0x4e, 0x47, 0x10, 0x00, 0x12, 0x0a, 0x0a, 0x06, 0x4c, 0x4f,
0x43, 0x4b, 0x45, 0x44, 0x10, 0x01, 0x12, 0x0c, 0x0a, 0x08, 0x55, 0x4e, 0x4c, 0x4f, 0x43, 0x4b,
0x45, 0x44, 0x10, 0x02, 0x12, 0x0e, 0x0a, 0x0a, 0x52, 0x50, 0x43, 0x5f, 0x41, 0x43, 0x54, 0x49,
0x56, 0x45, 0x10, 0x03, 0x12, 0x15, 0x0a, 0x10, 0x57, 0x41, 0x49, 0x54, 0x49, 0x4e, 0x47, 0x5f,
0x54, 0x4f, 0x5f, 0x53, 0x54, 0x41, 0x52, 0x54, 0x10, 0xff, 0x01, 0x32, 0x95, 0x01, 0x0a, 0x05,
0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x4f, 0x0a, 0x0e, 0x53, 0x75, 0x62, 0x73, 0x63, 0x72, 0x69,
0x62, 0x65, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x1c, 0x2e, 0x6c, 0x6e, 0x72, 0x70, 0x63, 0x2e,
0x53, 0x75, 0x62, 0x73, 0x63, 0x72, 0x69, 0x62, 0x65, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x65,
0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x1d, 0x2e, 0x6c, 0x6e, 0x72, 0x70, 0x63, 0x2e, 0x53, 0x75,
0x62, 0x73, 0x63, 0x72, 0x69, 0x62, 0x65, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x65, 0x73, 0x70,
0x6f, 0x6e, 0x73, 0x65, 0x30, 0x01, 0x12, 0x3b, 0x0a, 0x08, 0x47, 0x65, 0x74, 0x53, 0x74, 0x61,
0x74, 0x65, 0x12, 0x16, 0x2e, 0x6c, 0x6e, 0x72, 0x70, 0x63, 0x2e, 0x47, 0x65, 0x74, 0x53, 0x74,
0x61, 0x74, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x17, 0x2e, 0x6c, 0x6e, 0x72,
0x70, 0x63, 0x2e, 0x47, 0x65, 0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f,
0x6e, 0x73, 0x65, 0x42, 0x27, 0x5a, 0x25, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f,
0x6d, 0x2f, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x6e, 0x69, 0x6e, 0x67, 0x6e, 0x65, 0x74, 0x77, 0x6f,
0x72, 0x6b, 0x2f, 0x6c, 0x6e, 0x64, 0x2f, 0x6c, 0x6e, 0x72, 0x70, 0x63, 0x62, 0x06, 0x70, 0x72,
0x6f, 0x74, 0x6f, 0x33,
0x56, 0x45, 0x10, 0x03, 0x12, 0x11, 0x0a, 0x0d, 0x53, 0x45, 0x52, 0x56, 0x45, 0x52, 0x5f, 0x41,
0x43, 0x54, 0x49, 0x56, 0x45, 0x10, 0x04, 0x12, 0x15, 0x0a, 0x10, 0x57, 0x41, 0x49, 0x54, 0x49,
0x4e, 0x47, 0x5f, 0x54, 0x4f, 0x5f, 0x53, 0x54, 0x41, 0x52, 0x54, 0x10, 0xff, 0x01, 0x32, 0x95,
0x01, 0x0a, 0x05, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x4f, 0x0a, 0x0e, 0x53, 0x75, 0x62, 0x73,
0x63, 0x72, 0x69, 0x62, 0x65, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x1c, 0x2e, 0x6c, 0x6e, 0x72,
0x70, 0x63, 0x2e, 0x53, 0x75, 0x62, 0x73, 0x63, 0x72, 0x69, 0x62, 0x65, 0x53, 0x74, 0x61, 0x74,
0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x1d, 0x2e, 0x6c, 0x6e, 0x72, 0x70, 0x63,
0x2e, 0x53, 0x75, 0x62, 0x73, 0x63, 0x72, 0x69, 0x62, 0x65, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52,
0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x30, 0x01, 0x12, 0x3b, 0x0a, 0x08, 0x47, 0x65, 0x74,
0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x16, 0x2e, 0x6c, 0x6e, 0x72, 0x70, 0x63, 0x2e, 0x47, 0x65,
0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x17, 0x2e,
0x6c, 0x6e, 0x72, 0x70, 0x63, 0x2e, 0x47, 0x65, 0x74, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x65,
0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x42, 0x27, 0x5a, 0x25, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62,
0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x6e, 0x69, 0x6e, 0x67, 0x6e, 0x65,
0x74, 0x77, 0x6f, 0x72, 0x6b, 0x2f, 0x6c, 0x6e, 0x64, 0x2f, 0x6c, 0x6e, 0x72, 0x70, 0x63, 0x62,
0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33,
}
var (

View File

@ -41,6 +41,9 @@ enum WalletState {
UNLOCKED = 2;
RPC_ACTIVE = 3;
// SERVER_ACTIVE means that the lnd server is ready to accept calls.
SERVER_ACTIVE = 4;
WAITING_TO_START = 255;
}

View File

@ -96,9 +96,11 @@
"LOCKED",
"UNLOCKED",
"RPC_ACTIVE",
"SERVER_ACTIVE",
"WAITING_TO_START"
],
"default": "NON_EXISTING"
"default": "NON_EXISTING",
"description": " - SERVER_ACTIVE: SERVER_ACTIVE means that the lnd server is ready to accept calls."
},
"protobufAny": {
"type": "object",

View File

@ -397,6 +397,7 @@ type HarnessNode struct {
WalletKitClient walletrpc.WalletKitClient
Watchtower watchtowerrpc.WatchtowerClient
WatchtowerClient wtclientrpc.WatchtowerClientClient
StateClient lnrpc.StateClient
// backupDbDir is the path where a database backup is stored, if any.
backupDbDir string
@ -940,6 +941,34 @@ func (hn *HarnessNode) Unlock(ctx context.Context,
return hn.initClientWhenReady(DefaultTimeout)
}
// waitTillServerStarted makes a subscription to the server's state change and
// blocks until the server is in state ServerActive.
func (hn *HarnessNode) waitTillServerStarted() error {
ctxb := context.Background()
ctxt, cancel := context.WithTimeout(ctxb, NodeStartTimeout)
defer cancel()
client, err := hn.StateClient.SubscribeState(
ctxt, &lnrpc.SubscribeStateRequest{},
)
if err != nil {
return fmt.Errorf("failed to subscribe to state: %w", err)
}
for {
resp, err := client.Recv()
if err != nil {
return fmt.Errorf("failed to receive state "+
"client stream: %w", err)
}
if resp.State == lnrpc.WalletState_SERVER_ACTIVE {
return nil
}
}
}
// initLightningClient constructs the grpc LightningClient from the given client
// connection and subscribes the harness node to graph topology updates.
// This method also spawns a lightning network watcher for this node,
@ -955,6 +984,12 @@ func (hn *HarnessNode) initLightningClient(conn *grpc.ClientConn) error {
hn.Watchtower = watchtowerrpc.NewWatchtowerClient(conn)
hn.WatchtowerClient = wtclientrpc.NewWatchtowerClientClient(conn)
hn.SignerClient = signrpc.NewSignerClient(conn)
hn.StateClient = lnrpc.NewStateClient(conn)
// Wait until the server is fully started.
if err := hn.waitTillServerStarted(); err != nil {
return err
}
// Set the harness node's pubkey to what the node claims in GetInfo.
// Since the RPC might not be immediately active, we wrap the call in a

View File

@ -24,4 +24,8 @@ const (
// AsyncBenchmarkTimeout is the timeout used when running the async
// payments benchmark.
AsyncBenchmarkTimeout = 2 * time.Minute
// NodeStartTimeout is the timeout value when waiting for a node to
// become fully started.
NodeStartTimeout = time.Second * 60
)

View File

@ -25,4 +25,8 @@ const (
// payments benchmark. This timeout takes considerably longer on darwin
// after go1.12 corrected its use of fsync.
AsyncBenchmarkTimeout = time.Minute * 3
// NodeStartTimeout is the timeout value when waiting for a node to
// become fully started.
NodeStartTimeout = time.Second * 120
)

View File

@ -24,4 +24,8 @@ const (
// AsyncBenchmarkTimeout is the timeout used when running the async
// payments benchmark.
AsyncBenchmarkTimeout = 2 * time.Minute
// NodeStartTimeout is the timeout value when waiting for a node to
// become fully started.
NodeStartTimeout = time.Second * 60
)

View File

@ -44,6 +44,9 @@ const (
// rpcActive means that the RPC server is ready to accept calls.
rpcActive
// serverActive means that the lnd server is ready to accept calls.
serverActive
)
var (
@ -198,6 +201,15 @@ func (r *InterceptorChain) SetRPCActive() {
_ = r.ntfnServer.SendUpdate(r.state)
}
// SetServerActive moves the RPC state from walletUnlocked to rpcActive.
func (r *InterceptorChain) SetServerActive() {
r.Lock()
defer r.Unlock()
r.state = serverActive
_ = r.ntfnServer.SendUpdate(r.state)
}
// rpcStateToWalletState converts rpcState to lnrpc.WalletState. Returns
// WAITING_TO_START and an error on conversion error.
func rpcStateToWalletState(state rpcState) (lnrpc.WalletState, error) {
@ -215,6 +227,8 @@ func rpcStateToWalletState(state rpcState) (lnrpc.WalletState, error) {
walletState = lnrpc.WalletState_UNLOCKED
case rpcActive:
walletState = lnrpc.WalletState_RPC_ACTIVE
case serverActive:
walletState = lnrpc.WalletState_SERVER_ACTIVE
default:
return defaultState, fmt.Errorf("unknown wallet state %v", state)
@ -558,9 +572,9 @@ func (r *InterceptorChain) checkRPCState(srv interface{}) error {
return ErrRPCStarting
// If the RPC is active, we allow calls to any service except the
// WalletUnlocker.
case rpcActive:
// If the RPC server or lnd server is active, we allow calls to any
// service except the WalletUnlocker.
case rpcActive, serverActive:
_, ok := srv.(lnrpc.WalletUnlockerServer)
if ok {
return ErrWalletUnlocked

View File

@ -6622,6 +6622,12 @@ func (r *rpcServer) ExportAllChannelBackups(ctx context.Context,
func (r *rpcServer) RestoreChannelBackups(ctx context.Context,
in *lnrpc.RestoreChanBackupRequest) (*lnrpc.RestoreBackupResponse, error) {
// The server hasn't yet started, so it won't be able to service any of
// our requests, so we'll bail early here.
if !r.server.Started() {
return nil, ErrServerNotActive
}
// First, we'll make our implementation of the
// chanbackup.ChannelRestorer interface which we'll use to properly
// restore either a set of chanbackup.Single or chanbackup.Multi