lntest: properly handle shutdown error

This commit removes the panic used in checking the shutdown log.
Instead, the error is returned and asserted in `shutdownAllNodes` so
it's easier to check which node failed in which test. We also catch all
the errors returned from `StopDaemon` call to properly access the
shutdown behavior.
This commit is contained in:
yyforyongyu
2024-12-05 10:25:23 +08:00
parent 73574d919d
commit 31b66962d8
3 changed files with 58 additions and 51 deletions

View File

@@ -636,12 +636,11 @@ func (hn *HarnessNode) cleanup() error {
// waitForProcessExit Launch a new goroutine which that bubbles up any
// potential fatal process errors to the goroutine running the tests.
func (hn *HarnessNode) WaitForProcessExit() error {
var err error
var errReturned error
errChan := make(chan error, 1)
go func() {
err = hn.cmd.Wait()
errChan <- err
errChan <- hn.cmd.Wait()
}()
select {
@@ -656,24 +655,36 @@ func (hn *HarnessNode) WaitForProcessExit() error {
return nil
}
// The process may have already been killed in the test, in
// that case we will skip the error and continue processing
// the logs.
if strings.Contains(err.Error(), "signal: killed") {
break
}
// Otherwise, we print the error, break the select and save
// logs.
hn.printErrf("wait process exit got err: %v", err)
break
errReturned = err
case <-time.After(wait.DefaultTimeout):
hn.printErrf("timeout waiting for process to exit")
}
// Make sure log file is closed and renamed if necessary.
finalizeLogfile(hn)
filename := finalizeLogfile(hn)
// Rename the etcd.log file if the node was running on embedded
// etcd.
// Assert the node has shut down from the log file.
err1 := assertNodeShutdown(filename)
if err1 != nil {
return fmt.Errorf("[%s]: assert shutdown failed in log[%s]: %w",
hn.Name(), filename, err1)
}
// Rename the etcd.log file if the node was running on embedded etcd.
finalizeEtcdLog(hn)
return err
return errReturned
}
// Stop attempts to stop the active lnd process.
@@ -700,23 +711,21 @@ func (hn *HarnessNode) Stop() error {
err := wait.NoError(func() error {
_, err := hn.RPC.LN.StopDaemon(ctxt, &req)
switch {
case err == nil:
return nil
// Try again if a recovery/rescan is in progress.
case strings.Contains(
err.Error(), "recovery in progress",
):
return err
default:
if err == nil {
return nil
}
// If the connection is already closed, we can exit
// early as the node has already been shut down in the
// test, e.g., in etcd leader health check test.
if strings.Contains(err.Error(), "connection refused") {
return nil
}
return err
}, wait.DefaultTimeout)
if err != nil {
return err
return fmt.Errorf("shutdown timeout: %w", err)
}
// Wait for goroutines to be finished.
@@ -724,6 +733,7 @@ func (hn *HarnessNode) Stop() error {
go func() {
hn.Watcher.wg.Wait()
close(done)
hn.Watcher = nil
}()
// If the goroutines fail to finish before timeout, we'll print
@@ -966,31 +976,23 @@ func getFinalizedLogFilePrefix(hn *HarnessNode) string {
// finalizeLogfile makes sure the log file cleanup function is initialized,
// even if no log file is created.
func finalizeLogfile(hn *HarnessNode) {
func finalizeLogfile(hn *HarnessNode) string {
// Exit early if there's no log file.
if hn.logFile == nil {
return
return ""
}
hn.logFile.Close()
// If logoutput flag is not set, return early.
if !*logOutput {
return
return ""
}
newFileName := fmt.Sprintf("%v.log",
getFinalizedLogFilePrefix(hn),
)
newFileName := fmt.Sprintf("%v.log", getFinalizedLogFilePrefix(hn))
renameFile(hn.filename, newFileName)
// Assert the node has shut down from the log file.
err := assertNodeShutdown(newFileName)
if err != nil {
err := fmt.Errorf("[%s]: assert shutdown failed in log[%s]: %w",
hn.Name(), newFileName, err)
panic(err)
}
return newFileName
}
// assertNodeShutdown asserts that the node has shut down properly by checking