discovery: fix race access to syncer's state

This commit fixes the following race, 1. syncer(state=syncingChans) sends QueryChannelRange 2. remote peer replies ReplyChannelRange 3. ProcessQueryMsg fails to process the remote peer's msg as its state is neither waitingQueryChanReply nor waitingQueryRangeReply. 4. syncer marks its new state waitingQueryChanReply, but too late. The historical sync will now fail, and the syncer will be stuck at this state. What's worse is it cannot forward channel announcements to other connected peers now as it will skip the broadcasting during initial graph sync. This is now fixed to make sure the following two steps are atomic, 1. syncer(state=syncingChans) sends QueryChannelRange 2. syncer marks its new state waitingQueryChanReply.
2025-08-28 06:32:18 +02:00 · 2025-01-16 22:49:25 +08:00
parent 4b30b09d1c
commit 9fecfed3b5
2 changed files with 20 additions and 4 deletions
--- a/discovery/syncer.go
+++ b/discovery/syncer.go
@@ -486,6 +486,15 @@ func (g *GossipSyncer) handleSyncingChans() {
 		return
 	}

+	// Acquire a lock so the following state transition is atomic.
+	//
+	// NOTE: We must lock the following steps as it's possible we get an
+	// immediate response (ReplyChannelRange) after sending the query msg.
+	// The response is handled in ProcessQueryMsg, which requires the
+	// current state to be waitingQueryRangeReply.
+	g.Lock()
+	defer g.Unlock()
+
 	err = g.cfg.sendToPeer(queryRangeMsg)
 	if err != nil {
 		log.Errorf("Unable to send chan range query: %v", err)
@@ -1517,12 +1526,15 @@ func (g *GossipSyncer) ProcessQueryMsg(msg lnwire.Message, peerQuit <-chan struc
 	// Reply messages should only be expected in states where we're waiting
 	// for a reply.
 	case *lnwire.ReplyChannelRange, *lnwire.ReplyShortChanIDsEnd:
+		g.Lock()
 		syncState := g.syncState()
+		g.Unlock()
+
 		if syncState != waitingQueryRangeReply &&
 			syncState != waitingQueryChanReply {

-			return fmt.Errorf("received unexpected query reply "+
-				"message %T", msg)
+			return fmt.Errorf("unexpected msg %T received in "+
+				"state %v", msg, syncState)
 		}
 		msgChan = g.gossipMsgs