multi: add leader check to the healthcheck monitor

This commit extends our healtcheck with an optional leader check. This
is to ensure that given network partition or other cluster wide failure
we act as soon as possible to avoid a split-brain situation where a new
leader is elected but we still hold onto our etcd client.
This commit is contained in:
Andras Banki-Horvath
2024-07-25 18:21:47 +02:00
parent 7784d6abf6
commit 8e0534f756
8 changed files with 121 additions and 12 deletions

View File

@@ -99,9 +99,27 @@ func (e *etcdLeaderElector) Leader(ctx context.Context) (string, error) {
return "", err
}
if resp == nil || len(resp.Kvs) == 0 {
return "", nil
}
return string(resp.Kvs[0].Value), nil
}
// IsLeader returns true if the caller is the leader.
func (e *etcdLeaderElector) IsLeader(ctx context.Context) (bool, error) {
resp, err := e.election.Leader(ctx)
if err != nil {
return false, err
}
if resp == nil || len(resp.Kvs) == 0 {
return false, nil
}
return string(resp.Kvs[0].Value) == e.id, nil
}
// Campaign will start a new leader election campaign. Campaign will block until
// the elector context is canceled or the caller is elected as the leader.
func (e *etcdLeaderElector) Campaign(ctx context.Context) error {
@@ -110,6 +128,6 @@ func (e *etcdLeaderElector) Campaign(ctx context.Context) error {
// Resign resigns the leader role allowing other election members to take
// the place.
func (e *etcdLeaderElector) Resign() error {
return e.election.Resign(context.Background())
func (e *etcdLeaderElector) Resign(ctx context.Context) error {
return e.election.Resign(ctx)
}

View File

@@ -87,12 +87,12 @@ func TestEtcdElector(t *testing.T) {
tmp := <-ch
first, err := tmp.Leader(ctxb)
require.NoError(t, err)
require.NoError(t, tmp.Resign())
require.NoError(t, tmp.Resign(ctxb))
tmp = <-ch
second, err := tmp.Leader(ctxb)
require.NoError(t, err)
require.NoError(t, tmp.Resign())
require.NoError(t, tmp.Resign(ctxb))
require.Contains(t, []string{id1, id2}, first)
require.Contains(t, []string{id1, id2}, second)

View File

@@ -19,8 +19,11 @@ type LeaderElector interface {
// Resign resigns from the leader role, allowing other election members
// to take on leadership.
Resign() error
Resign(ctx context.Context) error
// Leader returns the leader value for the current election.
Leader(ctx context.Context) (string, error)
// IsLeader returns true if the caller is the leader.
IsLeader(ctx context.Context) (bool, error)
}