From 23c94ccb99820b685b4e1f514bdf14d028f0d607 Mon Sep 17 00:00:00 2001 From: Andrew Lavery Date: Mon, 7 Apr 2025 13:18:51 -0400 Subject: [PATCH 1/3] add retries to leaveEtcdcluster --- cmd/installer/cli/reset.go | 52 ++++++++++++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 11 deletions(-) diff --git a/cmd/installer/cli/reset.go b/cmd/installer/cli/reset.go index 740d45222..835d63e39 100644 --- a/cmd/installer/cli/reset.go +++ b/cmd/installer/cli/reset.go @@ -330,25 +330,55 @@ type etcdMembers struct { // leaveEtcdcluster uses k0s to attempt to leave the etcd cluster func (h *hostInfo) leaveEtcdcluster() error { - - // if we're the only etcd member we don't need to leave the cluster - out, err := helpers.RunCommand(k0sBinPath, "etcd", "member-list") - if err != nil { - return err + // Try to list members with retries + var memberlist etcdMembers + var out string + var err error + + // Retry member list up to 3 times + for i := 0; i < 3; i++ { + out, err = helpers.RunCommand(k0sBinPath, "etcd", "member-list") + if err == nil { + err = json.Unmarshal([]byte(out), &memberlist) + if err == nil { + break + } + } + if i < 2 { // Don't sleep on last attempt + time.Sleep(2 * time.Second) + } } - memberlist := etcdMembers{} - err = json.Unmarshal([]byte(out), &memberlist) + if err != nil { - return err + logrus.Warnf("Unable to list etcd members, continuing with reset: %v", err) + return nil } + + // If we're the only member, no need to leave if len(memberlist.Members) == 1 && memberlist.Members[h.Hostname] != "" { return nil } - out, err = helpers.RunCommand(k0sBinPath, "etcd", "leave") - if err != nil { - return fmt.Errorf("unable to leave etcd cluster: %w, %s", err, out) + // Attempt to leave the cluster with retries + for i := 0; i < 3; i++ { + out, err = helpers.RunCommand(k0sBinPath, "etcd", "leave") + if err == nil { + return nil + } + + // Check if the error is due to etcd being stopped + if strings.Contains(err.Error(), "etcdserver: server stopped") { + logrus.Warnf("Etcd server is stopped, continuing with reset") + return nil + } + + if i < 2 { // Don't sleep on last attempt + time.Sleep(2 * time.Second) + } } + + // If we get here, we failed to leave after retries + logrus.Warnf("Unable to leave etcd cluster after retries (this is often normal during reset): %v, %s", err, out) return nil } From f2f1665575100b441187cf2403cc9cca740caf99 Mon Sep 17 00:00:00 2001 From: Andrew Lavery Date: Mon, 7 Apr 2025 15:00:38 -0400 Subject: [PATCH 2/3] remove always-nil error --- cmd/installer/cli/reset.go | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/cmd/installer/cli/reset.go b/cmd/installer/cli/reset.go index 835d63e39..a00029fe5 100644 --- a/cmd/installer/cli/reset.go +++ b/cmd/installer/cli/reset.go @@ -127,11 +127,7 @@ func ResetCmd(ctx context.Context, name string) *cobra.Command { } // try and leave etcd cluster - err = currentHost.leaveEtcdcluster() - if !checkErrPrompt(assumeYes, force, err) { - return err - } - + currentHost.leaveEtcdcluster() } } @@ -329,7 +325,7 @@ type etcdMembers struct { } // leaveEtcdcluster uses k0s to attempt to leave the etcd cluster -func (h *hostInfo) leaveEtcdcluster() error { +func (h *hostInfo) leaveEtcdcluster() { // Try to list members with retries var memberlist etcdMembers var out string @@ -351,25 +347,25 @@ func (h *hostInfo) leaveEtcdcluster() error { if err != nil { logrus.Warnf("Unable to list etcd members, continuing with reset: %v", err) - return nil + return } // If we're the only member, no need to leave if len(memberlist.Members) == 1 && memberlist.Members[h.Hostname] != "" { - return nil + return } // Attempt to leave the cluster with retries for i := 0; i < 3; i++ { out, err = helpers.RunCommand(k0sBinPath, "etcd", "leave") if err == nil { - return nil + return } // Check if the error is due to etcd being stopped if strings.Contains(err.Error(), "etcdserver: server stopped") { logrus.Warnf("Etcd server is stopped, continuing with reset") - return nil + return } if i < 2 { // Don't sleep on last attempt @@ -379,7 +375,7 @@ func (h *hostInfo) leaveEtcdcluster() error { // If we get here, we failed to leave after retries logrus.Warnf("Unable to leave etcd cluster after retries (this is often normal during reset): %v, %s", err, out) - return nil + return } var ( From 7ffa6746a52fa84a252138ab9b675d1ca2fb9020 Mon Sep 17 00:00:00 2001 From: Andrew Lavery Date: Mon, 7 Apr 2025 15:14:02 -0400 Subject: [PATCH 3/3] f --- cmd/installer/cli/reset.go | 1 - 1 file changed, 1 deletion(-) diff --git a/cmd/installer/cli/reset.go b/cmd/installer/cli/reset.go index a00029fe5..487cfddd1 100644 --- a/cmd/installer/cli/reset.go +++ b/cmd/installer/cli/reset.go @@ -375,7 +375,6 @@ func (h *hostInfo) leaveEtcdcluster() { // If we get here, we failed to leave after retries logrus.Warnf("Unable to leave etcd cluster after retries (this is often normal during reset): %v, %s", err, out) - return } var (