Skip to content

Commit 0a7ce40

Browse files
authored
wait for rqlite to be synced when enabling HA (#2067)
* wait for rqlite to be synced when enabling HA * maybe fix integration test * add timestamps to HA kind test logs, add logs to HA migration * log ss replicas * f * remove rqlite sync check from go code * use helm chart that waits for rqlite sync state * use staging chart * use real chart version * t.Logf
1 parent c215b5b commit 0a7ce40

File tree

8 files changed

+97
-23
lines changed

8 files changed

+97
-23
lines changed

cmd/buildtools/adminconsole.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ var updateAdminConsoleAddonCommand = &cli.Command{
4444
defer hcli.Close()
4545

4646
logrus.Infof("getting admin console latest tag")
47-
latest, err := GetLatestGitHubTag(c.Context, "replicatedhq", "kots-helm")
47+
latest, err := GetLatestKotsHelmTag(c.Context)
4848
if err != nil {
4949
return fmt.Errorf("failed to get admin console latest tag: %w", err)
5050
}

cmd/buildtools/utils.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,37 @@ func GetLatestGitHubTag(ctx context.Context, owner, repo string) (string, error)
211211
return tags[0].GetName(), nil
212212
}
213213

214+
// GetLatestKotsHelmTag returns the correct tag from the kots-helm repository.
215+
// this is not quite the same as the latest tag from the kots-helm repository, as github
216+
// will list "v1.124.12" as being newer than "v1.124.12-build.0" and it is not in our usage.
217+
func GetLatestKotsHelmTag(ctx context.Context) (string, error) {
218+
client := github.NewClient(nil)
219+
tags, _, err := client.Repositories.ListTags(ctx, "replicatedhq", "kots-helm", &github.ListOptions{PerPage: 100})
220+
if err != nil {
221+
return "", fmt.Errorf("list tags: %w", err)
222+
}
223+
if len(tags) == 0 {
224+
return "", fmt.Errorf("no tags found")
225+
}
226+
latestTag := tags[0].GetName()
227+
logrus.Infof("latest tag: %s", latestTag)
228+
229+
// check to see if there is a 'build.x' tag - if so, return that
230+
for _, tag := range tags {
231+
logrus.Infof("checkingtag: %s", tag.GetName())
232+
if !strings.HasPrefix(tag.GetName(), latestTag) {
233+
// tags are sorted, so once we find a tag that doesn't have the same prefix, we can break
234+
logrus.Infof("tag does not have same prefix: %s", tag.GetName())
235+
break
236+
}
237+
if strings.Contains(tag.GetName(), "-build.") {
238+
logrus.Infof("tag is a build tag, returning: %s", tag.GetName())
239+
return tag.GetName(), nil
240+
}
241+
}
242+
return latestTag, nil
243+
}
244+
214245
// GetGreatestGitHubTag returns the greatest non-prerelease semver tag from a GitHub repository
215246
// that matches the provided constraints.
216247
func GetGreatestGitHubTag(ctx context.Context, owner, repo string, constrants *semver.Constraints) (string, error) {

pkg/addons/adminconsole/static/metadata.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# $ make buildtools
66
# $ output/bin/buildtools update addon <addon name>
77
#
8-
version: 1.124.12
8+
version: 1.124.12-build.0
99
location: oci://proxy.replicated.com/anonymous/registry.replicated.com/library/admin-console
1010
images:
1111
kotsadm:

pkg/addons/highavailability.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@ package addons
33
import (
44
"context"
55
"fmt"
6-
"strings"
7-
86
"github.com/pkg/errors"
97
ecv1beta1 "github.com/replicatedhq/embedded-cluster/kinds/apis/v1beta1"
108
"github.com/replicatedhq/embedded-cluster/pkg/addons/adminconsole"
@@ -23,6 +21,7 @@ import (
2321
"k8s.io/client-go/kubernetes"
2422
"k8s.io/utils/ptr"
2523
"sigs.k8s.io/controller-runtime/pkg/client"
24+
"strings"
2625
)
2726

2827
// CanEnableHA checks if high availability can be enabled in the cluster.
@@ -254,6 +253,10 @@ func EnableAdminConsoleHA(ctx context.Context, kcli client.Client, hcli helm.Cli
254253
return errors.Wrap(err, "upgrade admin console")
255254
}
256255

256+
if err := kubeutils.WaitForStatefulset(ctx, kcli, runtimeconfig.KotsadmNamespace, "kotsadm-rqlite", nil); err != nil {
257+
return errors.Wrap(err, "wait for rqlite to be ready")
258+
}
259+
257260
return nil
258261
}
259262

pkg/dryrun/kubeutils.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@ func (k *KubeUtils) WaitForDeployment(ctx context.Context, cli client.Client, ns
2222
return nil
2323
}
2424

25+
func (k *KubeUtils) WaitForStatefulset(ctx context.Context, cli client.Client, ns, name string, opts *kubeutils.WaitOptions) error {
26+
return nil
27+
}
28+
2529
func (k *KubeUtils) WaitForDaemonset(ctx context.Context, cli client.Client, ns, name string, opts *kubeutils.WaitOptions) error {
2630
return nil
2731
}

pkg/kubeutils/interface.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ func Set(_kb KubeUtilsInterface) {
2323
type KubeUtilsInterface interface {
2424
WaitForNamespace(ctx context.Context, cli client.Client, ns string, opts *WaitOptions) error
2525
WaitForDeployment(ctx context.Context, cli client.Client, ns, name string, opts *WaitOptions) error
26+
WaitForStatefulset(ctx context.Context, cli client.Client, ns, name string, opts *WaitOptions) error
2627
WaitForDaemonset(ctx context.Context, cli client.Client, ns, name string, opts *WaitOptions) error
2728
WaitForService(ctx context.Context, cli client.Client, ns, name string, opts *WaitOptions) error
2829
WaitForJob(ctx context.Context, cli client.Client, ns, name string, completions int32, opts *WaitOptions) error
@@ -62,6 +63,10 @@ func WaitForDeployment(ctx context.Context, cli client.Client, ns, name string,
6263
return kb.WaitForDeployment(ctx, cli, ns, name, opts)
6364
}
6465

66+
func WaitForStatefulset(ctx context.Context, cli client.Client, ns, name string, opts *WaitOptions) error {
67+
return kb.WaitForStatefulset(ctx, cli, ns, name, opts)
68+
}
69+
6570
func WaitForDaemonset(ctx context.Context, cli client.Client, ns, name string, opts *WaitOptions) error {
6671
return kb.WaitForDaemonset(ctx, cli, ns, name, opts)
6772
}

pkg/kubeutils/kubeutils.go

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,34 @@ func (k *KubeUtils) WaitForDeployment(ctx context.Context, cli client.Client, ns
7575
return nil
7676
}
7777

78+
// WaitForStatefulset waits for the provided statefulset to be ready.
79+
func (k *KubeUtils) WaitForStatefulset(ctx context.Context, cli client.Client, ns, name string, opts *WaitOptions) error {
80+
backoff := opts.GetBackoff()
81+
var lasterr error
82+
if err := wait.ExponentialBackoffWithContext(
83+
ctx, backoff, func(ctx context.Context) (bool, error) {
84+
ready, err := k.IsStatefulSetReady(ctx, cli, ns, name)
85+
if err != nil {
86+
lasterr = fmt.Errorf("unable to get statefulset %s status: %v", name, err)
87+
return false, nil
88+
}
89+
return ready, nil
90+
},
91+
); err != nil {
92+
if errors.Is(err, context.Canceled) {
93+
if lasterr != nil {
94+
err = errors.Join(err, lasterr)
95+
}
96+
return err
97+
} else if lasterr != nil {
98+
return fmt.Errorf("timed out waiting for %s to statefulset: %w", name, lasterr)
99+
} else {
100+
return fmt.Errorf("timed out waiting for %s to statefulset", name)
101+
}
102+
}
103+
return nil
104+
}
105+
78106
// WaitForDaemonset waits for the provided daemonset to be ready.
79107
func (k *KubeUtils) WaitForDaemonset(ctx context.Context, cli client.Client, ns, name string, opts *WaitOptions) error {
80108
backoff := opts.GetBackoff()

tests/integration/kind/registry/ha_test.go

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -55,29 +55,29 @@ func TestRegistry_EnableHAAirgap(t *testing.T) {
5555
kclient := util.KubeClient(t, kubeconfig)
5656
hcli := util.HelmClient(t, kubeconfig)
5757

58-
t.Log("installing openebs")
58+
t.Logf("%s installing openebs", formattedTime())
5959
addon := &openebs.OpenEBS{
6060
ProxyRegistryDomain: "proxy.replicated.com",
6161
}
6262
if err := addon.Install(ctx, kcli, hcli, nil, nil); err != nil {
6363
t.Fatalf("failed to install openebs: %v", err)
6464
}
6565

66-
t.Log("waiting for storageclass")
66+
t.Logf("%s waiting for storageclass", formattedTime())
6767
util.WaitForStorageClass(t, kubeconfig, "openebs-hostpath", 30*time.Second)
6868

69-
t.Log("installing registry")
69+
t.Logf("%s installing registry", formattedTime())
7070
registryAddon := &registry.Registry{
7171
ServiceCIDR: "10.96.0.0/12",
7272
ProxyRegistryDomain: "proxy.replicated.com",
7373
IsHA: false,
7474
}
7575
require.NoError(t, registryAddon.Install(ctx, kcli, hcli, nil, nil))
7676

77-
t.Log("creating hostport service")
77+
t.Logf("%s creating hostport service", formattedTime())
7878
registryAddr := createHostPortService(t, clusterName, kubeconfig)
7979

80-
t.Log("installing admin console")
80+
t.Logf("%s installing admin console", formattedTime())
8181
adminConsoleAddon := &adminconsole.AdminConsole{
8282
IsAirgap: true,
8383
ServiceCIDR: "10.96.0.0/12",
@@ -86,13 +86,13 @@ func TestRegistry_EnableHAAirgap(t *testing.T) {
8686
}
8787
require.NoError(t, adminConsoleAddon.Install(ctx, kcli, hcli, nil, nil))
8888

89-
t.Log("pushing image to registry")
89+
t.Logf("%s pushing image to registry", formattedTime())
9090
copyImageToRegistry(t, registryAddr, "docker.io/library/busybox:1.36.1")
9191

92-
t.Log("running pod to validate image pull")
92+
t.Logf("%s running pod to validate image pull", formattedTime())
9393
runPodAndValidateImagePull(t, kubeconfig, "pod-1", "pod1.yaml")
9494

95-
t.Log("creating installation with HA disabled")
95+
t.Logf("%s creating installation with HA disabled", formattedTime())
9696
util.EnsureInstallation(t, kcli, ecv1beta1.InstallationSpec{
9797
HighAvailability: false,
9898
})
@@ -119,21 +119,21 @@ func TestRegistry_EnableHAAirgap(t *testing.T) {
119119
require.NoError(t, err)
120120
require.True(t, canEnable, "should be able to enable HA: %s", reason)
121121

122-
t.Log("enabling HA")
122+
t.Logf("%s enabling HA", formattedTime())
123123
loading := newTestingSpinner(t)
124124
func() {
125125
defer loading.Close()
126126
err = addons.EnableHA(ctx, kcli, kclient, hcli, true, "10.96.0.0/12", nil, cfgSpec, loading)
127127
require.NoError(t, err)
128128
}()
129129

130-
t.Log("pushing a second image to registry")
130+
t.Logf("%s pushing a second image to registry", formattedTime())
131131
copyImageToRegistry(t, registryAddr, "docker.io/library/busybox:1.37.0")
132132

133-
t.Log("running pod to validate image pull")
133+
t.Logf("%s running pod to validate image pull", formattedTime())
134134
runPodAndValidateImagePull(t, kubeconfig, "pod-1", "pod1.yaml")
135135

136-
t.Log("running second pod to validate image pull")
136+
t.Logf("%s running second pod to validate image pull", formattedTime())
137137
runPodAndValidateImagePull(t, kubeconfig, "pod-2", "pod2.yaml")
138138
}
139139

@@ -175,7 +175,7 @@ func enableHAAndCancelContextOnMessage(
175175
defer pr.Close()
176176
got := waitForMatchingMessage(t, pr, re)
177177
if got {
178-
t.Log("cancelling context")
178+
t.Logf("%s cancelling context", formattedTime())
179179
cancel()
180180
}
181181
io.Copy(io.Discard, pr) // discard the rest of the output
@@ -184,19 +184,18 @@ func enableHAAndCancelContextOnMessage(
184184
loading := newTestingSpinner(t)
185185
defer loading.Close()
186186

187-
t.Log("enabling HA and cancelling context on message")
187+
t.Logf("%s enabling HA and cancelling context on message", formattedTime())
188188
err = addons.EnableHA(ctx, kcli, kclient, hcli, true, "10.96.0.0/12", nil, cfgSpec, loading)
189189
require.ErrorIs(t, err, context.Canceled, "expected context to be cancelled")
190-
t.Logf("cancelled context and got error: %v", err)
190+
t.Logf("%s cancelled context and got error: %v", formattedTime(), err)
191191
}
192192

193193
func waitForMatchingMessage(t *testing.T, r io.Reader, re *regexp.Regexp) bool {
194194
scanner := bufio.NewScanner(r)
195195
for scanner.Scan() {
196196
b := scanner.Bytes()
197-
// t.Logf("got message: %s", string(b))
198197
if re.Match(b) {
199-
t.Logf("got matching message: %s", string(b))
198+
t.Logf("%s got matching message: %s", formattedTime(), string(b))
200199
return true
201200
}
202201
}
@@ -212,7 +211,7 @@ func buildOperatorImage(t *testing.T) string {
212211
operatorDir := filepath.Join(workspaceRoot, "operator")
213212

214213
if os.Getenv("SKIP_OPERATOR_IMAGE_BUILD") == "" {
215-
t.Log("building operator image")
214+
t.Logf("%s building operator image", formattedTime())
216215

217216
cmd := exec.CommandContext(
218217
t.Context(), "make", "-C", operatorDir, "build-ttl.sh", "USE_CHAINGUARD=0",
@@ -253,7 +252,7 @@ func newTestingSpinner(t *testing.T) *spinner.MessageWriter {
253252
spinner.WithWriter(func(format string, args ...any) (int, error) {
254253
// discard the output
255254
out := fmt.Sprintf(format, args...)
256-
t.Log("[spinner]", strings.TrimSpace(out))
255+
t.Logf("%s [spinner] %s", formattedTime(), strings.TrimSpace(out))
257256
return len(out), nil
258257
}),
259258
spinner.WithTTY(false),
@@ -272,3 +271,7 @@ func (h *logrusHook) Fire(entry *logrus.Entry) error {
272271
h.writer.Write([]byte(entry.Message + "\n"))
273272
return nil
274273
}
274+
275+
func formattedTime() string {
276+
return time.Now().Format("2006-01-02 15:04:05")
277+
}

0 commit comments

Comments
 (0)