Skip to content

Commit 2417b58

Browse files
authored
chore(ec): send upgrade events instead of the operator (#5017)
* chore(ec): send upgrade events instead of the operator * f * f * f * increase preflight timeout * feedback * explain
1 parent 7eacbb7 commit 2417b58

File tree

6 files changed

+232
-8
lines changed

6 files changed

+232
-8
lines changed

e2e/playwright/tests/@smoke-test/test.spec.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ test("smoke test", async ({ page }) => {
2727
);
2828
await page.getByRole("button", { name: "Continue" }).click();
2929
await expect(page.locator("#app")).toContainText("Results", {
30-
timeout: 30000,
30+
timeout: 60 * 1000,
3131
});
3232
await expect(page.locator("#app")).toContainText("Sequence is 0");
3333
await page.getByRole("button", { name: "Deploy" }).click();

pkg/embeddedcluster/metrics.go

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
package embeddedcluster
2+
3+
import (
4+
"bytes"
5+
"context"
6+
"encoding/json"
7+
"fmt"
8+
"net/http"
9+
"time"
10+
11+
embeddedclusterv1beta1 "github.com/replicatedhq/embedded-cluster/kinds/apis/v1beta1"
12+
"github.com/replicatedhq/kots/pkg/logger"
13+
)
14+
15+
// UpgradeStartedEvent is send back home when the upgrade starts.
16+
type UpgradeStartedEvent struct {
17+
ClusterID string `json:"clusterID"`
18+
TargetVersion string `json:"targetVersion"`
19+
InitialVersion string `json:"initialVersion"`
20+
AppVersion string `json:"appVersion"`
21+
}
22+
23+
// UpgradeFailedEvent is send back home when the upgrade fails.
24+
type UpgradeFailedEvent struct {
25+
ClusterID string `json:"clusterID"`
26+
TargetVersion string `json:"targetVersion"`
27+
InitialVersion string `json:"initialVersion"`
28+
Reason string `json:"reason"`
29+
}
30+
31+
// UpgradeSucceededEvent event is send back home when the upgrade succeeds.
32+
type UpgradeSucceededEvent struct {
33+
ClusterID string `json:"clusterID"`
34+
TargetVersion string `json:"targetVersion"`
35+
InitialVersion string `json:"initialVersion"`
36+
}
37+
38+
// NotifyUpgradeStarted notifies the metrics server that an upgrade has started.
39+
func NotifyUpgradeStarted(ctx context.Context, baseURL string, ins, prev *embeddedclusterv1beta1.Installation, versionLabel string) error {
40+
if ins.Spec.AirGap {
41+
return nil
42+
}
43+
return sendEvent(ctx, "UpgradeStarted", baseURL, UpgradeStartedEvent{
44+
ClusterID: ins.Spec.ClusterID,
45+
TargetVersion: ins.Spec.Config.Version,
46+
InitialVersion: prev.Spec.Config.Version,
47+
AppVersion: versionLabel,
48+
})
49+
}
50+
51+
// NotifyUpgradeFailed notifies the metrics server that an upgrade has failed.
52+
func NotifyUpgradeFailed(ctx context.Context, baseURL string, ins, prev *embeddedclusterv1beta1.Installation, reason string) error {
53+
if ins.Spec.AirGap {
54+
return nil
55+
}
56+
return sendEvent(ctx, "UpgradeFailed", baseURL, UpgradeFailedEvent{
57+
ClusterID: ins.Spec.ClusterID,
58+
TargetVersion: ins.Spec.Config.Version,
59+
InitialVersion: prev.Spec.Config.Version,
60+
Reason: reason,
61+
})
62+
}
63+
64+
// NotifyUpgradeSucceeded notifies the metrics server that an upgrade has succeeded.
65+
func NotifyUpgradeSucceeded(ctx context.Context, baseURL string, ins, prev *embeddedclusterv1beta1.Installation) error {
66+
if ins.Spec.AirGap {
67+
return nil
68+
}
69+
return sendEvent(ctx, "UpgradeSucceeded", baseURL, UpgradeSucceededEvent{
70+
ClusterID: ins.Spec.ClusterID,
71+
TargetVersion: ins.Spec.Config.Version,
72+
InitialVersion: prev.Spec.Config.Version,
73+
})
74+
}
75+
76+
// sendEvent sends the received event to the metrics server through a post request.
77+
func sendEvent(ctx context.Context, evname, baseURL string, ev interface{}) error {
78+
url := fmt.Sprintf("%s/embedded_cluster_metrics/%s", baseURL, evname)
79+
80+
logger.Infof("Sending event %s to %s", evname, url)
81+
82+
body := map[string]interface{}{"event": ev}
83+
buf := bytes.NewBuffer(nil)
84+
if err := json.NewEncoder(buf).Encode(body); err != nil {
85+
return err
86+
}
87+
88+
req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, buf)
89+
if err != nil {
90+
return err
91+
}
92+
req.Header.Set("Content-Type", "application/json")
93+
94+
client := &http.Client{
95+
Timeout: 5 * time.Second,
96+
}
97+
resp, err := client.Do(req)
98+
if err != nil {
99+
return fmt.Errorf("failed to send event: %w", err)
100+
}
101+
defer resp.Body.Close()
102+
103+
if resp.StatusCode != http.StatusOK {
104+
return fmt.Errorf("failed to send event: %s", resp.Status)
105+
}
106+
return nil
107+
}

pkg/embeddedcluster/upgrade.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
dockerregistrytypes "github.com/replicatedhq/kots/pkg/docker/registry/types"
2323
"github.com/replicatedhq/kots/pkg/imageutil"
2424
"github.com/replicatedhq/kots/pkg/k8sutil"
25+
"github.com/replicatedhq/kots/pkg/logger"
2526
registrytypes "github.com/replicatedhq/kots/pkg/registry/types"
2627
"github.com/replicatedhq/kots/pkg/util"
2728
kotsv1beta1 "github.com/replicatedhq/kotskinds/apis/kots/v1beta1"
@@ -75,8 +76,16 @@ func startClusterUpgrade(
7576

7677
log.Printf("Starting cluster upgrade to version %s...", newcfg.Version)
7778

79+
// We cannot notify the upgrade started until the new install is available
80+
if err := NotifyUpgradeStarted(ctx, license.Spec.Endpoint, newInstall, current, versionLabel); err != nil {
81+
logger.Errorf("Failed to notify upgrade started: %v", err)
82+
}
83+
7884
err = runClusterUpgrade(ctx, k8sClient, newInstall, registrySettings, license, versionLabel)
7985
if err != nil {
86+
if err := NotifyUpgradeFailed(ctx, license.Spec.Endpoint, newInstall, current, err.Error()); err != nil {
87+
logger.Errorf("Failed to notify upgrade failed: %v", err)
88+
}
8089
return fmt.Errorf("run cluster upgrade: %w", err)
8190
}
8291

pkg/embeddedcluster/util.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,21 @@ func GetCurrentInstallation(ctx context.Context, kbClient kbclient.Client) (*emb
6868
return &installations[0], nil
6969
}
7070

71+
// GetCurrentInstallation returns the second most recent installation object from the cluster.
72+
func GetPreviousInstallation(ctx context.Context, kbClient kbclient.Client) (*embeddedclusterv1beta1.Installation, error) {
73+
installations, err := ListInstallations(ctx, kbClient)
74+
if err != nil {
75+
return nil, fmt.Errorf("failed to list installations: %w", err)
76+
}
77+
if len(installations) < 2 {
78+
return nil, nil
79+
}
80+
sort.SliceStable(installations, func(i, j int) bool {
81+
return installations[j].Name < installations[i].Name
82+
})
83+
return &installations[1], nil
84+
}
85+
7186
func ListInstallations(ctx context.Context, kbClient kbclient.Client) ([]embeddedclusterv1beta1.Installation, error) {
7287
var installationList embeddedclusterv1beta1.InstallationList
7388
if err := kbClient.List(ctx, &installationList, &kbclient.ListOptions{}); err != nil {

pkg/operator/operator.go

Lines changed: 69 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"time"
1313

1414
"github.com/pkg/errors"
15+
embeddedclusterv1beta1 "github.com/replicatedhq/embedded-cluster/kinds/apis/v1beta1"
1516
downstreamtypes "github.com/replicatedhq/kots/pkg/api/downstream/types"
1617
"github.com/replicatedhq/kots/pkg/app"
1718
apptypes "github.com/replicatedhq/kots/pkg/app/types"
@@ -53,6 +54,7 @@ import (
5354
"k8s.io/client-go/kubernetes"
5455
"k8s.io/client-go/kubernetes/scheme"
5556
"k8s.io/client-go/tools/cache"
57+
kbclient "sigs.k8s.io/controller-runtime/pkg/client"
5658
)
5759

5860
var (
@@ -933,7 +935,7 @@ func (o *Operator) reconcileDeployment(cm *corev1.ConfigMap) (finalError error)
933935
if cm.Data["requires-cluster-upgrade"] == "true" {
934936
// wait for cluster upgrade even if the embedded cluster version doesn't match yet
935937
// in order to continuously report progress to the user
936-
if err := o.waitForClusterUpgrade(cm.Data["app-slug"]); err != nil {
938+
if err := o.waitForClusterUpgrade(cm.Data["app-id"], cm.Data["app-slug"]); err != nil {
937939
return errors.Wrap(err, "failed to wait for cluster upgrade")
938940
}
939941
}
@@ -1035,21 +1037,31 @@ func (o *Operator) reconcileDeployment(cm *corev1.ConfigMap) (finalError error)
10351037
return nil
10361038
}
10371039

1038-
func (o *Operator) waitForClusterUpgrade(appSlug string) error {
1039-
kbClient, err := k8sutil.GetKubeClient(context.Background())
1040+
func (o *Operator) waitForClusterUpgrade(appID string, appSlug string) error {
1041+
ctx := context.Background()
1042+
1043+
kbClient, err := k8sutil.GetKubeClient(ctx)
10401044
if err != nil {
10411045
return errors.Wrap(err, "failed to get kube client")
10421046
}
1043-
logger.Infof("waiting for cluster upgrade to finish")
1047+
logger.Infof("Waiting for cluster upgrade to finish")
10441048
for {
1045-
ins, err := embeddedcluster.GetCurrentInstallation(context.Background(), kbClient)
1049+
ins, err := embeddedcluster.GetCurrentInstallation(ctx, kbClient)
10461050
if err != nil {
10471051
return errors.Wrap(err, "failed to wait for embedded cluster installation")
10481052
}
1049-
if embeddedcluster.InstallationSucceeded(context.Background(), ins) {
1053+
if embeddedcluster.InstallationSucceeded(ctx, ins) {
1054+
logger.Infof("Cluster upgrade succeeded")
1055+
if err := o.notifyClusterUpgradeSucceeded(ctx, kbClient, ins, appID); err != nil {
1056+
logger.Errorf("Failed to notify upgrade succeeded: %v", err)
1057+
}
10501058
return nil
10511059
}
1052-
if embeddedcluster.InstallationFailed(context.Background(), ins) {
1060+
if embeddedcluster.InstallationFailed(ctx, ins) {
1061+
logger.Infof("Cluster upgrade failed")
1062+
if err := o.notifyClusterUpgradeFailed(ctx, kbClient, ins, appID); err != nil {
1063+
logger.Errorf("Failed to notify upgrade failed: %v", err)
1064+
}
10531065
if err := upgradeservicetask.SetStatusUpgradeFailed(appSlug, ins.Status.Reason); err != nil {
10541066
return errors.Wrap(err, "failed to set task status to failed")
10551067
}
@@ -1061,3 +1073,53 @@ func (o *Operator) waitForClusterUpgrade(appSlug string) error {
10611073
time.Sleep(5 * time.Second)
10621074
}
10631075
}
1076+
1077+
// notifyClusterUpgradeSucceeded sends a metrics event to the api that the upgrade succeeded.
1078+
func (o *Operator) notifyClusterUpgradeSucceeded(ctx context.Context, kbClient kbclient.Client, ins *embeddedclusterv1beta1.Installation, appID string) error {
1079+
if ins.Spec.AirGap {
1080+
return nil
1081+
}
1082+
1083+
license, err := o.store.GetLatestLicenseForApp(appID)
1084+
if err != nil {
1085+
return errors.Wrapf(err, "failed to get latest license for app %s", appID)
1086+
}
1087+
1088+
prev, err := embeddedcluster.GetPreviousInstallation(ctx, kbClient)
1089+
if err != nil {
1090+
return errors.Wrap(err, "failed to get previous installation")
1091+
} else if prev == nil {
1092+
return errors.New("previous installation not found")
1093+
}
1094+
1095+
err = embeddedcluster.NotifyUpgradeSucceeded(ctx, license.Spec.Endpoint, ins, prev)
1096+
if err != nil {
1097+
return errors.Wrap(err, "failed to send event")
1098+
}
1099+
return nil
1100+
}
1101+
1102+
// notifyClusterUpgradeFailed sends a metrics event to the api that the upgrade failed.
1103+
func (o *Operator) notifyClusterUpgradeFailed(ctx context.Context, kbClient kbclient.Client, ins *embeddedclusterv1beta1.Installation, appID string) error {
1104+
if ins.Spec.AirGap {
1105+
return nil
1106+
}
1107+
1108+
license, err := o.store.GetLatestLicenseForApp(appID)
1109+
if err != nil {
1110+
return errors.Wrapf(err, "failed to get latest license for app %s", appID)
1111+
}
1112+
1113+
prev, err := embeddedcluster.GetPreviousInstallation(ctx, kbClient)
1114+
if err != nil {
1115+
return errors.Wrap(err, "failed to get previous installation")
1116+
} else if prev == nil {
1117+
return errors.New("previous installation not found")
1118+
}
1119+
1120+
err = embeddedcluster.NotifyUpgradeFailed(ctx, license.Spec.Endpoint, ins, prev, ins.Status.Reason)
1121+
if err != nil {
1122+
return errors.Wrap(err, "failed to send event")
1123+
}
1124+
return nil
1125+
}

pkg/upgradeservice/deploy/deploy.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
corev1 "k8s.io/api/core/v1"
2424
kuberneteserrors "k8s.io/apimachinery/pkg/api/errors"
2525
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
26+
kbclient "sigs.k8s.io/controller-runtime/pkg/client"
2627
)
2728

2829
type CanDeployOptions struct {
@@ -141,6 +142,11 @@ func Deploy(opts DeployOptions) error {
141142
tgzArchiveKey: tgzArchiveKey,
142143
requiresClusterUpgrade: true,
143144
}); err != nil {
145+
// The operator is responsible for notifying of upgrade success/failure using the deployment.
146+
// If we cannot create the deployment, the operator cannot take over and we need to notify of failure here.
147+
if err := notifyClusterUpgradeFailed(context.Background(), kbClient, opts, finalError.Error()); err != nil {
148+
logger.Errorf("Failed to notify upgrade failed: %v", err)
149+
}
144150
return errors.Wrap(err, "failed to create deployment")
145151
}
146152

@@ -150,6 +156,31 @@ func Deploy(opts DeployOptions) error {
150156
return nil
151157
}
152158

159+
// notifyClusterUpgradeFailed sends a metrics event to the api that the upgrade failed.
160+
func notifyClusterUpgradeFailed(ctx context.Context, kbClient kbclient.Client, opts DeployOptions, reason string) error {
161+
ins, err := embeddedcluster.GetCurrentInstallation(ctx, kbClient)
162+
if err != nil {
163+
return fmt.Errorf("failed to get current installation: %w", err)
164+
}
165+
166+
if ins.Spec.AirGap {
167+
return nil
168+
}
169+
170+
prev, err := embeddedcluster.GetPreviousInstallation(ctx, kbClient)
171+
if err != nil {
172+
return errors.Wrap(err, "failed to get previous installation")
173+
} else if prev == nil {
174+
return errors.New("previous installation not found")
175+
}
176+
177+
err = embeddedcluster.NotifyUpgradeFailed(ctx, opts.KotsKinds.License.Spec.Endpoint, ins, prev, reason)
178+
if err != nil {
179+
return errors.Wrap(err, "failed to send event")
180+
}
181+
return nil
182+
}
183+
153184
type createDeploymentOptions struct {
154185
ctx context.Context
155186
isSkipPreflights bool

0 commit comments

Comments
 (0)