@@ -388,16 +388,30 @@ func analyzeBackups(backups []*velerov1.Backup, count int) []*AnalyzeResult {
388
388
velerov1 .BackupPhaseFailedValidation : true ,
389
389
}
390
390
391
- for _ , backup := range backups {
391
+ // knownFailureReasons is a map of known failure messages to their resolutions
392
+ knownFailureReasons := map [string ]string {
393
+ "some known error message" : "Resolution for the known error." ,
394
+ }
392
395
396
+ for _ , backup := range backups {
393
397
if failedPhases [backup .Status .Phase ] {
394
398
result := & AnalyzeResult {
395
399
Title : fmt .Sprintf ("Backup %s" , backup .Name ),
396
400
}
401
+
402
+ // Check if the backup has a failure reason and it's in the map
403
+ if backup .Status .FailureReason != "" {
404
+ if resolution , found := knownFailureReasons [backup .Status .FailureReason ]; found {
405
+ result .Message = fmt .Sprintf ("Backup %s phase is %s. Reason: %s. Resolution: %s" , backup .Name , backup .Status .Phase , backup .Status .FailureReason , resolution )
406
+ } else {
407
+ result .Message = fmt .Sprintf ("Backup %s phase is %s. Reason: %s" , backup .Name , backup .Status .Phase , backup .Status .FailureReason )
408
+ }
409
+ } else {
410
+ result .Message = fmt .Sprintf ("Backup %s phase is %s" , backup .Name , backup .Status .Phase )
411
+ }
412
+
397
413
result .IsFail = true
398
- result .Message = fmt .Sprintf ("Backup %s phase is %s" , backup .Name , backup .Status .Phase )
399
414
results = append (results , result )
400
-
401
415
}
402
416
}
403
417
if len (backups ) > 0 {
@@ -471,14 +485,31 @@ func analyzeDeleteBackupRequests(deleteBackupRequests []*velerov1.DeleteBackupRe
471
485
func analyzePodVolumeBackups (podVolumeBackups []* velerov1.PodVolumeBackup ) []* AnalyzeResult {
472
486
results := []* AnalyzeResult {}
473
487
failures := 0
488
+
489
+ // knownFailureMessages is a map of known failure messages to their resolutions
490
+ knownFailureMessages := map [string ]string {
491
+ "example known error message" : "Resolution for the known pod volume backup error." ,
492
+ }
493
+
474
494
if len (podVolumeBackups ) > 0 {
475
495
for _ , podVolumeBackup := range podVolumeBackups {
476
496
if podVolumeBackup .Status .Phase == velerov1 .PodVolumeBackupPhaseFailed {
477
497
result := & AnalyzeResult {
478
498
Title : fmt .Sprintf ("Pod Volume Backup %s" , podVolumeBackup .Name ),
479
499
}
500
+
501
+ // Check if the pod volume backup has a status message and it's in the map
502
+ if podVolumeBackup .Status .Message != "" {
503
+ if resolution , found := knownFailureMessages [podVolumeBackup .Status .Message ]; found {
504
+ result .Message = fmt .Sprintf ("Pod Volume Backup %s phase is %s. Message: %s. Resolution: %s" , podVolumeBackup .Name , podVolumeBackup .Status .Phase , podVolumeBackup .Status .Message , resolution )
505
+ } else {
506
+ result .Message = fmt .Sprintf ("Pod Volume Backup %s phase is %s. Message: %s" , podVolumeBackup .Name , podVolumeBackup .Status .Phase , podVolumeBackup .Status .Message )
507
+ }
508
+ } else {
509
+ result .Message = fmt .Sprintf ("Pod Volume Backup %s phase is %s" , podVolumeBackup .Name , podVolumeBackup .Status .Phase )
510
+ }
511
+
480
512
result .IsFail = true
481
- result .Message = fmt .Sprintf ("Pod Volume Backup %s phase is %s" , podVolumeBackup .Name , podVolumeBackup .Status .Phase )
482
513
results = append (results , result )
483
514
failures ++
484
515
}
@@ -545,17 +576,29 @@ func analyzeRestores(restores []*velerov1.Restore, count int) []*AnalyzeResult {
545
576
velerov1 .RestorePhaseFailedValidation : true ,
546
577
}
547
578
548
- // failureReasons := []string{
549
- // "found a restore with status \"InProgress\" during the server starting, mark it as \"Failed\"",
550
- // }
579
+ // knownFailureReasons is a map of strings to strings that are used to detect specific failure messages and return a resolution
580
+ knownFailureReasons := map [string ]string {
581
+ "found a restore with status \" InProgress\" during the server starting, mark it as \" Failed\" " : "The Velero pod exited or restarted while a restore was already in progress, most likely due to running out of memory. Check the resource allocation of the velero pod and increase it or remove the memory limit." ,
582
+ }
551
583
552
584
for _ , restore := range restores {
553
- if failedPhases [restore .Status .Phase ] {
585
+ if failedPhases [restore .Status .Phase ] || restore . Status . FailureReason != "" {
554
586
result := & AnalyzeResult {
555
587
Title : fmt .Sprintf ("Restore %s" , restore .Name ),
556
588
}
589
+
590
+ // Check if the restore has a failure reason and it's in the map
591
+ if restore .Status .FailureReason != "" {
592
+ if resolution , found := knownFailureReasons [restore .Status .FailureReason ]; found {
593
+ result .Message = fmt .Sprintf ("Restore %s reported a FailureReason: %s. Resolution: %s" , restore .Name , restore .Status .FailureReason , resolution )
594
+ } else {
595
+ result .Message = fmt .Sprintf ("Restore %s phase is %s. Reason: %s" , restore .Name , restore .Status .Phase , restore .Status .FailureReason )
596
+ }
597
+ } else {
598
+ result .Message = fmt .Sprintf ("Restore %s phase is %s" , restore .Name , restore .Status .Phase )
599
+ }
600
+
557
601
result .IsFail = true
558
- result .Message = fmt .Sprintf ("Restore %s phase is %s" , restore .Name , restore .Status .Phase )
559
602
results = append (results , result )
560
603
failures ++
561
604
}
0 commit comments