Skip to content

Commit 7484b10

Browse files
authored
feat: [sc-110727] troubleshoot: collector/analyzer for wildcard dns (#1606)
* store DNS collector in JSON output for analyze later * fix incorrect path * configurable dns image * make non resolvable domain configurable * nit update address field * * update dns util image * add unit test
1 parent f662161 commit 7484b10

9 files changed

+176
-14
lines changed

config/crds/troubleshoot.sh_collectors.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,10 @@ spec:
298298
type: string
299299
exclude:
300300
type: BoolString
301+
image:
302+
type: string
303+
nonResolvable:
304+
type: string
301305
timeout:
302306
type: string
303307
type: object

config/crds/troubleshoot.sh_preflights.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2027,6 +2027,10 @@ spec:
20272027
type: string
20282028
exclude:
20292029
type: BoolString
2030+
image:
2031+
type: string
2032+
nonResolvable:
2033+
type: string
20302034
timeout:
20312035
type: string
20322036
type: object

config/crds/troubleshoot.sh_supportbundles.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2058,6 +2058,10 @@ spec:
20582058
type: string
20592059
exclude:
20602060
type: BoolString
2061+
image:
2062+
type: string
2063+
nonResolvable:
2064+
type: string
20612065
timeout:
20622066
type: string
20632067
type: object

pkg/apis/troubleshoot/v1beta2/collector_shared.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,8 @@ type Sonobuoy struct {
297297
type DNS struct {
298298
CollectorMeta `json:",inline" yaml:",inline"`
299299
Timeout string `json:"timeout,omitempty" yaml:"timeout,omitempty"`
300+
Image string `json:"image,omitempty" yaml:"image,omitempty"`
301+
NonResolvable string `json:"nonResolvable,omitempty" yaml:"nonResolvable,omitempty"`
300302
}
301303

302304
type Etcd struct {

pkg/collect/dns.go

Lines changed: 103 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
package collect
22

33
import (
4+
"bufio"
45
"bytes"
56
"context"
67
"encoding/json"
78
"fmt"
89
"io"
9-
"path/filepath"
1010
"strings"
1111
"time"
1212

@@ -21,7 +21,8 @@ import (
2121
)
2222

2323
const (
24-
dnsUtilsImage = "registry.k8s.io/e2e-test-images/jessie-dnsutils:1.3"
24+
dnsUtilsImage = "registry.k8s.io/e2e-test-images/agnhost:2.39"
25+
nonResolvableDomain = "non-existent-domain"
2526
)
2627

2728
type CollectDNS struct {
@@ -34,6 +35,25 @@ type CollectDNS struct {
3435
RBACErrors
3536
}
3637

38+
// DNSTroubleshootResult represents the structure of the DNS troubleshooting JSON data
39+
type DNSTroubleshootResult struct {
40+
KubernetesClusterIP string `json:"kubernetesClusterIP"`
41+
PodResolvConf string `json:"podResolvConf"`
42+
Query struct {
43+
Kubernetes struct {
44+
Name string `json:"name"`
45+
Address string `json:"address"`
46+
} `json:"kubernetes"`
47+
NonResolvableDomain struct {
48+
Name string `json:"name"`
49+
Address string `json:"address"`
50+
} `json:"nonResolvableDomain"`
51+
} `json:"query"`
52+
KubeDNSPods []string `json:"kubeDNSPods"`
53+
KubeDNSService string `json:"kubeDNSService"`
54+
KubeDNSEndpoints string `json:"kubeDNSEndpoints"`
55+
}
56+
3757
func (c *CollectDNS) Title() string {
3858
return getCollectorName(c)
3959
}
@@ -48,32 +68,57 @@ func (c *CollectDNS) Collect(progressChan chan<- interface{}) (CollectorResult,
4868
defer cancel()
4969

5070
sb := strings.Builder{}
71+
dnsDebug := DNSTroubleshootResult{}
5172

5273
// get kubernetes Cluster IP
5374
clusterIP, err := getKubernetesClusterIP(c.Client, ctx)
5475
if err == nil {
5576
sb.WriteString(fmt.Sprintf("=== Kubernetes Cluster IP from API Server: %s\n", clusterIP))
77+
dnsDebug.KubernetesClusterIP = clusterIP
5678
} else {
5779
sb.WriteString(fmt.Sprintf("=== Failed to detect Kubernetes Cluster IP: %v\n", err))
5880
}
5981

6082
// run a pod and perform DNS lookup
61-
podLog, err := troubleshootDNSFromPod(c.Client, ctx)
83+
testDomain := c.Collector.NonResolvable
84+
if testDomain == "" {
85+
testDomain = nonResolvableDomain
86+
}
87+
dnsDebug.Query.NonResolvableDomain.Name = testDomain
88+
89+
image := c.Collector.Image
90+
if image == "" {
91+
image = dnsUtilsImage
92+
}
93+
94+
podLog, err := troubleshootDNSFromPod(c.Client, ctx, testDomain, image)
6295
if err == nil {
63-
sb.WriteString(fmt.Sprintf("=== Test DNS resolution in pod %s: \n", dnsUtilsImage))
96+
sb.WriteString(fmt.Sprintf("=== Test DNS resolution in pod %s: \n", image))
6497
sb.WriteString(podLog)
6598
} else {
6699
sb.WriteString(fmt.Sprintf("=== Failed to run commands from pod: %v\n", err))
67100
}
68101

102+
// extract DNS queries from pod log
103+
err = extractDNSQueriesFromPodLog(podLog, &dnsDebug)
104+
if err != nil {
105+
sb.WriteString(fmt.Sprintf("=== Failed to extract DNS queries from pod log: %v\n", err))
106+
}
107+
69108
// is DNS pods running?
70-
sb.WriteString(fmt.Sprintf("=== Running kube-dns pods: %s\n", getRunningKubeDNSPodNames(c.Client, ctx)))
109+
kubeDNSPods := getRunningKubeDNSPodNames(c.Client, ctx)
110+
sb.WriteString(fmt.Sprintf("=== Running kube-dns pods: %s\n", kubeDNSPods))
111+
dnsDebug.KubeDNSPods = strings.Split(kubeDNSPods, ", ")
71112

72113
// is DNS service up?
73-
sb.WriteString(fmt.Sprintf("=== Running kube-dns service: %s\n", getKubeDNSServiceClusterIP(c.Client, ctx)))
114+
kubeDNSService := getKubeDNSServiceClusterIP(c.Client, ctx)
115+
sb.WriteString(fmt.Sprintf("=== Running kube-dns service: %s\n", kubeDNSService))
116+
dnsDebug.KubeDNSService = kubeDNSService
74117

75118
// are DNS endpoints exposed?
76-
sb.WriteString(fmt.Sprintf("=== kube-dns endpoints: %s\n", getKubeDNSEndpoints(c.Client, ctx)))
119+
kubeDNSEndpoints := getKubeDNSEndpoints(c.Client, ctx)
120+
sb.WriteString(fmt.Sprintf("=== kube-dns endpoints: %s\n", kubeDNSEndpoints))
121+
dnsDebug.KubeDNSEndpoints = kubeDNSEndpoints
77122

78123
// get DNS server config
79124
coreDNSConfig, err := getCoreDNSConfig(c.Client, ctx)
@@ -89,7 +134,16 @@ func (c *CollectDNS) Collect(progressChan chan<- interface{}) (CollectorResult,
89134

90135
data := sb.String()
91136
output := NewResult()
92-
output.SaveResult(c.BundlePath, filepath.Join("dns", c.Collector.CollectorName), bytes.NewBuffer([]byte(data)))
137+
138+
// save raw debug output
139+
output.SaveResult(c.BundlePath, "dns/debug.txt", bytes.NewBuffer([]byte(data)))
140+
141+
// save structured debug output as JSON file
142+
jsonData, err := json.Marshal(dnsDebug)
143+
if err != nil {
144+
return output, errors.Wrap(err, "failed to marshal DNS troubleshooting data")
145+
}
146+
output.SaveResult(c.BundlePath, "dns/debug.json", bytes.NewBuffer(jsonData))
93147

94148
return output, nil
95149
}
@@ -104,14 +158,17 @@ func getKubernetesClusterIP(client kubernetes.Interface, ctx context.Context) (s
104158
return service.Spec.ClusterIP, nil
105159
}
106160

107-
func troubleshootDNSFromPod(client kubernetes.Interface, ctx context.Context) (string, error) {
161+
func troubleshootDNSFromPod(client kubernetes.Interface, ctx context.Context, nonResolvableDomain string, image string) (string, error) {
108162
namespace := "default"
109-
command := []string{"/bin/sh", "-c", `
110-
set -x
163+
command := []string{"/bin/sh", "-c", fmt.Sprintf(`
164+
echo "=== /etc/resolv.conf ==="
111165
cat /etc/resolv.conf
112-
nslookup -debug kubernetes
166+
echo "=== dig kubernetes ==="
167+
dig +search +short kubernetes
168+
echo "=== dig non-existent-domain ==="
169+
dig +short %s
113170
exit 0
114-
`}
171+
`, nonResolvableDomain)}
115172

116173
// TODO: image pull secret?
117174
podLabels := map[string]string{
@@ -127,7 +184,7 @@ func troubleshootDNSFromPod(client kubernetes.Interface, ctx context.Context) (s
127184
Containers: []corev1.Container{
128185
{
129186
Name: "troubleshoot-dns",
130-
Image: dnsUtilsImage,
187+
Image: image,
131188
Command: command,
132189
},
133190
},
@@ -271,3 +328,35 @@ func getKubeDNSEndpoints(client kubernetes.Interface, ctx context.Context) strin
271328

272329
return strings.Join(endpointStrings, ", ")
273330
}
331+
332+
func extractDNSQueriesFromPodLog(podLog string, dnsDebug *DNSTroubleshootResult) error {
333+
scanner := bufio.NewScanner(strings.NewReader(podLog))
334+
335+
var currentSection string
336+
337+
for scanner.Scan() {
338+
line := scanner.Text()
339+
340+
switch {
341+
case strings.Contains(line, "=== /etc/resolv.conf ==="):
342+
currentSection = "podResolvConf"
343+
case strings.Contains(line, "=== dig kubernetes ==="):
344+
currentSection = "kubernetes"
345+
case strings.Contains(line, "=== dig non-existent-domain ==="):
346+
currentSection = "nonResolvableDomain"
347+
default:
348+
switch currentSection {
349+
case "podResolvConf":
350+
dnsDebug.PodResolvConf += line + "\n"
351+
case "kubernetes":
352+
dnsDebug.Query.Kubernetes.Name = "kubernetes"
353+
dnsDebug.Query.Kubernetes.Address = line
354+
case "nonResolvableDomain":
355+
dnsDebug.Query.NonResolvableDomain.Address = line
356+
}
357+
}
358+
}
359+
360+
return nil
361+
362+
}

pkg/collect/dns_test.go

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"testing"
66

7+
"github.com/stretchr/testify/assert"
78
corev1 "k8s.io/api/core/v1"
89
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
910
"k8s.io/client-go/kubernetes/fake"
@@ -39,3 +40,43 @@ func TestGetKubernetesClusterIP(t *testing.T) {
3940
t.Errorf("expected %s, got %s", k8sSvcIp, clusterIP)
4041
}
4142
}
43+
44+
func TestExtractDNSQueriesFromPodLog(t *testing.T) {
45+
podLog := `
46+
=== /etc/resolv.conf ===
47+
search default.svc.cluster.local svc.cluster.local cluster.local
48+
nameserver 10.43.0.10
49+
options ndots:5
50+
=== dig kubernetes ===
51+
10.43.0.1
52+
=== dig non-existent-domain ===`
53+
54+
expectedResolvConf := `search default.svc.cluster.local svc.cluster.local cluster.local
55+
nameserver 10.43.0.10
56+
options ndots:5
57+
`
58+
59+
expectedKubernetesQuery := struct {
60+
Name string `json:"name"`
61+
Address string `json:"address"`
62+
}{
63+
Name: "kubernetes",
64+
Address: "10.43.0.1",
65+
}
66+
67+
expectedNonResolvableDomainQuery := struct {
68+
Name string `json:"name"`
69+
Address string `json:"address"`
70+
}{
71+
Name: "",
72+
Address: "",
73+
}
74+
75+
dnsDebug := &DNSTroubleshootResult{}
76+
err := extractDNSQueriesFromPodLog(podLog, dnsDebug)
77+
assert.NoError(t, err)
78+
79+
assert.Equal(t, expectedResolvConf, dnsDebug.PodResolvConf)
80+
assert.Equal(t, expectedKubernetesQuery, dnsDebug.Query.Kubernetes)
81+
assert.Equal(t, expectedNonResolvableDomainQuery, dnsDebug.Query.NonResolvableDomain)
82+
}

schemas/collector-troubleshoot-v1beta2.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,12 @@
399399
"exclude": {
400400
"oneOf": [{"type": "string"},{"type": "boolean"}]
401401
},
402+
"image": {
403+
"type": "string"
404+
},
405+
"nonResolvable": {
406+
"type": "string"
407+
},
402408
"timeout": {
403409
"type": "string"
404410
}

schemas/preflight-troubleshoot-v1beta2.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3063,6 +3063,12 @@
30633063
"exclude": {
30643064
"oneOf": [{"type": "string"},{"type": "boolean"}]
30653065
},
3066+
"image": {
3067+
"type": "string"
3068+
},
3069+
"nonResolvable": {
3070+
"type": "string"
3071+
},
30663072
"timeout": {
30673073
"type": "string"
30683074
}

schemas/supportbundle-troubleshoot-v1beta2.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3109,6 +3109,12 @@
31093109
"exclude": {
31103110
"oneOf": [{"type": "string"},{"type": "boolean"}]
31113111
},
3112+
"image": {
3113+
"type": "string"
3114+
},
3115+
"nonResolvable": {
3116+
"type": "string"
3117+
},
31123118
"timeout": {
31133119
"type": "string"
31143120
}

0 commit comments

Comments
 (0)