From dc4884bb49a6d1e35388e4777c1c619b6cbcf579 Mon Sep 17 00:00:00 2001 From: David Kydd Date: Thu, 22 Apr 2021 14:01:23 +1200 Subject: [PATCH 01/31] support for collecting from CRDs using default deployment yaml --- deployment/aks-periscope.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deployment/aks-periscope.yaml b/deployment/aks-periscope.yaml index 03ef1167..500b4025 100644 --- a/deployment/aks-periscope.yaml +++ b/deployment/aks-periscope.yaml @@ -14,8 +14,8 @@ kind: ClusterRole metadata: name: aks-periscope-role rules: -- apiGroups: ["","metrics.k8s.io"] - resources: ["pods", "nodes"] +- apiGroups: ["","metrics.k8s.io", "apiextensions.k8s.io"] + resources: ["pods", "nodes", "customresourcedefinitions"] verbs: ["get", "watch", "list"] - apiGroups: ["aks-periscope.azure.github.com"] resources: ["diagnostics"] From f2683a6ca01fea2031a8b202819920f270178858 Mon Sep 17 00:00:00 2001 From: david kydd Date: Thu, 6 May 2021 15:07:20 +1200 Subject: [PATCH 02/31] revert additional resource permissions --- deployment/aks-periscope.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deployment/aks-periscope.yaml b/deployment/aks-periscope.yaml index 500b4025..6abef2d0 100644 --- a/deployment/aks-periscope.yaml +++ b/deployment/aks-periscope.yaml @@ -14,8 +14,8 @@ kind: ClusterRole metadata: name: aks-periscope-role rules: -- apiGroups: ["","metrics.k8s.io", "apiextensions.k8s.io"] - resources: ["pods", "nodes", "customresourcedefinitions"] +- apiGroups: ["","metrics.k8s.io"] + resources: ["pods", "nodes""] verbs: ["get", "watch", "list"] - apiGroups: ["aks-periscope.azure.github.com"] resources: ["diagnostics"] From 9a8e367db516e70bc5f7a25adb7d252ebde5300b Mon Sep 17 00:00:00 2001 From: david kydd Date: Thu, 6 May 2021 15:19:05 +1200 Subject: [PATCH 03/31] expose a feature flag for collectors and diagnosers --- cmd/aks-periscope/aks-periscope.go | 45 ++++++++++++++++++++++-------- deployment/aks-periscope.yaml | 20 +++++++++++++ 2 files changed, 53 insertions(+), 12 deletions(-) diff --git a/cmd/aks-periscope/aks-periscope.go b/cmd/aks-periscope/aks-periscope.go index aa36bb39..00a80576 100644 --- a/cmd/aks-periscope/aks-periscope.go +++ b/cmd/aks-periscope/aks-periscope.go @@ -2,6 +2,7 @@ package main import ( "log" + "os" "strings" "sync" @@ -22,25 +23,35 @@ func main() { log.Printf("Failed to create CRD: %+v", err) } - collectors := []interfaces.Collector{} + //create map of all possible collectors by name, discrete vars for each collector as some are diagnoser dependecies + allCollectorsByName := make(map[string]interfaces.Collector) containerLogsCollector := collector.NewContainerLogsCollector(exporter) - collectors = append(collectors, containerLogsCollector) + allCollectorsByName["containerlogs"] = containerLogsCollector systemLogsCollector := collector.NewSystemLogsCollector(exporter) - collectors = append(collectors, systemLogsCollector) + allCollectorsByName["systemlogs"] = systemLogsCollector networkOutboundCollector := collector.NewNetworkOutboundCollector(5, exporter) - collectors = append(collectors, networkOutboundCollector) + allCollectorsByName["networkoutbound"] = networkOutboundCollector ipTablesCollector := collector.NewIPTablesCollector(exporter) - collectors = append(collectors, ipTablesCollector) + allCollectorsByName["iptables"] = ipTablesCollector nodeLogsCollector := collector.NewNodeLogsCollector(exporter) - collectors = append(collectors, nodeLogsCollector) + allCollectorsByName["nodelogs"] = nodeLogsCollector dnsCollector := collector.NewDNSCollector(exporter) - collectors = append(collectors, dnsCollector) + allCollectorsByName["dns"] = dnsCollector kubeObjectsCollector := collector.NewKubeObjectsCollector(exporter) - collectors = append(collectors, kubeObjectsCollector) + allCollectorsByName["kubeobjects"] = kubeObjectsCollector kubeletCmdCollector := collector.NewKubeletCmdCollector(exporter) - collectors = append(collectors, kubeletCmdCollector) + allCollectorsByName["kubeletcmd"] = kubeletCmdCollector systemPerfCollector := collector.NewSystemPerfCollector(exporter) - collectors = append(collectors, systemPerfCollector) + allCollectorsByName["systemperf"] = systemPerfCollector + + //read list of collectors that are enabled + enabledCollectorNames := strings.Fields(os.Getenv("collectors-config")) + + //gather those collectors which are enabled by selecting from allCollectorsByName + collectors := []interfaces.Collector{} + for _, collector := range enabledCollectorNames { + collectors = append(collectors, allCollectorsByName[collector]) + } for _, c := range collectors { waitgroup.Add(1) @@ -62,9 +73,19 @@ func main() { waitgroup.Wait() + //create map of all possible diagnosers by name + allDiagnosersByName := make(map[string]interfaces.Diagnoser) + allDiagnosersByName["networkconfig"] = diagnoser.NewNetworkConfigDiagnoser(dnsCollector, kubeletCmdCollector, exporter) + allDiagnosersByName["networkoutbound"] = diagnoser.NewNetworkOutboundDiagnoser(networkOutboundCollector, exporter) + + //read list of diagnosers that are enabled + enabledDiagnoserNames := strings.Fields(os.Getenv("diagnosers-config")) + + //gather those diagnosers which are enabled by selecting from allDiagnosersByName diagnosers := []interfaces.Diagnoser{} - diagnosers = append(diagnosers, diagnoser.NewNetworkConfigDiagnoser(dnsCollector, kubeletCmdCollector, exporter)) - diagnosers = append(diagnosers, diagnoser.NewNetworkOutboundDiagnoser(networkOutboundCollector, exporter)) + for _, diagnoser := range enabledDiagnoserNames { + diagnosers = append(diagnosers, allDiagnosersByName[diagnoser]) + } for _, d := range diagnosers { waitgroup.Add(1) diff --git a/deployment/aks-periscope.yaml b/deployment/aks-periscope.yaml index 6abef2d0..ffff4127 100644 --- a/deployment/aks-periscope.yaml +++ b/deployment/aks-periscope.yaml @@ -80,6 +80,10 @@ spec: name: kubeobjects-config - configMapRef: name: nodelogs-config + - configMapRef: + name: collectors-config + - configMapRef: + name: diagnosers-config - secretRef: name: azureblob-secret volumeMounts: @@ -132,6 +136,22 @@ metadata: data: DIAGNOSTIC_NODELOGS_LIST: /var/log/azure/cluster-provision.log /var/log/cloud-init.log --- +apiVersion: v1 +kind: ConfigMap +metadata: + name: collectors-config + namespace: aks-periscope +data: + ENABLED_COLLECTORS: dns containerlogs iptables kubeletcmd kubeobjects networkoutbound nodelogs systemlogs systemperf +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: diagnosers-config + namespace: aks-periscope +data: + ENABLED_DIAGNOSERS: networkconfig networkoutbound +--- apiVersion: apiextensions.k8s.io/v1beta1 kind: CustomResourceDefinition metadata: From 5d344b9292a79e4c307acdacab21f4f8f0277082 Mon Sep 17 00:00:00 2001 From: david kydd Date: Thu, 6 May 2021 15:20:15 +1200 Subject: [PATCH 04/31] typo --- deployment/aks-periscope.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployment/aks-periscope.yaml b/deployment/aks-periscope.yaml index ffff4127..ba034f97 100644 --- a/deployment/aks-periscope.yaml +++ b/deployment/aks-periscope.yaml @@ -15,7 +15,7 @@ metadata: name: aks-periscope-role rules: - apiGroups: ["","metrics.k8s.io"] - resources: ["pods", "nodes""] + resources: ["pods", "nodes"] verbs: ["get", "watch", "list"] - apiGroups: ["aks-periscope.azure.github.com"] resources: ["diagnostics"] From 5104ef79f6fabeb5f73b1b7e85d92573f562ee9e Mon Sep 17 00:00:00 2001 From: david kydd Date: Thu, 6 May 2021 15:34:12 +1200 Subject: [PATCH 05/31] fix targeting configmap rather than config itself --- cmd/aks-periscope/aks-periscope.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/aks-periscope/aks-periscope.go b/cmd/aks-periscope/aks-periscope.go index 00a80576..ce5a69e2 100644 --- a/cmd/aks-periscope/aks-periscope.go +++ b/cmd/aks-periscope/aks-periscope.go @@ -45,7 +45,7 @@ func main() { allCollectorsByName["systemperf"] = systemPerfCollector //read list of collectors that are enabled - enabledCollectorNames := strings.Fields(os.Getenv("collectors-config")) + enabledCollectorNames := strings.Fields(os.Getenv("ENABLED_COLLECTORS")) //gather those collectors which are enabled by selecting from allCollectorsByName collectors := []interfaces.Collector{} @@ -79,7 +79,7 @@ func main() { allDiagnosersByName["networkoutbound"] = diagnoser.NewNetworkOutboundDiagnoser(networkOutboundCollector, exporter) //read list of diagnosers that are enabled - enabledDiagnoserNames := strings.Fields(os.Getenv("diagnosers-config")) + enabledDiagnoserNames := strings.Fields(os.Getenv("ENABLED_DIAGNOSERS")) //gather those diagnosers which are enabled by selecting from allDiagnosersByName diagnosers := []interfaces.Diagnoser{} From 178b618f3b2e26fa63e1d0e3bb17a986278c52eb Mon Sep 17 00:00:00 2001 From: david kydd Date: Fri, 7 May 2021 12:33:30 +1200 Subject: [PATCH 06/31] - treat exporters the same as collectors + diagnosers - support multiple exporters - refactor main() to better encapsulate logic in sub-funcs - add multierror package, currently only used when collector.export errors; to prevent one failing exporter preventing the others from running - removed zip and export mode flag for now --- cmd/aks-periscope/aks-periscope.go | 181 ++++++++++++++------- deployment/aks-periscope.yaml | 10 ++ go.mod | 5 +- go.sum | 5 + pkg/collector/collector.go | 19 ++- pkg/collector/containerlogs_collector.go | 4 +- pkg/collector/dns_collector.go | 4 +- pkg/collector/iptables_collector.go | 4 +- pkg/collector/kubeletcmd_collector.go | 4 +- pkg/collector/kubeobjects_collector.go | 4 +- pkg/collector/networkoutbound_collector.go | 4 +- pkg/collector/nodelogs_collector.go | 4 +- pkg/collector/systemlogs_collector.go | 4 +- pkg/collector/systemperf_collector.go | 4 +- pkg/diagnoser/diagnoser.go | 21 ++- pkg/diagnoser/networkconfig_diagnoser.go | 4 +- pkg/diagnoser/networkoutbound_diagnoser.go | 4 +- pkg/exporter/azureblob_exporter.go | 13 +- pkg/exporter/exporter.go | 24 +++ 19 files changed, 223 insertions(+), 99 deletions(-) create mode 100644 pkg/exporter/exporter.go diff --git a/cmd/aks-periscope/aks-periscope.go b/cmd/aks-periscope/aks-periscope.go index ce5a69e2..6f95d427 100644 --- a/cmd/aks-periscope/aks-periscope.go +++ b/cmd/aks-periscope/aks-periscope.go @@ -1,6 +1,7 @@ package main import ( + "github.com/hashicorp/go-multierror" "log" "os" "strings" @@ -14,45 +15,124 @@ import ( ) func main() { - zipAndExportMode := true - exporter := &exporter.AzureBlobExporter{} var waitgroup sync.WaitGroup err := utils.CreateCRD() if err != nil { log.Printf("Failed to create CRD: %+v", err) } + collectors, diagnosers, exporters := initializeComponents() - //create map of all possible collectors by name, discrete vars for each collector as some are diagnoser dependecies - allCollectorsByName := make(map[string]interfaces.Collector) - containerLogsCollector := collector.NewContainerLogsCollector(exporter) - allCollectorsByName["containerlogs"] = containerLogsCollector - systemLogsCollector := collector.NewSystemLogsCollector(exporter) - allCollectorsByName["systemlogs"] = systemLogsCollector - networkOutboundCollector := collector.NewNetworkOutboundCollector(5, exporter) - allCollectorsByName["networkoutbound"] = networkOutboundCollector - ipTablesCollector := collector.NewIPTablesCollector(exporter) - allCollectorsByName["iptables"] = ipTablesCollector - nodeLogsCollector := collector.NewNodeLogsCollector(exporter) - allCollectorsByName["nodelogs"] = nodeLogsCollector - dnsCollector := collector.NewDNSCollector(exporter) - allCollectorsByName["dns"] = dnsCollector - kubeObjectsCollector := collector.NewKubeObjectsCollector(exporter) - allCollectorsByName["kubeobjects"] = kubeObjectsCollector - kubeletCmdCollector := collector.NewKubeletCmdCollector(exporter) - allCollectorsByName["kubeletcmd"] = kubeletCmdCollector - systemPerfCollector := collector.NewSystemPerfCollector(exporter) - allCollectorsByName["systemperf"] = systemPerfCollector + runCollectors(collectors, &waitgroup) + waitgroup.Wait() + + runDiagnosers(diagnosers, &waitgroup) + waitgroup.Wait() + + log.Print("Zip and export result files") + outputs, err := zipOutputDirectory() + if err != nil { + log.Printf("Failed to zip result files: %+v", err) + } + + err = runExporters(exporters, outputs) + if err != nil { + log.Printf("Failed to export result files: %+v", err) + } + + select {} +} + +// initializeComponents initializes and returns collectors, diagnosers and exporters +func initializeComponents()([]interfaces.Collector, []interfaces.Diagnoser, []interfaces.Exporter){ + + //exporters + azureBlobExporter := exporter.NewAzureBlobExporter() + selectedExporters := selectExporters( + map[string]interfaces.Exporter{ + azureBlobExporter.GetName(): azureBlobExporter, + }) + + //collectors + containerLogsCollector := collector.NewContainerLogsCollector(selectedExporters) + systemLogsCollector := collector.NewSystemLogsCollector(selectedExporters) + networkOutboundCollector := collector.NewNetworkOutboundCollector(5, selectedExporters) + ipTablesCollector := collector.NewIPTablesCollector(selectedExporters) + dnsCollector := collector.NewDNSCollector(selectedExporters) + nodeLogsCollector := collector.NewNodeLogsCollector(selectedExporters) + kubeObjectsCollector := collector.NewKubeObjectsCollector(selectedExporters) + kubeletCmdCollector := collector.NewKubeletCmdCollector(selectedExporters) + systemPerfCollector := collector.NewSystemPerfCollector(selectedExporters) + + selectedCollectors := selectCollectors( + map[string]interfaces.Collector { + containerLogsCollector.GetName(): containerLogsCollector, + systemLogsCollector.GetName(): systemLogsCollector, + networkOutboundCollector.GetName(): networkOutboundCollector, + ipTablesCollector.GetName(): ipTablesCollector, + nodeLogsCollector.GetName(): nodeLogsCollector, + dnsCollector.GetName(): dnsCollector, + kubeObjectsCollector.GetName(): kubeObjectsCollector, + kubeletCmdCollector.GetName(): kubeletCmdCollector, + systemPerfCollector.GetName(): systemPerfCollector, + }) + + //diagnosers + networkConfigDiagnoser := diagnoser.NewNetworkConfigDiagnoser(dnsCollector, kubeletCmdCollector, selectedExporters) + networkOutboundDiagnoser := diagnoser.NewNetworkOutboundDiagnoser(networkOutboundCollector, selectedExporters) + selectedDiagnosers := selectDiagnosers( + map[string]interfaces.Diagnoser{ + networkConfigDiagnoser.GetName(): networkConfigDiagnoser, + networkOutboundDiagnoser.GetName(): networkOutboundDiagnoser, + }) + + return selectedCollectors, selectedDiagnosers, selectedExporters +} + +// selectCollectors select the collectors to run +func selectCollectors(allCollectorsByName map[string]interfaces.Collector) []interfaces.Collector { + collectors := []interfaces.Collector{} //read list of collectors that are enabled enabledCollectorNames := strings.Fields(os.Getenv("ENABLED_COLLECTORS")) - //gather those collectors which are enabled by selecting from allCollectorsByName - collectors := []interfaces.Collector{} for _, collector := range enabledCollectorNames { collectors = append(collectors, allCollectorsByName[collector]) } + return collectors +} + +// selectDiagnosers select the diagnosers to run +func selectDiagnosers(allDiagnosersByName map[string]interfaces.Diagnoser) []interfaces.Diagnoser { + diagnosers := []interfaces.Diagnoser{} + + //read list of diagnosers that are enabled + enabledDiagnoserNames := strings.Fields(os.Getenv("ENABLED_DIAGNOSERS")) + + for _, diagnoser := range enabledDiagnoserNames { + diagnosers = append(diagnosers, allDiagnosersByName[diagnoser]) + } + + return diagnosers +} + +// selectedExporters select the exporters to run +func selectExporters(allExporters map[string]interfaces.Exporter) []interfaces.Exporter { + exporters := []interfaces.Exporter{} + + //read list of collectors that are enabled + enabledExporterNames := strings.Fields(os.Getenv("ENABLED_EXPORTERS")) + + for _, exporter := range enabledExporterNames { + exporters = append(exporters, allExporters[exporter]) + } + + return exporters +} + +// runCollectors run the collectors +func runCollectors(collectors []interfaces.Collector, waitgroup *sync.WaitGroup){ for _, c := range collectors { waitgroup.Add(1) go func(c interfaces.Collector) { @@ -70,23 +150,10 @@ func main() { waitgroup.Done() }(c) } +} - waitgroup.Wait() - - //create map of all possible diagnosers by name - allDiagnosersByName := make(map[string]interfaces.Diagnoser) - allDiagnosersByName["networkconfig"] = diagnoser.NewNetworkConfigDiagnoser(dnsCollector, kubeletCmdCollector, exporter) - allDiagnosersByName["networkoutbound"] = diagnoser.NewNetworkOutboundDiagnoser(networkOutboundCollector, exporter) - - //read list of diagnosers that are enabled - enabledDiagnoserNames := strings.Fields(os.Getenv("ENABLED_DIAGNOSERS")) - - //gather those diagnosers which are enabled by selecting from allDiagnosersByName - diagnosers := []interfaces.Diagnoser{} - for _, diagnoser := range enabledDiagnoserNames { - diagnosers = append(diagnosers, allDiagnosersByName[diagnoser]) - } - +// runDiagnosers run the diagnosers +func runDiagnosers(diagnosers []interfaces.Diagnoser, waitgroup *sync.WaitGroup){ for _, d := range diagnosers { waitgroup.Add(1) go func(d interfaces.Diagnoser) { @@ -104,30 +171,29 @@ func main() { waitgroup.Done() }(d) } +} - waitgroup.Wait() - - if zipAndExportMode { - log.Print("Zip and export result files") - err := zipAndExport(exporter) - if err != nil { - log.Printf("Failed to zip and export result files: %+v", err) +// runExporters run the exporters +func runExporters(exporters []interfaces.Exporter, filesToExport []string) error { + var result error + for _, exporter := range exporters { + if err := exporter.Export(filesToExport); err != nil { + result = multierror.Append(result, err) } } - - select {} + return result } -// zipAndExport zip the results and export -func zipAndExport(exporter interfaces.Exporter) error { +// zipAndExport zip the results +func zipOutputDirectory() (zipFiles []string, error error) { hostName, err := utils.GetHostName() if err != nil { - return err + return nil, err } creationTimeStamp, err := utils.GetCreationTimeStamp() if err != nil { - return err + return nil, err } sourcePathOnHost := "/var/log/aks-periscope/" + strings.Replace(creationTimeStamp, ":", "-", -1) + "/" + hostName @@ -136,13 +202,8 @@ func zipAndExport(exporter interfaces.Exporter) error { _, err = utils.RunCommandOnHost("zip", "-r", zipFileOnHost, sourcePathOnHost) if err != nil { - return err - } - - err = exporter.Export([]string{zipFileOnContainer}) - if err != nil { - return err + return nil, err } - return nil + return []string{zipFileOnContainer}, nil } diff --git a/deployment/aks-periscope.yaml b/deployment/aks-periscope.yaml index ba034f97..64a4cc5b 100644 --- a/deployment/aks-periscope.yaml +++ b/deployment/aks-periscope.yaml @@ -84,6 +84,8 @@ spec: name: collectors-config - configMapRef: name: diagnosers-config + - configMapRef: + name: exporters-config - secretRef: name: azureblob-secret volumeMounts: @@ -152,6 +154,14 @@ metadata: data: ENABLED_DIAGNOSERS: networkconfig networkoutbound --- +apiVersion: v1 +kind: ConfigMap +metadata: + name: exporters-config + namespace: aks-periscope +data: + ENABLED_EXPORTERS: azureblob +--- apiVersion: apiextensions.k8s.io/v1beta1 kind: CustomResourceDefinition metadata: diff --git a/go.mod b/go.mod index 2df633de..96a14363 100644 --- a/go.mod +++ b/go.mod @@ -9,8 +9,7 @@ require ( github.com/docker/docker v1.13.1 // indirect github.com/docker/go-connections v0.4.0 // indirect github.com/docker/go-units v0.4.0 // indirect - github.com/onsi/gomega v1.11.0 // indirect + github.com/hashicorp/go-multierror v1.1.1 + github.com/onsi/gomega v1.11.0 github.com/opencontainers/go-digest v1.0.0-rc1 // indirect - golang.org/x/net v0.0.0-20190724013045-ca1201d0de80 // indirect - golang.org/x/sys v0.0.0-20190801041406-cbf593c0f2f3 // indirect ) diff --git a/go.sum b/go.sum index 43308314..1fca6f19 100644 --- a/go.sum +++ b/go.sum @@ -28,6 +28,10 @@ github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA= +github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= +github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo= +github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/mattn/go-ieproxy v0.0.0-20190610004146-91bb50d98149 h1:HfxbT6/JcvIljmERptWhwa8XzP7H3T+Z2N26gTsaDaA= @@ -68,6 +72,7 @@ golang.org/x/sys v0.0.0-20190801041406-cbf593c0f2f3 h1:4y9KwBHBgBNwDbtu44R5o1fdO golang.org/x/sys v0.0.0-20190801041406-cbf593c0f2f3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f h1:+Nyd8tzPX9R7BWHguqsrbFdRx3WQ/1ib8I44HXV5yTA= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= diff --git a/pkg/collector/collector.go b/pkg/collector/collector.go index 1c342617..8e85ddf8 100644 --- a/pkg/collector/collector.go +++ b/pkg/collector/collector.go @@ -1,6 +1,9 @@ package collector -import "github.com/Azure/aks-periscope/pkg/interfaces" +import ( + "github.com/Azure/aks-periscope/pkg/interfaces" + "github.com/hashicorp/go-multierror" +) // Type defines Collector Type type Type int @@ -36,7 +39,7 @@ type BaseCollector struct { collectorType Type collectIntervalInSeconds int collectorFiles []string - exporter interfaces.Exporter + exporters []interfaces.Exporter } // GetName gets collector name @@ -61,9 +64,13 @@ func (b *BaseCollector) AddToCollectorFiles(file string) { // Export implements the interface method func (b *BaseCollector) Export() error { - if b.exporter != nil { - return b.exporter.Export(b.collectorFiles) + var result error + for _, exporter := range b.exporters { + if exporter != nil { + if err := exporter.Export(b.collectorFiles); err != nil { + result = multierror.Append(result, err) + } + } } - - return nil + return result } diff --git a/pkg/collector/containerlogs_collector.go b/pkg/collector/containerlogs_collector.go index 6a163374..e9e200c4 100644 --- a/pkg/collector/containerlogs_collector.go +++ b/pkg/collector/containerlogs_collector.go @@ -17,11 +17,11 @@ type ContainerLogsCollector struct { var _ interfaces.Collector = &ContainerLogsCollector{} // NewContainerLogsCollector is a constructor -func NewContainerLogsCollector(exporter interfaces.Exporter) *ContainerLogsCollector { +func NewContainerLogsCollector(exporters []interfaces.Exporter) *ContainerLogsCollector { return &ContainerLogsCollector{ BaseCollector: BaseCollector{ collectorType: ContainerLogs, - exporter: exporter, + exporters: exporters, }, } } diff --git a/pkg/collector/dns_collector.go b/pkg/collector/dns_collector.go index 23cedd0f..ba69fbfd 100644 --- a/pkg/collector/dns_collector.go +++ b/pkg/collector/dns_collector.go @@ -15,11 +15,11 @@ type DNSCollector struct { var _ interfaces.Collector = &DNSCollector{} // NewDNSCollector is a constructor -func NewDNSCollector(exporter interfaces.Exporter) *DNSCollector { +func NewDNSCollector(exporters []interfaces.Exporter) *DNSCollector { return &DNSCollector{ BaseCollector: BaseCollector{ collectorType: DNS, - exporter: exporter, + exporters: exporters, }, } } diff --git a/pkg/collector/iptables_collector.go b/pkg/collector/iptables_collector.go index a5f28ee0..9e942243 100644 --- a/pkg/collector/iptables_collector.go +++ b/pkg/collector/iptables_collector.go @@ -15,11 +15,11 @@ type IPTablesCollector struct { var _ interfaces.Collector = &IPTablesCollector{} // NewIPTablesCollector is a constructor -func NewIPTablesCollector(exporter interfaces.Exporter) *IPTablesCollector { +func NewIPTablesCollector(exporters []interfaces.Exporter) *IPTablesCollector { return &IPTablesCollector{ BaseCollector: BaseCollector{ collectorType: IPTables, - exporter: exporter, + exporters: exporters, }, } } diff --git a/pkg/collector/kubeletcmd_collector.go b/pkg/collector/kubeletcmd_collector.go index 72a86e95..8e48faa3 100644 --- a/pkg/collector/kubeletcmd_collector.go +++ b/pkg/collector/kubeletcmd_collector.go @@ -15,11 +15,11 @@ type KubeletCmdCollector struct { var _ interfaces.Collector = &KubeletCmdCollector{} // NewKubeletCmdCollector is a constructor -func NewKubeletCmdCollector(exporter interfaces.Exporter) *KubeletCmdCollector { +func NewKubeletCmdCollector(exporters []interfaces.Exporter) *KubeletCmdCollector { return &KubeletCmdCollector{ BaseCollector: BaseCollector{ collectorType: KubeletCmd, - exporter: exporter, + exporters: exporters, }, } } diff --git a/pkg/collector/kubeobjects_collector.go b/pkg/collector/kubeobjects_collector.go index f77b6f5e..cc82adc1 100644 --- a/pkg/collector/kubeobjects_collector.go +++ b/pkg/collector/kubeobjects_collector.go @@ -17,11 +17,11 @@ type KubeObjectsCollector struct { var _ interfaces.Collector = &KubeObjectsCollector{} // NewKubeObjectsCollector is a constructor -func NewKubeObjectsCollector(exporter interfaces.Exporter) *KubeObjectsCollector { +func NewKubeObjectsCollector(exporters []interfaces.Exporter) *KubeObjectsCollector { return &KubeObjectsCollector{ BaseCollector: BaseCollector{ collectorType: KubeObjects, - exporter: exporter, + exporters: exporters, }, } } diff --git a/pkg/collector/networkoutbound_collector.go b/pkg/collector/networkoutbound_collector.go index 07f624bc..e462e885 100644 --- a/pkg/collector/networkoutbound_collector.go +++ b/pkg/collector/networkoutbound_collector.go @@ -32,12 +32,12 @@ type NetworkOutboundCollector struct { var _ interfaces.Collector = &NetworkOutboundCollector{} // NewNetworkOutboundCollector is a constructor -func NewNetworkOutboundCollector(collectIntervalInSeconds int, exporter interfaces.Exporter) *NetworkOutboundCollector { +func NewNetworkOutboundCollector(collectIntervalInSeconds int, exporters []interfaces.Exporter) *NetworkOutboundCollector { return &NetworkOutboundCollector{ BaseCollector: BaseCollector{ collectorType: NetworkOutbound, collectIntervalInSeconds: collectIntervalInSeconds, - exporter: exporter, + exporters: exporters, }, } } diff --git a/pkg/collector/nodelogs_collector.go b/pkg/collector/nodelogs_collector.go index 84185f5b..bf129a92 100644 --- a/pkg/collector/nodelogs_collector.go +++ b/pkg/collector/nodelogs_collector.go @@ -17,11 +17,11 @@ type NodeLogsCollector struct { var _ interfaces.Collector = &NodeLogsCollector{} // NewNodeLogsCollector is a constructor -func NewNodeLogsCollector(exporter interfaces.Exporter) *NodeLogsCollector { +func NewNodeLogsCollector(exporters []interfaces.Exporter) *NodeLogsCollector { return &NodeLogsCollector{ BaseCollector: BaseCollector{ collectorType: NodeLogs, - exporter: exporter, + exporters: exporters, }, } } diff --git a/pkg/collector/systemlogs_collector.go b/pkg/collector/systemlogs_collector.go index af6c4ae5..0d4e5692 100644 --- a/pkg/collector/systemlogs_collector.go +++ b/pkg/collector/systemlogs_collector.go @@ -15,11 +15,11 @@ type SystemLogsCollector struct { var _ interfaces.Collector = &SystemLogsCollector{} // NewSystemLogsCollector is a constructor -func NewSystemLogsCollector(exporter interfaces.Exporter) *SystemLogsCollector { +func NewSystemLogsCollector(exporters []interfaces.Exporter) *SystemLogsCollector { return &SystemLogsCollector{ BaseCollector: BaseCollector{ collectorType: SystemLogs, - exporter: exporter, + exporters: exporters, }, } } diff --git a/pkg/collector/systemperf_collector.go b/pkg/collector/systemperf_collector.go index 86254910..0777f8f2 100644 --- a/pkg/collector/systemperf_collector.go +++ b/pkg/collector/systemperf_collector.go @@ -15,11 +15,11 @@ type SystemPerfCollector struct { var _ interfaces.Collector = &SystemPerfCollector{} // NewSystemPerfCollector is a constructor -func NewSystemPerfCollector(exporter interfaces.Exporter) *SystemPerfCollector { +func NewSystemPerfCollector(exporters []interfaces.Exporter) *SystemPerfCollector { return &SystemPerfCollector{ BaseCollector: BaseCollector{ collectorType: SystemPerf, - exporter: exporter, + exporters: exporters, }, } } diff --git a/pkg/diagnoser/diagnoser.go b/pkg/diagnoser/diagnoser.go index e0c4d8af..8002e198 100644 --- a/pkg/diagnoser/diagnoser.go +++ b/pkg/diagnoser/diagnoser.go @@ -1,6 +1,9 @@ package diagnoser -import "github.com/Azure/aks-periscope/pkg/interfaces" +import ( + "github.com/Azure/aks-periscope/pkg/interfaces" + "github.com/hashicorp/go-multierror" +) // Type defines Diagnoser Type type Type int @@ -9,7 +12,7 @@ const ( // NetworkConfig defines NetworkConfig Diagnoser Type NetworkConfig Type = iota // NetworkOutbound defines NetworkOutbound Diagnoser Type - NetworkOutbound + NetworkOutbound Type = iota ) // Name returns type name @@ -21,7 +24,7 @@ func (t Type) name() string { type BaseDiagnoser struct { diagnoserType Type diagnoserFiles []string - exporter interfaces.Exporter + exporters []interfaces.Exporter } // GetName gets diagnoser name @@ -36,9 +39,13 @@ func (b *BaseDiagnoser) AddToDiagnoserFiles(file string) { // Export implements the interface method func (b *BaseDiagnoser) Export() error { - if b.exporter != nil { - return b.exporter.Export(b.diagnoserFiles) + var result error + for _, exporter := range b.exporters { + if exporter != nil { + if err := exporter.Export(b.diagnoserFiles); err != nil { + result = multierror.Append(result, err) + } + } } - - return nil + return result } diff --git a/pkg/diagnoser/networkconfig_diagnoser.go b/pkg/diagnoser/networkconfig_diagnoser.go index 22652f42..90cabdba 100644 --- a/pkg/diagnoser/networkconfig_diagnoser.go +++ b/pkg/diagnoser/networkconfig_diagnoser.go @@ -33,11 +33,11 @@ type NetworkConfigDiagnoser struct { var _ interfaces.Diagnoser = &NetworkConfigDiagnoser{} // NewNetworkConfigDiagnoser is a constructor -func NewNetworkConfigDiagnoser(dnsCollector *collector.DNSCollector, kubeletCmdCollector *collector.KubeletCmdCollector, exporter interfaces.Exporter) *NetworkConfigDiagnoser { +func NewNetworkConfigDiagnoser(dnsCollector *collector.DNSCollector, kubeletCmdCollector *collector.KubeletCmdCollector, exporters []interfaces.Exporter) *NetworkConfigDiagnoser { return &NetworkConfigDiagnoser{ BaseDiagnoser: BaseDiagnoser{ diagnoserType: NetworkConfig, - exporter: exporter, + exporters: exporters, }, dnsCollector: dnsCollector, kubeletCmdCollector: kubeletCmdCollector, diff --git a/pkg/diagnoser/networkoutbound_diagnoser.go b/pkg/diagnoser/networkoutbound_diagnoser.go index 585dcdff..eff9cf51 100644 --- a/pkg/diagnoser/networkoutbound_diagnoser.go +++ b/pkg/diagnoser/networkoutbound_diagnoser.go @@ -30,11 +30,11 @@ type NetworkOutboundDiagnoser struct { var _ interfaces.Diagnoser = &NetworkOutboundDiagnoser{} // NewNetworkOutboundDiagnoser is a constructor -func NewNetworkOutboundDiagnoser(networkOutboundCollector *collector.NetworkOutboundCollector, exporter interfaces.Exporter) *NetworkOutboundDiagnoser { +func NewNetworkOutboundDiagnoser(networkOutboundCollector *collector.NetworkOutboundCollector, exporters []interfaces.Exporter) *NetworkOutboundDiagnoser { return &NetworkOutboundDiagnoser{ BaseDiagnoser: BaseDiagnoser{ diagnoserType: NetworkOutbound, - exporter: exporter, + exporters: exporters, }, networkOutboundCollector: networkOutboundCollector, } diff --git a/pkg/exporter/azureblob_exporter.go b/pkg/exporter/azureblob_exporter.go index f74230fa..ac2ea752 100644 --- a/pkg/exporter/azureblob_exporter.go +++ b/pkg/exporter/azureblob_exporter.go @@ -19,10 +19,21 @@ const ( ) // AzureBlobExporter defines an Azure Blob Exporter -type AzureBlobExporter struct{} +type AzureBlobExporter struct{ + BaseExporter +} var _ interfaces.Exporter = &AzureBlobExporter{} +// NewAzureBlobExporter is a constructor +func NewAzureBlobExporter() *AzureBlobExporter { + return &AzureBlobExporter{ + BaseExporter: BaseExporter{ + exporterType: AzureBlob, + }, + } +} + // Export implements the interface method func (exporter *AzureBlobExporter) Export(files []string) error { APIServerFQDN, err := utils.GetAPIServerFQDN() diff --git a/pkg/exporter/exporter.go b/pkg/exporter/exporter.go new file mode 100644 index 00000000..f4660ff5 --- /dev/null +++ b/pkg/exporter/exporter.go @@ -0,0 +1,24 @@ +package exporter + +// Type defines Exporter Type +type Type int + +const ( + // AzureBlob defines AzureBlob exporter Type + AzureBlob Type = iota +) + +// Name returns type name +func (t Type) name() string { + return [...]string{"azureblob"}[t] +} + +// BaseExporter defines Base Exporter +type BaseExporter struct { + exporterType Type +} + +// GetName gets exporter name +func (b *BaseExporter) GetName() string { + return b.exporterType.name() +} \ No newline at end of file From ecfe3ff2a9402d636f107a973a3edcce0a38e123 Mon Sep 17 00:00:00 2001 From: david kydd Date: Fri, 7 May 2021 15:50:08 +1200 Subject: [PATCH 07/31] minor tidy --- cmd/aks-periscope/aks-periscope.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/cmd/aks-periscope/aks-periscope.go b/cmd/aks-periscope/aks-periscope.go index 6f95d427..22c308d5 100644 --- a/cmd/aks-periscope/aks-periscope.go +++ b/cmd/aks-periscope/aks-periscope.go @@ -15,26 +15,29 @@ import ( ) func main() { - var waitgroup sync.WaitGroup err := utils.CreateCRD() if err != nil { log.Printf("Failed to create CRD: %+v", err) } + collectors, diagnosers, exporters := initializeComponents() + var waitgroup sync.WaitGroup + runCollectors(collectors, &waitgroup) waitgroup.Wait() runDiagnosers(diagnosers, &waitgroup) waitgroup.Wait() - log.Print("Zip and export result files") + log.Print("Zip result files") outputs, err := zipOutputDirectory() if err != nil { log.Printf("Failed to zip result files: %+v", err) } + log.Print("Run exporters for result files") err = runExporters(exporters, outputs) if err != nil { log.Printf("Failed to export result files: %+v", err) @@ -78,6 +81,7 @@ func initializeComponents()([]interfaces.Collector, []interfaces.Diagnoser, []in }) //diagnosers + //NOTE currently the collector instances are shared between the collector itself and things which use it as a dependency networkConfigDiagnoser := diagnoser.NewNetworkConfigDiagnoser(dnsCollector, kubeletCmdCollector, selectedExporters) networkOutboundDiagnoser := diagnoser.NewNetworkOutboundDiagnoser(networkOutboundCollector, selectedExporters) selectedDiagnosers := selectDiagnosers( From 864297aaecad2605db71753496278ebb8513ea1e Mon Sep 17 00:00:00 2001 From: david kydd Date: Sat, 8 May 2021 09:28:15 +1200 Subject: [PATCH 08/31] create dev deployment yaml in its own subdir and revert changes to top level deployment yaml - to avoid changing the one used by downstream tools (vscode / cli) --- deployment/aks-periscope.yaml | 30 ----- deployment/dev/aks-periscope.yaml | 192 ++++++++++++++++++++++++++++++ 2 files changed, 192 insertions(+), 30 deletions(-) create mode 100644 deployment/dev/aks-periscope.yaml diff --git a/deployment/aks-periscope.yaml b/deployment/aks-periscope.yaml index 64a4cc5b..03ef1167 100644 --- a/deployment/aks-periscope.yaml +++ b/deployment/aks-periscope.yaml @@ -80,12 +80,6 @@ spec: name: kubeobjects-config - configMapRef: name: nodelogs-config - - configMapRef: - name: collectors-config - - configMapRef: - name: diagnosers-config - - configMapRef: - name: exporters-config - secretRef: name: azureblob-secret volumeMounts: @@ -138,30 +132,6 @@ metadata: data: DIAGNOSTIC_NODELOGS_LIST: /var/log/azure/cluster-provision.log /var/log/cloud-init.log --- -apiVersion: v1 -kind: ConfigMap -metadata: - name: collectors-config - namespace: aks-periscope -data: - ENABLED_COLLECTORS: dns containerlogs iptables kubeletcmd kubeobjects networkoutbound nodelogs systemlogs systemperf ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: diagnosers-config - namespace: aks-periscope -data: - ENABLED_DIAGNOSERS: networkconfig networkoutbound ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: exporters-config - namespace: aks-periscope -data: - ENABLED_EXPORTERS: azureblob ---- apiVersion: apiextensions.k8s.io/v1beta1 kind: CustomResourceDefinition metadata: diff --git a/deployment/dev/aks-periscope.yaml b/deployment/dev/aks-periscope.yaml new file mode 100644 index 00000000..5f8a2d16 --- /dev/null +++ b/deployment/dev/aks-periscope.yaml @@ -0,0 +1,192 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: aks-periscope +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: aks-periscope-service-account + namespace: aks-periscope +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: aks-periscope-role +rules: + - apiGroups: ["","metrics.k8s.io"] + resources: ["pods", "nodes"] + verbs: ["get", "watch", "list"] + - apiGroups: ["aks-periscope.azure.github.com"] + resources: ["diagnostics"] + verbs: ["get", "watch", "list", "create", "patch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: aks-periscope-role-binding +subjects: + - kind: ServiceAccount + name: aks-periscope-service-account + namespace: aks-periscope +roleRef: + kind: ClusterRole + name: aks-periscope-role + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: aks-periscope-role-binding-view +subjects: + - kind: ServiceAccount + name: aks-periscope-service-account + namespace: aks-periscope +roleRef: + kind: ClusterRole + name: view + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: aks-periscope + namespace: aks-periscope + labels: + app: aks-periscope +spec: + selector: + matchLabels: + app: aks-periscope + template: + metadata: + labels: + app: aks-periscope + spec: + serviceAccountName: aks-periscope-service-account + hostPID: true + nodeSelector: + beta.kubernetes.io/os: linux + containers: + - name: aks-periscope + image: aksrepos.azurecr.io/staging/aks-periscope:v0.3 + securityContext: + privileged: true + imagePullPolicy: Always + envFrom: + - configMapRef: + name: containerlogs-config + - configMapRef: + name: kubeobjects-config + - configMapRef: + name: nodelogs-config + - configMapRef: + name: collectors-config + - configMapRef: + name: diagnosers-config + - configMapRef: + name: exporters-config + - secretRef: + name: azureblob-secret + volumeMounts: + - mountPath: /aks-periscope + name: aks-periscope-storage + resources: + requests: + memory: "500Mi" + cpu: "250m" + limits: + memory: "2000Mi" + cpu: "1000m" + volumes: + - name: aks-periscope-storage + hostPath: + path: /var/log/aks-periscope + type: DirectoryOrCreate +--- +apiVersion: v1 +kind: Secret +metadata: + name: azureblob-secret + namespace: aks-periscope +type: Opaque +data: + AZURE_BLOB_ACCOUNT_NAME: # + AZURE_BLOB_SAS_KEY: # +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: containerlogs-config + namespace: aks-periscope +data: + DIAGNOSTIC_CONTAINERLOGS_LIST: kube-system +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: kubeobjects-config + namespace: aks-periscope +data: + DIAGNOSTIC_KUBEOBJECTS_LIST: kube-system/pod kube-system/service kube-system/deployment +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: nodelogs-config + namespace: aks-periscope +data: + DIAGNOSTIC_NODELOGS_LIST: /var/log/azure/cluster-provision.log /var/log/cloud-init.log +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: collectors-config + namespace: aks-periscope +data: + ENABLED_COLLECTORS: dns containerlogs iptables kubeletcmd kubeobjects networkoutbound nodelogs systemlogs systemperf +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: diagnosers-config + namespace: aks-periscope +data: + ENABLED_DIAGNOSERS: networkconfig networkoutbound +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: exporters-config + namespace: aks-periscope +data: + ENABLED_EXPORTERS: azureblob +--- +apiVersion: apiextensions.k8s.io/v1beta1 +kind: CustomResourceDefinition +metadata: + name: diagnostics.aks-periscope.azure.github.com +spec: + group: aks-periscope.azure.github.com + versions: + - name: v1 + served: true + storage: true + validation: + openAPIV3Schema: + type: object + properties: + spec: + type: object + properties: + dns: + type: string + networkoutbound: + type: string + scope: Namespaced + names: + plural: diagnostics + singular: diagnostic + kind: Diagnostic + shortNames: + - apd \ No newline at end of file From 790744a939ddbcc1aee9f061e9adee7a7454ad2b Mon Sep 17 00:00:00 2001 From: david kydd Date: Sat, 8 May 2021 20:20:46 +1200 Subject: [PATCH 09/31] go fmt --- cmd/aks-periscope/aks-periscope.go | 34 +- pkg/collector/collector.go | 2 +- pkg/collector/containerlogs_collector.go | 2 +- pkg/collector/dns_collector.go | 2 +- pkg/collector/iptables_collector.go | 2 +- pkg/collector/kubeletcmd_collector.go | 2 +- pkg/collector/kubeobjects_collector.go | 2 +- pkg/collector/networkoutbound_collector.go | 2 +- pkg/collector/nodelogs_collector.go | 2 +- pkg/collector/systemlogs_collector.go | 2 +- pkg/collector/systemperf_collector.go | 2 +- pkg/diagnoser/diagnoser.go | 2 +- pkg/diagnoser/networkconfig_diagnoser.go | 2 +- pkg/diagnoser/networkoutbound_diagnoser.go | 2 +- pkg/exporter/azureblob_exporter.go | 2 +- pkg/exporter/exporter.go | 4 +- pkg/utils/helper.go | 534 ++++++++++----------- 17 files changed, 300 insertions(+), 300 deletions(-) diff --git a/cmd/aks-periscope/aks-periscope.go b/cmd/aks-periscope/aks-periscope.go index 22c308d5..da95bbc4 100644 --- a/cmd/aks-periscope/aks-periscope.go +++ b/cmd/aks-periscope/aks-periscope.go @@ -47,14 +47,14 @@ func main() { } // initializeComponents initializes and returns collectors, diagnosers and exporters -func initializeComponents()([]interfaces.Collector, []interfaces.Diagnoser, []interfaces.Exporter){ +func initializeComponents() ([]interfaces.Collector, []interfaces.Diagnoser, []interfaces.Exporter) { //exporters - azureBlobExporter := exporter.NewAzureBlobExporter() + azureBlobExporter := exporter.NewAzureBlobExporter() selectedExporters := selectExporters( map[string]interfaces.Exporter{ azureBlobExporter.GetName(): azureBlobExporter, - }) + }) //collectors containerLogsCollector := collector.NewContainerLogsCollector(selectedExporters) @@ -68,17 +68,17 @@ func initializeComponents()([]interfaces.Collector, []interfaces.Diagnoser, []in systemPerfCollector := collector.NewSystemPerfCollector(selectedExporters) selectedCollectors := selectCollectors( - map[string]interfaces.Collector { - containerLogsCollector.GetName(): containerLogsCollector, - systemLogsCollector.GetName(): systemLogsCollector, + map[string]interfaces.Collector{ + containerLogsCollector.GetName(): containerLogsCollector, + systemLogsCollector.GetName(): systemLogsCollector, networkOutboundCollector.GetName(): networkOutboundCollector, - ipTablesCollector.GetName(): ipTablesCollector, - nodeLogsCollector.GetName(): nodeLogsCollector, - dnsCollector.GetName(): dnsCollector, - kubeObjectsCollector.GetName(): kubeObjectsCollector, - kubeletCmdCollector.GetName(): kubeletCmdCollector, - systemPerfCollector.GetName(): systemPerfCollector, - }) + ipTablesCollector.GetName(): ipTablesCollector, + nodeLogsCollector.GetName(): nodeLogsCollector, + dnsCollector.GetName(): dnsCollector, + kubeObjectsCollector.GetName(): kubeObjectsCollector, + kubeletCmdCollector.GetName(): kubeletCmdCollector, + systemPerfCollector.GetName(): systemPerfCollector, + }) //diagnosers //NOTE currently the collector instances are shared between the collector itself and things which use it as a dependency @@ -86,9 +86,9 @@ func initializeComponents()([]interfaces.Collector, []interfaces.Diagnoser, []in networkOutboundDiagnoser := diagnoser.NewNetworkOutboundDiagnoser(networkOutboundCollector, selectedExporters) selectedDiagnosers := selectDiagnosers( map[string]interfaces.Diagnoser{ - networkConfigDiagnoser.GetName(): networkConfigDiagnoser, + networkConfigDiagnoser.GetName(): networkConfigDiagnoser, networkOutboundDiagnoser.GetName(): networkOutboundDiagnoser, - }) + }) return selectedCollectors, selectedDiagnosers, selectedExporters } @@ -136,7 +136,7 @@ func selectExporters(allExporters map[string]interfaces.Exporter) []interfaces.E } // runCollectors run the collectors -func runCollectors(collectors []interfaces.Collector, waitgroup *sync.WaitGroup){ +func runCollectors(collectors []interfaces.Collector, waitgroup *sync.WaitGroup) { for _, c := range collectors { waitgroup.Add(1) go func(c interfaces.Collector) { @@ -157,7 +157,7 @@ func runCollectors(collectors []interfaces.Collector, waitgroup *sync.WaitGroup) } // runDiagnosers run the diagnosers -func runDiagnosers(diagnosers []interfaces.Diagnoser, waitgroup *sync.WaitGroup){ +func runDiagnosers(diagnosers []interfaces.Diagnoser, waitgroup *sync.WaitGroup) { for _, d := range diagnosers { waitgroup.Add(1) go func(d interfaces.Diagnoser) { diff --git a/pkg/collector/collector.go b/pkg/collector/collector.go index 8e85ddf8..4089289e 100644 --- a/pkg/collector/collector.go +++ b/pkg/collector/collector.go @@ -39,7 +39,7 @@ type BaseCollector struct { collectorType Type collectIntervalInSeconds int collectorFiles []string - exporters []interfaces.Exporter + exporters []interfaces.Exporter } // GetName gets collector name diff --git a/pkg/collector/containerlogs_collector.go b/pkg/collector/containerlogs_collector.go index e9e200c4..6e5354e1 100644 --- a/pkg/collector/containerlogs_collector.go +++ b/pkg/collector/containerlogs_collector.go @@ -21,7 +21,7 @@ func NewContainerLogsCollector(exporters []interfaces.Exporter) *ContainerLogsCo return &ContainerLogsCollector{ BaseCollector: BaseCollector{ collectorType: ContainerLogs, - exporters: exporters, + exporters: exporters, }, } } diff --git a/pkg/collector/dns_collector.go b/pkg/collector/dns_collector.go index ba69fbfd..632bcc23 100644 --- a/pkg/collector/dns_collector.go +++ b/pkg/collector/dns_collector.go @@ -19,7 +19,7 @@ func NewDNSCollector(exporters []interfaces.Exporter) *DNSCollector { return &DNSCollector{ BaseCollector: BaseCollector{ collectorType: DNS, - exporters: exporters, + exporters: exporters, }, } } diff --git a/pkg/collector/iptables_collector.go b/pkg/collector/iptables_collector.go index 9e942243..a7573771 100644 --- a/pkg/collector/iptables_collector.go +++ b/pkg/collector/iptables_collector.go @@ -19,7 +19,7 @@ func NewIPTablesCollector(exporters []interfaces.Exporter) *IPTablesCollector { return &IPTablesCollector{ BaseCollector: BaseCollector{ collectorType: IPTables, - exporters: exporters, + exporters: exporters, }, } } diff --git a/pkg/collector/kubeletcmd_collector.go b/pkg/collector/kubeletcmd_collector.go index 8e48faa3..9d950583 100644 --- a/pkg/collector/kubeletcmd_collector.go +++ b/pkg/collector/kubeletcmd_collector.go @@ -19,7 +19,7 @@ func NewKubeletCmdCollector(exporters []interfaces.Exporter) *KubeletCmdCollecto return &KubeletCmdCollector{ BaseCollector: BaseCollector{ collectorType: KubeletCmd, - exporters: exporters, + exporters: exporters, }, } } diff --git a/pkg/collector/kubeobjects_collector.go b/pkg/collector/kubeobjects_collector.go index cc82adc1..f54c9d49 100644 --- a/pkg/collector/kubeobjects_collector.go +++ b/pkg/collector/kubeobjects_collector.go @@ -21,7 +21,7 @@ func NewKubeObjectsCollector(exporters []interfaces.Exporter) *KubeObjectsCollec return &KubeObjectsCollector{ BaseCollector: BaseCollector{ collectorType: KubeObjects, - exporters: exporters, + exporters: exporters, }, } } diff --git a/pkg/collector/networkoutbound_collector.go b/pkg/collector/networkoutbound_collector.go index e462e885..fbf314a1 100644 --- a/pkg/collector/networkoutbound_collector.go +++ b/pkg/collector/networkoutbound_collector.go @@ -37,7 +37,7 @@ func NewNetworkOutboundCollector(collectIntervalInSeconds int, exporters []inter BaseCollector: BaseCollector{ collectorType: NetworkOutbound, collectIntervalInSeconds: collectIntervalInSeconds, - exporters: exporters, + exporters: exporters, }, } } diff --git a/pkg/collector/nodelogs_collector.go b/pkg/collector/nodelogs_collector.go index bf129a92..5cac9782 100644 --- a/pkg/collector/nodelogs_collector.go +++ b/pkg/collector/nodelogs_collector.go @@ -21,7 +21,7 @@ func NewNodeLogsCollector(exporters []interfaces.Exporter) *NodeLogsCollector { return &NodeLogsCollector{ BaseCollector: BaseCollector{ collectorType: NodeLogs, - exporters: exporters, + exporters: exporters, }, } } diff --git a/pkg/collector/systemlogs_collector.go b/pkg/collector/systemlogs_collector.go index 0d4e5692..9e2548ee 100644 --- a/pkg/collector/systemlogs_collector.go +++ b/pkg/collector/systemlogs_collector.go @@ -19,7 +19,7 @@ func NewSystemLogsCollector(exporters []interfaces.Exporter) *SystemLogsCollecto return &SystemLogsCollector{ BaseCollector: BaseCollector{ collectorType: SystemLogs, - exporters: exporters, + exporters: exporters, }, } } diff --git a/pkg/collector/systemperf_collector.go b/pkg/collector/systemperf_collector.go index 0777f8f2..62b2af22 100644 --- a/pkg/collector/systemperf_collector.go +++ b/pkg/collector/systemperf_collector.go @@ -19,7 +19,7 @@ func NewSystemPerfCollector(exporters []interfaces.Exporter) *SystemPerfCollecto return &SystemPerfCollector{ BaseCollector: BaseCollector{ collectorType: SystemPerf, - exporters: exporters, + exporters: exporters, }, } } diff --git a/pkg/diagnoser/diagnoser.go b/pkg/diagnoser/diagnoser.go index 8002e198..05dd77f4 100644 --- a/pkg/diagnoser/diagnoser.go +++ b/pkg/diagnoser/diagnoser.go @@ -24,7 +24,7 @@ func (t Type) name() string { type BaseDiagnoser struct { diagnoserType Type diagnoserFiles []string - exporters []interfaces.Exporter + exporters []interfaces.Exporter } // GetName gets diagnoser name diff --git a/pkg/diagnoser/networkconfig_diagnoser.go b/pkg/diagnoser/networkconfig_diagnoser.go index 90cabdba..b8d63ea2 100644 --- a/pkg/diagnoser/networkconfig_diagnoser.go +++ b/pkg/diagnoser/networkconfig_diagnoser.go @@ -37,7 +37,7 @@ func NewNetworkConfigDiagnoser(dnsCollector *collector.DNSCollector, kubeletCmdC return &NetworkConfigDiagnoser{ BaseDiagnoser: BaseDiagnoser{ diagnoserType: NetworkConfig, - exporters: exporters, + exporters: exporters, }, dnsCollector: dnsCollector, kubeletCmdCollector: kubeletCmdCollector, diff --git a/pkg/diagnoser/networkoutbound_diagnoser.go b/pkg/diagnoser/networkoutbound_diagnoser.go index eff9cf51..c80abe60 100644 --- a/pkg/diagnoser/networkoutbound_diagnoser.go +++ b/pkg/diagnoser/networkoutbound_diagnoser.go @@ -34,7 +34,7 @@ func NewNetworkOutboundDiagnoser(networkOutboundCollector *collector.NetworkOutb return &NetworkOutboundDiagnoser{ BaseDiagnoser: BaseDiagnoser{ diagnoserType: NetworkOutbound, - exporters: exporters, + exporters: exporters, }, networkOutboundCollector: networkOutboundCollector, } diff --git a/pkg/exporter/azureblob_exporter.go b/pkg/exporter/azureblob_exporter.go index ac2ea752..1aef790b 100644 --- a/pkg/exporter/azureblob_exporter.go +++ b/pkg/exporter/azureblob_exporter.go @@ -19,7 +19,7 @@ const ( ) // AzureBlobExporter defines an Azure Blob Exporter -type AzureBlobExporter struct{ +type AzureBlobExporter struct { BaseExporter } diff --git a/pkg/exporter/exporter.go b/pkg/exporter/exporter.go index f4660ff5..6868e1bf 100644 --- a/pkg/exporter/exporter.go +++ b/pkg/exporter/exporter.go @@ -15,10 +15,10 @@ func (t Type) name() string { // BaseExporter defines Base Exporter type BaseExporter struct { - exporterType Type + exporterType Type } // GetName gets exporter name func (b *BaseExporter) GetName() string { return b.exporterType.name() -} \ No newline at end of file +} diff --git a/pkg/utils/helper.go b/pkg/utils/helper.go index ce973430..9f3f97e9 100644 --- a/pkg/utils/helper.go +++ b/pkg/utils/helper.go @@ -1,267 +1,267 @@ -package utils - -import ( - "bytes" - "errors" - "fmt" - "io/ioutil" - "os" - "os/exec" - "path/filepath" - "strings" -) - -// GetHostName get host name -func GetHostName() (string, error) { - hostname, err := RunCommandOnHost("cat", "/etc/hostname") - if err != nil { - return "", fmt.Errorf("Fail to get host name: %+v", err) - } - - return strings.TrimSuffix(string(hostname), "\n"), nil -} - -// GetAPIServerFQDN gets the API Server FQDN from the kubeconfig file -func GetAPIServerFQDN() (string, error) { - output, err := RunCommandOnHost("cat", "/var/lib/kubelet/kubeconfig") - - if err != nil { - return "", fmt.Errorf("Can't open kubeconfig file: %+v", err) - } - - lines := strings.Split(output, "\n") - for _, line := range lines { - index := strings.Index(line, "server: ") - if index >= 0 { - fqdn := line[index+len("server: "):] - fqdn = strings.Replace(fqdn, "https://", "", -1) - fqdn = strings.Replace(fqdn, ":443", "", -1) - return fqdn, nil - } - } - - return "", errors.New("Could not find server definitions in kubeconfig") -} - -// RunCommandOnHost runs a command on host system -func RunCommandOnHost(command string, arg ...string) (string, error) { - args := []string{"--target", "1", "--mount", "--uts", "--ipc", "--net", "--pid"} - args = append(args, "--") - args = append(args, command) - args = append(args, arg...) - - cmd := exec.Command("nsenter", args...) - out, err := cmd.CombinedOutput() - if err != nil { - return "", fmt.Errorf("Fail to run command on host: %+v", err) - } - - return string(out), nil -} - -// RunCommandOnContainer runs a command on container system -func RunCommandOnContainer(command string, arg ...string) (string, error) { - cmd := exec.Command(command, arg...) - - var out bytes.Buffer - var stderr bytes.Buffer - cmd.Stdout = &out - cmd.Stderr = &stderr - err := cmd.Run() - if err != nil { - return "", fmt.Errorf("Fail to run command in container: %s", fmt.Sprint(err)+": "+stderr.String()) - } - - return out.String(), nil -} - -// WriteToFile writes data to a file -func WriteToFile(fileName string, data string) error { - f, err := os.Create(fileName) - defer f.Close() - if err != nil { - return fmt.Errorf("Fail to create file %s: %+v", fileName, err) - } - - _, err = f.Write([]byte(data)) - if err != nil { - return fmt.Errorf("Fail to write data to file %s: %+v", fileName, err) - } - - return nil -} - -// CreateCollectorDir creates a working dir for a collector -func CreateCollectorDir(name string) (string, error) { - hostName, err := GetHostName() - if err != nil { - return "", err - } - - creationTimeStamp, err := GetCreationTimeStamp() - if err != nil { - return "", err - } - - rootPath := filepath.Join("/aks-periscope", strings.Replace(creationTimeStamp, ":", "-", -1), hostName, "collector", name) - err = os.MkdirAll(rootPath, os.ModePerm) - if err != nil { - return "", fmt.Errorf("Fail to create dir %s: %+v", rootPath, err) - } - - return rootPath, nil -} - -// CreateDiagnosticDir creates a working dir for diagnostic -func CreateDiagnosticDir() (string, error) { - hostName, err := GetHostName() - if err != nil { - return "", err - } - - creationTimeStamp, err := GetCreationTimeStamp() - if err != nil { - return "", err - } - - rootPath := filepath.Join("/aks-periscope", strings.Replace(creationTimeStamp, ":", "-", -1), hostName, "diagnoser") - err = os.MkdirAll(rootPath, os.ModePerm) - if err != nil { - return "", fmt.Errorf("Fail to create dir %s: %+v", rootPath, err) - } - - return rootPath, nil -} - -// CreateKubeConfigFromServiceAccount creates kubeconfig based on creds in service account -func CreateKubeConfigFromServiceAccount() error { - token, err := RunCommandOnContainer("cat", "/var/run/secrets/kubernetes.io/serviceaccount/token") - if err != nil { - return err - } - - _, err = RunCommandOnContainer("kubectl", "config", "set-credentials", "aks-periscope-service-account", "--token="+token) - if err != nil { - return err - } - - _, err = RunCommandOnContainer("kubectl", "config", "set-cluster", "aks-periscope-cluster", "--server=https://kubernetes.default.svc.cluster.local:443", "--certificate-authority=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt") - if err != nil { - return err - } - - _, err = RunCommandOnContainer("kubectl", "config", "set-context", "aks-periscope-context", "--user=aks-periscope-service-account", "--cluster=aks-periscope-cluster") - if err != nil { - return err - } - - _, err = RunCommandOnContainer("kubectl", "config", "use-context", "aks-periscope-context") - if err != nil { - return err - } - - return nil -} - -// GetCreationTimeStamp returns a create timestamp -func GetCreationTimeStamp() (string, error) { - creationTimeStamp, err := RunCommandOnContainer("kubectl", "get", "pods", "--all-namespaces", "-l", "app=aks-periscope", "-o", "jsonpath=\"{.items[0].metadata.creationTimestamp}\"") - if err != nil { - return "", err - } - - return creationTimeStamp[1 : len(creationTimeStamp)-1], nil -} - -// WriteToCRD writes diagnostic data to CRD -func WriteToCRD(fileName string, key string) error { - hostName, err := GetHostName() - if err != nil { - return err - } - - crdName := "aks-periscope-diagnostic" + "-" + hostName - - jsonBytes, err := ioutil.ReadFile(fileName) - if err != nil { - return err - } - - patchContent := fmt.Sprintf("{\"spec\":{%q:%q}}", key, string(jsonBytes)) - - _, err = RunCommandOnContainer("kubectl", "-n", "aks-periscope", "patch", "apd", crdName, "-p", patchContent, "--type=merge") - if err != nil { - return err - } - - return nil -} - -// CreateCRD creates a CRD object -func CreateCRD() error { - hostName, err := GetHostName() - if err != nil { - return err - } - - crdName := "aks-periscope-diagnostic" + "-" + hostName - - writeDiagnosticCRD(crdName) - - _, err = RunCommandOnContainer("kubectl", "apply", "-f", "aks-periscope-diagnostic-crd.yaml") - if err != nil { - return err - } - - return nil -} - -func writeDiagnosticCRD(crdName string) error { - f, err := os.Create("aks-periscope-diagnostic-crd.yaml") - if err != nil { - return err - } - defer f.Close() - - _, err = f.WriteString("apiVersion: \"aks-periscope.azure.github.com/v1\"\n") - if err != nil { - return err - } - - _, err = f.WriteString("kind: Diagnostic\n") - if err != nil { - return err - } - - _, err = f.WriteString("metadata:\n") - if err != nil { - return err - } - - _, err = f.WriteString(" name: " + crdName + "\n") - if err != nil { - return err - } - - _, err = f.WriteString(" namespace: aks-periscope\n") - if err != nil { - return err - } - - _, err = f.WriteString("spec:\n") - if err != nil { - return err - } - - _, err = f.WriteString(" networkconfig: \"\"\n") - if err != nil { - return err - } - - _, err = f.WriteString(" networkoutbound: \"\"\n") - if err != nil { - return err - } - - return nil -} +package utils + +import ( + "bytes" + "errors" + "fmt" + "io/ioutil" + "os" + "os/exec" + "path/filepath" + "strings" +) + +// GetHostName get host name +func GetHostName() (string, error) { + hostname, err := RunCommandOnHost("cat", "/etc/hostname") + if err != nil { + return "", fmt.Errorf("Fail to get host name: %+v", err) + } + + return strings.TrimSuffix(string(hostname), "\n"), nil +} + +// GetAPIServerFQDN gets the API Server FQDN from the kubeconfig file +func GetAPIServerFQDN() (string, error) { + output, err := RunCommandOnHost("cat", "/var/lib/kubelet/kubeconfig") + + if err != nil { + return "", fmt.Errorf("Can't open kubeconfig file: %+v", err) + } + + lines := strings.Split(output, "\n") + for _, line := range lines { + index := strings.Index(line, "server: ") + if index >= 0 { + fqdn := line[index+len("server: "):] + fqdn = strings.Replace(fqdn, "https://", "", -1) + fqdn = strings.Replace(fqdn, ":443", "", -1) + return fqdn, nil + } + } + + return "", errors.New("Could not find server definitions in kubeconfig") +} + +// RunCommandOnHost runs a command on host system +func RunCommandOnHost(command string, arg ...string) (string, error) { + args := []string{"--target", "1", "--mount", "--uts", "--ipc", "--net", "--pid"} + args = append(args, "--") + args = append(args, command) + args = append(args, arg...) + + cmd := exec.Command("nsenter", args...) + out, err := cmd.CombinedOutput() + if err != nil { + return "", fmt.Errorf("Fail to run command on host: %+v", err) + } + + return string(out), nil +} + +// RunCommandOnContainer runs a command on container system +func RunCommandOnContainer(command string, arg ...string) (string, error) { + cmd := exec.Command(command, arg...) + + var out bytes.Buffer + var stderr bytes.Buffer + cmd.Stdout = &out + cmd.Stderr = &stderr + err := cmd.Run() + if err != nil { + return "", fmt.Errorf("Fail to run command in container: %s", fmt.Sprint(err)+": "+stderr.String()) + } + + return out.String(), nil +} + +// WriteToFile writes data to a file +func WriteToFile(fileName string, data string) error { + f, err := os.Create(fileName) + defer f.Close() + if err != nil { + return fmt.Errorf("Fail to create file %s: %+v", fileName, err) + } + + _, err = f.Write([]byte(data)) + if err != nil { + return fmt.Errorf("Fail to write data to file %s: %+v", fileName, err) + } + + return nil +} + +// CreateCollectorDir creates a working dir for a collector +func CreateCollectorDir(name string) (string, error) { + hostName, err := GetHostName() + if err != nil { + return "", err + } + + creationTimeStamp, err := GetCreationTimeStamp() + if err != nil { + return "", err + } + + rootPath := filepath.Join("/aks-periscope", strings.Replace(creationTimeStamp, ":", "-", -1), hostName, "collector", name) + err = os.MkdirAll(rootPath, os.ModePerm) + if err != nil { + return "", fmt.Errorf("Fail to create dir %s: %+v", rootPath, err) + } + + return rootPath, nil +} + +// CreateDiagnosticDir creates a working dir for diagnostic +func CreateDiagnosticDir() (string, error) { + hostName, err := GetHostName() + if err != nil { + return "", err + } + + creationTimeStamp, err := GetCreationTimeStamp() + if err != nil { + return "", err + } + + rootPath := filepath.Join("/aks-periscope", strings.Replace(creationTimeStamp, ":", "-", -1), hostName, "diagnoser") + err = os.MkdirAll(rootPath, os.ModePerm) + if err != nil { + return "", fmt.Errorf("Fail to create dir %s: %+v", rootPath, err) + } + + return rootPath, nil +} + +// CreateKubeConfigFromServiceAccount creates kubeconfig based on creds in service account +func CreateKubeConfigFromServiceAccount() error { + token, err := RunCommandOnContainer("cat", "/var/run/secrets/kubernetes.io/serviceaccount/token") + if err != nil { + return err + } + + _, err = RunCommandOnContainer("kubectl", "config", "set-credentials", "aks-periscope-service-account", "--token="+token) + if err != nil { + return err + } + + _, err = RunCommandOnContainer("kubectl", "config", "set-cluster", "aks-periscope-cluster", "--server=https://kubernetes.default.svc.cluster.local:443", "--certificate-authority=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt") + if err != nil { + return err + } + + _, err = RunCommandOnContainer("kubectl", "config", "set-context", "aks-periscope-context", "--user=aks-periscope-service-account", "--cluster=aks-periscope-cluster") + if err != nil { + return err + } + + _, err = RunCommandOnContainer("kubectl", "config", "use-context", "aks-periscope-context") + if err != nil { + return err + } + + return nil +} + +// GetCreationTimeStamp returns a create timestamp +func GetCreationTimeStamp() (string, error) { + creationTimeStamp, err := RunCommandOnContainer("kubectl", "get", "pods", "--all-namespaces", "-l", "app=aks-periscope", "-o", "jsonpath=\"{.items[0].metadata.creationTimestamp}\"") + if err != nil { + return "", err + } + + return creationTimeStamp[1 : len(creationTimeStamp)-1], nil +} + +// WriteToCRD writes diagnostic data to CRD +func WriteToCRD(fileName string, key string) error { + hostName, err := GetHostName() + if err != nil { + return err + } + + crdName := "aks-periscope-diagnostic" + "-" + hostName + + jsonBytes, err := ioutil.ReadFile(fileName) + if err != nil { + return err + } + + patchContent := fmt.Sprintf("{\"spec\":{%q:%q}}", key, string(jsonBytes)) + + _, err = RunCommandOnContainer("kubectl", "-n", "aks-periscope", "patch", "apd", crdName, "-p", patchContent, "--type=merge") + if err != nil { + return err + } + + return nil +} + +// CreateCRD creates a CRD object +func CreateCRD() error { + hostName, err := GetHostName() + if err != nil { + return err + } + + crdName := "aks-periscope-diagnostic" + "-" + hostName + + writeDiagnosticCRD(crdName) + + _, err = RunCommandOnContainer("kubectl", "apply", "-f", "aks-periscope-diagnostic-crd.yaml") + if err != nil { + return err + } + + return nil +} + +func writeDiagnosticCRD(crdName string) error { + f, err := os.Create("aks-periscope-diagnostic-crd.yaml") + if err != nil { + return err + } + defer f.Close() + + _, err = f.WriteString("apiVersion: \"aks-periscope.azure.github.com/v1\"\n") + if err != nil { + return err + } + + _, err = f.WriteString("kind: Diagnostic\n") + if err != nil { + return err + } + + _, err = f.WriteString("metadata:\n") + if err != nil { + return err + } + + _, err = f.WriteString(" name: " + crdName + "\n") + if err != nil { + return err + } + + _, err = f.WriteString(" namespace: aks-periscope\n") + if err != nil { + return err + } + + _, err = f.WriteString("spec:\n") + if err != nil { + return err + } + + _, err = f.WriteString(" networkconfig: \"\"\n") + if err != nil { + return err + } + + _, err = f.WriteString(" networkoutbound: \"\"\n") + if err != nil { + return err + } + + return nil +} From 2b66c39903f21a4950edcf35b9f11b8bf0221ac2 Mon Sep 17 00:00:00 2001 From: david kydd Date: Sat, 8 May 2021 21:07:22 +1200 Subject: [PATCH 10/31] rename for clarity --- cmd/aks-periscope/aks-periscope.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmd/aks-periscope/aks-periscope.go b/cmd/aks-periscope/aks-periscope.go index da95bbc4..9a9b72a5 100644 --- a/cmd/aks-periscope/aks-periscope.go +++ b/cmd/aks-periscope/aks-periscope.go @@ -32,13 +32,13 @@ func main() { waitgroup.Wait() log.Print("Zip result files") - outputs, err := zipOutputDirectory() + zippedOutputs, err := zipOutputDirectory() if err != nil { log.Printf("Failed to zip result files: %+v", err) } log.Print("Run exporters for result files") - err = runExporters(exporters, outputs) + err = runExporters(exporters, zippedOutputs) if err != nil { log.Printf("Failed to export result files: %+v", err) } From 308e35051b7023c0822e86c4585187ed2e087345 Mon Sep 17 00:00:00 2001 From: Safeer Mohammed Date: Tue, 27 Apr 2021 17:20:46 -0700 Subject: [PATCH 11/31] Helm chart for azure-k8s-periscope (cherry picked from commit 910ed972104c01890757da50148bebda424de0f4) --- charts/azure-k8s-periscope/.helmignore | 22 +++ charts/azure-k8s-periscope/Chart.yaml | 8 + .../templates/azure-k8s-periscope.yaml | 186 ++++++++++++++++++ charts/azure-k8s-periscope/values.yaml | 8 + 4 files changed, 224 insertions(+) create mode 100644 charts/azure-k8s-periscope/.helmignore create mode 100644 charts/azure-k8s-periscope/Chart.yaml create mode 100644 charts/azure-k8s-periscope/templates/azure-k8s-periscope.yaml create mode 100644 charts/azure-k8s-periscope/values.yaml diff --git a/charts/azure-k8s-periscope/.helmignore b/charts/azure-k8s-periscope/.helmignore new file mode 100644 index 00000000..50af0317 --- /dev/null +++ b/charts/azure-k8s-periscope/.helmignore @@ -0,0 +1,22 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/charts/azure-k8s-periscope/Chart.yaml b/charts/azure-k8s-periscope/Chart.yaml new file mode 100644 index 00000000..19108732 --- /dev/null +++ b/charts/azure-k8s-periscope/Chart.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +appVersion: "1.0" +description: A Helm chart for azure-k8s-periscope +home: https://github.com/Azure/aks-periscope/charts/azure-k8s-periscope +name: azure-k8s-periscope +sources: +- https:https://github.com/Azure/aks-periscope/charts/azure-k8s-periscope +version: 0.3.0 diff --git a/charts/azure-k8s-periscope/templates/azure-k8s-periscope.yaml b/charts/azure-k8s-periscope/templates/azure-k8s-periscope.yaml new file mode 100644 index 00000000..566b76dc --- /dev/null +++ b/charts/azure-k8s-periscope/templates/azure-k8s-periscope.yaml @@ -0,0 +1,186 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: aks-periscope +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: aks-periscope-service-account + namespace: aks-periscope +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: aks-periscope-role +rules: +- apiGroups: ["","metrics.k8s.io"] + resources: ["pods", "nodes"] + verbs: ["get", "watch", "list"] +- apiGroups: ["aks-periscope.azure.github.com"] + resources: ["diagnostics"] + verbs: ["get", "watch", "list", "create", "patch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: aks-periscope-role-binding +subjects: +- kind: ServiceAccount + name: aks-periscope-service-account + namespace: aks-periscope +roleRef: + kind: ClusterRole + name: aks-periscope-role + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: aks-periscope-role-binding-view +subjects: +- kind: ServiceAccount + name: aks-periscope-service-account + namespace: aks-periscope +roleRef: + kind: ClusterRole + name: view + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: aks-periscope + namespace: aks-periscope + labels: + app: aks-periscope +spec: + selector: + matchLabels: + app: aks-periscope + template: + metadata: + labels: + app: aks-periscope + spec: + serviceAccountName: aks-periscope-service-account + hostPID: true + nodeSelector: + beta.kubernetes.io/os: linux + containers: + - name: aks-periscope + image: aksrepos.azurecr.io/staging/aks-periscope:v0.3 + securityContext: + privileged: true + imagePullPolicy: Always + envFrom: + - configMapRef: + name: containerlogs-config + - configMapRef: + name: kubeobjects-config + - configMapRef: + name: nodelogs-config + - configMapRef: + name: clustertypes-config + - secretRef: + name: azureblob-secret + volumeMounts: + - mountPath: /aks-periscope + name: aks-periscope-storage + resources: + requests: + memory: "500Mi" + cpu: "250m" + limits: + memory: "2000Mi" + cpu: "1000m" + volumes: + - name: aks-periscope-storage + hostPath: + path: /var/log/aks-periscope + type: DirectoryOrCreate +--- +apiVersion: v1 +kind: Secret +metadata: + name: azureblob-secret + namespace: aks-periscope +type: Opaque +data: + {{- if ne .Values.global.accountName "" }} + AZURE_BLOB_ACCOUNT_NAME: {{ .Values.global.accountName | b64enc }} + AZURE_BLOB_SAS_KEY: {{ .Values.global.saskey | b64enc }} + {{- end }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: containerlogs-config + namespace: aks-periscope +data: + {{- if eq .Values.global.clusterType "connectedClusters" }} + DIAGNOSTIC_CONTAINERLOGS_LIST: azure-arc + {{- else }} + DIAGNOSTIC_CONTAINERLOGS_LIST: kube-system + {{- end }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: kubeobjects-config + namespace: aks-periscope +data: + {{- if eq .Values.global.clusterType "connectedClusters" }} + DIAGNOSTIC_KUBEOBJECTS_LIST: azure-arc/pod azure-arc/service azure-arc/deployment azure-arc/crd + {{- else }} + DIAGNOSTIC_KUBEOBJECTS_LIST: kube-system/pod kube-system/service kube-system/deployment + {{- end }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: nodelogs-config + namespace: aks-periscope +data: + {{- if eq .Values.global.clusterType "connectedClusters" }} + DIAGNOSTIC_NODELOGS_LIST: "" + {{- else }} + DIAGNOSTIC_KUBEOBJECTS_LIST: kube-system/pod kube-system/service kube-system/deployment + {{- end }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: clustertypes-config + namespace: aks-periscope +data: + CLUSTER_TYPE: {{ .Values.global.clusterType }} +--- +apiVersion: apiextensions.k8s.io/v1beta1 +kind: CustomResourceDefinition +metadata: + name: diagnostics.aks-periscope.azure.github.com +spec: + group: aks-periscope.azure.github.com + versions: + - name: v1 + served: true + storage: true + validation: + openAPIV3Schema: + type: object + properties: + spec: + type: object + properties: + dns: + type: string + networkoutbound: + type: string + scope: Namespaced + names: + plural: diagnostics + singular: diagnostic + kind: Diagnostic + shortNames: + - apd \ No newline at end of file diff --git a/charts/azure-k8s-periscope/values.yaml b/charts/azure-k8s-periscope/values.yaml new file mode 100644 index 00000000..afc16a63 --- /dev/null +++ b/charts/azure-k8s-periscope/values.yaml @@ -0,0 +1,8 @@ +# Default values for Azure k8s periscope +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +global: + clusterType: "managedClusters" + accountName: "" + saskey: "" \ No newline at end of file From d5697726101bfb3206e874c87206f1116d691d12 Mon Sep 17 00:00:00 2001 From: david kydd Date: Mon, 10 May 2021 13:45:35 +1200 Subject: [PATCH 12/31] add new configmaps to chart --- .../templates/azure-k8s-periscope.yaml | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/charts/azure-k8s-periscope/templates/azure-k8s-periscope.yaml b/charts/azure-k8s-periscope/templates/azure-k8s-periscope.yaml index 566b76dc..0f7246e9 100644 --- a/charts/azure-k8s-periscope/templates/azure-k8s-periscope.yaml +++ b/charts/azure-k8s-periscope/templates/azure-k8s-periscope.yaml @@ -136,6 +136,30 @@ data: DIAGNOSTIC_KUBEOBJECTS_LIST: kube-system/pod kube-system/service kube-system/deployment {{- end }} --- +apiVersion: v1 +kind: ConfigMap +metadata: + name: collectors-config + namespace: aks-periscope +data: + ENABLED_COLLECTORS: dns containerlogs iptables kubeletcmd kubeobjects networkoutbound nodelogs systemlogs systemperf +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: diagnosers-config + namespace: aks-periscope +data: + ENABLED_DIAGNOSERS: networkconfig networkoutbound +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: exporters-config + namespace: aks-periscope +data: + ENABLED_EXPORTERS: azureblob +--- apiVersion: v1 kind: ConfigMap metadata: From 4e69de1d62e26f31bf10476874ae7140d9b3ea43 Mon Sep 17 00:00:00 2001 From: david kydd Date: Sun, 20 Jun 2021 22:24:33 +1200 Subject: [PATCH 13/31] WIP - support for KubernetesInDocker (kind) clusters --- pkg/exporter/azureblob_exporter.go | 38 +- pkg/exporter/azureblob_exporter_test.go | 27 + pkg/utils/helper.go | 873 +++++++++++++----------- pkg/utils/helper_test.go | 69 ++ 4 files changed, 591 insertions(+), 416 deletions(-) create mode 100644 pkg/exporter/azureblob_exporter_test.go create mode 100644 pkg/utils/helper_test.go diff --git a/pkg/exporter/azureblob_exporter.go b/pkg/exporter/azureblob_exporter.go index 525fe434..0ed422f6 100644 --- a/pkg/exporter/azureblob_exporter.go +++ b/pkg/exporter/azureblob_exporter.go @@ -23,19 +23,47 @@ type AzureBlobExporter struct{} var _ interfaces.Exporter = &AzureBlobExporter{} -// Export implements the interface method -func (exporter *AzureBlobExporter) Export(files []string) error { - APIServerFQDN, err := utils.GetAPIServerFQDN() +// GetStorageContainerName get storage container name +func (exporter *AzureBlobExporter) GetStorageContainerName(APIServerFQDN string) (string, error) { + var containerName string + var err error + if utils.IsKubernetesInDocker() { + containerName, err = exporter.GetKubernetesInDockerStorageContainerName(APIServerFQDN) + } else { + containerName, err = exporter.GetNonKINDStorageContainerName(APIServerFQDN) + } if err != nil { - return err + return "", fmt.Errorf("Fail to build blob container url: %+v", err) } + //TODO run a sanitizer over the final chars in the containerName + return containerName, err +} +func (exporter *AzureBlobExporter) GetKubernetesInDockerStorageContainerName(APIServerFQDN string) (string, error) { + return APIServerFQDN, nil +} + +func (exporter *AzureBlobExporter) GetNonKINDStorageContainerName(APIServerFQDN string) (string, error) { containerName := strings.Replace(APIServerFQDN, ".", "-", -1) len := strings.Index(containerName, "-hcp-") if len == -1 { len = maxContainerNameLength } containerName = strings.TrimRight(containerName[:len], "-") + return containerName, nil +} + +// Export implements the interface method +func (exporter *AzureBlobExporter) Export(files []string) error { + APIServerFQDN, err := utils.GetAPIServerFQDN() + if err != nil { + return fmt.Errorf("Failed to get APIServerFQDN: %+v", err) + } + + containerName, err := exporter.GetStorageContainerName(APIServerFQDN) + if err != nil { + return fmt.Errorf("Failed to get StorageContainerName: %+v", err) + } ctx := context.Background() @@ -46,7 +74,7 @@ func (exporter *AzureBlobExporter) Export(files []string) error { ses := utils.GetStorageEndpointSuffix() url, err := url.Parse(fmt.Sprintf("https://%s.blob.%s/%s%s", accountName, ses, containerName, sasKey)) if err != nil { - return fmt.Errorf("Fail to build blob container url: %+v", err) + return fmt.Errorf("Failed to build blob container url: %+v", err) } containerURL := azblob.NewContainerURL(*url, pipeline) diff --git a/pkg/exporter/azureblob_exporter_test.go b/pkg/exporter/azureblob_exporter_test.go new file mode 100644 index 00000000..5bcfa30c --- /dev/null +++ b/pkg/exporter/azureblob_exporter_test.go @@ -0,0 +1,27 @@ +package exporter + +import ( + "testing" +) + +var getStorageContainerNameTests = []struct { + apiServerFqdn string + containerName string +}{ + {"dakydd-test-eastus-dns-d0daedb9.hcp.eastus.azmk8s.io", "dakydd-test-eastus-dns-d0daedb9"}, +} + +// TestGetNonKINDStorageContainerName get storage container name for non kind cluster +func TestGetNonKINDStorageContainerName(t *testing.T) { + for _, tt := range getStorageContainerNameTests { + t.Run(tt.apiServerFqdn, func(t *testing.T) { + var blobExporter = &AzureBlobExporter{} + containerName, _ := blobExporter.GetNonKINDStorageContainerName(tt.apiServerFqdn) + + if containerName != tt.containerName { + t.Errorf("Sprintf(%q, &blobExporter) => %q, want %q", + tt.apiServerFqdn, containerName, tt.containerName) + } + }) + } +} diff --git a/pkg/utils/helper.go b/pkg/utils/helper.go index 7456aaf0..24ebcd69 100644 --- a/pkg/utils/helper.go +++ b/pkg/utils/helper.go @@ -1,411 +1,462 @@ -package utils - -import ( - "bytes" - "encoding/json" - "errors" - "fmt" - "io/ioutil" - "log" - "net/http" - "os" - "os/exec" - "path/filepath" - "strings" - "time" -) - -const ( - // PublicAzureStorageEndpointSuffix defines default Storage Endpoint Suffix - PublicAzureStorageEndpointSuffix = "core.windows.net" - // AzureStackCloudName references the value that will be under the key "cloud" in azure.json if the application is running on Azure Stack Cloud - // https://kubernetes-sigs.github.io/cloud-provider-azure/install/configs/#azure-stack-configuration -- See this documentation for the well-known cloud name. - AzureStackCloudName = "AzureStackCloud" -) - -// Azure defines Azure configuration -type Azure struct { - Cloud string `json:"cloud"` -} - -// AzureStackCloud defines Azure Stack Cloud configuration -type AzureStackCloud struct { - StorageEndpointSuffix string `json:"storageEndpointSuffix"` -} - -type CommandOutputStreams struct { - Stdout string - Stderr string -} - -// IsAzureStackCloud returns true if the application is running on Azure Stack Cloud -func IsAzureStackCloud() bool { - azureFile, err := RunCommandOnHost("cat", "/etc/kubernetes/azure.json") - if err != nil { - return false - } - var azure Azure - if err = json.Unmarshal([]byte(azureFile), &azure); err != nil { - return false - } - cloud := azure.Cloud - return strings.EqualFold(cloud, AzureStackCloudName) -} - -// CopyFileFromHost saves the specified source file to the destination -func CopyFileFromHost(source, destination string) error { - sourceFile, err := RunCommandOnHost("cat", source) - if err != nil { - return fmt.Errorf("unable to retrieve source content: %w", err) - } - if err = WriteToFile(destination, sourceFile); err != nil { - return fmt.Errorf("unable to write source file to destination: %w", err) - } - return nil -} - -// GetStorageEndpointSuffix returns the SES url from the JSON file as a string -func GetStorageEndpointSuffix() string { - if IsAzureStackCloud() { - ascFile, err := RunCommandOnHost("cat", "/etc/kubernetes/azurestackcloud.json") - if err != nil { - log.Fatalf("unable to locate azurestackcloud.json to extract storage endpoint suffix: %v", err) - } - var azurestackcloud AzureStackCloud - if err = json.Unmarshal([]byte(ascFile), &azurestackcloud); err != nil { - log.Fatalf("unable to read azurestackcloud.json file: %v", err) - } - return azurestackcloud.StorageEndpointSuffix - } - return PublicAzureStorageEndpointSuffix -} - -// GetHostName get host name -func GetHostName() (string, error) { - hostname, err := RunCommandOnHost("cat", "/etc/hostname") - if err != nil { - return "", fmt.Errorf("Fail to get host name: %+v", err) - } - - return strings.TrimSuffix(string(hostname), "\n"), nil -} - -// GetAPIServerFQDN gets the API Server FQDN from the kubeconfig file -func GetAPIServerFQDN() (string, error) { - output, err := RunCommandOnHost("cat", "/var/lib/kubelet/kubeconfig") - - if err != nil { - return "", fmt.Errorf("Can't open kubeconfig file: %+v", err) - } - - lines := strings.Split(output, "\n") - for _, line := range lines { - index := strings.Index(line, "server: ") - if index >= 0 { - fqdn := line[index+len("server: "):] - fqdn = strings.Replace(fqdn, "https://", "", -1) - fqdn = strings.Replace(fqdn, ":443", "", -1) - return fqdn, nil - } - } - - return "", errors.New("Could not find server definitions in kubeconfig") -} - -// RunCommandOnHost runs a command on host system -func RunCommandOnHost(command string, arg ...string) (string, error) { - args := []string{"--target", "1", "--mount", "--uts", "--ipc", "--net", "--pid"} - args = append(args, "--") - args = append(args, command) - args = append(args, arg...) - - cmd := exec.Command("nsenter", args...) - out, err := cmd.CombinedOutput() - if err != nil { - return "", fmt.Errorf("Fail to run command on host: %+v", err) - } - - return string(out), nil -} - -// RunCommandOnContainerWithOutputStreams runs a command on container system and returns both the stdout and stderr output streams -func RunCommandOnContainerWithOutputStreams(command string, arg ...string) (CommandOutputStreams, error) { - cmd := exec.Command(command, arg...) - - var stdout bytes.Buffer - var stderr bytes.Buffer - cmd.Stdout = &stdout - cmd.Stderr = &stderr - - err := cmd.Run() - outputStreams := CommandOutputStreams{stdout.String(), stderr.String()} - - if err != nil { - return outputStreams, fmt.Errorf("Fail to run command in container: %s", fmt.Sprint(err)+": "+stderr.String()) - } - - return outputStreams, nil -} - -// RunCommandOnContainer runs a command on container system and returns the stdout output stream -func RunCommandOnContainer(command string, arg ...string) (string, error) { - outputStreams, err := RunCommandOnContainerWithOutputStreams(command, arg...) - return outputStreams.Stdout, err -} - -// RunBackgroundCommand starts running a command on a container system in the background and returns its process ID -func RunBackgroundCommand(command string, arg ...string) (int, error) { - cmd := exec.Command(command, arg...) - var out bytes.Buffer - var stderr bytes.Buffer - cmd.Stdout = &out - cmd.Stderr = &stderr - err := cmd.Start() - if err != nil { - return 0, fmt.Errorf("Start background command in container exited with message %s: %w", stderr.String(), err) - } - return cmd.Process.Pid, nil -} - -// Finds and kills a process with a given process ID -func KillProcess(pid int) error { - process, err := os.FindProcess(pid) - if err != nil { - return fmt.Errorf("Find process with pid %d to kill: %w", pid, err) - } - if err := process.Kill(); err != nil { - return err - } - return nil -} - -// Tries to issue an HTTP GET request up to maxRetries times -func GetUrlWithRetries(url string, maxRetries int) ([]byte, error) { - retry := 1 - for { - resp, err := http.Get(url) - if err != nil { - if retry == maxRetries { - return nil, fmt.Errorf("Max retries reached for request HTTP Get %s: %w", url, err) - } - retry++ - time.Sleep(5 * time.Second) - } else { - defer resp.Body.Close() - return ioutil.ReadAll(resp.Body) - } - } -} - -// WriteToFile writes data to a file -func WriteToFile(fileName string, data string) error { - if err := os.MkdirAll(filepath.Dir(fileName), os.ModePerm); err != nil { - return fmt.Errorf("Fail to create path directories for file %s: %w", fileName, err) - } - f, err := os.Create(fileName) - if err != nil { - return fmt.Errorf("Fail to create file %s: %+v", fileName, err) - } - defer f.Close() - - _, err = f.Write([]byte(data)) - if err != nil { - return fmt.Errorf("Fail to write data to file %s: %+v", fileName, err) - } - - return nil -} - -// CreateCollectorDir creates a working dir for a collector -func CreateCollectorDir(name string) (string, error) { - hostName, err := GetHostName() - if err != nil { - return "", err - } - - creationTimeStamp, err := GetCreationTimeStamp() - if err != nil { - return "", err - } - - rootPath := filepath.Join("/aks-periscope", strings.Replace(creationTimeStamp, ":", "-", -1), hostName, "collector", name) - err = os.MkdirAll(rootPath, os.ModePerm) - if err != nil { - return "", fmt.Errorf("Fail to create dir %s: %+v", rootPath, err) - } - - return rootPath, nil -} - -// CreateDiagnosticDir creates a working dir for diagnostic -func CreateDiagnosticDir() (string, error) { - hostName, err := GetHostName() - if err != nil { - return "", err - } - - creationTimeStamp, err := GetCreationTimeStamp() - if err != nil { - return "", err - } - - rootPath := filepath.Join("/aks-periscope", strings.Replace(creationTimeStamp, ":", "-", -1), hostName, "diagnoser") - err = os.MkdirAll(rootPath, os.ModePerm) - if err != nil { - return "", fmt.Errorf("Fail to create dir %s: %+v", rootPath, err) - } - - return rootPath, nil -} - -// CreateKubeConfigFromServiceAccount creates kubeconfig based on creds in service account -func CreateKubeConfigFromServiceAccount() error { - token, err := RunCommandOnContainer("cat", "/var/run/secrets/kubernetes.io/serviceaccount/token") - if err != nil { - return err - } - - _, err = RunCommandOnContainer("kubectl", "config", "set-credentials", "aks-periscope-service-account", "--token="+token) - if err != nil { - return err - } - - _, err = RunCommandOnContainer("kubectl", "config", "set-cluster", "aks-periscope-cluster", "--server=https://kubernetes.default.svc.cluster.local:443", "--certificate-authority=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt") - if err != nil { - return err - } - - _, err = RunCommandOnContainer("kubectl", "config", "set-context", "aks-periscope-context", "--user=aks-periscope-service-account", "--cluster=aks-periscope-cluster") - if err != nil { - return err - } - - _, err = RunCommandOnContainer("kubectl", "config", "use-context", "aks-periscope-context") - if err != nil { - return err - } - - return nil -} - -// GetCreationTimeStamp returns a create timestamp -func GetCreationTimeStamp() (string, error) { - creationTimeStamp, err := RunCommandOnContainer("kubectl", "get", "pods", "--all-namespaces", "-l", "app=aks-periscope", "-o", "jsonpath=\"{.items[0].metadata.creationTimestamp}\"") - if err != nil { - return "", err - } - - return creationTimeStamp[1 : len(creationTimeStamp)-1], nil -} - -// WriteToCRD writes diagnostic data to CRD -func WriteToCRD(fileName string, key string) error { - hostName, err := GetHostName() - if err != nil { - return err - } - - crdName := "aks-periscope-diagnostic" + "-" + hostName - - jsonBytes, err := ioutil.ReadFile(fileName) - if err != nil { - return err - } - - patchContent := fmt.Sprintf("{\"spec\":{%q:%q}}", key, string(jsonBytes)) - - _, err = RunCommandOnContainer("kubectl", "-n", "aks-periscope", "patch", "apd", crdName, "-p", patchContent, "--type=merge") - if err != nil { - return err - } - - return nil -} - -// CreateCRD creates a CRD object -func CreateCRD() error { - hostName, err := GetHostName() - if err != nil { - return err - } - - crdName := "aks-periscope-diagnostic" + "-" + hostName - - if err = writeDiagnosticCRD(crdName); err != nil { - return err - } - - _, err = RunCommandOnContainer("kubectl", "apply", "-f", "aks-periscope-diagnostic-crd.yaml") - if err != nil { - return err - } - - return nil -} - -// GetResourceList gets a list of all resources of given type in a specified namespace -func GetResourceList(kubeCmds []string, separator string) ([]string, error) { - outputStreams, err := RunCommandOnContainerWithOutputStreams("kubectl", kubeCmds...) - - if err != nil { - return nil, err - } - - resourceList := outputStreams.Stdout - // If the resource is not found within the cluster, then log a message and do not return any resources. - if len(resourceList) == 0 { - return nil, fmt.Errorf("No '%s' resource found in the cluster for given kubectl command", kubeCmds[1]) - } - - return strings.Split(strings.Trim(resourceList, "\""), separator), nil -} - -func writeDiagnosticCRD(crdName string) error { - f, err := os.Create("aks-periscope-diagnostic-crd.yaml") - if err != nil { - return err - } - defer f.Close() - - _, err = f.WriteString("apiVersion: \"aks-periscope.azure.github.com/v1\"\n") - if err != nil { - return err - } - - _, err = f.WriteString("kind: Diagnostic\n") - if err != nil { - return err - } - - _, err = f.WriteString("metadata:\n") - if err != nil { - return err - } - - _, err = f.WriteString(" name: " + crdName + "\n") - if err != nil { - return err - } - - _, err = f.WriteString(" namespace: aks-periscope\n") - if err != nil { - return err - } - - _, err = f.WriteString("spec:\n") - if err != nil { - return err - } - - _, err = f.WriteString(" networkconfig: \"\"\n") - if err != nil { - return err - } - - _, err = f.WriteString(" networkoutbound: \"\"\n") - if err != nil { - return err - } - - return nil -} +package utils + +import ( + "bytes" + "encoding/json" + "errors" + "fmt" + "io/ioutil" + "log" + "net" + "net/http" + "net/url" + "os" + "os/exec" + "path/filepath" + "strings" + "time" +) + +const ( + // PublicAzureStorageEndpointSuffix defines default Storage Endpoint Suffix + PublicAzureStorageEndpointSuffix = "core.windows.net" + // AzureStackCloudName references the value that will be under the key "cloud" in azure.json if the application is running on Azure Stack Cloud + // https://kubernetes-sigs.github.io/cloud-provider-azure/install/configs/#azure-stack-configuration -- See this documentation for the well-known cloud name. + AzureStackCloudName = "AzureStackCloud" +) + +// Azure defines Azure configuration +type Azure struct { + Cloud string `json:"cloud"` +} + +// AzureStackCloud defines Azure Stack Cloud configuration +type AzureStackCloud struct { + StorageEndpointSuffix string `json:"storageEndpointSuffix"` +} + +type CommandOutputStreams struct { + Stdout string + Stderr string +} + +// IsAzureStackCloud returns true if the application is running on Azure Stack Cloud +func IsAzureStackCloud() bool { + azureFile, err := RunCommandOnHost("cat", "/etc/kubernetes/azure.json") + if err != nil { + return false + } + var azure Azure + if err = json.Unmarshal([]byte(azureFile), &azure); err != nil { + return false + } + cloud := azure.Cloud + return strings.EqualFold(cloud, AzureStackCloudName) +} + +// IsKubernetesInDocker returns true if the application is running on KubernetesInDocker (kind) +func IsKubernetesInDocker() bool { + //TODO refactor the conditional logic this check guards into a new "KindClusterOperations" type behind an interface + //test the AKS kubeconfig location, if we find something then this isn't a KIND cluster + _, err := RunCommandOnHost("ls", "/var/lib/kubelet/kubeconfig") + if err == nil { + return false + } + + //test the KIND kubeconfig location + _, err = RunCommandOnHost("ls", "/etc/kubernetes/kubelet.conf") + if err == nil { + return true + } + + return false +} + +// CopyFileFromHost saves the specified source file to the destination +func CopyFileFromHost(source, destination string) error { + sourceFile, err := RunCommandOnHost("cat", source) + if err != nil { + return fmt.Errorf("unable to retrieve source content: %w", err) + } + if err = WriteToFile(destination, sourceFile); err != nil { + return fmt.Errorf("unable to write source file to destination: %w", err) + } + return nil +} + +// GetStorageEndpointSuffix returns the SES url from the JSON file as a string +func GetStorageEndpointSuffix() string { + if IsAzureStackCloud() { + ascFile, err := RunCommandOnHost("cat", "/etc/kubernetes/azurestackcloud.json") + if err != nil { + log.Fatalf("unable to locate azurestackcloud.json to extract storage endpoint suffix: %v", err) + } + var azurestackcloud AzureStackCloud + if err = json.Unmarshal([]byte(ascFile), &azurestackcloud); err != nil { + log.Fatalf("unable to read azurestackcloud.json file: %v", err) + } + return azurestackcloud.StorageEndpointSuffix + } + return PublicAzureStorageEndpointSuffix +} + +// GetHostName get host name +func GetHostName() (string, error) { + hostname, err := RunCommandOnHost("cat", "/etc/hostname") + if err != nil { + return "", fmt.Errorf("Fail to get host name: %+v", err) + } + + return strings.TrimSuffix(string(hostname), "\n"), nil +} + +//ParseAPIServerFQDNFromKubeConfig parses a kubeConfig and returns the APIServerFQDN +func ParseAPIServerFQDNFromKubeConfig(output string) (string, error) { + lines := strings.Split(output, "\n") + for _, line := range lines { + index := strings.Index(line, "server: ") + if index >= 0 { + fqdn := line[index+len("server: "):] + fqdnurl, err := url.Parse(fqdn) + if err != nil { + return "", fmt.Errorf("Fail to parse url from fqdn: %s", fmt.Sprint(err)+": "+fqdn) + } + + host, _, err := net.SplitHostPort(fqdnurl.Host) + if err != nil { + return "", fmt.Errorf("Fail to split host port from fqdnurl: %s", fmt.Sprint(err)+": "+fqdnurl.String()) + } + + return host, nil + } + } + return "", errors.New("Could not find server definitions in kubeconfig") +} + +//ReadKubeletConfig reads the kubeletConfig from the node +func ReadKubeletConfig() (string, error) { + if IsKubernetesInDocker() { + output, err := RunCommandOnHost("cat", "/etc/kubernetes/kubelet.conf") + if err != nil { + return "", fmt.Errorf("Can't open kubeconfig file at /etc/kubernetes/kubelet.conf\": %+v", err) + } + return output, nil + } else { + output, err := RunCommandOnHost("cat", "/var/lib/kubelet/kubeconfig") + if err != nil { + return "", fmt.Errorf("Can't open kubeconfig file at /var/lib/kubelet/kubeconfig\": %+v", err) + } + return output, nil + } +} + +// GetAPIServerFQDN gets the API Server FQDN from the kubeconfig file +func GetAPIServerFQDN() (string, error) { + output, err := ReadKubeletConfig() + if err != nil { + return "", err + } + fqdn, err := ParseAPIServerFQDNFromKubeConfig(output) + if err != nil { + return "", err + } + return fqdn, nil +} + +// RunCommandOnHost runs a command on host system +func RunCommandOnHost(command string, arg ...string) (string, error) { + args := []string{"--target", "1", "--mount", "--uts", "--ipc", "--net", "--pid"} + args = append(args, "--") + args = append(args, command) + args = append(args, arg...) + + cmd := exec.Command("nsenter", args...) + out, err := cmd.CombinedOutput() + if err != nil { + return "", fmt.Errorf("Fail to run command on host: %+v", err) + } + + return string(out), nil +} + +// RunCommandOnContainerWithOutputStreams runs a command on container system and returns both the stdout and stderr output streams +func RunCommandOnContainerWithOutputStreams(command string, arg ...string) (CommandOutputStreams, error) { + cmd := exec.Command(command, arg...) + + var stdout bytes.Buffer + var stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + err := cmd.Run() + outputStreams := CommandOutputStreams{stdout.String(), stderr.String()} + + if err != nil { + return outputStreams, fmt.Errorf("Fail to run command in container: %s", fmt.Sprint(err)+": "+stderr.String()) + } + + return outputStreams, nil +} + +// RunCommandOnContainer runs a command on container system and returns the stdout output stream +func RunCommandOnContainer(command string, arg ...string) (string, error) { + outputStreams, err := RunCommandOnContainerWithOutputStreams(command, arg...) + return outputStreams.Stdout, err +} + +// RunBackgroundCommand starts running a command on a container system in the background and returns its process ID +func RunBackgroundCommand(command string, arg ...string) (int, error) { + cmd := exec.Command(command, arg...) + var out bytes.Buffer + var stderr bytes.Buffer + cmd.Stdout = &out + cmd.Stderr = &stderr + err := cmd.Start() + if err != nil { + return 0, fmt.Errorf("Start background command in container exited with message %s: %w", stderr.String(), err) + } + return cmd.Process.Pid, nil +} + +// Finds and kills a process with a given process ID +func KillProcess(pid int) error { + process, err := os.FindProcess(pid) + if err != nil { + return fmt.Errorf("Find process with pid %d to kill: %w", pid, err) + } + if err := process.Kill(); err != nil { + return err + } + return nil +} + +// Tries to issue an HTTP GET request up to maxRetries times +func GetUrlWithRetries(url string, maxRetries int) ([]byte, error) { + retry := 1 + for { + resp, err := http.Get(url) + if err != nil { + if retry == maxRetries { + return nil, fmt.Errorf("Max retries reached for request HTTP Get %s: %w", url, err) + } + retry++ + time.Sleep(5 * time.Second) + } else { + defer resp.Body.Close() + return ioutil.ReadAll(resp.Body) + } + } +} + +// WriteToFile writes data to a file +func WriteToFile(fileName string, data string) error { + if err := os.MkdirAll(filepath.Dir(fileName), os.ModePerm); err != nil { + return fmt.Errorf("Fail to create path directories for file %s: %w", fileName, err) + } + f, err := os.Create(fileName) + if err != nil { + return fmt.Errorf("Fail to create file %s: %+v", fileName, err) + } + defer f.Close() + + _, err = f.Write([]byte(data)) + if err != nil { + return fmt.Errorf("Fail to write data to file %s: %+v", fileName, err) + } + + return nil +} + +// CreateCollectorDir creates a working dir for a collector +func CreateCollectorDir(name string) (string, error) { + hostName, err := GetHostName() + if err != nil { + return "", err + } + + creationTimeStamp, err := GetCreationTimeStamp() + if err != nil { + return "", err + } + + rootPath := filepath.Join("/aks-periscope", strings.Replace(creationTimeStamp, ":", "-", -1), hostName, "collector", name) + err = os.MkdirAll(rootPath, os.ModePerm) + if err != nil { + return "", fmt.Errorf("Fail to create dir %s: %+v", rootPath, err) + } + + return rootPath, nil +} + +// CreateDiagnosticDir creates a working dir for diagnostic +func CreateDiagnosticDir() (string, error) { + hostName, err := GetHostName() + if err != nil { + return "", err + } + + creationTimeStamp, err := GetCreationTimeStamp() + if err != nil { + return "", err + } + + rootPath := filepath.Join("/aks-periscope", strings.Replace(creationTimeStamp, ":", "-", -1), hostName, "diagnoser") + err = os.MkdirAll(rootPath, os.ModePerm) + if err != nil { + return "", fmt.Errorf("Fail to create dir %s: %+v", rootPath, err) + } + + return rootPath, nil +} + +// CreateKubeConfigFromServiceAccount creates kubeconfig based on creds in service account +func CreateKubeConfigFromServiceAccount() error { + token, err := RunCommandOnContainer("cat", "/var/run/secrets/kubernetes.io/serviceaccount/token") + if err != nil { + return err + } + + _, err = RunCommandOnContainer("kubectl", "config", "set-credentials", "aks-periscope-service-account", "--token="+token) + if err != nil { + return err + } + + _, err = RunCommandOnContainer("kubectl", "config", "set-cluster", "aks-periscope-cluster", "--server=https://kubernetes.default.svc.cluster.local:443", "--certificate-authority=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt") + if err != nil { + return err + } + + _, err = RunCommandOnContainer("kubectl", "config", "set-context", "aks-periscope-context", "--user=aks-periscope-service-account", "--cluster=aks-periscope-cluster") + if err != nil { + return err + } + + _, err = RunCommandOnContainer("kubectl", "config", "use-context", "aks-periscope-context") + if err != nil { + return err + } + + return nil +} + +// GetCreationTimeStamp returns a create timestamp +func GetCreationTimeStamp() (string, error) { + creationTimeStamp, err := RunCommandOnContainer("kubectl", "get", "pods", "--all-namespaces", "-l", "app=aks-periscope", "-o", "jsonpath=\"{.items[0].metadata.creationTimestamp}\"") + if err != nil { + return "", err + } + + return creationTimeStamp[1 : len(creationTimeStamp)-1], nil +} + +// WriteToCRD writes diagnostic data to CRD +func WriteToCRD(fileName string, key string) error { + hostName, err := GetHostName() + if err != nil { + return err + } + + crdName := "aks-periscope-diagnostic" + "-" + hostName + + jsonBytes, err := ioutil.ReadFile(fileName) + if err != nil { + return err + } + + patchContent := fmt.Sprintf("{\"spec\":{%q:%q}}", key, string(jsonBytes)) + + _, err = RunCommandOnContainer("kubectl", "-n", "aks-periscope", "patch", "apd", crdName, "-p", patchContent, "--type=merge") + if err != nil { + return err + } + + return nil +} + +// CreateCRD creates a CRD object +func CreateCRD() error { + hostName, err := GetHostName() + if err != nil { + return err + } + + crdName := "aks-periscope-diagnostic" + "-" + hostName + + if err = writeDiagnosticCRD(crdName); err != nil { + return err + } + + _, err = RunCommandOnContainer("kubectl", "apply", "-f", "aks-periscope-diagnostic-crd.yaml") + if err != nil { + return err + } + + return nil +} + +// GetResourceList gets a list of all resources of given type in a specified namespace +func GetResourceList(kubeCmds []string, separator string) ([]string, error) { + outputStreams, err := RunCommandOnContainerWithOutputStreams("kubectl", kubeCmds...) + + if err != nil { + return nil, err + } + + resourceList := outputStreams.Stdout + // If the resource is not found within the cluster, then log a message and do not return any resources. + if len(resourceList) == 0 { + return nil, fmt.Errorf("No '%s' resource found in the cluster for given kubectl command", kubeCmds[1]) + } + + return strings.Split(strings.Trim(resourceList, "\""), separator), nil +} + +func writeDiagnosticCRD(crdName string) error { + f, err := os.Create("aks-periscope-diagnostic-crd.yaml") + if err != nil { + return err + } + defer f.Close() + + _, err = f.WriteString("apiVersion: \"aks-periscope.azure.github.com/v1\"\n") + if err != nil { + return err + } + + _, err = f.WriteString("kind: Diagnostic\n") + if err != nil { + return err + } + + _, err = f.WriteString("metadata:\n") + if err != nil { + return err + } + + _, err = f.WriteString(" name: " + crdName + "\n") + if err != nil { + return err + } + + _, err = f.WriteString(" namespace: aks-periscope\n") + if err != nil { + return err + } + + _, err = f.WriteString("spec:\n") + if err != nil { + return err + } + + _, err = f.WriteString(" networkconfig: \"\"\n") + if err != nil { + return err + } + + _, err = f.WriteString(" networkoutbound: \"\"\n") + if err != nil { + return err + } + + return nil +} diff --git a/pkg/utils/helper_test.go b/pkg/utils/helper_test.go new file mode 100644 index 00000000..1264e5c2 --- /dev/null +++ b/pkg/utils/helper_test.go @@ -0,0 +1,69 @@ +package utils + +import ( + "testing" +) + +var parseAPIServerFQDNFromKubeConfigTests = []struct { + kubeConfig string + APIServerFQDN string +}{ + {`apiVersion: v1 +clusters: +- cluster: + certificate-authority-data: dummyData== + server: https://kind-control-plane:6443 + name: kind +contexts: +- context: + cluster: kind + user: system:node:kind-control-plane + name: system:node:kind-control-plane@kind +current-context: system:node:kind-control-plane@kind +kind: Config +preferences: {} +users: +- name: system:node:kind-control-plane + user: + client-certificate: /var/lib/kubelet/pki/kubelet-client-current.pem + client-key: /var/lib/kubelet/pki/kubelet-client-current.pem`, + "kind-control-plane"}, + + {`apiVersion: v1 +kind: Config +clusters: +- name: localcluster + cluster: + certificate-authority: /etc/kubernetes/certs/ca.crt + server: https://dakydd-test-eastus-dns-d0daedb9.hcp.eastus.azmk8s.io:443 +users: +- name: client + user: + client-certificate: /etc/kubernetes/certs/client.crt + client-key: /etc/kubernetes/certs/client.key +contexts: +- context: + cluster: localcluster + user: client + name: localclustercontext +current-context: localclustercontext`, +"dakydd-test-eastus-dns-d0daedb9.hcp.eastus.azmk8s.io"}, +} + +// GetStorageContainerName get storage container name +func TestParseAPIServerFQDNFromKubeConfig(t *testing.T) { + for _, tt := range parseAPIServerFQDNFromKubeConfigTests { + t.Run(tt.kubeConfig, func(t *testing.T) { + APIServerFQDN, err := ParseAPIServerFQDNFromKubeConfig(tt.kubeConfig) + if err != nil { + t.Errorf("Sprintf(%q, utils.TestParseAPIServerFQDNFromKubeConfig) Error: %q, expected %q", + tt.kubeConfig, err, tt.APIServerFQDN) + } + + if APIServerFQDN != tt.APIServerFQDN { + t.Errorf("Sprintf(%q, utils.TestParseAPIServerFQDNFromKubeConfig) => %q, want %q", + tt.kubeConfig, APIServerFQDN, tt.APIServerFQDN) + } + }) + } +} From a377636c01bda029365f1b6644da04943c4a609a Mon Sep 17 00:00:00 2001 From: david kydd Date: Mon, 21 Jun 2021 09:41:36 +1200 Subject: [PATCH 14/31] simplify err => boolean --- pkg/utils/helper.go | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pkg/utils/helper.go b/pkg/utils/helper.go index 24ebcd69..6d2f3c88 100644 --- a/pkg/utils/helper.go +++ b/pkg/utils/helper.go @@ -65,11 +65,7 @@ func IsKubernetesInDocker() bool { //test the KIND kubeconfig location _, err = RunCommandOnHost("ls", "/etc/kubernetes/kubelet.conf") - if err == nil { - return true - } - - return false + return err == nil } // CopyFileFromHost saves the specified source file to the destination From 228cf6b197b9f5c2df428a6a271206ca409b3c19 Mon Sep 17 00:00:00 2001 From: david kydd Date: Mon, 21 Jun 2021 10:15:08 +1200 Subject: [PATCH 15/31] reset line endings to crlf --- pkg/utils/helper.go | 916 ++++++++++++++++++++++---------------------- 1 file changed, 458 insertions(+), 458 deletions(-) diff --git a/pkg/utils/helper.go b/pkg/utils/helper.go index 6d2f3c88..2b3fdce2 100644 --- a/pkg/utils/helper.go +++ b/pkg/utils/helper.go @@ -1,458 +1,458 @@ -package utils - -import ( - "bytes" - "encoding/json" - "errors" - "fmt" - "io/ioutil" - "log" - "net" - "net/http" - "net/url" - "os" - "os/exec" - "path/filepath" - "strings" - "time" -) - -const ( - // PublicAzureStorageEndpointSuffix defines default Storage Endpoint Suffix - PublicAzureStorageEndpointSuffix = "core.windows.net" - // AzureStackCloudName references the value that will be under the key "cloud" in azure.json if the application is running on Azure Stack Cloud - // https://kubernetes-sigs.github.io/cloud-provider-azure/install/configs/#azure-stack-configuration -- See this documentation for the well-known cloud name. - AzureStackCloudName = "AzureStackCloud" -) - -// Azure defines Azure configuration -type Azure struct { - Cloud string `json:"cloud"` -} - -// AzureStackCloud defines Azure Stack Cloud configuration -type AzureStackCloud struct { - StorageEndpointSuffix string `json:"storageEndpointSuffix"` -} - -type CommandOutputStreams struct { - Stdout string - Stderr string -} - -// IsAzureStackCloud returns true if the application is running on Azure Stack Cloud -func IsAzureStackCloud() bool { - azureFile, err := RunCommandOnHost("cat", "/etc/kubernetes/azure.json") - if err != nil { - return false - } - var azure Azure - if err = json.Unmarshal([]byte(azureFile), &azure); err != nil { - return false - } - cloud := azure.Cloud - return strings.EqualFold(cloud, AzureStackCloudName) -} - -// IsKubernetesInDocker returns true if the application is running on KubernetesInDocker (kind) -func IsKubernetesInDocker() bool { - //TODO refactor the conditional logic this check guards into a new "KindClusterOperations" type behind an interface - //test the AKS kubeconfig location, if we find something then this isn't a KIND cluster - _, err := RunCommandOnHost("ls", "/var/lib/kubelet/kubeconfig") - if err == nil { - return false - } - - //test the KIND kubeconfig location - _, err = RunCommandOnHost("ls", "/etc/kubernetes/kubelet.conf") - return err == nil -} - -// CopyFileFromHost saves the specified source file to the destination -func CopyFileFromHost(source, destination string) error { - sourceFile, err := RunCommandOnHost("cat", source) - if err != nil { - return fmt.Errorf("unable to retrieve source content: %w", err) - } - if err = WriteToFile(destination, sourceFile); err != nil { - return fmt.Errorf("unable to write source file to destination: %w", err) - } - return nil -} - -// GetStorageEndpointSuffix returns the SES url from the JSON file as a string -func GetStorageEndpointSuffix() string { - if IsAzureStackCloud() { - ascFile, err := RunCommandOnHost("cat", "/etc/kubernetes/azurestackcloud.json") - if err != nil { - log.Fatalf("unable to locate azurestackcloud.json to extract storage endpoint suffix: %v", err) - } - var azurestackcloud AzureStackCloud - if err = json.Unmarshal([]byte(ascFile), &azurestackcloud); err != nil { - log.Fatalf("unable to read azurestackcloud.json file: %v", err) - } - return azurestackcloud.StorageEndpointSuffix - } - return PublicAzureStorageEndpointSuffix -} - -// GetHostName get host name -func GetHostName() (string, error) { - hostname, err := RunCommandOnHost("cat", "/etc/hostname") - if err != nil { - return "", fmt.Errorf("Fail to get host name: %+v", err) - } - - return strings.TrimSuffix(string(hostname), "\n"), nil -} - -//ParseAPIServerFQDNFromKubeConfig parses a kubeConfig and returns the APIServerFQDN -func ParseAPIServerFQDNFromKubeConfig(output string) (string, error) { - lines := strings.Split(output, "\n") - for _, line := range lines { - index := strings.Index(line, "server: ") - if index >= 0 { - fqdn := line[index+len("server: "):] - fqdnurl, err := url.Parse(fqdn) - if err != nil { - return "", fmt.Errorf("Fail to parse url from fqdn: %s", fmt.Sprint(err)+": "+fqdn) - } - - host, _, err := net.SplitHostPort(fqdnurl.Host) - if err != nil { - return "", fmt.Errorf("Fail to split host port from fqdnurl: %s", fmt.Sprint(err)+": "+fqdnurl.String()) - } - - return host, nil - } - } - return "", errors.New("Could not find server definitions in kubeconfig") -} - -//ReadKubeletConfig reads the kubeletConfig from the node -func ReadKubeletConfig() (string, error) { - if IsKubernetesInDocker() { - output, err := RunCommandOnHost("cat", "/etc/kubernetes/kubelet.conf") - if err != nil { - return "", fmt.Errorf("Can't open kubeconfig file at /etc/kubernetes/kubelet.conf\": %+v", err) - } - return output, nil - } else { - output, err := RunCommandOnHost("cat", "/var/lib/kubelet/kubeconfig") - if err != nil { - return "", fmt.Errorf("Can't open kubeconfig file at /var/lib/kubelet/kubeconfig\": %+v", err) - } - return output, nil - } -} - -// GetAPIServerFQDN gets the API Server FQDN from the kubeconfig file -func GetAPIServerFQDN() (string, error) { - output, err := ReadKubeletConfig() - if err != nil { - return "", err - } - fqdn, err := ParseAPIServerFQDNFromKubeConfig(output) - if err != nil { - return "", err - } - return fqdn, nil -} - -// RunCommandOnHost runs a command on host system -func RunCommandOnHost(command string, arg ...string) (string, error) { - args := []string{"--target", "1", "--mount", "--uts", "--ipc", "--net", "--pid"} - args = append(args, "--") - args = append(args, command) - args = append(args, arg...) - - cmd := exec.Command("nsenter", args...) - out, err := cmd.CombinedOutput() - if err != nil { - return "", fmt.Errorf("Fail to run command on host: %+v", err) - } - - return string(out), nil -} - -// RunCommandOnContainerWithOutputStreams runs a command on container system and returns both the stdout and stderr output streams -func RunCommandOnContainerWithOutputStreams(command string, arg ...string) (CommandOutputStreams, error) { - cmd := exec.Command(command, arg...) - - var stdout bytes.Buffer - var stderr bytes.Buffer - cmd.Stdout = &stdout - cmd.Stderr = &stderr - - err := cmd.Run() - outputStreams := CommandOutputStreams{stdout.String(), stderr.String()} - - if err != nil { - return outputStreams, fmt.Errorf("Fail to run command in container: %s", fmt.Sprint(err)+": "+stderr.String()) - } - - return outputStreams, nil -} - -// RunCommandOnContainer runs a command on container system and returns the stdout output stream -func RunCommandOnContainer(command string, arg ...string) (string, error) { - outputStreams, err := RunCommandOnContainerWithOutputStreams(command, arg...) - return outputStreams.Stdout, err -} - -// RunBackgroundCommand starts running a command on a container system in the background and returns its process ID -func RunBackgroundCommand(command string, arg ...string) (int, error) { - cmd := exec.Command(command, arg...) - var out bytes.Buffer - var stderr bytes.Buffer - cmd.Stdout = &out - cmd.Stderr = &stderr - err := cmd.Start() - if err != nil { - return 0, fmt.Errorf("Start background command in container exited with message %s: %w", stderr.String(), err) - } - return cmd.Process.Pid, nil -} - -// Finds and kills a process with a given process ID -func KillProcess(pid int) error { - process, err := os.FindProcess(pid) - if err != nil { - return fmt.Errorf("Find process with pid %d to kill: %w", pid, err) - } - if err := process.Kill(); err != nil { - return err - } - return nil -} - -// Tries to issue an HTTP GET request up to maxRetries times -func GetUrlWithRetries(url string, maxRetries int) ([]byte, error) { - retry := 1 - for { - resp, err := http.Get(url) - if err != nil { - if retry == maxRetries { - return nil, fmt.Errorf("Max retries reached for request HTTP Get %s: %w", url, err) - } - retry++ - time.Sleep(5 * time.Second) - } else { - defer resp.Body.Close() - return ioutil.ReadAll(resp.Body) - } - } -} - -// WriteToFile writes data to a file -func WriteToFile(fileName string, data string) error { - if err := os.MkdirAll(filepath.Dir(fileName), os.ModePerm); err != nil { - return fmt.Errorf("Fail to create path directories for file %s: %w", fileName, err) - } - f, err := os.Create(fileName) - if err != nil { - return fmt.Errorf("Fail to create file %s: %+v", fileName, err) - } - defer f.Close() - - _, err = f.Write([]byte(data)) - if err != nil { - return fmt.Errorf("Fail to write data to file %s: %+v", fileName, err) - } - - return nil -} - -// CreateCollectorDir creates a working dir for a collector -func CreateCollectorDir(name string) (string, error) { - hostName, err := GetHostName() - if err != nil { - return "", err - } - - creationTimeStamp, err := GetCreationTimeStamp() - if err != nil { - return "", err - } - - rootPath := filepath.Join("/aks-periscope", strings.Replace(creationTimeStamp, ":", "-", -1), hostName, "collector", name) - err = os.MkdirAll(rootPath, os.ModePerm) - if err != nil { - return "", fmt.Errorf("Fail to create dir %s: %+v", rootPath, err) - } - - return rootPath, nil -} - -// CreateDiagnosticDir creates a working dir for diagnostic -func CreateDiagnosticDir() (string, error) { - hostName, err := GetHostName() - if err != nil { - return "", err - } - - creationTimeStamp, err := GetCreationTimeStamp() - if err != nil { - return "", err - } - - rootPath := filepath.Join("/aks-periscope", strings.Replace(creationTimeStamp, ":", "-", -1), hostName, "diagnoser") - err = os.MkdirAll(rootPath, os.ModePerm) - if err != nil { - return "", fmt.Errorf("Fail to create dir %s: %+v", rootPath, err) - } - - return rootPath, nil -} - -// CreateKubeConfigFromServiceAccount creates kubeconfig based on creds in service account -func CreateKubeConfigFromServiceAccount() error { - token, err := RunCommandOnContainer("cat", "/var/run/secrets/kubernetes.io/serviceaccount/token") - if err != nil { - return err - } - - _, err = RunCommandOnContainer("kubectl", "config", "set-credentials", "aks-periscope-service-account", "--token="+token) - if err != nil { - return err - } - - _, err = RunCommandOnContainer("kubectl", "config", "set-cluster", "aks-periscope-cluster", "--server=https://kubernetes.default.svc.cluster.local:443", "--certificate-authority=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt") - if err != nil { - return err - } - - _, err = RunCommandOnContainer("kubectl", "config", "set-context", "aks-periscope-context", "--user=aks-periscope-service-account", "--cluster=aks-periscope-cluster") - if err != nil { - return err - } - - _, err = RunCommandOnContainer("kubectl", "config", "use-context", "aks-periscope-context") - if err != nil { - return err - } - - return nil -} - -// GetCreationTimeStamp returns a create timestamp -func GetCreationTimeStamp() (string, error) { - creationTimeStamp, err := RunCommandOnContainer("kubectl", "get", "pods", "--all-namespaces", "-l", "app=aks-periscope", "-o", "jsonpath=\"{.items[0].metadata.creationTimestamp}\"") - if err != nil { - return "", err - } - - return creationTimeStamp[1 : len(creationTimeStamp)-1], nil -} - -// WriteToCRD writes diagnostic data to CRD -func WriteToCRD(fileName string, key string) error { - hostName, err := GetHostName() - if err != nil { - return err - } - - crdName := "aks-periscope-diagnostic" + "-" + hostName - - jsonBytes, err := ioutil.ReadFile(fileName) - if err != nil { - return err - } - - patchContent := fmt.Sprintf("{\"spec\":{%q:%q}}", key, string(jsonBytes)) - - _, err = RunCommandOnContainer("kubectl", "-n", "aks-periscope", "patch", "apd", crdName, "-p", patchContent, "--type=merge") - if err != nil { - return err - } - - return nil -} - -// CreateCRD creates a CRD object -func CreateCRD() error { - hostName, err := GetHostName() - if err != nil { - return err - } - - crdName := "aks-periscope-diagnostic" + "-" + hostName - - if err = writeDiagnosticCRD(crdName); err != nil { - return err - } - - _, err = RunCommandOnContainer("kubectl", "apply", "-f", "aks-periscope-diagnostic-crd.yaml") - if err != nil { - return err - } - - return nil -} - -// GetResourceList gets a list of all resources of given type in a specified namespace -func GetResourceList(kubeCmds []string, separator string) ([]string, error) { - outputStreams, err := RunCommandOnContainerWithOutputStreams("kubectl", kubeCmds...) - - if err != nil { - return nil, err - } - - resourceList := outputStreams.Stdout - // If the resource is not found within the cluster, then log a message and do not return any resources. - if len(resourceList) == 0 { - return nil, fmt.Errorf("No '%s' resource found in the cluster for given kubectl command", kubeCmds[1]) - } - - return strings.Split(strings.Trim(resourceList, "\""), separator), nil -} - -func writeDiagnosticCRD(crdName string) error { - f, err := os.Create("aks-periscope-diagnostic-crd.yaml") - if err != nil { - return err - } - defer f.Close() - - _, err = f.WriteString("apiVersion: \"aks-periscope.azure.github.com/v1\"\n") - if err != nil { - return err - } - - _, err = f.WriteString("kind: Diagnostic\n") - if err != nil { - return err - } - - _, err = f.WriteString("metadata:\n") - if err != nil { - return err - } - - _, err = f.WriteString(" name: " + crdName + "\n") - if err != nil { - return err - } - - _, err = f.WriteString(" namespace: aks-periscope\n") - if err != nil { - return err - } - - _, err = f.WriteString("spec:\n") - if err != nil { - return err - } - - _, err = f.WriteString(" networkconfig: \"\"\n") - if err != nil { - return err - } - - _, err = f.WriteString(" networkoutbound: \"\"\n") - if err != nil { - return err - } - - return nil -} +package utils + +import ( + "bytes" + "encoding/json" + "errors" + "fmt" + "io/ioutil" + "log" + "net" + "net/http" + "net/url" + "os" + "os/exec" + "path/filepath" + "strings" + "time" +) + +const ( + // PublicAzureStorageEndpointSuffix defines default Storage Endpoint Suffix + PublicAzureStorageEndpointSuffix = "core.windows.net" + // AzureStackCloudName references the value that will be under the key "cloud" in azure.json if the application is running on Azure Stack Cloud + // https://kubernetes-sigs.github.io/cloud-provider-azure/install/configs/#azure-stack-configuration -- See this documentation for the well-known cloud name. + AzureStackCloudName = "AzureStackCloud" +) + +// Azure defines Azure configuration +type Azure struct { + Cloud string `json:"cloud"` +} + +// AzureStackCloud defines Azure Stack Cloud configuration +type AzureStackCloud struct { + StorageEndpointSuffix string `json:"storageEndpointSuffix"` +} + +type CommandOutputStreams struct { + Stdout string + Stderr string +} + +// IsAzureStackCloud returns true if the application is running on Azure Stack Cloud +func IsAzureStackCloud() bool { + azureFile, err := RunCommandOnHost("cat", "/etc/kubernetes/azure.json") + if err != nil { + return false + } + var azure Azure + if err = json.Unmarshal([]byte(azureFile), &azure); err != nil { + return false + } + cloud := azure.Cloud + return strings.EqualFold(cloud, AzureStackCloudName) +} + +// IsKubernetesInDocker returns true if the application is running on KubernetesInDocker (kind) +func IsKubernetesInDocker() bool { + //TODO refactor the conditional logic this check guards into a new "KindClusterOperations" type behind an interface + //test the AKS kubeconfig location, if we find something then this isn't a KIND cluster + _, err := RunCommandOnHost("ls", "/var/lib/kubelet/kubeconfig") + if err == nil { + return false + } + + //test the KIND kubeconfig location + _, err = RunCommandOnHost("ls", "/etc/kubernetes/kubelet.conf") + return err == nil +} + +// CopyFileFromHost saves the specified source file to the destination +func CopyFileFromHost(source, destination string) error { + sourceFile, err := RunCommandOnHost("cat", source) + if err != nil { + return fmt.Errorf("unable to retrieve source content: %w", err) + } + if err = WriteToFile(destination, sourceFile); err != nil { + return fmt.Errorf("unable to write source file to destination: %w", err) + } + return nil +} + +// GetStorageEndpointSuffix returns the SES url from the JSON file as a string +func GetStorageEndpointSuffix() string { + if IsAzureStackCloud() { + ascFile, err := RunCommandOnHost("cat", "/etc/kubernetes/azurestackcloud.json") + if err != nil { + log.Fatalf("unable to locate azurestackcloud.json to extract storage endpoint suffix: %v", err) + } + var azurestackcloud AzureStackCloud + if err = json.Unmarshal([]byte(ascFile), &azurestackcloud); err != nil { + log.Fatalf("unable to read azurestackcloud.json file: %v", err) + } + return azurestackcloud.StorageEndpointSuffix + } + return PublicAzureStorageEndpointSuffix +} + +// GetHostName get host name +func GetHostName() (string, error) { + hostname, err := RunCommandOnHost("cat", "/etc/hostname") + if err != nil { + return "", fmt.Errorf("Fail to get host name: %+v", err) + } + + return strings.TrimSuffix(string(hostname), "\n"), nil +} + +//ParseAPIServerFQDNFromKubeConfig parses a kubeConfig and returns the APIServerFQDN +func ParseAPIServerFQDNFromKubeConfig(output string) (string, error) { + lines := strings.Split(output, "\n") + for _, line := range lines { + index := strings.Index(line, "server: ") + if index >= 0 { + fqdn := line[index+len("server: "):] + fqdnurl, err := url.Parse(fqdn) + if err != nil { + return "", fmt.Errorf("Fail to parse url from fqdn: %s", fmt.Sprint(err)+": "+fqdn) + } + + host, _, err := net.SplitHostPort(fqdnurl.Host) + if err != nil { + return "", fmt.Errorf("Fail to split host port from fqdnurl: %s", fmt.Sprint(err)+": "+fqdnurl.String()) + } + + return host, nil + } + } + return "", errors.New("Could not find server definitions in kubeconfig") +} + +//ReadKubeletConfig reads the kubeletConfig from the node +func ReadKubeletConfig() (string, error) { + if IsKubernetesInDocker() { + output, err := RunCommandOnHost("cat", "/etc/kubernetes/kubelet.conf") + if err != nil { + return "", fmt.Errorf("Can't open kubeconfig file at /etc/kubernetes/kubelet.conf\": %+v", err) + } + return output, nil + } else { + output, err := RunCommandOnHost("cat", "/var/lib/kubelet/kubeconfig") + if err != nil { + return "", fmt.Errorf("Can't open kubeconfig file at /var/lib/kubelet/kubeconfig\": %+v", err) + } + return output, nil + } +} + +// GetAPIServerFQDN gets the API Server FQDN from the kubeconfig file +func GetAPIServerFQDN() (string, error) { + output, err := ReadKubeletConfig() + if err != nil { + return "", err + } + fqdn, err := ParseAPIServerFQDNFromKubeConfig(output) + if err != nil { + return "", err + } + return fqdn, nil +} + +// RunCommandOnHost runs a command on host system +func RunCommandOnHost(command string, arg ...string) (string, error) { + args := []string{"--target", "1", "--mount", "--uts", "--ipc", "--net", "--pid"} + args = append(args, "--") + args = append(args, command) + args = append(args, arg...) + + cmd := exec.Command("nsenter", args...) + out, err := cmd.CombinedOutput() + if err != nil { + return "", fmt.Errorf("Fail to run command on host: %+v", err) + } + + return string(out), nil +} + +// RunCommandOnContainerWithOutputStreams runs a command on container system and returns both the stdout and stderr output streams +func RunCommandOnContainerWithOutputStreams(command string, arg ...string) (CommandOutputStreams, error) { + cmd := exec.Command(command, arg...) + + var stdout bytes.Buffer + var stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + err := cmd.Run() + outputStreams := CommandOutputStreams{stdout.String(), stderr.String()} + + if err != nil { + return outputStreams, fmt.Errorf("Fail to run command in container: %s", fmt.Sprint(err)+": "+stderr.String()) + } + + return outputStreams, nil +} + +// RunCommandOnContainer runs a command on container system and returns the stdout output stream +func RunCommandOnContainer(command string, arg ...string) (string, error) { + outputStreams, err := RunCommandOnContainerWithOutputStreams(command, arg...) + return outputStreams.Stdout, err +} + +// RunBackgroundCommand starts running a command on a container system in the background and returns its process ID +func RunBackgroundCommand(command string, arg ...string) (int, error) { + cmd := exec.Command(command, arg...) + var out bytes.Buffer + var stderr bytes.Buffer + cmd.Stdout = &out + cmd.Stderr = &stderr + err := cmd.Start() + if err != nil { + return 0, fmt.Errorf("Start background command in container exited with message %s: %w", stderr.String(), err) + } + return cmd.Process.Pid, nil +} + +// Finds and kills a process with a given process ID +func KillProcess(pid int) error { + process, err := os.FindProcess(pid) + if err != nil { + return fmt.Errorf("Find process with pid %d to kill: %w", pid, err) + } + if err := process.Kill(); err != nil { + return err + } + return nil +} + +// Tries to issue an HTTP GET request up to maxRetries times +func GetUrlWithRetries(url string, maxRetries int) ([]byte, error) { + retry := 1 + for { + resp, err := http.Get(url) + if err != nil { + if retry == maxRetries { + return nil, fmt.Errorf("Max retries reached for request HTTP Get %s: %w", url, err) + } + retry++ + time.Sleep(5 * time.Second) + } else { + defer resp.Body.Close() + return ioutil.ReadAll(resp.Body) + } + } +} + +// WriteToFile writes data to a file +func WriteToFile(fileName string, data string) error { + if err := os.MkdirAll(filepath.Dir(fileName), os.ModePerm); err != nil { + return fmt.Errorf("Fail to create path directories for file %s: %w", fileName, err) + } + f, err := os.Create(fileName) + if err != nil { + return fmt.Errorf("Fail to create file %s: %+v", fileName, err) + } + defer f.Close() + + _, err = f.Write([]byte(data)) + if err != nil { + return fmt.Errorf("Fail to write data to file %s: %+v", fileName, err) + } + + return nil +} + +// CreateCollectorDir creates a working dir for a collector +func CreateCollectorDir(name string) (string, error) { + hostName, err := GetHostName() + if err != nil { + return "", err + } + + creationTimeStamp, err := GetCreationTimeStamp() + if err != nil { + return "", err + } + + rootPath := filepath.Join("/aks-periscope", strings.Replace(creationTimeStamp, ":", "-", -1), hostName, "collector", name) + err = os.MkdirAll(rootPath, os.ModePerm) + if err != nil { + return "", fmt.Errorf("Fail to create dir %s: %+v", rootPath, err) + } + + return rootPath, nil +} + +// CreateDiagnosticDir creates a working dir for diagnostic +func CreateDiagnosticDir() (string, error) { + hostName, err := GetHostName() + if err != nil { + return "", err + } + + creationTimeStamp, err := GetCreationTimeStamp() + if err != nil { + return "", err + } + + rootPath := filepath.Join("/aks-periscope", strings.Replace(creationTimeStamp, ":", "-", -1), hostName, "diagnoser") + err = os.MkdirAll(rootPath, os.ModePerm) + if err != nil { + return "", fmt.Errorf("Fail to create dir %s: %+v", rootPath, err) + } + + return rootPath, nil +} + +// CreateKubeConfigFromServiceAccount creates kubeconfig based on creds in service account +func CreateKubeConfigFromServiceAccount() error { + token, err := RunCommandOnContainer("cat", "/var/run/secrets/kubernetes.io/serviceaccount/token") + if err != nil { + return err + } + + _, err = RunCommandOnContainer("kubectl", "config", "set-credentials", "aks-periscope-service-account", "--token="+token) + if err != nil { + return err + } + + _, err = RunCommandOnContainer("kubectl", "config", "set-cluster", "aks-periscope-cluster", "--server=https://kubernetes.default.svc.cluster.local:443", "--certificate-authority=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt") + if err != nil { + return err + } + + _, err = RunCommandOnContainer("kubectl", "config", "set-context", "aks-periscope-context", "--user=aks-periscope-service-account", "--cluster=aks-periscope-cluster") + if err != nil { + return err + } + + _, err = RunCommandOnContainer("kubectl", "config", "use-context", "aks-periscope-context") + if err != nil { + return err + } + + return nil +} + +// GetCreationTimeStamp returns a create timestamp +func GetCreationTimeStamp() (string, error) { + creationTimeStamp, err := RunCommandOnContainer("kubectl", "get", "pods", "--all-namespaces", "-l", "app=aks-periscope", "-o", "jsonpath=\"{.items[0].metadata.creationTimestamp}\"") + if err != nil { + return "", err + } + + return creationTimeStamp[1 : len(creationTimeStamp)-1], nil +} + +// WriteToCRD writes diagnostic data to CRD +func WriteToCRD(fileName string, key string) error { + hostName, err := GetHostName() + if err != nil { + return err + } + + crdName := "aks-periscope-diagnostic" + "-" + hostName + + jsonBytes, err := ioutil.ReadFile(fileName) + if err != nil { + return err + } + + patchContent := fmt.Sprintf("{\"spec\":{%q:%q}}", key, string(jsonBytes)) + + _, err = RunCommandOnContainer("kubectl", "-n", "aks-periscope", "patch", "apd", crdName, "-p", patchContent, "--type=merge") + if err != nil { + return err + } + + return nil +} + +// CreateCRD creates a CRD object +func CreateCRD() error { + hostName, err := GetHostName() + if err != nil { + return err + } + + crdName := "aks-periscope-diagnostic" + "-" + hostName + + if err = writeDiagnosticCRD(crdName); err != nil { + return err + } + + _, err = RunCommandOnContainer("kubectl", "apply", "-f", "aks-periscope-diagnostic-crd.yaml") + if err != nil { + return err + } + + return nil +} + +// GetResourceList gets a list of all resources of given type in a specified namespace +func GetResourceList(kubeCmds []string, separator string) ([]string, error) { + outputStreams, err := RunCommandOnContainerWithOutputStreams("kubectl", kubeCmds...) + + if err != nil { + return nil, err + } + + resourceList := outputStreams.Stdout + // If the resource is not found within the cluster, then log a message and do not return any resources. + if len(resourceList) == 0 { + return nil, fmt.Errorf("No '%s' resource found in the cluster for given kubectl command", kubeCmds[1]) + } + + return strings.Split(strings.Trim(resourceList, "\""), separator), nil +} + +func writeDiagnosticCRD(crdName string) error { + f, err := os.Create("aks-periscope-diagnostic-crd.yaml") + if err != nil { + return err + } + defer f.Close() + + _, err = f.WriteString("apiVersion: \"aks-periscope.azure.github.com/v1\"\n") + if err != nil { + return err + } + + _, err = f.WriteString("kind: Diagnostic\n") + if err != nil { + return err + } + + _, err = f.WriteString("metadata:\n") + if err != nil { + return err + } + + _, err = f.WriteString(" name: " + crdName + "\n") + if err != nil { + return err + } + + _, err = f.WriteString(" namespace: aks-periscope\n") + if err != nil { + return err + } + + _, err = f.WriteString("spec:\n") + if err != nil { + return err + } + + _, err = f.WriteString(" networkconfig: \"\"\n") + if err != nil { + return err + } + + _, err = f.WriteString(" networkoutbound: \"\"\n") + if err != nil { + return err + } + + return nil +} From 7c5c80dcbd34239ef08a3a598307ec6a075756af Mon Sep 17 00:00:00 2001 From: david kydd Date: Wed, 23 Jun 2021 13:46:42 +1200 Subject: [PATCH 16/31] Fix additional merge issues --- cmd/aks-periscope/aks-periscope.go | 2 +- pkg/exporter/azureblob_exporter.go | 7 ------- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/cmd/aks-periscope/aks-periscope.go b/cmd/aks-periscope/aks-periscope.go index 2732886c..fae3a4bd 100644 --- a/cmd/aks-periscope/aks-periscope.go +++ b/cmd/aks-periscope/aks-periscope.go @@ -176,7 +176,7 @@ func selectExporters(allExporters map[string]interfaces.Exporter) []interfaces.E } // runCollectors run the collectors -func runCollectors(collectors []interfaces.Collector, waitgroup *sync.WaitGroup) { +func runCollectors(collectors []interfaces.Collector, collectorGrp *sync.WaitGroup) { for _, c := range collectors { collectorGrp.Add(1) go func(c interfaces.Collector) { diff --git a/pkg/exporter/azureblob_exporter.go b/pkg/exporter/azureblob_exporter.go index 72cace19..8f0797e6 100644 --- a/pkg/exporter/azureblob_exporter.go +++ b/pkg/exporter/azureblob_exporter.go @@ -43,14 +43,7 @@ func (exporter *AzureBlobExporter) GetStorageContainerName(APIServerFQDN string) } else { containerName, err = exporter.GetNonKINDStorageContainerName(APIServerFQDN) } -} -// Export implements the interface method -func (exporter *AzureBlobExporter) Export(files []string) error { - APIServerFQDN, err := utils.GetAPIServerFQDN() - if err != nil { - return "", fmt.Errorf("Fail to build blob container url: %+v", err) - } //TODO run a sanitizer over the final chars in the containerName return containerName, err } From 9b9a5822761baf3f06c08c940ce3f396cc627023 Mon Sep 17 00:00:00 2001 From: david kydd Date: Wed, 23 Jun 2021 15:04:02 +1200 Subject: [PATCH 17/31] overload collectors-config to support both ENABLED_COLLECTORS and COLLECTOR_LIST. New temp deployment config for kind, to be soon replaced with Kustomize version --- cmd/aks-periscope/aks-periscope.go | 65 ++++++---- deployment/aks-periscope-kind.yaml | 200 +++++++++++++++++++++++++++++ 2 files changed, 239 insertions(+), 26 deletions(-) create mode 100644 deployment/aks-periscope-kind.yaml diff --git a/cmd/aks-periscope/aks-periscope.go b/cmd/aks-periscope/aks-periscope.go index fae3a4bd..fa518e5f 100644 --- a/cmd/aks-periscope/aks-periscope.go +++ b/cmd/aks-periscope/aks-periscope.go @@ -22,8 +22,6 @@ func main() { log.Fatalf("Failed to create CRD: %v", err) } - collectorList := strings.Fields(os.Getenv("COLLECTOR_LIST")) - // Copies self-signed cert information to container if application is running on Azure Stack Cloud. // We need the cert in order to communicate with the storage account. if utils.IsAzureStackCloud() { @@ -118,33 +116,48 @@ func initializeComponents() ([]interfaces.Collector, []interfaces.Diagnoser, []i func selectCollectors(allCollectorsByName map[string]interfaces.Collector) []interfaces.Collector { collectors := []interfaces.Collector{} - collectors = append(collectors, containerLogsCollector) - collectors = append(collectors, dnsCollector) - collectors = append(collectors, kubeObjectsCollector) - collectors = append(collectors, networkOutboundCollector) + //read list of collectors that are enabled + var enabledCollectorNames []string - if contains(collectorList, "connectedCluster") { - collectors = append(collectors, helmCollector) + //TODO try get partners to move from COLLECTOR_LIST to use ENABLED_COLLECTORS instead, for now COLLECTOR_LIST takes precedence if defined + collectorList := strings.Fields(os.Getenv("COLLECTOR_LIST")) + if collectorList != nil { + enabledCollectorNames = selectCollectorsUsingCollectorList(collectorList) } else { - collectors = append(collectors, systemLogsCollector) - collectors = append(collectors, ipTablesCollector) - collectors = append(collectors, nodeLogsCollector) - collectors = append(collectors, kubeletCmdCollector) - collectors = append(collectors, systemPerfCollector) + enabledCollectorNames = strings.Fields(os.Getenv("ENABLED_COLLECTORS")) } - if contains(collectorList, "OSM") { - collectors = append(collectors, osmCollector) + enabledCollectorNames = strings.Fields(os.Getenv("ENABLED_COLLECTORS")) + + for _, collectorName := range enabledCollectorNames { + collectors = append(collectors, allCollectorsByName[collectorName]) } - //read list of collectors that are enabled - enabledCollectorNames := strings.Fields(os.Getenv("ENABLED_COLLECTORS")) + return collectors +} + +//selectCollectorsUsingCollectorList use clusterType +func selectCollectorsUsingCollectorList(collectorList []string) []string { + var enabledCollectorNames []string - for _, collector := range enabledCollectorNames { - collectors = append(collectors, allCollectorsByName[collector]) + //select default collectors + enabledCollectorNames = append(enabledCollectorNames, + "dns", "containerlogs", "kubeobjects", "networkoutbound") + + if contains(collectorList, "connectedCluster") { + //select connectedCluster colelctors + enabledCollectorNames = append(enabledCollectorNames, "helm") + } else { + //select non-connectedCluster collectors + enabledCollectorNames = append(enabledCollectorNames, + "iptables", "kubeletcmd", "nodelogs", "systemlogs", "systemperf") + } + if contains(collectorList, "OSM") { + //select OSM collectors + enabledCollectorNames = append(enabledCollectorNames, "osm") } - return collectors + return enabledCollectorNames } // selectDiagnosers select the diagnosers to run @@ -154,8 +167,8 @@ func selectDiagnosers(allDiagnosersByName map[string]interfaces.Diagnoser) []int //read list of diagnosers that are enabled enabledDiagnoserNames := strings.Fields(os.Getenv("ENABLED_DIAGNOSERS")) - for _, diagnoser := range enabledDiagnoserNames { - diagnosers = append(diagnosers, allDiagnosersByName[diagnoser]) + for _, diagnoserName := range enabledDiagnoserNames { + diagnosers = append(diagnosers, allDiagnosersByName[diagnoserName]) } return diagnosers @@ -168,8 +181,8 @@ func selectExporters(allExporters map[string]interfaces.Exporter) []interfaces.E //read list of collectors that are enabled enabledExporterNames := strings.Fields(os.Getenv("ENABLED_EXPORTERS")) - for _, exporter := range enabledExporterNames { - exporters = append(exporters, allExporters[exporter]) + for _, exporterName := range enabledExporterNames { + exporters = append(exporters, allExporters[exporterName]) } return exporters @@ -224,8 +237,8 @@ func runDiagnosers(diagnosers []interfaces.Diagnoser, diagnoserGrp *sync.WaitGro // runExporters run the exporters func runExporters(exporters []interfaces.Exporter, filesToExport []string) error { var result error - for _, exporter := range exporters { - if err := exporter.Export(filesToExport); err != nil { + for _, e := range exporters { + if err := e.Export(filesToExport); err != nil { result = multierror.Append(result, err) } } diff --git a/deployment/aks-periscope-kind.yaml b/deployment/aks-periscope-kind.yaml new file mode 100644 index 00000000..5f261457 --- /dev/null +++ b/deployment/aks-periscope-kind.yaml @@ -0,0 +1,200 @@ +# Note: this file is deprecated and will be removed in a future release +# Use Kustomize to deploy the project +apiVersion: v1 +kind: Namespace +metadata: + name: aks-periscope +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: aks-periscope-service-account + namespace: aks-periscope +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: aks-periscope-role +rules: +- apiGroups: ["","metrics.k8s.io"] + resources: ["pods", "pods/portforward", "nodes", "secrets"] + verbs: ["get", "watch", "list", "create"] +- apiGroups: ["aks-periscope.azure.github.com"] + resources: ["diagnostics"] + verbs: ["get", "watch", "list", "create", "patch"] +- apiGroups: ["admissionregistration.k8s.io"] + resources: ["mutatingwebhookconfigurations", "validatingwebhookconfigurations"] + verbs: ["get", "list", "watch"] +- apiGroups: ["apiextensions.k8s.io"] + resources: ["customresourcedefinitions"] + verbs: ["get", "list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: aks-periscope-role-binding +subjects: +- kind: ServiceAccount + name: aks-periscope-service-account + namespace: aks-periscope +roleRef: + kind: ClusterRole + name: aks-periscope-role + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: aks-periscope-role-binding-view +subjects: +- kind: ServiceAccount + name: aks-periscope-service-account + namespace: aks-periscope +roleRef: + kind: ClusterRole + name: view + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: aks-periscope + namespace: aks-periscope + labels: + app: aks-periscope +spec: + selector: + matchLabels: + app: aks-periscope + template: + metadata: + labels: + app: aks-periscope + spec: + serviceAccountName: aks-periscope-service-account + hostPID: true + nodeSelector: + beta.kubernetes.io/os: linux + containers: + - name: aks-periscope + image: aksrepos.azurecr.io/staging/aks-periscope:v0.3 + securityContext: + privileged: true + imagePullPolicy: Always + envFrom: + - configMapRef: + name: containerlogs-config + - configMapRef: + name: kubeobjects-config + - configMapRef: + name: nodelogs-config + - configMapRef: + name: collectors-config + - configMapRef: + name: diagnosers-config + - configMapRef: + name: exporters-config + - secretRef: + name: azureblob-secret + volumeMounts: + - mountPath: /aks-periscope + name: aks-periscope-storage + resources: + requests: + memory: "500Mi" + cpu: "250m" + limits: + memory: "2000Mi" + cpu: "1000m" + volumes: + - name: aks-periscope-storage + hostPath: + path: /var/log/aks-periscope + type: DirectoryOrCreate +--- +apiVersion: v1 +kind: Secret +metadata: + name: azureblob-secret + namespace: aks-periscope +type: Opaque +data: + AZURE_BLOB_ACCOUNT_NAME: # + AZURE_BLOB_SAS_KEY: # +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: containerlogs-config + namespace: aks-periscope +data: + DIAGNOSTIC_CONTAINERLOGS_LIST: kube-system +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: kubeobjects-config + namespace: aks-periscope +data: + DIAGNOSTIC_KUBEOBJECTS_LIST: kube-system/pod kube-system/service kube-system/deployment +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: nodelogs-config + namespace: aks-periscope +data: + DIAGNOSTIC_NODELOGS_LIST: /var/log/azure/cluster-provision.log /var/log/cloud-init.log +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: collectors-config + namespace: aks-periscope +data: + ENABLED_COLLECTORS: dns containerlogs iptables kubeletcmd kubeobjects networkoutbound systemlogs +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: diagnosers-config + namespace: aks-periscope +data: + ENABLED_DIAGNOSERS: networkconfig networkoutbound +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: exporters-config + namespace: aks-periscope +data: + ENABLED_EXPORTERS: azureblob +--- +apiVersion: apiextensions.k8s.io/v1beta1 +kind: CustomResourceDefinition +metadata: + name: diagnostics.aks-periscope.azure.github.com +spec: + group: aks-periscope.azure.github.com + versions: + - name: v1 + served: true + storage: true + validation: + openAPIV3Schema: + type: object + properties: + spec: + type: object + properties: + dns: + type: string + networkoutbound: + type: string + scope: Namespaced + names: + plural: diagnostics + singular: diagnostic + kind: Diagnostic + shortNames: + - apd From 915b745deea6cc09b80eba150a3521a0790955b7 Mon Sep 17 00:00:00 2001 From: david kydd Date: Wed, 23 Jun 2021 15:05:18 +1200 Subject: [PATCH 18/31] Remove helm --- charts/azure-k8s-periscope/.helmignore | 22 -- charts/azure-k8s-periscope/Chart.yaml | 8 - .../templates/azure-k8s-periscope.yaml | 210 ------------------ charts/azure-k8s-periscope/values.yaml | 8 - 4 files changed, 248 deletions(-) delete mode 100644 charts/azure-k8s-periscope/.helmignore delete mode 100644 charts/azure-k8s-periscope/Chart.yaml delete mode 100644 charts/azure-k8s-periscope/templates/azure-k8s-periscope.yaml delete mode 100644 charts/azure-k8s-periscope/values.yaml diff --git a/charts/azure-k8s-periscope/.helmignore b/charts/azure-k8s-periscope/.helmignore deleted file mode 100644 index 50af0317..00000000 --- a/charts/azure-k8s-periscope/.helmignore +++ /dev/null @@ -1,22 +0,0 @@ -# Patterns to ignore when building packages. -# This supports shell glob matching, relative path matching, and -# negation (prefixed with !). Only one pattern per line. -.DS_Store -# Common VCS dirs -.git/ -.gitignore -.bzr/ -.bzrignore -.hg/ -.hgignore -.svn/ -# Common backup files -*.swp -*.bak -*.tmp -*~ -# Various IDEs -.project -.idea/ -*.tmproj -.vscode/ diff --git a/charts/azure-k8s-periscope/Chart.yaml b/charts/azure-k8s-periscope/Chart.yaml deleted file mode 100644 index 19108732..00000000 --- a/charts/azure-k8s-periscope/Chart.yaml +++ /dev/null @@ -1,8 +0,0 @@ -apiVersion: v1 -appVersion: "1.0" -description: A Helm chart for azure-k8s-periscope -home: https://github.com/Azure/aks-periscope/charts/azure-k8s-periscope -name: azure-k8s-periscope -sources: -- https:https://github.com/Azure/aks-periscope/charts/azure-k8s-periscope -version: 0.3.0 diff --git a/charts/azure-k8s-periscope/templates/azure-k8s-periscope.yaml b/charts/azure-k8s-periscope/templates/azure-k8s-periscope.yaml deleted file mode 100644 index 0f7246e9..00000000 --- a/charts/azure-k8s-periscope/templates/azure-k8s-periscope.yaml +++ /dev/null @@ -1,210 +0,0 @@ -apiVersion: v1 -kind: Namespace -metadata: - name: aks-periscope ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: aks-periscope-service-account - namespace: aks-periscope ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: aks-periscope-role -rules: -- apiGroups: ["","metrics.k8s.io"] - resources: ["pods", "nodes"] - verbs: ["get", "watch", "list"] -- apiGroups: ["aks-periscope.azure.github.com"] - resources: ["diagnostics"] - verbs: ["get", "watch", "list", "create", "patch"] ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: aks-periscope-role-binding -subjects: -- kind: ServiceAccount - name: aks-periscope-service-account - namespace: aks-periscope -roleRef: - kind: ClusterRole - name: aks-periscope-role - apiGroup: rbac.authorization.k8s.io ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: aks-periscope-role-binding-view -subjects: -- kind: ServiceAccount - name: aks-periscope-service-account - namespace: aks-periscope -roleRef: - kind: ClusterRole - name: view - apiGroup: rbac.authorization.k8s.io ---- -apiVersion: apps/v1 -kind: DaemonSet -metadata: - name: aks-periscope - namespace: aks-periscope - labels: - app: aks-periscope -spec: - selector: - matchLabels: - app: aks-periscope - template: - metadata: - labels: - app: aks-periscope - spec: - serviceAccountName: aks-periscope-service-account - hostPID: true - nodeSelector: - beta.kubernetes.io/os: linux - containers: - - name: aks-periscope - image: aksrepos.azurecr.io/staging/aks-periscope:v0.3 - securityContext: - privileged: true - imagePullPolicy: Always - envFrom: - - configMapRef: - name: containerlogs-config - - configMapRef: - name: kubeobjects-config - - configMapRef: - name: nodelogs-config - - configMapRef: - name: clustertypes-config - - secretRef: - name: azureblob-secret - volumeMounts: - - mountPath: /aks-periscope - name: aks-periscope-storage - resources: - requests: - memory: "500Mi" - cpu: "250m" - limits: - memory: "2000Mi" - cpu: "1000m" - volumes: - - name: aks-periscope-storage - hostPath: - path: /var/log/aks-periscope - type: DirectoryOrCreate ---- -apiVersion: v1 -kind: Secret -metadata: - name: azureblob-secret - namespace: aks-periscope -type: Opaque -data: - {{- if ne .Values.global.accountName "" }} - AZURE_BLOB_ACCOUNT_NAME: {{ .Values.global.accountName | b64enc }} - AZURE_BLOB_SAS_KEY: {{ .Values.global.saskey | b64enc }} - {{- end }} ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: containerlogs-config - namespace: aks-periscope -data: - {{- if eq .Values.global.clusterType "connectedClusters" }} - DIAGNOSTIC_CONTAINERLOGS_LIST: azure-arc - {{- else }} - DIAGNOSTIC_CONTAINERLOGS_LIST: kube-system - {{- end }} ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: kubeobjects-config - namespace: aks-periscope -data: - {{- if eq .Values.global.clusterType "connectedClusters" }} - DIAGNOSTIC_KUBEOBJECTS_LIST: azure-arc/pod azure-arc/service azure-arc/deployment azure-arc/crd - {{- else }} - DIAGNOSTIC_KUBEOBJECTS_LIST: kube-system/pod kube-system/service kube-system/deployment - {{- end }} ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: collectors-config - namespace: aks-periscope -data: - ENABLED_COLLECTORS: dns containerlogs iptables kubeletcmd kubeobjects networkoutbound nodelogs systemlogs systemperf ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: diagnosers-config - namespace: aks-periscope -data: - ENABLED_DIAGNOSERS: networkconfig networkoutbound ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: exporters-config - namespace: aks-periscope -data: - ENABLED_EXPORTERS: azureblob ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: nodelogs-config - namespace: aks-periscope -data: - {{- if eq .Values.global.clusterType "connectedClusters" }} - DIAGNOSTIC_NODELOGS_LIST: "" - {{- else }} - DIAGNOSTIC_KUBEOBJECTS_LIST: kube-system/pod kube-system/service kube-system/deployment - {{- end }} ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: clustertypes-config - namespace: aks-periscope -data: - CLUSTER_TYPE: {{ .Values.global.clusterType }} ---- -apiVersion: apiextensions.k8s.io/v1beta1 -kind: CustomResourceDefinition -metadata: - name: diagnostics.aks-periscope.azure.github.com -spec: - group: aks-periscope.azure.github.com - versions: - - name: v1 - served: true - storage: true - validation: - openAPIV3Schema: - type: object - properties: - spec: - type: object - properties: - dns: - type: string - networkoutbound: - type: string - scope: Namespaced - names: - plural: diagnostics - singular: diagnostic - kind: Diagnostic - shortNames: - - apd \ No newline at end of file diff --git a/charts/azure-k8s-periscope/values.yaml b/charts/azure-k8s-periscope/values.yaml deleted file mode 100644 index afc16a63..00000000 --- a/charts/azure-k8s-periscope/values.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Default values for Azure k8s periscope -# This is a YAML-formatted file. -# Declare variables to be passed into your templates. - -global: - clusterType: "managedClusters" - accountName: "" - saskey: "" \ No newline at end of file From 70329762d20364f6098988f0f5a203b3dcd44482 Mon Sep 17 00:00:00 2001 From: david kydd Date: Wed, 23 Jun 2021 15:06:13 +1200 Subject: [PATCH 19/31] Remove dev periscope yaml --- deployment/dev/aks-periscope.yaml | 192 ------------------------------ 1 file changed, 192 deletions(-) delete mode 100644 deployment/dev/aks-periscope.yaml diff --git a/deployment/dev/aks-periscope.yaml b/deployment/dev/aks-periscope.yaml deleted file mode 100644 index 5f8a2d16..00000000 --- a/deployment/dev/aks-periscope.yaml +++ /dev/null @@ -1,192 +0,0 @@ -apiVersion: v1 -kind: Namespace -metadata: - name: aks-periscope ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: aks-periscope-service-account - namespace: aks-periscope ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: aks-periscope-role -rules: - - apiGroups: ["","metrics.k8s.io"] - resources: ["pods", "nodes"] - verbs: ["get", "watch", "list"] - - apiGroups: ["aks-periscope.azure.github.com"] - resources: ["diagnostics"] - verbs: ["get", "watch", "list", "create", "patch"] ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: aks-periscope-role-binding -subjects: - - kind: ServiceAccount - name: aks-periscope-service-account - namespace: aks-periscope -roleRef: - kind: ClusterRole - name: aks-periscope-role - apiGroup: rbac.authorization.k8s.io ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: aks-periscope-role-binding-view -subjects: - - kind: ServiceAccount - name: aks-periscope-service-account - namespace: aks-periscope -roleRef: - kind: ClusterRole - name: view - apiGroup: rbac.authorization.k8s.io ---- -apiVersion: apps/v1 -kind: DaemonSet -metadata: - name: aks-periscope - namespace: aks-periscope - labels: - app: aks-periscope -spec: - selector: - matchLabels: - app: aks-periscope - template: - metadata: - labels: - app: aks-periscope - spec: - serviceAccountName: aks-periscope-service-account - hostPID: true - nodeSelector: - beta.kubernetes.io/os: linux - containers: - - name: aks-periscope - image: aksrepos.azurecr.io/staging/aks-periscope:v0.3 - securityContext: - privileged: true - imagePullPolicy: Always - envFrom: - - configMapRef: - name: containerlogs-config - - configMapRef: - name: kubeobjects-config - - configMapRef: - name: nodelogs-config - - configMapRef: - name: collectors-config - - configMapRef: - name: diagnosers-config - - configMapRef: - name: exporters-config - - secretRef: - name: azureblob-secret - volumeMounts: - - mountPath: /aks-periscope - name: aks-periscope-storage - resources: - requests: - memory: "500Mi" - cpu: "250m" - limits: - memory: "2000Mi" - cpu: "1000m" - volumes: - - name: aks-periscope-storage - hostPath: - path: /var/log/aks-periscope - type: DirectoryOrCreate ---- -apiVersion: v1 -kind: Secret -metadata: - name: azureblob-secret - namespace: aks-periscope -type: Opaque -data: - AZURE_BLOB_ACCOUNT_NAME: # - AZURE_BLOB_SAS_KEY: # ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: containerlogs-config - namespace: aks-periscope -data: - DIAGNOSTIC_CONTAINERLOGS_LIST: kube-system ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: kubeobjects-config - namespace: aks-periscope -data: - DIAGNOSTIC_KUBEOBJECTS_LIST: kube-system/pod kube-system/service kube-system/deployment ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: nodelogs-config - namespace: aks-periscope -data: - DIAGNOSTIC_NODELOGS_LIST: /var/log/azure/cluster-provision.log /var/log/cloud-init.log ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: collectors-config - namespace: aks-periscope -data: - ENABLED_COLLECTORS: dns containerlogs iptables kubeletcmd kubeobjects networkoutbound nodelogs systemlogs systemperf ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: diagnosers-config - namespace: aks-periscope -data: - ENABLED_DIAGNOSERS: networkconfig networkoutbound ---- -apiVersion: v1 -kind: ConfigMap -metadata: - name: exporters-config - namespace: aks-periscope -data: - ENABLED_EXPORTERS: azureblob ---- -apiVersion: apiextensions.k8s.io/v1beta1 -kind: CustomResourceDefinition -metadata: - name: diagnostics.aks-periscope.azure.github.com -spec: - group: aks-periscope.azure.github.com - versions: - - name: v1 - served: true - storage: true - validation: - openAPIV3Schema: - type: object - properties: - spec: - type: object - properties: - dns: - type: string - networkoutbound: - type: string - scope: Namespaced - names: - plural: diagnostics - singular: diagnostic - kind: Diagnostic - shortNames: - - apd \ No newline at end of file From 5a1df462be1ac150c3a21086dc103599793f0d9d Mon Sep 17 00:00:00 2001 From: david kydd Date: Thu, 24 Jun 2021 10:31:24 +1200 Subject: [PATCH 20/31] Handle kubeadm (perhaps poorly) and aks-engine fqdn -> containerName conversion --- pkg/exporter/azureblob_exporter.go | 19 ++++++++++++++----- pkg/exporter/azureblob_exporter_test.go | 4 +++- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/pkg/exporter/azureblob_exporter.go b/pkg/exporter/azureblob_exporter.go index 8f0797e6..91c3563a 100644 --- a/pkg/exporter/azureblob_exporter.go +++ b/pkg/exporter/azureblob_exporter.go @@ -5,6 +5,7 @@ import ( "context" "fmt" "log" + "math" "net/url" "os" "strings" @@ -49,16 +50,24 @@ func (exporter *AzureBlobExporter) GetStorageContainerName(APIServerFQDN string) } func (exporter *AzureBlobExporter) GetKubernetesInDockerStorageContainerName(APIServerFQDN string) (string, error) { - return APIServerFQDN, nil + containerName := strings.Replace(APIServerFQDN, ".", "-", -1) + + return containerName, nil } func (exporter *AzureBlobExporter) GetNonKINDStorageContainerName(APIServerFQDN string) (string, error) { containerName := strings.Replace(APIServerFQDN, ".", "-", -1) - len := strings.Index(containerName, "-hcp-") - if len == -1 { - len = maxContainerNameLength + + //TODO I really dont like the line below, it makes for weird behaviour if e.g. .hcp. or -hcp- is in the fqdn for some reason other than being auto-added by AKS + length := strings.Index(containerName, "-hcp-") + + if length == -1 { + maxLength := len(containerName) + length = int(math.Min(float64(maxLength), float64(maxContainerNameLength))) } - containerName = strings.TrimRight(containerName[:len], "-") + + containerName = containerName[:length] + containerName = strings.TrimRight(containerName, "-") return containerName, nil } diff --git a/pkg/exporter/azureblob_exporter_test.go b/pkg/exporter/azureblob_exporter_test.go index 5bcfa30c..022c1e23 100644 --- a/pkg/exporter/azureblob_exporter_test.go +++ b/pkg/exporter/azureblob_exporter_test.go @@ -8,7 +8,9 @@ var getStorageContainerNameTests = []struct { apiServerFqdn string containerName string }{ - {"dakydd-test-eastus-dns-d0daedb9.hcp.eastus.azmk8s.io", "dakydd-test-eastus-dns-d0daedb9"}, + {"standard-aks-fqdn-dns-d0daedb9.hcp.eastus.azmk8s.io", "standard-aks-fqdn-dns-d0daedb9"}, + {"aks-engine-fqdn.westeurope.cloudapp.azure.com", "aks-engine-fqdn-westeurope-cloudapp-azure-com"}, + {"extra.super.duper.long.apiserverfqdn.that.has.more.than.63.characters", "extra-super-duper-long-apiserverfqdn-that-has-more-than-63-char"}, } // TestGetNonKINDStorageContainerName get storage container name for non kind cluster From 92a767ce26a0df2dea32cd58b4c35c35abeb6e7c Mon Sep 17 00:00:00 2001 From: david kydd Date: Thu, 24 Jun 2021 11:11:22 +1200 Subject: [PATCH 21/31] additional test cases --- pkg/exporter/azureblob_exporter_test.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pkg/exporter/azureblob_exporter_test.go b/pkg/exporter/azureblob_exporter_test.go index 022c1e23..e68eddb6 100644 --- a/pkg/exporter/azureblob_exporter_test.go +++ b/pkg/exporter/azureblob_exporter_test.go @@ -10,6 +10,8 @@ var getStorageContainerNameTests = []struct { }{ {"standard-aks-fqdn-dns-d0daedb9.hcp.eastus.azmk8s.io", "standard-aks-fqdn-dns-d0daedb9"}, {"aks-engine-fqdn.westeurope.cloudapp.azure.com", "aks-engine-fqdn-westeurope-cloudapp-azure-com"}, + {"additional.aks-engine-fqdn.db839748.eastus.cloudapp.azure.com", "additional-aks-engine-fqdn-db839748-eastus-cloudapp-azure-com"}, + {"10.255.255.5", "10-255-255-5"}, // aks-engine clusters will currently return an IPv4 address as what Periscope is calling the APIServerFQDN {"extra.super.duper.long.apiserverfqdn.that.has.more.than.63.characters", "extra-super-duper-long-apiserverfqdn-that-has-more-than-63-char"}, } From 12bbfd370e7bebad9e4222c4c74302f2769ebdac Mon Sep 17 00:00:00 2001 From: david kydd Date: Fri, 25 Jun 2021 16:19:52 +1200 Subject: [PATCH 22/31] new containerlogs collector for containerd clusters - still needs to be plumbed into main app --- .../containerlogs_collector_containerd.go | 150 ++++++++++++++++++ ...containerlogs_collector_containerd_test.go | 88 ++++++++++ 2 files changed, 238 insertions(+) create mode 100644 pkg/collector/containerlogs_collector_containerd.go create mode 100644 pkg/collector/containerlogs_collector_containerd_test.go diff --git a/pkg/collector/containerlogs_collector_containerd.go b/pkg/collector/containerlogs_collector_containerd.go new file mode 100644 index 00000000..79cfa7ac --- /dev/null +++ b/pkg/collector/containerlogs_collector_containerd.go @@ -0,0 +1,150 @@ +package collector + +import ( + "os" + "path" + "strings" + + "github.com/Azure/aks-periscope/pkg/interfaces" + "github.com/Azure/aks-periscope/pkg/utils" +) + +// ContainerLogsCollector defines a ContainerLogs Collector struct for containerd clusters +type ContainerLogsCollectorContainerD struct { + BaseCollector +} + +var _ interfaces.Collector = &ContainerLogsCollectorContainerD{} + +// NewContainerLogsCollector is a constructor +func NewContainerLogsCollectorContainerD(exporters []interfaces.Exporter) *ContainerLogsCollector { + return &ContainerLogsCollector{ + BaseCollector: BaseCollector{ + collectorType: ContainerLogs, + exporters: exporters, + }, + } +} + +type ContainerLog struct { + podname string + namespace string + containerName string + containeruid string + filepath string +} + +type ContainerLogSelector struct { + namespace string + containerNamePrefix string +} + +// NOTE pod log files not currently used are in sub-directories of /var/log/pods and have format NAMESPACE_PODNAME_SOMEGUIDMEANINGTBD/CONTAINERNAME/#.log, +// where I assume # is incremented and a new log created for each container restart (starts at # == 0) +const containerLogDirectory = "/var/log/containers" + +//var/log/pods and /var/logs/containers +// Collect implements the interface method +func (collector *ContainerLogsCollectorContainerD) Collect() error { + selectorStrings := strings.Fields(os.Getenv("DIAGNOSTIC_CONTAINERLOGS_LIST")) + + containerLogSelectors := collector.ParseContainerLogSelectors(selectorStrings) + + allContainersEverRun, err := collector.GetAllContainerLogFilesThatHaveEverRunOnHost() + if err != nil { + return err + } + + containerLogs := collector.ParseContainerLogFilenames(containerLogDirectory, allContainersEverRun) + + containerLogsToCollect := collector.DetermineContainerLogsToCollect(containerLogs, containerLogSelectors) + + for _, containerLog := range containerLogsToCollect { + collector.AddToCollectorFiles(containerLog.filepath) + } + + return nil +} + +//DetermineContainerLogsToCollect applies the containerLogSelectors to filter the list of containerLogs to be collected +func (collector *ContainerLogsCollectorContainerD) DetermineContainerLogsToCollect(allContainers []ContainerLog, selectors []ContainerLogSelector) []ContainerLog { + var selectedContainerLogs []ContainerLog + for _, containerLog := range allContainers { + for _, selector := range selectors { + if collector.DoesSelectorSelectContainerLog(containerLog, selector){ + selectedContainerLogs = append(selectedContainerLogs, containerLog) + } + } + } + return selectedContainerLogs +} + +//DoesSelectorSelectContainerLog contains the logic for determining if a selector selects a containerLog for collecting +func (collector *ContainerLogsCollectorContainerD) DoesSelectorSelectContainerLog(containerLog ContainerLog, selector ContainerLogSelector) bool { + return containerLog.namespace == selector.namespace && strings.HasPrefix(containerLog.containerName, selector.containerNamePrefix) +} + +//ParseContainerLogSelectors parses selectorStrings into component struct +//TODO allow the raw struct objects to be defined directly in the deployment yaml and add additional required logic to DoesSelectorSelectContainerLog +func (collector *ContainerLogsCollectorContainerD) ParseContainerLogSelectors(selectorStrings []string) []ContainerLogSelector { + var containerLogSelectors []ContainerLogSelector + + for _, selectorString := range selectorStrings { + selectorStringParts := strings.Split(selectorString, "/") + + if len(selectorStringParts) == 1{ + containerLogSelectors = append(containerLogSelectors, ContainerLogSelector{ + namespace: selectorStringParts[0], + })} + if len(selectorStringParts) == 2{ + containerLogSelectors = append(containerLogSelectors, ContainerLogSelector{ + namespace: selectorStringParts[0], + containerNamePrefix: selectorStringParts[1], + })} + } + + return containerLogSelectors +} + +//ParseContainerLogFilenames parses container log filenames into component struct +func (collector *ContainerLogsCollectorContainerD) ParseContainerLogFilenames(directoryPath string, containerLogFilenames []string) []ContainerLog { + + var containerLogs []ContainerLog + + //container log files are in format PODNAME_NAMESPACE_CONTAINERNAME-CONTAINERUID.log + //TODO check that CONTAINERNAME and CONTAINERUID are correct terminology + for _, logFile := range containerLogFilenames { + logfileSplitOnDot := strings.Split(logFile, ".") + logFileSplitOnUnderscore := strings.Split(logfileSplitOnDot[0], "_") + containerNameWithIDSplitOnDash := strings.Split(logFileSplitOnUnderscore[2], "-") + + //uid is the last value + indexOfUid := len(containerNameWithIDSplitOnDash)-1 + + //containerName is everything except the last value, joined + containerName := strings.Join(containerNameWithIDSplitOnDash[0:indexOfUid], "") + + containerLogs = append(containerLogs, ContainerLog{ + podname: logFileSplitOnUnderscore[0], + namespace: logFileSplitOnUnderscore[1], + containerName: containerName, + containeruid: containerNameWithIDSplitOnDash[indexOfUid], + filepath: path.Join(directoryPath, logFile), + }) + } + + return containerLogs +} + +//GetAllContainerLogFilesThatHaveEverRunOnHost gets the list of log files for all containers that have ever run on the host +func (collector *ContainerLogsCollectorContainerD) GetAllContainerLogFilesThatHaveEverRunOnHost() ([]string, error){ + output, err := utils.RunCommandOnHost("ls", containerLogDirectory) + if err != nil { + return nil, err + } + + containers := strings.Split(output, "\n") + containers = containers[:len(containers)-1] + + return containers, nil +} diff --git a/pkg/collector/containerlogs_collector_containerd_test.go b/pkg/collector/containerlogs_collector_containerd_test.go new file mode 100644 index 00000000..a394eca2 --- /dev/null +++ b/pkg/collector/containerlogs_collector_containerd_test.go @@ -0,0 +1,88 @@ +package collector + +import ( + "strings" + "testing" +) + +var parseContainerLogSelectorTests = []struct { + selectorString []string + containerLogSelector []ContainerLogSelector +}{ + {[]string{"kube-system"}, []ContainerLogSelector{{namespace: "kube-system", containerNamePrefix: ""}}}, + {[]string{"kube-system/metrics-agent"}, []ContainerLogSelector{{namespace: "kube-system", containerNamePrefix: "metrics-agent"}}}, + {[]string{ + "kube-system/metrics-agent", + "azure-arc/fluent-bit"}, + []ContainerLogSelector{ + {namespace: "kube-system", containerNamePrefix: "metrics-agent"}, + {namespace: "azure-arc", containerNamePrefix: "fluent-bit"}}}, +} + +// TestParseContainerLogSelectors test container log selector parser +func TestParseContainerLogSelectors(t *testing.T) { + for _, tt := range parseContainerLogSelectorTests { + testName := strings.Join(tt.selectorString, " ") + t.Run(testName, func(t *testing.T) { + var collector = &ContainerLogsCollectorContainerD{} + selectors := collector.ParseContainerLogSelectors(tt.selectorString) + + for i, selector := range selectors { + if selector != tt.containerLogSelector[i] { + t.Errorf("Sprintf(%q, &collector) => %q, want %q", + testName, selector, tt.containerLogSelector[i]) + } + } + }) + } +} + +var ParseContainerLogFilenamesTests = []struct { + directoryPath string + containerLogFilenames []string + containerLogs []ContainerLog +}{ + {containerLogDirectory, []string{"kube-apiserver-kind-control-plane_kube-system_kube-apiserver-341694b9f16b51d7afc7c0a68d2ea44f31f6c2dad550d56d8d8dd9304a27b01f.log"}, + []ContainerLog{{ + podname: "kube-apiserver-kind-control-plane", + namespace: "kube-system", + containerName: "kube-apiserver", + containeruid: "341694b9f16b51d7afc7c0a68d2ea44f31f6c2dad550d56d8d8dd9304a27b01f", + filepath: "/var/logs/containers/kube-apiserver-kind-control-plane_kube-system_kube-apiserver-341694b9f16b51d7afc7c0a68d2ea44f31f6c2dad550d56d8d8dd9304a27b01f.log"}}}, + {containerLogDirectory, []string{ + "metrics-agent-5b9b94754f-lv4gm_azure-arc_metrics-agent-b2aa15b3f9c7c395539281bd94ee0725706c7228439a8870494804e042a54d7d.log", + "resource-sync-agent-f8c7c6b6b-zdqkc_azure-arc_fluent-bit-46d805fc1d05986cce94f845401949c241fa577e4312df45e5bfb39b27dc226c.log"}, + + []ContainerLog{{ + podname: "metrics-agent-5b9b94754f-lv4gm", + namespace: "azure-arc", + containerName: "metrics-agent", + containeruid: "b2aa15b3f9c7c395539281bd94ee0725706c7228439a8870494804e042a54d7d", + filepath: "/var/logs/containers/kube-apiserver-kind-control-plane_kube-system_kube-apiserver-341694b9f16b51d7afc7c0a68d2ea44f31f6c2dad550d56d8d8dd9304a27b01f.log"}, { + + podname: "resource-sync-agent-f8c7c6b6b-zdqkc", + namespace: "azure-arc", + containerName: "fluent-bit", + containeruid: "46d805fc1d05986cce94f845401949c241fa577e4312df45e5bfb39b27dc226c", + filepath: "/var/logs/containers/kube-apiserver-kind-control-plane_kube-system_kube-apiserver-341694b9f16b51d7afc7c0a68d2ea44f31f6c2dad550d56d8d8dd9304a27b01f.log"}}}, +} + +// TestParseContainerLogSelectors test container log selector parser +func TestParseContainerLogFilenames(t *testing.T) { + for _, tt := range parseContainerLogSelectorTests { + testName := strings.Join(tt.selectorString, " ") + t.Run(testName, func(t *testing.T) { + var collector = &ContainerLogsCollectorContainerD{} + selectors := collector.ParseContainerLogSelectors(tt.selectorString) + + for i, selector := range selectors { + if selector != tt.containerLogSelector[i] { + t.Errorf("Sprintf(%q, &collector) => %q, want %q", + testName, selector, tt.containerLogSelector[i]) + } + } + }) + } +} + +//TODO tests for DetermineContainerLogsToCollect and DoesSelectorSelectContainerLog From 0a5aa64f67ce2dc80cd789f4c5a8456acc75e2a8 Mon Sep 17 00:00:00 2001 From: david kydd Date: Sat, 26 Jun 2021 09:53:08 +1200 Subject: [PATCH 23/31] register containerD collector --- pkg/collector/collector.go | 4 +++- pkg/collector/containerlogs_collector_containerd.go | 7 +++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/pkg/collector/collector.go b/pkg/collector/collector.go index a7624dfc..bafb572e 100644 --- a/pkg/collector/collector.go +++ b/pkg/collector/collector.go @@ -17,6 +17,8 @@ const ( DNS Type = iota // ContainerLogs defines ContainerLogs Collector Type ContainerLogs + // ContainerLogs defines ContainerLogs Collector Type for clusters using ContainerD + ContainerLogsContainerD //Helm defines Helm Collector Type Helm // IPTables defines IPTables Collector Type @@ -39,7 +41,7 @@ const ( // Name returns type name func (t Type) name() string { - return [...]string{"dns", "containerlogs", "helm", "iptables", "kubeletcmd", "kubeobjects", "networkoutbound", "nodelogs", "osm", "systemlogs", "systemperf"}[t] + return [...]string{"dns", "containerlogs", "containerlogscontainerd", "helm", "iptables", "kubeletcmd", "kubeobjects", "networkoutbound", "nodelogs", "osm", "systemlogs", "systemperf"}[t] } // BaseCollector defines Base Collector diff --git a/pkg/collector/containerlogs_collector_containerd.go b/pkg/collector/containerlogs_collector_containerd.go index 79cfa7ac..4ac66cd8 100644 --- a/pkg/collector/containerlogs_collector_containerd.go +++ b/pkg/collector/containerlogs_collector_containerd.go @@ -9,18 +9,18 @@ import ( "github.com/Azure/aks-periscope/pkg/utils" ) -// ContainerLogsCollector defines a ContainerLogs Collector struct for containerd clusters +// ContainerLogsCollectorContainerD defines a ContainerLogs Collector struct for containerd clusters type ContainerLogsCollectorContainerD struct { BaseCollector } var _ interfaces.Collector = &ContainerLogsCollectorContainerD{} -// NewContainerLogsCollector is a constructor +// NewContainerLogsCollectorContainerD is a constructor func NewContainerLogsCollectorContainerD(exporters []interfaces.Exporter) *ContainerLogsCollector { return &ContainerLogsCollector{ BaseCollector: BaseCollector{ - collectorType: ContainerLogs, + collectorType: ContainerLogsContainerD, exporters: exporters, }, } @@ -43,7 +43,6 @@ type ContainerLogSelector struct { // where I assume # is incremented and a new log created for each container restart (starts at # == 0) const containerLogDirectory = "/var/log/containers" -//var/log/pods and /var/logs/containers // Collect implements the interface method func (collector *ContainerLogsCollectorContainerD) Collect() error { selectorStrings := strings.Fields(os.Getenv("DIAGNOSTIC_CONTAINERLOGS_LIST")) From 2bb4a370c1255be2da159f3a1c868dcb3b474efb Mon Sep 17 00:00:00 2001 From: david kydd Date: Sat, 26 Jun 2021 09:59:54 +1200 Subject: [PATCH 24/31] reverse the kind vs. non-kind check to be AKS vs. non-AKS --- pkg/exporter/azureblob_exporter.go | 14 +- pkg/exporter/azureblob_exporter_test.go | 6 +- pkg/utils/helper.go | 917 ++++++++++++------------ 3 files changed, 470 insertions(+), 467 deletions(-) diff --git a/pkg/exporter/azureblob_exporter.go b/pkg/exporter/azureblob_exporter.go index 91c3563a..752ff85a 100644 --- a/pkg/exporter/azureblob_exporter.go +++ b/pkg/exporter/azureblob_exporter.go @@ -39,26 +39,28 @@ func NewAzureBlobExporter() *AzureBlobExporter { func (exporter *AzureBlobExporter) GetStorageContainerName(APIServerFQDN string) (string, error) { var containerName string var err error - if utils.IsKubernetesInDocker() { - containerName, err = exporter.GetKubernetesInDockerStorageContainerName(APIServerFQDN) + if utils.IsRunningInAks() { + containerName, err = exporter.GetAKSStorageContainerName(APIServerFQDN) } else { - containerName, err = exporter.GetNonKINDStorageContainerName(APIServerFQDN) + containerName, err = exporter.GetNonAKSStorageContainerName(APIServerFQDN) } //TODO run a sanitizer over the final chars in the containerName return containerName, err } -func (exporter *AzureBlobExporter) GetKubernetesInDockerStorageContainerName(APIServerFQDN string) (string, error) { +//GetNonAKSStorageContainerName get the storage container name for non AKS cluster +func (exporter *AzureBlobExporter) GetNonAKSStorageContainerName(APIServerFQDN string) (string, error) { containerName := strings.Replace(APIServerFQDN, ".", "-", -1) return containerName, nil } -func (exporter *AzureBlobExporter) GetNonKINDStorageContainerName(APIServerFQDN string) (string, error) { +//GetAKSStorageContainerName get the storage container name when running on an AKS cluster +func (exporter *AzureBlobExporter) GetAKSStorageContainerName(APIServerFQDN string) (string, error) { containerName := strings.Replace(APIServerFQDN, ".", "-", -1) - //TODO I really dont like the line below, it makes for weird behaviour if e.g. .hcp. or -hcp- is in the fqdn for some reason other than being auto-added by AKS + //TODO DK: I really dont like the line below, it makes for weird behaviour if e.g. .hcp. or -hcp- is in the fqdn for some reason other than being auto-added by AKS length := strings.Index(containerName, "-hcp-") if length == -1 { diff --git a/pkg/exporter/azureblob_exporter_test.go b/pkg/exporter/azureblob_exporter_test.go index e68eddb6..75c2b51f 100644 --- a/pkg/exporter/azureblob_exporter_test.go +++ b/pkg/exporter/azureblob_exporter_test.go @@ -15,12 +15,12 @@ var getStorageContainerNameTests = []struct { {"extra.super.duper.long.apiserverfqdn.that.has.more.than.63.characters", "extra-super-duper-long-apiserverfqdn-that-has-more-than-63-char"}, } -// TestGetNonKINDStorageContainerName get storage container name for non kind cluster -func TestGetNonKINDStorageContainerName(t *testing.T) { +// TestGetAKSStorageContainerName get storage container name for non kind cluster +func TestGetAKSStorageContainerName(t *testing.T) { for _, tt := range getStorageContainerNameTests { t.Run(tt.apiServerFqdn, func(t *testing.T) { var blobExporter = &AzureBlobExporter{} - containerName, _ := blobExporter.GetNonKINDStorageContainerName(tt.apiServerFqdn) + containerName, _ := blobExporter.GetAKSStorageContainerName(tt.apiServerFqdn) if containerName != tt.containerName { t.Errorf("Sprintf(%q, &blobExporter) => %q, want %q", diff --git a/pkg/utils/helper.go b/pkg/utils/helper.go index 2b3fdce2..620add9c 100644 --- a/pkg/utils/helper.go +++ b/pkg/utils/helper.go @@ -1,458 +1,459 @@ -package utils - -import ( - "bytes" - "encoding/json" - "errors" - "fmt" - "io/ioutil" - "log" - "net" - "net/http" - "net/url" - "os" - "os/exec" - "path/filepath" - "strings" - "time" -) - -const ( - // PublicAzureStorageEndpointSuffix defines default Storage Endpoint Suffix - PublicAzureStorageEndpointSuffix = "core.windows.net" - // AzureStackCloudName references the value that will be under the key "cloud" in azure.json if the application is running on Azure Stack Cloud - // https://kubernetes-sigs.github.io/cloud-provider-azure/install/configs/#azure-stack-configuration -- See this documentation for the well-known cloud name. - AzureStackCloudName = "AzureStackCloud" -) - -// Azure defines Azure configuration -type Azure struct { - Cloud string `json:"cloud"` -} - -// AzureStackCloud defines Azure Stack Cloud configuration -type AzureStackCloud struct { - StorageEndpointSuffix string `json:"storageEndpointSuffix"` -} - -type CommandOutputStreams struct { - Stdout string - Stderr string -} - -// IsAzureStackCloud returns true if the application is running on Azure Stack Cloud -func IsAzureStackCloud() bool { - azureFile, err := RunCommandOnHost("cat", "/etc/kubernetes/azure.json") - if err != nil { - return false - } - var azure Azure - if err = json.Unmarshal([]byte(azureFile), &azure); err != nil { - return false - } - cloud := azure.Cloud - return strings.EqualFold(cloud, AzureStackCloudName) -} - -// IsKubernetesInDocker returns true if the application is running on KubernetesInDocker (kind) -func IsKubernetesInDocker() bool { - //TODO refactor the conditional logic this check guards into a new "KindClusterOperations" type behind an interface - //test the AKS kubeconfig location, if we find something then this isn't a KIND cluster - _, err := RunCommandOnHost("ls", "/var/lib/kubelet/kubeconfig") - if err == nil { - return false - } - - //test the KIND kubeconfig location - _, err = RunCommandOnHost("ls", "/etc/kubernetes/kubelet.conf") - return err == nil -} - -// CopyFileFromHost saves the specified source file to the destination -func CopyFileFromHost(source, destination string) error { - sourceFile, err := RunCommandOnHost("cat", source) - if err != nil { - return fmt.Errorf("unable to retrieve source content: %w", err) - } - if err = WriteToFile(destination, sourceFile); err != nil { - return fmt.Errorf("unable to write source file to destination: %w", err) - } - return nil -} - -// GetStorageEndpointSuffix returns the SES url from the JSON file as a string -func GetStorageEndpointSuffix() string { - if IsAzureStackCloud() { - ascFile, err := RunCommandOnHost("cat", "/etc/kubernetes/azurestackcloud.json") - if err != nil { - log.Fatalf("unable to locate azurestackcloud.json to extract storage endpoint suffix: %v", err) - } - var azurestackcloud AzureStackCloud - if err = json.Unmarshal([]byte(ascFile), &azurestackcloud); err != nil { - log.Fatalf("unable to read azurestackcloud.json file: %v", err) - } - return azurestackcloud.StorageEndpointSuffix - } - return PublicAzureStorageEndpointSuffix -} - -// GetHostName get host name -func GetHostName() (string, error) { - hostname, err := RunCommandOnHost("cat", "/etc/hostname") - if err != nil { - return "", fmt.Errorf("Fail to get host name: %+v", err) - } - - return strings.TrimSuffix(string(hostname), "\n"), nil -} - -//ParseAPIServerFQDNFromKubeConfig parses a kubeConfig and returns the APIServerFQDN -func ParseAPIServerFQDNFromKubeConfig(output string) (string, error) { - lines := strings.Split(output, "\n") - for _, line := range lines { - index := strings.Index(line, "server: ") - if index >= 0 { - fqdn := line[index+len("server: "):] - fqdnurl, err := url.Parse(fqdn) - if err != nil { - return "", fmt.Errorf("Fail to parse url from fqdn: %s", fmt.Sprint(err)+": "+fqdn) - } - - host, _, err := net.SplitHostPort(fqdnurl.Host) - if err != nil { - return "", fmt.Errorf("Fail to split host port from fqdnurl: %s", fmt.Sprint(err)+": "+fqdnurl.String()) - } - - return host, nil - } - } - return "", errors.New("Could not find server definitions in kubeconfig") -} - -//ReadKubeletConfig reads the kubeletConfig from the node -func ReadKubeletConfig() (string, error) { - if IsKubernetesInDocker() { - output, err := RunCommandOnHost("cat", "/etc/kubernetes/kubelet.conf") - if err != nil { - return "", fmt.Errorf("Can't open kubeconfig file at /etc/kubernetes/kubelet.conf\": %+v", err) - } - return output, nil - } else { - output, err := RunCommandOnHost("cat", "/var/lib/kubelet/kubeconfig") - if err != nil { - return "", fmt.Errorf("Can't open kubeconfig file at /var/lib/kubelet/kubeconfig\": %+v", err) - } - return output, nil - } -} - -// GetAPIServerFQDN gets the API Server FQDN from the kubeconfig file -func GetAPIServerFQDN() (string, error) { - output, err := ReadKubeletConfig() - if err != nil { - return "", err - } - fqdn, err := ParseAPIServerFQDNFromKubeConfig(output) - if err != nil { - return "", err - } - return fqdn, nil -} - -// RunCommandOnHost runs a command on host system -func RunCommandOnHost(command string, arg ...string) (string, error) { - args := []string{"--target", "1", "--mount", "--uts", "--ipc", "--net", "--pid"} - args = append(args, "--") - args = append(args, command) - args = append(args, arg...) - - cmd := exec.Command("nsenter", args...) - out, err := cmd.CombinedOutput() - if err != nil { - return "", fmt.Errorf("Fail to run command on host: %+v", err) - } - - return string(out), nil -} - -// RunCommandOnContainerWithOutputStreams runs a command on container system and returns both the stdout and stderr output streams -func RunCommandOnContainerWithOutputStreams(command string, arg ...string) (CommandOutputStreams, error) { - cmd := exec.Command(command, arg...) - - var stdout bytes.Buffer - var stderr bytes.Buffer - cmd.Stdout = &stdout - cmd.Stderr = &stderr - - err := cmd.Run() - outputStreams := CommandOutputStreams{stdout.String(), stderr.String()} - - if err != nil { - return outputStreams, fmt.Errorf("Fail to run command in container: %s", fmt.Sprint(err)+": "+stderr.String()) - } - - return outputStreams, nil -} - -// RunCommandOnContainer runs a command on container system and returns the stdout output stream -func RunCommandOnContainer(command string, arg ...string) (string, error) { - outputStreams, err := RunCommandOnContainerWithOutputStreams(command, arg...) - return outputStreams.Stdout, err -} - -// RunBackgroundCommand starts running a command on a container system in the background and returns its process ID -func RunBackgroundCommand(command string, arg ...string) (int, error) { - cmd := exec.Command(command, arg...) - var out bytes.Buffer - var stderr bytes.Buffer - cmd.Stdout = &out - cmd.Stderr = &stderr - err := cmd.Start() - if err != nil { - return 0, fmt.Errorf("Start background command in container exited with message %s: %w", stderr.String(), err) - } - return cmd.Process.Pid, nil -} - -// Finds and kills a process with a given process ID -func KillProcess(pid int) error { - process, err := os.FindProcess(pid) - if err != nil { - return fmt.Errorf("Find process with pid %d to kill: %w", pid, err) - } - if err := process.Kill(); err != nil { - return err - } - return nil -} - -// Tries to issue an HTTP GET request up to maxRetries times -func GetUrlWithRetries(url string, maxRetries int) ([]byte, error) { - retry := 1 - for { - resp, err := http.Get(url) - if err != nil { - if retry == maxRetries { - return nil, fmt.Errorf("Max retries reached for request HTTP Get %s: %w", url, err) - } - retry++ - time.Sleep(5 * time.Second) - } else { - defer resp.Body.Close() - return ioutil.ReadAll(resp.Body) - } - } -} - -// WriteToFile writes data to a file -func WriteToFile(fileName string, data string) error { - if err := os.MkdirAll(filepath.Dir(fileName), os.ModePerm); err != nil { - return fmt.Errorf("Fail to create path directories for file %s: %w", fileName, err) - } - f, err := os.Create(fileName) - if err != nil { - return fmt.Errorf("Fail to create file %s: %+v", fileName, err) - } - defer f.Close() - - _, err = f.Write([]byte(data)) - if err != nil { - return fmt.Errorf("Fail to write data to file %s: %+v", fileName, err) - } - - return nil -} - -// CreateCollectorDir creates a working dir for a collector -func CreateCollectorDir(name string) (string, error) { - hostName, err := GetHostName() - if err != nil { - return "", err - } - - creationTimeStamp, err := GetCreationTimeStamp() - if err != nil { - return "", err - } - - rootPath := filepath.Join("/aks-periscope", strings.Replace(creationTimeStamp, ":", "-", -1), hostName, "collector", name) - err = os.MkdirAll(rootPath, os.ModePerm) - if err != nil { - return "", fmt.Errorf("Fail to create dir %s: %+v", rootPath, err) - } - - return rootPath, nil -} - -// CreateDiagnosticDir creates a working dir for diagnostic -func CreateDiagnosticDir() (string, error) { - hostName, err := GetHostName() - if err != nil { - return "", err - } - - creationTimeStamp, err := GetCreationTimeStamp() - if err != nil { - return "", err - } - - rootPath := filepath.Join("/aks-periscope", strings.Replace(creationTimeStamp, ":", "-", -1), hostName, "diagnoser") - err = os.MkdirAll(rootPath, os.ModePerm) - if err != nil { - return "", fmt.Errorf("Fail to create dir %s: %+v", rootPath, err) - } - - return rootPath, nil -} - -// CreateKubeConfigFromServiceAccount creates kubeconfig based on creds in service account -func CreateKubeConfigFromServiceAccount() error { - token, err := RunCommandOnContainer("cat", "/var/run/secrets/kubernetes.io/serviceaccount/token") - if err != nil { - return err - } - - _, err = RunCommandOnContainer("kubectl", "config", "set-credentials", "aks-periscope-service-account", "--token="+token) - if err != nil { - return err - } - - _, err = RunCommandOnContainer("kubectl", "config", "set-cluster", "aks-periscope-cluster", "--server=https://kubernetes.default.svc.cluster.local:443", "--certificate-authority=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt") - if err != nil { - return err - } - - _, err = RunCommandOnContainer("kubectl", "config", "set-context", "aks-periscope-context", "--user=aks-periscope-service-account", "--cluster=aks-periscope-cluster") - if err != nil { - return err - } - - _, err = RunCommandOnContainer("kubectl", "config", "use-context", "aks-periscope-context") - if err != nil { - return err - } - - return nil -} - -// GetCreationTimeStamp returns a create timestamp -func GetCreationTimeStamp() (string, error) { - creationTimeStamp, err := RunCommandOnContainer("kubectl", "get", "pods", "--all-namespaces", "-l", "app=aks-periscope", "-o", "jsonpath=\"{.items[0].metadata.creationTimestamp}\"") - if err != nil { - return "", err - } - - return creationTimeStamp[1 : len(creationTimeStamp)-1], nil -} - -// WriteToCRD writes diagnostic data to CRD -func WriteToCRD(fileName string, key string) error { - hostName, err := GetHostName() - if err != nil { - return err - } - - crdName := "aks-periscope-diagnostic" + "-" + hostName - - jsonBytes, err := ioutil.ReadFile(fileName) - if err != nil { - return err - } - - patchContent := fmt.Sprintf("{\"spec\":{%q:%q}}", key, string(jsonBytes)) - - _, err = RunCommandOnContainer("kubectl", "-n", "aks-periscope", "patch", "apd", crdName, "-p", patchContent, "--type=merge") - if err != nil { - return err - } - - return nil -} - -// CreateCRD creates a CRD object -func CreateCRD() error { - hostName, err := GetHostName() - if err != nil { - return err - } - - crdName := "aks-periscope-diagnostic" + "-" + hostName - - if err = writeDiagnosticCRD(crdName); err != nil { - return err - } - - _, err = RunCommandOnContainer("kubectl", "apply", "-f", "aks-periscope-diagnostic-crd.yaml") - if err != nil { - return err - } - - return nil -} - -// GetResourceList gets a list of all resources of given type in a specified namespace -func GetResourceList(kubeCmds []string, separator string) ([]string, error) { - outputStreams, err := RunCommandOnContainerWithOutputStreams("kubectl", kubeCmds...) - - if err != nil { - return nil, err - } - - resourceList := outputStreams.Stdout - // If the resource is not found within the cluster, then log a message and do not return any resources. - if len(resourceList) == 0 { - return nil, fmt.Errorf("No '%s' resource found in the cluster for given kubectl command", kubeCmds[1]) - } - - return strings.Split(strings.Trim(resourceList, "\""), separator), nil -} - -func writeDiagnosticCRD(crdName string) error { - f, err := os.Create("aks-periscope-diagnostic-crd.yaml") - if err != nil { - return err - } - defer f.Close() - - _, err = f.WriteString("apiVersion: \"aks-periscope.azure.github.com/v1\"\n") - if err != nil { - return err - } - - _, err = f.WriteString("kind: Diagnostic\n") - if err != nil { - return err - } - - _, err = f.WriteString("metadata:\n") - if err != nil { - return err - } - - _, err = f.WriteString(" name: " + crdName + "\n") - if err != nil { - return err - } - - _, err = f.WriteString(" namespace: aks-periscope\n") - if err != nil { - return err - } - - _, err = f.WriteString("spec:\n") - if err != nil { - return err - } - - _, err = f.WriteString(" networkconfig: \"\"\n") - if err != nil { - return err - } - - _, err = f.WriteString(" networkoutbound: \"\"\n") - if err != nil { - return err - } - - return nil -} +package utils + +import ( + "bytes" + "encoding/json" + "errors" + "fmt" + "io/ioutil" + "log" + "net" + "net/http" + "net/url" + "os" + "os/exec" + "path/filepath" + "strings" + "time" +) + +const ( + // PublicAzureStorageEndpointSuffix defines default Storage Endpoint Suffix + PublicAzureStorageEndpointSuffix = "core.windows.net" + // AzureStackCloudName references the value that will be under the key "cloud" in azure.json if the application is running on Azure Stack Cloud + // https://kubernetes-sigs.github.io/cloud-provider-azure/install/configs/#azure-stack-configuration -- See this documentation for the well-known cloud name. + AzureStackCloudName = "AzureStackCloud" +) + +// Azure defines Azure configuration +type Azure struct { + Cloud string `json:"cloud"` +} + +// AzureStackCloud defines Azure Stack Cloud configuration +type AzureStackCloud struct { + StorageEndpointSuffix string `json:"storageEndpointSuffix"` +} + +type CommandOutputStreams struct { + Stdout string + Stderr string +} + +// IsAzureStackCloud returns true if the application is running on Azure Stack Cloud +func IsAzureStackCloud() bool { + azureFile, err := RunCommandOnHost("cat", "/etc/kubernetes/azure.json") + if err != nil { + return false + } + var azure Azure + if err = json.Unmarshal([]byte(azureFile), &azure); err != nil { + return false + } + cloud := azure.Cloud + return strings.EqualFold(cloud, AzureStackCloudName) +} + +// IsRunningInAks returns true if the application is running on AKS +func IsRunningInAks() bool { + //TODO refactor the conditional logic this check guards into new Cluster-Type specific components behind an interface + //test the non-AKS kubeconfig location, if we find something then this is *not* an AKS cluster + _, err := RunCommandOnHost("ls", "/etc/kubernetes/kubelet.conf") + if err == nil { + return false + } + + //test the AKS kubeconfig location + //TODO can we improve this check to be more like the IsAzureStackCloud one, which seems less arbitrary? + _, err = RunCommandOnHost("ls", "/var/lib/kubelet/kubeconfig") + return err == nil +} + +// CopyFileFromHost saves the specified source file to the destination +func CopyFileFromHost(source, destination string) error { + sourceFile, err := RunCommandOnHost("cat", source) + if err != nil { + return fmt.Errorf("unable to retrieve source content: %w", err) + } + if err = WriteToFile(destination, sourceFile); err != nil { + return fmt.Errorf("unable to write source file to destination: %w", err) + } + return nil +} + +// GetStorageEndpointSuffix returns the SES url from the JSON file as a string +func GetStorageEndpointSuffix() string { + if IsAzureStackCloud() { + ascFile, err := RunCommandOnHost("cat", "/etc/kubernetes/azurestackcloud.json") + if err != nil { + log.Fatalf("unable to locate azurestackcloud.json to extract storage endpoint suffix: %v", err) + } + var azurestackcloud AzureStackCloud + if err = json.Unmarshal([]byte(ascFile), &azurestackcloud); err != nil { + log.Fatalf("unable to read azurestackcloud.json file: %v", err) + } + return azurestackcloud.StorageEndpointSuffix + } + return PublicAzureStorageEndpointSuffix +} + +// GetHostName get host name +func GetHostName() (string, error) { + hostname, err := RunCommandOnHost("cat", "/etc/hostname") + if err != nil { + return "", fmt.Errorf("Fail to get host name: %+v", err) + } + + return strings.TrimSuffix(string(hostname), "\n"), nil +} + +//ParseAPIServerFQDNFromKubeConfig parses a kubeConfig and returns the APIServerFQDN +func ParseAPIServerFQDNFromKubeConfig(output string) (string, error) { + lines := strings.Split(output, "\n") + for _, line := range lines { + index := strings.Index(line, "server: ") + if index >= 0 { + fqdn := line[index+len("server: "):] + fqdnurl, err := url.Parse(fqdn) + if err != nil { + return "", fmt.Errorf("Fail to parse url from fqdn: %s", fmt.Sprint(err)+": "+fqdn) + } + + host, _, err := net.SplitHostPort(fqdnurl.Host) + if err != nil { + return "", fmt.Errorf("Fail to split host port from fqdnurl: %s: %w", fqdnurl, err) + } + + return host, nil + } + } + return "", errors.New("Could not find server definitions in kubeconfig") +} + +//ReadKubeletConfig reads the kubeletConfig from the node +func ReadKubeletConfig() (string, error) { + if IsRunningInAks() { + output, err := RunCommandOnHost("cat", "/var/lib/kubelet/kubeconfig") + if err != nil { + return "", fmt.Errorf("Can't open kubeconfig file at /var/lib/kubelet/kubeconfig\": %+v", err) + } + return output, nil + } else { + output, err := RunCommandOnHost("cat", "/etc/kubernetes/kubelet.conf") + if err != nil { + return "", fmt.Errorf("Can't open kubeconfig file at /etc/kubernetes/kubelet.conf\": %+v", err) + } + return output, nil + } +} + +// GetAPIServerFQDN gets the API Server FQDN from the kubeconfig file +func GetAPIServerFQDN() (string, error) { + output, err := ReadKubeletConfig() + if err != nil { + return "", err + } + fqdn, err := ParseAPIServerFQDNFromKubeConfig(output) + if err != nil { + return "", err + } + return fqdn, nil +} + +// RunCommandOnHost runs a command on host system +func RunCommandOnHost(command string, arg ...string) (string, error) { + args := []string{"--target", "1", "--mount", "--uts", "--ipc", "--net", "--pid"} + args = append(args, "--") + args = append(args, command) + args = append(args, arg...) + + cmd := exec.Command("nsenter", args...) + out, err := cmd.CombinedOutput() + if err != nil { + return "", fmt.Errorf("Fail to run command on host: %+v", err) + } + + return string(out), nil +} + +// RunCommandOnContainerWithOutputStreams runs a command on container system and returns both the stdout and stderr output streams +func RunCommandOnContainerWithOutputStreams(command string, arg ...string) (CommandOutputStreams, error) { + cmd := exec.Command(command, arg...) + + var stdout bytes.Buffer + var stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + err := cmd.Run() + outputStreams := CommandOutputStreams{stdout.String(), stderr.String()} + + if err != nil { + return outputStreams, fmt.Errorf("Fail to run command in container: %s", fmt.Sprint(err)+": "+stderr.String()) + } + + return outputStreams, nil +} + +// RunCommandOnContainer runs a command on container system and returns the stdout output stream +func RunCommandOnContainer(command string, arg ...string) (string, error) { + outputStreams, err := RunCommandOnContainerWithOutputStreams(command, arg...) + return outputStreams.Stdout, err +} + +// RunBackgroundCommand starts running a command on a container system in the background and returns its process ID +func RunBackgroundCommand(command string, arg ...string) (int, error) { + cmd := exec.Command(command, arg...) + var out bytes.Buffer + var stderr bytes.Buffer + cmd.Stdout = &out + cmd.Stderr = &stderr + err := cmd.Start() + if err != nil { + return 0, fmt.Errorf("Start background command in container exited with message %s: %w", stderr.String(), err) + } + return cmd.Process.Pid, nil +} + +// Finds and kills a process with a given process ID +func KillProcess(pid int) error { + process, err := os.FindProcess(pid) + if err != nil { + return fmt.Errorf("Find process with pid %d to kill: %w", pid, err) + } + if err := process.Kill(); err != nil { + return err + } + return nil +} + +// Tries to issue an HTTP GET request up to maxRetries times +func GetUrlWithRetries(url string, maxRetries int) ([]byte, error) { + retry := 1 + for { + resp, err := http.Get(url) + if err != nil { + if retry == maxRetries { + return nil, fmt.Errorf("Max retries reached for request HTTP Get %s: %w", url, err) + } + retry++ + time.Sleep(5 * time.Second) + } else { + defer resp.Body.Close() + return ioutil.ReadAll(resp.Body) + } + } +} + +// WriteToFile writes data to a file +func WriteToFile(fileName string, data string) error { + if err := os.MkdirAll(filepath.Dir(fileName), os.ModePerm); err != nil { + return fmt.Errorf("Fail to create path directories for file %s: %w", fileName, err) + } + f, err := os.Create(fileName) + if err != nil { + return fmt.Errorf("Fail to create file %s: %+v", fileName, err) + } + defer f.Close() + + _, err = f.Write([]byte(data)) + if err != nil { + return fmt.Errorf("Fail to write data to file %s: %+v", fileName, err) + } + + return nil +} + +// CreateCollectorDir creates a working dir for a collector +func CreateCollectorDir(name string) (string, error) { + hostName, err := GetHostName() + if err != nil { + return "", err + } + + creationTimeStamp, err := GetCreationTimeStamp() + if err != nil { + return "", err + } + + rootPath := filepath.Join("/aks-periscope", strings.Replace(creationTimeStamp, ":", "-", -1), hostName, "collector", name) + err = os.MkdirAll(rootPath, os.ModePerm) + if err != nil { + return "", fmt.Errorf("Fail to create dir %s: %+v", rootPath, err) + } + + return rootPath, nil +} + +// CreateDiagnosticDir creates a working dir for diagnostic +func CreateDiagnosticDir() (string, error) { + hostName, err := GetHostName() + if err != nil { + return "", err + } + + creationTimeStamp, err := GetCreationTimeStamp() + if err != nil { + return "", err + } + + rootPath := filepath.Join("/aks-periscope", strings.Replace(creationTimeStamp, ":", "-", -1), hostName, "diagnoser") + err = os.MkdirAll(rootPath, os.ModePerm) + if err != nil { + return "", fmt.Errorf("Fail to create dir %s: %+v", rootPath, err) + } + + return rootPath, nil +} + +// CreateKubeConfigFromServiceAccount creates kubeconfig based on creds in service account +func CreateKubeConfigFromServiceAccount() error { + token, err := RunCommandOnContainer("cat", "/var/run/secrets/kubernetes.io/serviceaccount/token") + if err != nil { + return err + } + + _, err = RunCommandOnContainer("kubectl", "config", "set-credentials", "aks-periscope-service-account", "--token="+token) + if err != nil { + return err + } + + _, err = RunCommandOnContainer("kubectl", "config", "set-cluster", "aks-periscope-cluster", "--server=https://kubernetes.default.svc.cluster.local:443", "--certificate-authority=/var/run/secrets/kubernetes.io/serviceaccount/ca.crt") + if err != nil { + return err + } + + _, err = RunCommandOnContainer("kubectl", "config", "set-context", "aks-periscope-context", "--user=aks-periscope-service-account", "--cluster=aks-periscope-cluster") + if err != nil { + return err + } + + _, err = RunCommandOnContainer("kubectl", "config", "use-context", "aks-periscope-context") + if err != nil { + return err + } + + return nil +} + +// GetCreationTimeStamp returns a create timestamp +func GetCreationTimeStamp() (string, error) { + creationTimeStamp, err := RunCommandOnContainer("kubectl", "get", "pods", "--all-namespaces", "-l", "app=aks-periscope", "-o", "jsonpath=\"{.items[0].metadata.creationTimestamp}\"") + if err != nil { + return "", err + } + + return creationTimeStamp[1 : len(creationTimeStamp)-1], nil +} + +// WriteToCRD writes diagnostic data to CRD +func WriteToCRD(fileName string, key string) error { + hostName, err := GetHostName() + if err != nil { + return err + } + + crdName := "aks-periscope-diagnostic" + "-" + hostName + + jsonBytes, err := ioutil.ReadFile(fileName) + if err != nil { + return err + } + + patchContent := fmt.Sprintf("{\"spec\":{%q:%q}}", key, string(jsonBytes)) + + _, err = RunCommandOnContainer("kubectl", "-n", "aks-periscope", "patch", "apd", crdName, "-p", patchContent, "--type=merge") + if err != nil { + return err + } + + return nil +} + +// CreateCRD creates a CRD object +func CreateCRD() error { + hostName, err := GetHostName() + if err != nil { + return err + } + + crdName := "aks-periscope-diagnostic" + "-" + hostName + + if err = writeDiagnosticCRD(crdName); err != nil { + return err + } + + _, err = RunCommandOnContainer("kubectl", "apply", "-f", "aks-periscope-diagnostic-crd.yaml") + if err != nil { + return err + } + + return nil +} + +// GetResourceList gets a list of all resources of given type in a specified namespace +func GetResourceList(kubeCmds []string, separator string) ([]string, error) { + outputStreams, err := RunCommandOnContainerWithOutputStreams("kubectl", kubeCmds...) + + if err != nil { + return nil, err + } + + resourceList := outputStreams.Stdout + // If the resource is not found within the cluster, then log a message and do not return any resources. + if len(resourceList) == 0 { + return nil, fmt.Errorf("No '%s' resource found in the cluster for given kubectl command", kubeCmds[1]) + } + + return strings.Split(strings.Trim(resourceList, "\""), separator), nil +} + +func writeDiagnosticCRD(crdName string) error { + f, err := os.Create("aks-periscope-diagnostic-crd.yaml") + if err != nil { + return err + } + defer f.Close() + + _, err = f.WriteString("apiVersion: \"aks-periscope.azure.github.com/v1\"\n") + if err != nil { + return err + } + + _, err = f.WriteString("kind: Diagnostic\n") + if err != nil { + return err + } + + _, err = f.WriteString("metadata:\n") + if err != nil { + return err + } + + _, err = f.WriteString(" name: " + crdName + "\n") + if err != nil { + return err + } + + _, err = f.WriteString(" namespace: aks-periscope\n") + if err != nil { + return err + } + + _, err = f.WriteString("spec:\n") + if err != nil { + return err + } + + _, err = f.WriteString(" networkconfig: \"\"\n") + if err != nil { + return err + } + + _, err = f.WriteString(" networkoutbound: \"\"\n") + if err != nil { + return err + } + + return nil +} From 0c61fedb45c672839c0f04bc04acec6720dfb8c6 Mon Sep 17 00:00:00 2001 From: david kydd Date: Sat, 26 Jun 2021 10:00:27 +1200 Subject: [PATCH 25/31] register the containerlogscontainerd collector for the kind deployment file --- deployment/aks-periscope-kind.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deployment/aks-periscope-kind.yaml b/deployment/aks-periscope-kind.yaml index 5f261457..0cb6a50c 100644 --- a/deployment/aks-periscope-kind.yaml +++ b/deployment/aks-periscope-kind.yaml @@ -144,7 +144,7 @@ metadata: name: nodelogs-config namespace: aks-periscope data: - DIAGNOSTIC_NODELOGS_LIST: /var/log/azure/cluster-provision.log /var/log/cloud-init.log + DIAGNOSTIC_NODELOGS_LIST: --- apiVersion: v1 kind: ConfigMap @@ -152,7 +152,7 @@ metadata: name: collectors-config namespace: aks-periscope data: - ENABLED_COLLECTORS: dns containerlogs iptables kubeletcmd kubeobjects networkoutbound systemlogs + ENABLED_COLLECTORS: dns containerlogscontainerd iptables kubeletcmd kubeobjects networkoutbound systemlogs --- apiVersion: v1 kind: ConfigMap From fc7bc0388ec1d3389c123c775d328ac8a23c5184 Mon Sep 17 00:00:00 2001 From: david kydd Date: Sat, 26 Jun 2021 10:38:16 +1200 Subject: [PATCH 26/31] remove unused config --- deployment/aks-periscope-kind.yaml | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/deployment/aks-periscope-kind.yaml b/deployment/aks-periscope-kind.yaml index 0cb6a50c..d042e4a0 100644 --- a/deployment/aks-periscope-kind.yaml +++ b/deployment/aks-periscope-kind.yaml @@ -86,8 +86,6 @@ spec: name: containerlogs-config - configMapRef: name: kubeobjects-config - - configMapRef: - name: nodelogs-config - configMapRef: name: collectors-config - configMapRef: @@ -138,14 +136,6 @@ metadata: data: DIAGNOSTIC_KUBEOBJECTS_LIST: kube-system/pod kube-system/service kube-system/deployment --- -apiVersion: v1 -kind: ConfigMap -metadata: - name: nodelogs-config - namespace: aks-periscope -data: - DIAGNOSTIC_NODELOGS_LIST: ---- apiVersion: v1 kind: ConfigMap metadata: From 248d8fd0d5696e6fe2bafb5c2be9548a96a48eab Mon Sep 17 00:00:00 2001 From: david kydd Date: Sat, 26 Jun 2021 10:58:21 +1200 Subject: [PATCH 27/31] register new collector in initialization, fix typo + collector name --- cmd/aks-periscope/aks-periscope.go | 4 +++- pkg/collector/containerlogs_collector_containerd.go | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/cmd/aks-periscope/aks-periscope.go b/cmd/aks-periscope/aks-periscope.go index fa518e5f..c5066dc2 100644 --- a/cmd/aks-periscope/aks-periscope.go +++ b/cmd/aks-periscope/aks-periscope.go @@ -73,6 +73,7 @@ func initializeComponents() ([]interfaces.Collector, []interfaces.Diagnoser, []i //collectors containerLogsCollector := collector.NewContainerLogsCollector(selectedExporters) + containerLogsCollectorContainerD := collector.NewContainerLogsCollectorContainerD(selectedExporters) systemLogsCollector := collector.NewSystemLogsCollector(selectedExporters) networkOutboundCollector := collector.NewNetworkOutboundCollector(5, selectedExporters) ipTablesCollector := collector.NewIPTablesCollector(selectedExporters) @@ -87,6 +88,7 @@ func initializeComponents() ([]interfaces.Collector, []interfaces.Diagnoser, []i selectedCollectors := selectCollectors( map[string]interfaces.Collector{ containerLogsCollector.GetName(): containerLogsCollector, + containerLogsCollectorContainerD.GetName(): containerLogsCollectorContainerD, systemLogsCollector.GetName(): systemLogsCollector, networkOutboundCollector.GetName(): networkOutboundCollector, ipTablesCollector.GetName(): ipTablesCollector, @@ -145,7 +147,7 @@ func selectCollectorsUsingCollectorList(collectorList []string) []string { "dns", "containerlogs", "kubeobjects", "networkoutbound") if contains(collectorList, "connectedCluster") { - //select connectedCluster colelctors + //select connectedCluster collectors enabledCollectorNames = append(enabledCollectorNames, "helm") } else { //select non-connectedCluster collectors diff --git a/pkg/collector/containerlogs_collector_containerd.go b/pkg/collector/containerlogs_collector_containerd.go index 4ac66cd8..9a8be2a8 100644 --- a/pkg/collector/containerlogs_collector_containerd.go +++ b/pkg/collector/containerlogs_collector_containerd.go @@ -17,8 +17,8 @@ type ContainerLogsCollectorContainerD struct { var _ interfaces.Collector = &ContainerLogsCollectorContainerD{} // NewContainerLogsCollectorContainerD is a constructor -func NewContainerLogsCollectorContainerD(exporters []interfaces.Exporter) *ContainerLogsCollector { - return &ContainerLogsCollector{ +func NewContainerLogsCollectorContainerD(exporters []interfaces.Exporter) *ContainerLogsCollectorContainerD { + return &ContainerLogsCollectorContainerD{ BaseCollector: BaseCollector{ collectorType: ContainerLogsContainerD, exporters: exporters, From c32622abc2c3e5b3ee450e3bb83dc5da561295ce Mon Sep 17 00:00:00 2001 From: david kydd Date: Sat, 26 Jun 2021 11:34:39 +1200 Subject: [PATCH 28/31] zip_and_export flag added to exporter config, false for KIND, defaults to true --- cmd/aks-periscope/aks-periscope.go | 25 +++++++++++++++++-------- deployment/aks-periscope-kind.yaml | 5 +++-- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/cmd/aks-periscope/aks-periscope.go b/cmd/aks-periscope/aks-periscope.go index c5066dc2..1ada01be 100644 --- a/cmd/aks-periscope/aks-periscope.go +++ b/cmd/aks-periscope/aks-periscope.go @@ -3,6 +3,7 @@ package main import ( "log" "os" + "strconv" "strings" "sync" @@ -40,16 +41,24 @@ func main() { runDiagnosers(diagnosers, diagnoserGrp) diagnoserGrp.Wait() - log.Print("Zip result files") - zippedOutputs, err := zipOutputDirectory() - if err != nil { - log.Printf("Failed to zip result files: %+v", err) + zipAndExportString, found := os.LookupEnv("ZIP_AND_EXPORT") + zipAndExport, parseErr := strconv.ParseBool(zipAndExportString) + if !found || parseErr != nil{ + zipAndExport = true } - log.Print("Run exporters for result files") - err = runExporters(exporters, zippedOutputs) - if err != nil { - log.Printf("Failed to export result files: %+v", err) + if zipAndExport { + log.Print("Zip result files") + zippedOutputs, err := zipOutputDirectory() + if err != nil { + log.Printf("Failed to zip result files: %+v", err) + } + + log.Print("Run exporters for result files") + err = runExporters(exporters, zippedOutputs) + if err != nil { + log.Printf("Failed to export result files: %+v", err) + } } // TODO: Hack: for now AKS-Periscope is running as a deamonset so it shall not stop (or the pod will be restarted) diff --git a/deployment/aks-periscope-kind.yaml b/deployment/aks-periscope-kind.yaml index d042e4a0..3c73e03e 100644 --- a/deployment/aks-periscope-kind.yaml +++ b/deployment/aks-periscope-kind.yaml @@ -77,7 +77,7 @@ spec: beta.kubernetes.io/os: linux containers: - name: aks-periscope - image: aksrepos.azurecr.io/staging/aks-periscope:v0.3 + image: dakyddacr.azurecr.io/periscope/kind:v21 securityContext: privileged: true imagePullPolicy: Always @@ -158,7 +158,8 @@ metadata: name: exporters-config namespace: aks-periscope data: - ENABLED_EXPORTERS: azureblob + ENABLED_EXPORTERS: "azureblob" + ZIP_AND_EXPORT: "false" --- apiVersion: apiextensions.k8s.io/v1beta1 kind: CustomResourceDefinition From c575b09abf452b9566c75270cdb124de636cd560 Mon Sep 17 00:00:00 2001 From: david kydd Date: Sat, 26 Jun 2021 11:35:20 +1200 Subject: [PATCH 29/31] containerD collector needs to copy files from host to container prior to export --- pkg/collector/containerlogs_collector_containerd.go | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/pkg/collector/containerlogs_collector_containerd.go b/pkg/collector/containerlogs_collector_containerd.go index 9a8be2a8..499fade7 100644 --- a/pkg/collector/containerlogs_collector_containerd.go +++ b/pkg/collector/containerlogs_collector_containerd.go @@ -3,6 +3,7 @@ package collector import ( "os" "path" + "path/filepath" "strings" "github.com/Azure/aks-periscope/pkg/interfaces" @@ -46,6 +47,7 @@ const containerLogDirectory = "/var/log/containers" // Collect implements the interface method func (collector *ContainerLogsCollectorContainerD) Collect() error { selectorStrings := strings.Fields(os.Getenv("DIAGNOSTIC_CONTAINERLOGS_LIST")) + rootPath, err := utils.CreateCollectorDir(collector.GetName()) containerLogSelectors := collector.ParseContainerLogSelectors(selectorStrings) @@ -59,7 +61,14 @@ func (collector *ContainerLogsCollectorContainerD) Collect() error { containerLogsToCollect := collector.DetermineContainerLogsToCollect(containerLogs, containerLogSelectors) for _, containerLog := range containerLogsToCollect { - collector.AddToCollectorFiles(containerLog.filepath) + output, err := utils.RunCommandOnHost("cat", containerLog.filepath) + containerLogOnContainer := filepath.Join(rootPath, filepath.Base(containerLog.filepath)) + + err = utils.WriteToFile(containerLogOnContainer, output) + if err != nil { + return err + } + collector.AddToCollectorFiles(containerLogOnContainer) } return nil From 99fb06c35dd92793e184c458d4147a595a8ba602 Mon Sep 17 00:00:00 2001 From: david kydd Date: Sat, 26 Jun 2021 15:07:02 +1200 Subject: [PATCH 30/31] fixes for when running on AKS - image: dakyddacr.azurecr.io/periscope/kind:v23 --- cmd/aks-periscope/aks-periscope.go | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/cmd/aks-periscope/aks-periscope.go b/cmd/aks-periscope/aks-periscope.go index 1ada01be..ad7e4563 100644 --- a/cmd/aks-periscope/aks-periscope.go +++ b/cmd/aks-periscope/aks-periscope.go @@ -72,6 +72,7 @@ func main() { // initializeComponents initializes and returns collectors, diagnosers and exporters func initializeComponents() ([]interfaces.Collector, []interfaces.Diagnoser, []interfaces.Exporter) { + //TODO it would be nice if we only instantiated those collector/diagnoser/exporters that were actually selected for execution //exporters azureBlobExporter := exporter.NewAzureBlobExporter() @@ -138,8 +139,6 @@ func selectCollectors(allCollectorsByName map[string]interfaces.Collector) []int enabledCollectorNames = strings.Fields(os.Getenv("ENABLED_COLLECTORS")) } - enabledCollectorNames = strings.Fields(os.Getenv("ENABLED_COLLECTORS")) - for _, collectorName := range enabledCollectorNames { collectors = append(collectors, allCollectorsByName[collectorName]) } @@ -176,7 +175,13 @@ func selectDiagnosers(allDiagnosersByName map[string]interfaces.Diagnoser) []int diagnosers := []interfaces.Diagnoser{} //read list of diagnosers that are enabled - enabledDiagnoserNames := strings.Fields(os.Getenv("ENABLED_DIAGNOSERS")) + enabledDiagnoserString, found := os.LookupEnv("ENABLED_DIAGNOSERS") + if !found{ + //if not defined, default to all diagnosers enabled + enabledDiagnoserString = "networkconfig networkoutbound" + } + + enabledDiagnoserNames := strings.Fields(enabledDiagnoserString) for _, diagnoserName := range enabledDiagnoserNames { diagnosers = append(diagnosers, allDiagnosersByName[diagnoserName]) @@ -189,8 +194,14 @@ func selectDiagnosers(allDiagnosersByName map[string]interfaces.Diagnoser) []int func selectExporters(allExporters map[string]interfaces.Exporter) []interfaces.Exporter { exporters := []interfaces.Exporter{} - //read list of collectors that are enabled - enabledExporterNames := strings.Fields(os.Getenv("ENABLED_EXPORTERS")) + //read list of exporters that are enabled + enabledExportersString, found := os.LookupEnv("ENABLED_EXPORTERS") + if !found{ + //if not defined, default to all exporters enabled + enabledExportersString = "azureblob" + } + + enabledExporterNames := strings.Fields(enabledExportersString) for _, exporterName := range enabledExporterNames { exporters = append(exporters, allExporters[exporterName]) From 3fd258715481f7b083667dd44624a40dbc9f7807 Mon Sep 17 00:00:00 2001 From: david kydd Date: Mon, 28 Jun 2021 15:31:54 +1200 Subject: [PATCH 31/31] fix golint errors --- cmd/aks-periscope/aks-periscope.go | 32 ++++++-------- .../containerlogs_collector_containerd.go | 43 +++++++++++-------- 2 files changed, 40 insertions(+), 35 deletions(-) diff --git a/cmd/aks-periscope/aks-periscope.go b/cmd/aks-periscope/aks-periscope.go index ad7e4563..579d4bba 100644 --- a/cmd/aks-periscope/aks-periscope.go +++ b/cmd/aks-periscope/aks-periscope.go @@ -43,7 +43,7 @@ func main() { zipAndExportString, found := os.LookupEnv("ZIP_AND_EXPORT") zipAndExport, parseErr := strconv.ParseBool(zipAndExportString) - if !found || parseErr != nil{ + if !found || parseErr != nil { zipAndExport = true } @@ -64,10 +64,6 @@ func main() { // TODO: Hack: for now AKS-Periscope is running as a deamonset so it shall not stop (or the pod will be restarted) // Revert from https://github.com/Azure/aks-periscope/blob/b98d66a238e942158ef2628a9315b58937ff9c8f/cmd/aks-periscope/aks-periscope.go#L70 select {} - - // TODO: remove this //nolint comment once the select{} has been removed - //nolint:govet - return } // initializeComponents initializes and returns collectors, diagnosers and exporters @@ -97,18 +93,18 @@ func initializeComponents() ([]interfaces.Collector, []interfaces.Diagnoser, []i selectedCollectors := selectCollectors( map[string]interfaces.Collector{ - containerLogsCollector.GetName(): containerLogsCollector, + containerLogsCollector.GetName(): containerLogsCollector, containerLogsCollectorContainerD.GetName(): containerLogsCollectorContainerD, - systemLogsCollector.GetName(): systemLogsCollector, - networkOutboundCollector.GetName(): networkOutboundCollector, - ipTablesCollector.GetName(): ipTablesCollector, - nodeLogsCollector.GetName(): nodeLogsCollector, - dnsCollector.GetName(): dnsCollector, - kubeObjectsCollector.GetName(): kubeObjectsCollector, - kubeletCmdCollector.GetName(): kubeletCmdCollector, - systemPerfCollector.GetName(): systemPerfCollector, - helmCollector.GetName(): helmCollector, - osmCollector.GetName(): osmCollector, + systemLogsCollector.GetName(): systemLogsCollector, + networkOutboundCollector.GetName(): networkOutboundCollector, + ipTablesCollector.GetName(): ipTablesCollector, + nodeLogsCollector.GetName(): nodeLogsCollector, + dnsCollector.GetName(): dnsCollector, + kubeObjectsCollector.GetName(): kubeObjectsCollector, + kubeletCmdCollector.GetName(): kubeletCmdCollector, + systemPerfCollector.GetName(): systemPerfCollector, + helmCollector.GetName(): helmCollector, + osmCollector.GetName(): osmCollector, }) //diagnosers @@ -176,7 +172,7 @@ func selectDiagnosers(allDiagnosersByName map[string]interfaces.Diagnoser) []int //read list of diagnosers that are enabled enabledDiagnoserString, found := os.LookupEnv("ENABLED_DIAGNOSERS") - if !found{ + if !found { //if not defined, default to all diagnosers enabled enabledDiagnoserString = "networkconfig networkoutbound" } @@ -196,7 +192,7 @@ func selectExporters(allExporters map[string]interfaces.Exporter) []interfaces.E //read list of exporters that are enabled enabledExportersString, found := os.LookupEnv("ENABLED_EXPORTERS") - if !found{ + if !found { //if not defined, default to all exporters enabled enabledExportersString = "azureblob" } diff --git a/pkg/collector/containerlogs_collector_containerd.go b/pkg/collector/containerlogs_collector_containerd.go index 499fade7..73b13ab6 100644 --- a/pkg/collector/containerlogs_collector_containerd.go +++ b/pkg/collector/containerlogs_collector_containerd.go @@ -28,15 +28,15 @@ func NewContainerLogsCollectorContainerD(exporters []interfaces.Exporter) *Conta } type ContainerLog struct { - podname string - namespace string + podname string + namespace string containerName string - containeruid string - filepath string + containeruid string + filepath string } type ContainerLogSelector struct { - namespace string + namespace string containerNamePrefix string } @@ -48,6 +48,9 @@ const containerLogDirectory = "/var/log/containers" func (collector *ContainerLogsCollectorContainerD) Collect() error { selectorStrings := strings.Fields(os.Getenv("DIAGNOSTIC_CONTAINERLOGS_LIST")) rootPath, err := utils.CreateCollectorDir(collector.GetName()) + if err != nil { + return err + } containerLogSelectors := collector.ParseContainerLogSelectors(selectorStrings) @@ -62,6 +65,10 @@ func (collector *ContainerLogsCollectorContainerD) Collect() error { for _, containerLog := range containerLogsToCollect { output, err := utils.RunCommandOnHost("cat", containerLog.filepath) + if err != nil { + return err + } + containerLogOnContainer := filepath.Join(rootPath, filepath.Base(containerLog.filepath)) err = utils.WriteToFile(containerLogOnContainer, output) @@ -79,7 +86,7 @@ func (collector *ContainerLogsCollectorContainerD) DetermineContainerLogsToColle var selectedContainerLogs []ContainerLog for _, containerLog := range allContainers { for _, selector := range selectors { - if collector.DoesSelectorSelectContainerLog(containerLog, selector){ + if collector.DoesSelectorSelectContainerLog(containerLog, selector) { selectedContainerLogs = append(selectedContainerLogs, containerLog) } } @@ -100,15 +107,17 @@ func (collector *ContainerLogsCollectorContainerD) ParseContainerLogSelectors(se for _, selectorString := range selectorStrings { selectorStringParts := strings.Split(selectorString, "/") - if len(selectorStringParts) == 1{ - containerLogSelectors = append(containerLogSelectors, ContainerLogSelector{ - namespace: selectorStringParts[0], - })} - if len(selectorStringParts) == 2{ - containerLogSelectors = append(containerLogSelectors, ContainerLogSelector{ - namespace: selectorStringParts[0], - containerNamePrefix: selectorStringParts[1], - })} + if len(selectorStringParts) == 1 { + containerLogSelectors = append(containerLogSelectors, ContainerLogSelector{ + namespace: selectorStringParts[0], + }) + } + if len(selectorStringParts) == 2 { + containerLogSelectors = append(containerLogSelectors, ContainerLogSelector{ + namespace: selectorStringParts[0], + containerNamePrefix: selectorStringParts[1], + }) + } } return containerLogSelectors @@ -127,7 +136,7 @@ func (collector *ContainerLogsCollectorContainerD) ParseContainerLogFilenames(di containerNameWithIDSplitOnDash := strings.Split(logFileSplitOnUnderscore[2], "-") //uid is the last value - indexOfUid := len(containerNameWithIDSplitOnDash)-1 + indexOfUid := len(containerNameWithIDSplitOnDash) - 1 //containerName is everything except the last value, joined containerName := strings.Join(containerNameWithIDSplitOnDash[0:indexOfUid], "") @@ -145,7 +154,7 @@ func (collector *ContainerLogsCollectorContainerD) ParseContainerLogFilenames(di } //GetAllContainerLogFilesThatHaveEverRunOnHost gets the list of log files for all containers that have ever run on the host -func (collector *ContainerLogsCollectorContainerD) GetAllContainerLogFilesThatHaveEverRunOnHost() ([]string, error){ +func (collector *ContainerLogsCollectorContainerD) GetAllContainerLogFilesThatHaveEverRunOnHost() ([]string, error) { output, err := utils.RunCommandOnHost("ls", containerLogDirectory) if err != nil { return nil, err