Skip to content

Commit

Permalink
Allow multiple 'runs' for Periscope (#196)
Browse files Browse the repository at this point in the history
  • Loading branch information
peterbom committed Aug 31, 2022
1 parent 8dd3720 commit 3082e38
Show file tree
Hide file tree
Showing 29 changed files with 821 additions and 304 deletions.
13 changes: 12 additions & 1 deletion .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,17 @@
"env": {},
"args": ["-test.v"],
"showLog": true
}
},
{
"name": "Launch Tests with race check",
"type": "go",
"request": "launch",
"program": "${fileDirname}",
"mode": "test",
"env": {},
"buildFlags": "-race",
"args": ["-test.v"],
"showLog": true
},
]
}
32 changes: 23 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,15 +97,17 @@ secretGenerator:
- AZURE_BLOB_CONTAINER_NAME=<CONTAINER_NAME>
- AZURE_BLOB_SAS_KEY=<SAS_KEY>

# Uncomment for optional configuration (default values shown)
# configMapGenerator:
# - name: diagnostic-config
# behavior: replace
# literals:
# - DIAGNOSTIC_CONTAINERLOGS_LIST=kube-system # space-separated namespaces
# - DIAGNOSTIC_KUBEOBJECTS_LIST=kube-system/pod kube-system/service kube-system/deployment # space-separated list of namespace/resource-type[/resource]
# - DIAGNOSTIC_NODELOGS_LIST_LINUX="/var/log/azure/cluster-provision.log /var/log/cloud-init.log" # space-separated log file locations
# - DIAGNOSTIC_NODELOGS_LIST_WINDOWS="C:\AzureData\CustomDataSetupScript.log" # space-separated log file locations
# Commented-out config values are the defaults. Uncomment to change.
configMapGenerator:
- name: diagnostic-config
behavior: merge
literals:
- DIAGNOSTIC_RUN_ID=<RUN_ID>
# - DIAGNOSTIC_CONTAINERLOGS_LIST=kube-system # space-separated namespaces
# - DIAGNOSTIC_KUBEOBJECTS_LIST=kube-system/pod kube-system/service kube-system/deployment # space-separated list of namespace/resource-type[/resource]
# - DIAGNOSTIC_NODELOGS_LIST_LINUX="/var/log/azure/cluster-provision.log /var/log/cloud-init.log" # space-separated log file locations
# - DIAGNOSTIC_NODELOGS_LIST_WINDOWS="C:\AzureData\CustomDataSetupScript.log" # space-separated log file locations
# - COLLECTOR_LIST="" # space-separated list containing any of 'connectedCluster' (enables helm/pods-containerlogs, disables iptables/kubelet/nodelogs/pdb/systemlogs/systemperf), 'OSM' (enables osm/smi), 'SMI' (enables smi).
```

All placeholders in angled brackets (`<`/`>`) need to be substituted for the relevant values:
Expand All @@ -117,12 +119,24 @@ All placeholders in angled brackets (`<`/`>`) need to be substituted for the rel
- `ss`: `b` (Service: blob)
- `srt`: `sco` (Resource types: service, container and object)
- `sp`: `rlacwd` (Permissions: read, list, add, create, write, delete)
- `RUN_ID`: The identifier for a particular 'run' of Periscope, by convention a timestamp formatted as `YYYY-MM-DDThh-mm-ssZ`. This will become the topmost container within `CONTAINER_NAME`.

You can then deploy Periscope by running:
```sh
kubectl apply -k <path-to-kustomize-directory>
```

To re-run without deleting and recreating resources, you can update the `RUN_ID` value in the ConfigMap. Depending on the expiry of the SAS token, you may need to update the `AZURE_BLOB_SAS_KEY` value in the Secret first:
```sh
# Update SAS token (if expired)
sas=...
kubectl patch secret -n aks-periscope azureblob-secret -p="{\"data\":{\"AZURE_BLOB_SAS_KEY\": \"$(echo -n ?$sas | base64 -w 0)\"}}"

# Update DIAGNOSTIC_RUN_ID
runId=$(date -u '+%Y-%m-%dT%H-%M-%SZ')
kubectl patch configmap -n aks-periscope diagnostic-config -p="{\"data\":{\"DIAGNOSTIC_RUN_ID\": \"$runId\"}}"
```

### Using Azure Command-Line tool

AKS Periscope can be deployed by using Azure Command-Line tool (CLI). The steps are:
Expand Down
72 changes: 50 additions & 22 deletions cmd/aks-periscope/aks-periscope.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ package main

import (
"bytes"
"fmt"
"log"
"runtime"
"sync"
"time"

Expand All @@ -15,56 +17,87 @@ import (
)

func main() {
config, err := restclient.InClusterConfig()
osIdentifier, err := utils.StringToOSIdentifier(runtime.GOOS)
if err != nil {
log.Fatalf("Cannot load kubeconfig: %v", err)
log.Fatalf("cannot determine OS: %v", err)
}

creationTimeStamp, err := utils.GetCreationTimeStamp(config)
knownFilePaths, err := utils.GetKnownFilePaths(osIdentifier)
if err != nil {
log.Fatalf("Failed to get creation timestamp: %v", err)
log.Fatalf("failed to get file paths: %v", err)
}

runtimeInfo, err := utils.GetRuntimeInfo()
fileSystem := utils.NewFileSystem()

// Create a watcher for the Run ID file that checks its content every 10 seconds
fileWatcher := utils.NewFileContentWatcher(fileSystem, 10*time.Second)

// Create a channel for unrecoverable errors
errChan := make(chan error)

// Add a watcher for the run ID file content
runIdChan := make(chan string)
fileWatcher.AddHandler(knownFilePaths.GetConfigPath(utils.RunIdKey), runIdChan, errChan)

go func() {
for {
runId := <-runIdChan
log.Printf("Starting Periscope run %s", runId)
err := run(osIdentifier, knownFilePaths, fileSystem)
if err != nil {
errChan <- err
}

log.Printf("Completed Periscope run %s", runId)
}
}()

fileWatcher.Start()

// Run until unrecoverable error
err = <-errChan
log.Fatalf("Error running Periscope: %v", err)
}

func run(osIdentifier utils.OSIdentifier, knownFilePaths *utils.KnownFilePaths, fileSystem interfaces.FileSystemAccessor) error {
runtimeInfo, err := utils.GetRuntimeInfo(fileSystem, knownFilePaths)
if err != nil {
log.Fatalf("Failed to get runtime information: %v", err)
}

knownFilePaths, err := utils.GetKnownFilePaths(runtimeInfo)
config, err := restclient.InClusterConfig()
if err != nil {
log.Fatalf("Failed to get file paths: %v", err)
return fmt.Errorf("cannot load kubeconfig: %w", err)
}

exp := exporter.NewAzureBlobExporter(runtimeInfo, knownFilePaths, creationTimeStamp)
exp := exporter.NewAzureBlobExporter(runtimeInfo, knownFilePaths, runtimeInfo.RunId)

// Copies self-signed cert information to container if application is running on Azure Stack Cloud.
// We need the cert in order to communicate with the storage account.
if utils.IsAzureStackCloud(knownFilePaths) {
if err := utils.CopyFile(knownFilePaths.AzureStackCertHost, knownFilePaths.AzureStackCertContainer); err != nil {
log.Fatalf("Cannot copy cert for Azure Stack Cloud environment: %v", err)
return fmt.Errorf("cannot copy cert for Azure Stack Cloud environment: %w", err)
}
}

fileSystem := utils.NewFileSystem()

dnsCollector := collector.NewDNSCollector(runtimeInfo, knownFilePaths, fileSystem)
kubeletCmdCollector := collector.NewKubeletCmdCollector(runtimeInfo)
dnsCollector := collector.NewDNSCollector(osIdentifier, knownFilePaths, fileSystem)
kubeletCmdCollector := collector.NewKubeletCmdCollector(osIdentifier, runtimeInfo)
networkOutboundCollector := collector.NewNetworkOutboundCollector()
collectors := []interfaces.Collector{
dnsCollector,
kubeletCmdCollector,
networkOutboundCollector,
collector.NewHelmCollector(config, runtimeInfo),
collector.NewIPTablesCollector(runtimeInfo),
collector.NewIPTablesCollector(osIdentifier, runtimeInfo),
collector.NewKubeObjectsCollector(config, runtimeInfo),
collector.NewNodeLogsCollector(runtimeInfo, fileSystem),
collector.NewOsmCollector(config, runtimeInfo),
collector.NewPDBCollector(config, runtimeInfo),
collector.NewPodsContainerLogsCollector(config, runtimeInfo),
collector.NewSmiCollector(config, runtimeInfo),
collector.NewSystemLogsCollector(runtimeInfo),
collector.NewSystemLogsCollector(osIdentifier, runtimeInfo),
collector.NewSystemPerfCollector(config, runtimeInfo),
collector.NewWindowsLogsCollector(runtimeInfo, knownFilePaths, fileSystem, 10*time.Second, 20*time.Minute),
collector.NewWindowsLogsCollector(osIdentifier, runtimeInfo, knownFilePaths, fileSystem, 10*time.Second, 20*time.Minute),
}

collectorGrp := new(sync.WaitGroup)
Expand Down Expand Up @@ -136,10 +169,5 @@ func main() {
}
}

// TODO: Hack: for now AKS-Periscope is running as a deamonset so it shall not stop (or the pod will be restarted)
// Revert from https://github.com/Azure/aks-periscope/blob/b98d66a238e942158ef2628a9315b58937ff9c8f/cmd/aks-periscope/aks-periscope.go#L70
select {}

// TODO: remove this //nolint comment once the select{} has been removed
//nolint:govet
return nil
}
30 changes: 20 additions & 10 deletions deployment/base/daemon-set.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,11 @@ spec:
valueFrom:
fieldRef:
fieldPath: spec.nodeName
envFrom:
- configMapRef:
name: diagnostic-config
- secretRef:
name: azureblob-secret
volumeMounts:
- name: diag-config-volume
mountPath: /config
- name: storage-secret-volume
mountPath: /secret
- name: varlog
mountPath: /var/log
- name: resolvlog
Expand All @@ -48,6 +47,12 @@ spec:
memory: "2000Mi"
cpu: "1000m"
volumes:
- name: diag-config-volume
configMap:
name: diagnostic-config
- name: storage-secret-volume
secret:
secretName: azureblob-secret
- name: varlog
hostPath:
path: /var/log
Expand Down Expand Up @@ -86,12 +91,11 @@ spec:
valueFrom:
fieldRef:
fieldPath: spec.nodeName
envFrom:
- configMapRef:
name: diagnostic-config
- secretRef:
name: azureblob-secret
volumeMounts:
- name: diag-config-volume
mountPath: /config
- name: storage-secret-volume
mountPath: /secret
- name: k
mountPath: /k
- name: azuredata
Expand All @@ -104,6 +108,12 @@ spec:
memory: "2000Mi"
cpu: "1000m"
volumes:
- name: diag-config-volume
configMap:
name: diagnostic-config
- name: storage-secret-volume
secret:
secretName: azureblob-secret
- name: k
hostPath:
path: /k
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ require (
github.com/containerd/containerd v1.4.13 // indirect
github.com/docker/docker v20.10.14+incompatible
github.com/google/uuid v1.2.0
github.com/hashicorp/go-multierror v1.1.1 // indirect
github.com/onsi/gomega v1.13.0 // indirect
helm.sh/helm/v3 v3.6.3
k8s.io/api v0.21.3
Expand Down
3 changes: 3 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -420,11 +420,14 @@ github.com/hashicorp/consul/api v1.1.0/go.mod h1:VmuI/Lkw1nC05EYQWNKwWGbkg+FbDBt
github.com/hashicorp/consul/api v1.3.0/go.mod h1:MmDNSzIMUjNpY/mQ398R4bk2FnqQLoPndWW5VkKPlCE=
github.com/hashicorp/consul/sdk v0.1.1/go.mod h1:VKf9jXwCTEY1QZP2MOLRhb5i/I/ssyNV1vwHyQBF0x8=
github.com/hashicorp/consul/sdk v0.3.0/go.mod h1:VKf9jXwCTEY1QZP2MOLRhb5i/I/ssyNV1vwHyQBF0x8=
github.com/hashicorp/errwrap v1.0.0 h1:hLrqtEDnRye3+sgx6z4qVLNuviH3MR5aQ0ykNJa/UYA=
github.com/hashicorp/errwrap v1.0.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4=
github.com/hashicorp/go-cleanhttp v0.5.1/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80=
github.com/hashicorp/go-immutable-radix v1.0.0/go.mod h1:0y9vanUI8NX6FsYoO3zeMjhV/C5i9g4Q3DwcSNZ4P60=
github.com/hashicorp/go-msgpack v0.5.3/go.mod h1:ahLV/dePpqEmjfWmKiqvPkv/twdG7iPBM1vqhUKIvfM=
github.com/hashicorp/go-multierror v1.0.0/go.mod h1:dHtQlpGsu+cZNNAkkCN/P3hoUDHhCYQXV3UM06sGGrk=
github.com/hashicorp/go-multierror v1.1.1 h1:H5DkEtf6CXdFp0N0Em5UCwQpXMWke8IA0+lD48awMYo=
github.com/hashicorp/go-multierror v1.1.1/go.mod h1:iw975J/qwKPdAO1clOe2L8331t/9/fmwbPZ6JB6eMoM=
github.com/hashicorp/go-rootcerts v1.0.0/go.mod h1:K6zTfqpRlCUIjkwsN4Z+hiSfzSTQa6eBIzfwKfwNnHU=
github.com/hashicorp/go-sockaddr v1.0.0/go.mod h1:7Xibr9yA9JjQq1JpNB2Vw7kxv8xerXegt+ozgdvDeDU=
github.com/hashicorp/go-syslog v1.0.0/go.mod h1:qPfqrKkXGihmCqbJM2mZgkZGvKG1dFdvsLplgctolz4=
Expand Down
23 changes: 8 additions & 15 deletions pkg/collector/dns_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package collector

import (
"fmt"
"io/ioutil"
"io"

"github.com/Azure/aks-periscope/pkg/interfaces"
"github.com/Azure/aks-periscope/pkg/utils"
Expand All @@ -12,17 +12,17 @@ import (
type DNSCollector struct {
HostConf string
ContainerConf string
runtimeInfo *utils.RuntimeInfo
osIdentifier utils.OSIdentifier
filePaths *utils.KnownFilePaths
fileSystem interfaces.FileSystemAccessor
}

// NewDNSCollector is a constructor
func NewDNSCollector(runtimeInfo *utils.RuntimeInfo, filePaths *utils.KnownFilePaths, fileSystem interfaces.FileSystemAccessor) *DNSCollector {
func NewDNSCollector(osIdentifier utils.OSIdentifier, filePaths *utils.KnownFilePaths, fileSystem interfaces.FileSystemAccessor) *DNSCollector {
return &DNSCollector{
HostConf: "",
ContainerConf: "",
runtimeInfo: runtimeInfo,
osIdentifier: osIdentifier,
filePaths: filePaths,
fileSystem: fileSystem,
}
Expand All @@ -36,8 +36,8 @@ func (collector *DNSCollector) CheckSupported() error {
// NOTE: This *might* be achievable in Windows using APIs that query the registry, see:
// https://kubernetes.io/docs/setup/production-environment/windows/intro-windows-in-kubernetes/#networking
// But for now it's restricted to Linux containers only, in which we can read `resolv.conf`.
if collector.runtimeInfo.OSIdentifier != "linux" {
return fmt.Errorf("unsupported OS: %s", collector.runtimeInfo.OSIdentifier)
if collector.osIdentifier != utils.Linux {
return fmt.Errorf("unsupported OS: %s", collector.osIdentifier)
}

return nil
Expand All @@ -52,19 +52,12 @@ func (collector *DNSCollector) Collect() error {
}

func (collector *DNSCollector) getConfFileContent(filePath string) string {
reader, err := collector.fileSystem.GetFileReader(filePath)
content, err := utils.GetContent(func() (io.ReadCloser, error) { return collector.fileSystem.GetFileReader(filePath) })
if err != nil {
return err.Error()
}

defer reader.Close()

content, err := ioutil.ReadAll(reader)
if err != nil {
return err.Error()
}

return string(content)
return content
}

func (collector *DNSCollector) GetData() map[string]interfaces.DataValue {
Expand Down

0 comments on commit 3082e38

Please sign in to comment.