Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add custom health checks for astra.netapp.io CRDs #17998 #17999

Merged
merged 1 commit into from May 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
13 changes: 13 additions & 0 deletions resource_customizations/astra.netapp.io/AppVault/health.lua
@@ -0,0 +1,13 @@
hs = { status = "Progressing", message = "No status available" }
if obj.status ~= nil then
if obj.status.state ~= nil then
if obj.status.state == "available" or obj.status.state == "Available" then
hs.status = "Healthy"
hs.message = obj.kind .. " Available"
elseif obj.status.state == "failed" or obj.status.state == "Failed" then
hs.status = "Degraded"
hs.message = obj.kind .. " Failed"
end
end
end
return hs
@@ -0,0 +1,13 @@
tests:
- healthStatus:
status: Progressing
message: "No status available"
inputPath: testdata/progressing_nostatus.yaml
- healthStatus:
status: Healthy
message: "AppVault Available"
inputPath: testdata/healthy.yaml
- healthStatus:
status: Degraded
message: "AppVault Failed"
inputPath: testdata/degraded.yaml
@@ -0,0 +1,23 @@
apiVersion: astra.netapp.io/v1
kind: AppVault
metadata:
creationTimestamp: "2024-04-26T14:25:45Z"
generation: 1
name: astra-gcp-backup-743cfd150129
namespace: astra-connector
resourceVersion: "12094908"
uid: 12943b68-323a-4e8a-ba78-604da0801d11
spec:
providerConfig:
bucketName: astra-gcp-backup-743cfd150129
providerCredentials:
credentials:
valueFromSecret:
key: credentials.json
name: astra-gcp-backup-734ced050128-5rdt4
providerType: gcp
status:
error:
'failed to close GCP object "appVault.json" in bucket "astra-gcp-backup-743cfd150129":
googleapi: Error 404: The specified bucket does not exist., notFound'
state: failed
@@ -0,0 +1,21 @@
apiVersion: astra.netapp.io/v1
kind: AppVault
metadata:
creationTimestamp: "2024-04-11T21:28:27Z"
generation: 1
name: astra-gcp-backup-743cfd150129
namespace: astra-connector
resourceVersion: "70908"
uid: d1b552b2-5d8e-467b-829b-1e6af7240400
spec:
providerConfig:
bucketName: astra-gcp-backup-743cfd150129
providerCredentials:
credentials:
valueFromSecret:
key: credentials.json
name: astra-gcp-backup-743cfd150129-5rdt4
providerType: gcp
status:
state: available
uid: c708262e-3944-49bf-af96-ad1c3eb6cafb
@@ -0,0 +1,18 @@
apiVersion: astra.netapp.io/v1
kind: AppVault
metadata:
creationTimestamp: "2024-04-26T14:25:45Z"
generation: 1
name: astra-gcp-backup-743cfd150129
namespace: astra-connector
resourceVersion: "12094608"
uid: 12943b68-323a-4e8a-ba78-604da0801d11
spec:
providerConfig:
bucketName: astra-gcp-backup-743cfd150129
providerCredentials:
credentials:
valueFromSecret:
key: credentials.json
name: astra-gcp-backup-734ced050128-5rdt4
providerType: gcp
17 changes: 17 additions & 0 deletions resource_customizations/astra.netapp.io/Application/health.lua
@@ -0,0 +1,17 @@
hs = { status = "Progressing", message = "No status available" }
if obj.status ~= nil then
if obj.status.conditions ~= nil then
for _, condition in ipairs(obj.status.conditions) do
if condition.type == "Ready" and condition.status == "True" then
hs.status = "Healthy"
hs.message = "Astra Application Ready, protectionState: " .. obj.status.protectionState
return hs
elseif condition.type == "Ready" and condition.status == "False" then
hs.status = "Degraded"
hs.message = "Astra Application Degraded, message: " .. condition.message
return hs
end
end
end
end
return hs
@@ -0,0 +1,13 @@
tests:
- healthStatus:
status: Progressing
message: "No status available"
inputPath: testdata/progressing.yaml
- healthStatus:
status: Healthy
message: "Astra Application Ready, protectionState: protected"
inputPath: testdata/healthy.yaml
- healthStatus:
status: Degraded
message: "Astra Application Degraded, message: namespace wordpress is in terminating state"
inputPath: testdata/degraded.yaml
@@ -0,0 +1,26 @@
apiVersion: astra.netapp.io/v1
kind: Application
metadata:
creationTimestamp: "2024-04-15T20:59:56Z"
finalizers:
- astra.netapp.io/finalizer
generation: 2
name: wordpress
namespace: astra-connector
resourceVersion: "10484469"
uid: 5ab7cd7d-7a9b-4508-9da2-c7dcb10a69b3
spec:
includedNamespaces:
- labelSelector: {}
namespace: wordpress
status:
conditions:
- lastTransitionTime: "2024-04-24T16:13:26Z"
message: namespace wordpress is in terminating state
reason: Ready
status: "False"
type: Ready
protectionState: partial
protectionStateDetails:
- Active backup schedule missing
- Application unavailable
@@ -0,0 +1,24 @@
apiVersion: astra.netapp.io/v1
kind: Application
metadata:
creationTimestamp: "2024-04-15T20:46:16Z"
finalizers:
- astra.netapp.io/finalizer
generation: 3
labels:
argocd.argoproj.io/instance: ghost-demo
name: ghost
namespace: astra-connector
resourceVersion: "3235325"
uid: 0af10ee8-772b-4367-8334-44f9e4ad2849
spec:
includedNamespaces:
- namespace: ghost
status:
conditions:
- lastTransitionTime: "2024-04-15T20:46:16Z"
message: ""
reason: Ready
status: "True"
type: Ready
protectionState: protected
@@ -0,0 +1,16 @@
apiVersion: astra.netapp.io/v1
kind: Application
metadata:
creationTimestamp: "2024-04-15T20:46:16Z"
finalizers:
- astra.netapp.io/finalizer
generation: 3
labels:
argocd.argoproj.io/instance: ghost-demo
name: ghost
namespace: astra-connector
resourceVersion: "3235325"
uid: 0af10ee8-772b-4367-8334-44f9e4ad2849
spec:
includedNamespaces:
- namespace: ghost
16 changes: 16 additions & 0 deletions resource_customizations/astra.netapp.io/Backup/health.lua
@@ -0,0 +1,16 @@
hs = { status = "Progressing", message = "No status available" }
if obj.status ~= nil then
if obj.status.state ~= nil then
if obj.status.state == "Completed" then
hs.status = "Healthy"
hs.message = obj.kind .. " Completed"
elseif obj.status.state == "Running" then
hs.status = "Progressing"
hs.message = obj.kind .. " Running"
else
hs.status = "Degraded"
hs.message = obj.status.state
end
end
end
return hs
17 changes: 17 additions & 0 deletions resource_customizations/astra.netapp.io/Backup/health_test.yaml
@@ -0,0 +1,17 @@
tests:
- healthStatus:
status: Progressing
message: "No status available"
inputPath: testdata/progressing_nostatus.yaml
- healthStatus:
status: Progressing
message: "Backup Running"
inputPath: testdata/progressing_status.yaml
- healthStatus:
status: Healthy
message: "Backup Completed"
inputPath: testdata/healthy.yaml
- healthStatus:
status: Degraded
message: "Failed"
inputPath: testdata/degraded.yaml
@@ -0,0 +1,79 @@
apiVersion: astra.netapp.io/v1
kind: Backup
metadata:
creationTimestamp: "2024-04-24T19:54:18Z"
finalizers:
- astra.netapp.io/finalizer
generation: 1
name: backup-20240424193746
namespace: astra-connector
ownerReferences:
- apiVersion: astra.netapp.io/v1
kind: Application
name: ghost
uid: 0af10ee8-772b-4367-8334-44f9e4ad2849
resourceVersion: "10641332"
uid: ad301b6a-6536-4313-89c1-d10ad0275430
spec:
appVaultRef: astra-gcp-backup-743cfd150129
applicationRef: ghost
status:
conditions:
- lastTransitionTime: "2024-04-24T19:54:18Z"
message: Successfully reconciled
reason: Done
status: "True"
type: AppOwnerReferenceCreated
- lastTransitionTime: "2024-04-24T19:54:18Z"
message: Successfully reconciled
reason: Done
status: "True"
type: SourceSnapshotExists
- lastTransitionTime: "2024-04-24T19:54:19Z"
message:
"Source snapshot failed with permanent error: reconcile timeout of 1h0m0s
exceeded"
reason: Failed
status: "False"
type: SourceSnapshotCompleted
- lastTransitionTime: "2024-04-24T19:54:18Z"
message: Not yet reconciled
reason: Pending
status: Unknown
type: SnapshotAppArchiveCopied
- lastTransitionTime: "2024-04-24T19:54:18Z"
message: Not yet reconciled
reason: Pending
status: Unknown
type: PreBackupExecHooksRunCompleted
- lastTransitionTime: "2024-04-24T19:54:18Z"
message: Not yet reconciled
reason: Pending
status: Unknown
type: VolumeBackupsCompleted
- lastTransitionTime: "2024-04-24T19:54:18Z"
message: Not yet reconciled
reason: Pending
status: Unknown
type: PostBackupExecHooksRunCompleted
- lastTransitionTime: "2024-04-24T19:54:18Z"
message: Not yet reconciled
reason: Pending
status: Unknown
type: TemporarySnapshotCleanedUp
- lastTransitionTime: "2024-04-24T19:54:18Z"
message: Not yet reconciled
reason: Pending
status: Unknown
type: Completed
- lastTransitionTime: "2024-04-24T19:54:18Z"
message: Not yet reconciled
reason: Pending
status: Unknown
type: OnFailurePostBackupExecHooksRunCompleted
error:
"Source snapshot failed with permanent error: reconcile timeout of 1h0m0s
exceeded"
progress: {}
sourceSnapshotName: backup-ad301b6a-6536-4313-89c1-d10ad0275430
state: Failed