diff --git a/Documentation/api.md b/Documentation/api.md index e70db4f9c9..3b7b58cbbb 100644 --- a/Documentation/api.md +++ b/Documentation/api.md @@ -810,6 +810,7 @@ PrometheusSpec is a specification of the desired behavior of the Prometheus clus | enforcedLabelValueLengthLimit | Per-scrape limit on length of labels value that will be accepted for a sample. If a label value is longer than this number post metric-relabeling, the entire scrape will be treated as failed. 0 means no limit. Only valid in Prometheus versions 2.27.0 and newer. | *uint64 | false | | enforcedBodySizeLimit | EnforcedBodySizeLimit defines the maximum size of uncompressed response body that will be accepted by Prometheus. Targets responding with a body larger than this many bytes will cause the scrape to fail. Example: 100MB. If defined, the limit will apply to all service/pod monitors and probes. This is an experimental feature, this behaviour could change or be removed in the future. Only valid in Prometheus versions 2.28.0 and newer. | ByteSize | false | | minReadySeconds | Minimum number of seconds for which a newly created pod should be ready without any of its container crashing for it to be considered available. Defaults to 0 (pod will be considered available as soon as it is ready) This is an alpha field and requires enabling StatefulSetMinReadySeconds feature gate. | *uint32 | false | +| terminationGracePeriodSeconds | Optional duration in seconds the pod needs to terminate gracefully. May be decreased in delete request. Value must be non-negative integer. The value zero indicates stop immediately via the kill signal (no opportunity to shut down). If this value is nil, the default grace period will be used instead. Default value is set to 10 min because Prometheus may take quite long to shutdown to checkpoint existing data. The grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal. Set this value longer than the expected cleanup time for your process. | *uint64 | false | | retention | Time duration Prometheus shall retain data for. Default is '24h' if retentionSize is not set, and must match the regular expression `[0-9]+(ms\|s\|m\|h\|d\|w\|y)` (milliseconds seconds minutes hours days weeks years). | string | false | | retentionSize | Maximum amount of disk space used by blocks. | ByteSize | false | | disableCompaction | Disable prometheus compaction. | bool | false | diff --git a/bundle.yaml b/bundle.yaml index 5b598f65c1..8e702e4b8c 100644 --- a/bundle.yaml +++ b/bundle.yaml @@ -17514,6 +17514,21 @@ spec: use ''image'' instead. The image tag can be specified as part of the image URL.' type: string + terminationGracePeriodSeconds: + default: 600 + description: Optional duration in seconds the pod needs to terminate + gracefully. May be decreased in delete request. Value must be non-negative + integer. The value zero indicates stop immediately via the kill + signal (no opportunity to shut down). If this value is nil, the + default grace period will be used instead. Default value is set + to 10 min because Prometheus may take quite long to shutdown to + checkpoint existing data. The grace period is the duration in seconds + after the processes running in the pod are sent a termination signal + and the time when the processes are forcibly halted with a kill + signal. Set this value longer than the expected cleanup time for + your process. + format: int64 + type: integer thanos: description: "Thanos configuration allows configuring various aspects of a Prometheus server in a Thanos environment. \n This section diff --git a/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml b/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml index ad20b79ba8..5870bfab1c 100644 --- a/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml +++ b/example/prometheus-operator-crd/monitoring.coreos.com_prometheuses.yaml @@ -6207,6 +6207,21 @@ spec: use ''image'' instead. The image tag can be specified as part of the image URL.' type: string + terminationGracePeriodSeconds: + default: 600 + description: Optional duration in seconds the pod needs to terminate + gracefully. May be decreased in delete request. Value must be non-negative + integer. The value zero indicates stop immediately via the kill + signal (no opportunity to shut down). If this value is nil, the + default grace period will be used instead. Default value is set + to 10 min because Prometheus may take quite long to shutdown to + checkpoint existing data. The grace period is the duration in seconds + after the processes running in the pod are sent a termination signal + and the time when the processes are forcibly halted with a kill + signal. Set this value longer than the expected cleanup time for + your process. + format: int64 + type: integer thanos: description: "Thanos configuration allows configuring various aspects of a Prometheus server in a Thanos environment. \n This section diff --git a/jsonnet/prometheus-operator/prometheuses-crd.json b/jsonnet/prometheus-operator/prometheuses-crd.json index d11d53e606..398b35a5a4 100644 --- a/jsonnet/prometheus-operator/prometheuses-crd.json +++ b/jsonnet/prometheus-operator/prometheuses-crd.json @@ -5766,6 +5766,12 @@ "description": "Tag of Prometheus container image to be deployed. Defaults to the value of `version`. Version is ignored if Tag is set. Deprecated: use 'image' instead. The image tag can be specified as part of the image URL.", "type": "string" }, + "terminationGracePeriodSeconds": { + "default": 600, + "description": "Optional duration in seconds the pod needs to terminate gracefully. May be decreased in delete request. Value must be non-negative integer. The value zero indicates stop immediately via the kill signal (no opportunity to shut down). If this value is nil, the default grace period will be used instead. Default value is set to 10 min because Prometheus may take quite long to shutdown to checkpoint existing data. The grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal. Set this value longer than the expected cleanup time for your process.", + "format": "int64", + "type": "integer" + }, "thanos": { "description": "Thanos configuration allows configuring various aspects of a Prometheus server in a Thanos environment. \n This section is experimental, it may change significantly without deprecation notice in any release. \n This is experimental and may change significantly without backward compatibility in any release.", "properties": { diff --git a/pkg/apis/monitoring/v1/types.go b/pkg/apis/monitoring/v1/types.go index 1002759b9e..4a1fe6396b 100644 --- a/pkg/apis/monitoring/v1/types.go +++ b/pkg/apis/monitoring/v1/types.go @@ -335,6 +335,17 @@ type CommonPrometheusFields struct { // This is an alpha field and requires enabling StatefulSetMinReadySeconds feature gate. // +optional MinReadySeconds *uint32 `json:"minReadySeconds,omitempty"` + // Optional duration in seconds the pod needs to terminate gracefully. May be decreased in delete request. + // Value must be non-negative integer. The value zero indicates stop immediately via + // the kill signal (no opportunity to shut down). + // If this value is nil, the default grace period will be used instead. Default value is set to + // 10 min because Prometheus may take quite long to shutdown to checkpoint existing data. + // The grace period is the duration in seconds after the processes running in the pod are sent + // a termination signal and the time when the processes are forcibly halted with a kill signal. + // Set this value longer than the expected cleanup time for your process. + // +optional + // +kubebuilder:default:=600 + TerminationGracePeriodSeconds *uint64 `json:"terminationGracePeriodSeconds,omitempty"` } // Prometheus defines a Prometheus deployment. diff --git a/pkg/apis/monitoring/v1/zz_generated.deepcopy.go b/pkg/apis/monitoring/v1/zz_generated.deepcopy.go index 7df92f853f..917daa659f 100644 --- a/pkg/apis/monitoring/v1/zz_generated.deepcopy.go +++ b/pkg/apis/monitoring/v1/zz_generated.deepcopy.go @@ -591,6 +591,11 @@ func (in *CommonPrometheusFields) DeepCopyInto(out *CommonPrometheusFields) { *out = new(uint32) **out = **in } + if in.TerminationGracePeriodSeconds != nil { + in, out := &in.TerminationGracePeriodSeconds, &out.TerminationGracePeriodSeconds + *out = new(uint64) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CommonPrometheusFields. diff --git a/pkg/prometheus/statefulset.go b/pkg/prometheus/statefulset.go index 4aa5585eac..8463ad6f62 100644 --- a/pkg/prometheus/statefulset.go +++ b/pkg/prometheus/statefulset.go @@ -325,10 +325,6 @@ func makeStatefulSetService(p *monitoringv1.Prometheus, config operator.Config) func makeStatefulSetSpec(p monitoringv1.Prometheus, c *operator.Config, shard int32, ruleConfigMapNames []string, tlsAssetSecrets []string, version semver.Version) (*appsv1.StatefulSetSpec, error) { - // Prometheus may take quite long to shut down to checkpoint existing data. - // Allow up to 10 minutes for clean termination. - terminationGracePeriod := int64(600) - prometheusImagePath, err := operator.BuildImagePath( operator.StringPtrValOrDefault(p.Spec.Image, ""), operator.StringValOrDefault(p.Spec.BaseImage, c.PrometheusDefaultBaseImage), @@ -904,10 +900,16 @@ func makeStatefulSetSpec(p monitoringv1.Prometheus, c *operator.Config, shard in } } - var minReadySeconds int32 + var ( + minReadySeconds int32 + terminationGracePeriod int64 + ) if p.Spec.MinReadySeconds != nil { minReadySeconds = int32(*p.Spec.MinReadySeconds) } + if p.Spec.TerminationGracePeriodSeconds != nil { + terminationGracePeriod = int64(*p.Spec.TerminationGracePeriodSeconds) + } operatorInitContainers = append(operatorInitContainers, operator.CreateConfigReloader( diff --git a/pkg/prometheus/statefulset_test.go b/pkg/prometheus/statefulset_test.go index ec57d6debe..e08dfbb045 100644 --- a/pkg/prometheus/statefulset_test.go +++ b/pkg/prometheus/statefulset_test.go @@ -1998,6 +1998,39 @@ func TestExpectedStatefulSetShardNames(t *testing.T) { } } +func TestExpectStatefulSetTerminationGracePeriodSeconds(t *testing.T) { + statefulSet, err := makeStatefulSet("test", monitoringv1.Prometheus{ + Spec: monitoringv1.PrometheusSpec{}, + }, defaultTestConfig, nil, "", 0, nil) + + if err != nil { + t.Fatal(err) + } + // assert defaults to 0 if nil + if *statefulSet.Spec.Template.Spec.TerminationGracePeriodSeconds != 0 { + t.Fatalf("expected TerminationGracePeriodSeconds to be 0 but got %d", + *statefulSet.Spec.Template.Spec.TerminationGracePeriodSeconds) + } + + var expect uint64 = 5 + statefulSet, err = makeStatefulSet("test", monitoringv1.Prometheus{ + Spec: monitoringv1.PrometheusSpec{ + CommonPrometheusFields: monitoringv1.CommonPrometheusFields{ + TerminationGracePeriodSeconds: &expect, + }, + }, + }, defaultTestConfig, nil, "", 0, nil) + + if err != nil { + t.Fatal(err) + } + + if *statefulSet.Spec.Template.Spec.TerminationGracePeriodSeconds != int64(expect) { + t.Fatalf("expected TerminationGracePeriodSeconds to be %d but got %d", expect, + *statefulSet.Spec.Template.Spec.TerminationGracePeriodSeconds) + } +} + func TestExpectStatefulSetMinReadySeconds(t *testing.T) { statefulSet, err := makeStatefulSet("test", monitoringv1.Prometheus{ Spec: monitoringv1.PrometheusSpec{},