observatorium · matej-g · Sep 8, 2022 · Aug 26, 2022 · Aug 26, 2022
diff --git a/README.md b/README.md
@@ -66,3 +66,10 @@ Finally, deploy StatefulSets of Thanos receivers labeled with `controller.receiv
 The controller lists all of the StatefulSets with that label and matches the value of their `controller.receive.thanos.io/hashring` labels to the hashring names in the configuration file.
 The endpoints for each hashring will be populated automatically by the controller and the complete configuration file will be placed in a ConfigMap named `thanos-receive-generated`.
 This configuration should be consumed as a ConfigMap volume by the Thanos receivers.
+
+## About the `--allow-only-ready-replicas` flag
+By default, upon a scale up, the controller adds all new receiver replicas into the hashring as soon as they are in a _running_ state. However, this means the new replicas will be receiving requests from other replicas in the hashring before they are ready to accept them. Due to the nature of how receiver works, it can take some time until receiver's storage is ready. Depending on your roll out strategy, you might see an increased failure rate in your hashring until enough replicas are in a ready state.
+
+An alternative is to use the `--allow-only-ready-replicas`, which modifies this behavior. Instead, upon a scale-up, new replicas are added only after it is confirmed they are ready. This means:
+- Old replicas keep operating with the old hashring, until all new replicas are ready. Once this is true, the hashring is updated to include all replicas in the stateful set
+- New replicas will initially come up with the old hashring configuration. This means they will serve only as a "router" and any requests that they receive will be forwarded to replicas in the old hashring. Once _all_ new receiver replicas are ready, the hashring will be updated to include both old and new replicas.
diff --git a/go.mod b/go.mod
@@ -11,6 +11,7 @@ require (
 	k8s.io/api v0.23.5
 	k8s.io/apimachinery v0.23.5
 	k8s.io/client-go v12.0.0+incompatible
+	k8s.io/kubernetes v1.13.0
 )
 
 require (

diff --git a/go.sum b/go.sum
@@ -2760,6 +2760,7 @@ k8s.io/kube-openapi v0.0.0-20210305001622-591a79e4bda7/go.mod h1:wXW5VT87nVfh/iL
 k8s.io/kube-openapi v0.0.0-20211109043538-20434351676c/go.mod h1:vHXdDvt9+2spS2Rx9ql3I8tycm3H9FDfdUoIuKCefvw=
 k8s.io/kube-openapi v0.0.0-20211115234752-e816edb12b65 h1:E3J9oCLlaobFUqsjG9DfKbP2BmgwBL2p7pn0A3dG9W4=
 k8s.io/kube-openapi v0.0.0-20211115234752-e816edb12b65/go.mod h1:sX9MT8g7NVZM5lVL/j8QyCCJe8YSMW30QvGZWaCIDIk=
+k8s.io/kubernetes v1.13.0 h1:qTfB+u5M92k2fCCCVP2iuhgwwSOv1EkAkvQY1tQODD8=
 k8s.io/kubernetes v1.13.0/go.mod h1:ocZa8+6APFNC2tX1DZASIbocyYT5jHzqFVsY5aoB7Jk=
 k8s.io/utils v0.0.0-20190809000727-6c36bc71fc4a/go.mod h1:sZAwmy6armz5eXlNoLmJcl4F1QuKu7sr+mFQ0byX7Ew=
 k8s.io/utils v0.0.0-20191114200735-6ca3b61696b6/go.mod h1:sZAwmy6armz5eXlNoLmJcl4F1QuKu7sr+mFQ0byX7Ew=

diff --git a/main.go b/main.go
@@ -35,6 +35,7 @@ import (
 	"k8s.io/client-go/kubernetes"
 	"k8s.io/client-go/tools/cache"
 	"k8s.io/client-go/tools/clientcmd"
+	podutil "k8s.io/kubernetes/pkg/api/v1/pod"
 )
 
 type label = string
@@ -69,6 +70,7 @@ func main() {
 		Port                   int
 		Scheme                 string
 		InternalAddr           string
+		AllowOnlyReadyReplicas bool
 		ScaleTimeout           time.Duration
 	}{}
 
@@ -82,6 +84,7 @@ func main() {
 	flag.IntVar(&config.Port, "port", defaultPort, "The port on which receive components are listening for write requests")
 	flag.StringVar(&config.Scheme, "scheme", "http", "The URL scheme on which receive components accept write requests")
 	flag.StringVar(&config.InternalAddr, "internal-addr", ":8080", "The address on which internal server runs")
+	flag.BoolVar(&config.AllowOnlyReadyReplicas, "allow-only-ready-replicas", false, "Populate only Ready receiver replicas in the hashring configuration")
 	flag.DurationVar(&config.ScaleTimeout, "scale-timeout", defaultScaleTimeout, "A timeout to wait for receivers to really start after they report healthy")
 	flag.Parse()
 
@@ -124,6 +127,7 @@ func main() {
 			scheme:                 config.Scheme,
 			labelKey:               labelKey,
 			labelValue:             labelValue,
+			allowOnlyReadyReplicas: config.AllowOnlyReadyReplicas,
 			scaleTimeout:           config.ScaleTimeout,
 		}
 		c := newController(klient, logger, opt)
@@ -305,6 +309,7 @@ type options struct {
 	scheme                 string
 	labelKey               string
 	labelValue             string
+	allowOnlyReadyReplicas bool
 	scaleTimeout           time.Duration
 }
 
@@ -565,6 +570,12 @@ func (c controller) waitForPod(ctx context.Context, name string) error {
 		}
 		switch pod.Status.Phase {
 		case corev1.PodRunning:
+			if c.options.allowOnlyReadyReplicas {
+				if podutil.IsPodReady(pod) {
+					return true, nil
+				}
+				return false, nil
+			}
 			return true, nil
 		case corev1.PodFailed, corev1.PodPending, corev1.PodSucceeded, corev1.PodUnknown:
 			return false, nil
@@ -574,6 +585,7 @@ func (c controller) waitForPod(ctx context.Context, name string) error {
 	})
 }
 
+//nolint:nestif
 func (c *controller) populate(hashrings []receive.HashringConfig, statefulsets map[string]*appsv1.StatefulSet) {
 	for i, h := range hashrings {
 		if sts, exists := statefulsets[h.Hashring]; exists {