Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

reverseproxy: Health as a ratio of successful requests #5398

Draft
wants to merge 5 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
79 changes: 78 additions & 1 deletion modules/caddyhttp/caddyhttp.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package caddyhttp
import (
"bytes"
"encoding/json"
"fmt"
"io"
"net"
"net/http"
Expand Down Expand Up @@ -164,7 +165,7 @@ func (ws *WeakString) UnmarshalJSON(b []byte) error {
return nil
}

// MarshalJSON marshals was a boolean if true or false,
// MarshalJSON marshals as a boolean if true or false,
// a number if an integer, or a string otherwise.
func (ws WeakString) MarshalJSON() ([]byte, error) {
if ws == "true" {
Expand Down Expand Up @@ -204,6 +205,82 @@ func (ws WeakString) String() string {
return string(ws)
}

// Ratio is a type that unmarshals a valid numerical ratio string.
// Valid formats are:
// - a/b as a fraction (a / b)
// - a:b as a ratio (a / a+b)
// - a floating point number
type Ratio float64

// UnmarshalJSON satisfies json.Unmarshaler according to
// this type's documentation.
func (r *Ratio) UnmarshalJSON(b []byte) error {
if len(b) == 0 {
return io.EOF
}
if b[0] == byte('"') && b[len(b)-1] == byte('"') {
if !strings.Contains(string(b), "/") && !strings.Contains(string(b), ":") {
return fmt.Errorf("ratio string '%s' did not contain a slash '/' or colon ':'", string(b[1:len(b)-1]))
}
if strings.Contains(string(b), "/") {
left, right, _ := strings.Cut(string(b[1:len(b)-1]), "/")
num, err := strconv.Atoi(left)
if err != nil {
return fmt.Errorf("failed parsing numerator as integer %s: %v", left, err)
}
denom, err := strconv.Atoi(right)
if err != nil {
return fmt.Errorf("failed parsing denominator as integer %s: %v", right, err)
}
*r = Ratio(float64(num) / float64(denom))
return nil
}
if strings.Contains(string(b), ":") {
left, right, _ := strings.Cut(string(b[1:len(b)-1]), ":")
num, err := strconv.Atoi(left)
if err != nil {
return fmt.Errorf("failed parsing numerator as integer %s: %v", left, err)
}
denom, err := strconv.Atoi(right)
if err != nil {
return fmt.Errorf("failed parsing denominator as integer %s: %v", right, err)
}
*r = Ratio(float64(num) / (float64(num) + float64(denom)))
return nil
}
return fmt.Errorf("invalid ratio string '%s'", string(b[1:len(b)-1]))
}
if bytes.Equal(b, []byte("null")) {
return nil
}
float, err := strconv.ParseFloat(string(b), 64)
if err != nil {
return fmt.Errorf("failed parsing ratio as float %s: %v", b, err)
}
*r = Ratio(float)
return nil
}

func ParseRatio(r string) (Ratio, error) {
if strings.Contains(r, "/") {
left, right, _ := strings.Cut(r, "/")
num, err := strconv.Atoi(left)
if err != nil {
return 0, fmt.Errorf("failed parsing numerator as integer %s: %v", left, err)
}
denom, err := strconv.Atoi(right)
if err != nil {
return 0, fmt.Errorf("failed parsing denominator as integer %s: %v", right, err)
}
return Ratio(float64(num) / float64(denom)), nil
}
float, err := strconv.ParseFloat(r, 64)
if err != nil {
return 0, fmt.Errorf("failed parsing ratio as float %s: %v", r, err)
}
return Ratio(float), nil
}

// StatusCodeMatches returns true if a real HTTP status code matches
// the configured status code, which may be either a real HTTP status
// code or an integer representing a class of codes (e.g. 4 for all
Expand Down
76 changes: 76 additions & 0 deletions modules/caddyhttp/caddyhttp_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,3 +149,79 @@ func TestCleanPath(t *testing.T) {
}
}
}

func TestUnmarshalRatio(t *testing.T) {
for i, tc := range []struct {
input []byte
expect float64
errMsg string
}{
{
input: []byte("null"),
expect: 0,
},
{
input: []byte(`"1/3"`),
expect: float64(1) / float64(3),
},
{
input: []byte(`"1/100"`),
expect: float64(1) / float64(100),
},
{
input: []byte(`"3:2"`),
expect: 0.6,
},
{
input: []byte(`"99:1"`),
expect: 0.99,
},
{
input: []byte(`"1/100"`),
expect: float64(1) / float64(100),
},
{
input: []byte(`0.1`),
expect: 0.1,
},
{
input: []byte(`0.005`),
expect: 0.005,
},
{
input: []byte(`0`),
expect: 0,
},
{
input: []byte(`"0"`),
errMsg: `ratio string '0' did not contain a slash '/' or colon ':'`,
},
{
input: []byte(`a`),
errMsg: `failed parsing ratio as float a: strconv.ParseFloat: parsing "a": invalid syntax`,
},
{
input: []byte(`"a/1"`),
errMsg: `failed parsing numerator as integer a: strconv.Atoi: parsing "a": invalid syntax`,
},
{
input: []byte(`"1/a"`),
errMsg: `failed parsing denominator as integer a: strconv.Atoi: parsing "a": invalid syntax`,
},
} {
ratio := Ratio(0)
err := ratio.UnmarshalJSON(tc.input)
if err != nil {
if tc.errMsg != "" {
if tc.errMsg != err.Error() {
t.Fatalf("Test %d: expected error: %v, got: %v", i, tc.errMsg, err)
}
continue
}
t.Fatalf("Test %d: invalid ratio: %v", i, err)
}
if ratio != Ratio(tc.expect) {
t.Fatalf("Test %d: expected %v, got %v", i, tc.expect, ratio)
}
}
}
2 changes: 2 additions & 0 deletions modules/caddyhttp/reverseproxy/admin.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ type upstreamStatus struct {
Address string `json:"address"`
NumRequests int `json:"num_requests"`
Fails int `json:"fails"`
Successes int `json:"successes"`
}

// CaddyModule returns the Caddy module information.
Expand Down Expand Up @@ -99,6 +100,7 @@ func (adminUpstreams) handleUpstreams(w http.ResponseWriter, r *http.Request) er
Address: address,
NumRequests: upstream.NumRequests(),
Fails: upstream.Fails(),
Successes: upstream.Successes(),
})
return true
})
Expand Down
51 changes: 51 additions & 0 deletions modules/caddyhttp/reverseproxy/caddyfile.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ func parseCaddyfile(h httpcaddyfile.Helper) (caddyhttp.MiddlewareHandler, error)
// # passive health checking
// fail_duration <duration>
// max_fails <num>
// success_duration <duration>
// min_success_ratio <ratio>
// min_success <num>
// unhealthy_status <status>
// unhealthy_latency <duration>
// unhealthy_request_count <num>
Expand Down Expand Up @@ -422,6 +425,54 @@ func (h *Handler) UnmarshalCaddyfile(d *caddyfile.Dispenser) error {
}
h.HealthChecks.Passive.MaxFails = maxFails

case "success_duration":
if !d.NextArg() {
return d.ArgErr()
}
if h.HealthChecks == nil {
h.HealthChecks = new(HealthChecks)
}
if h.HealthChecks.Passive == nil {
h.HealthChecks.Passive = new(PassiveHealthChecks)
}
dur, err := caddy.ParseDuration(d.Val())
if err != nil {
return d.Errf("bad duration value '%s': %v", d.Val(), err)
}
h.HealthChecks.Passive.SuccessDuration = caddy.Duration(dur)

case "min_success_ratio":
if !d.NextArg() {
return d.ArgErr()
}
if h.HealthChecks == nil {
h.HealthChecks = new(HealthChecks)
}
if h.HealthChecks.Passive == nil {
h.HealthChecks.Passive = new(PassiveHealthChecks)
}
ratio, err := caddyhttp.ParseRatio(d.Val())
if err != nil {
return d.Errf("bad ratio value '%s': %v", d.Val(), err)
}
h.HealthChecks.Passive.MinSuccessRatio = ratio

case "min_successes":
if !d.NextArg() {
return d.ArgErr()
}
if h.HealthChecks == nil {
h.HealthChecks = new(HealthChecks)
}
if h.HealthChecks.Passive == nil {
h.HealthChecks.Passive = new(PassiveHealthChecks)
}
count, err := strconv.Atoi(d.Val())
if err != nil {
return d.Errf("invalid minimum success count '%s': %v", d.Val(), err)
}
h.HealthChecks.Passive.MinSuccesses = count

case "fail_duration":
if !d.NextArg() {
return d.ArgErr()
Expand Down
70 changes: 68 additions & 2 deletions modules/caddyhttp/reverseproxy/healthchecks.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,15 +110,31 @@ type ActiveHealthChecks struct {
// health checks (that is, health checks which occur during
// the normal flow of request proxying).
type PassiveHealthChecks struct {
// How long to remember a failed request to a backend. A duration > 0
// enables passive health checking. Default is 0.
// How long to remember a failed request to a backend.
// A duration > 0 enables passive health checking. Default is 0.
FailDuration caddy.Duration `json:"fail_duration,omitempty"`

// The number of failed requests within the FailDuration window to
// consider a backend as "down". Must be >= 1; default is 1. Requires
// that FailDuration be > 0.
MaxFails int `json:"max_fails,omitempty"`

// How long to remember a successful request to a backend. Default is 0.
SuccessDuration caddy.Duration `json:"success_duration,omitempty"`

// The minimum ratio of successful to failed requests necessary to
// consider a backend as healthy. Both fail and success durations
// must be configured for those stats to be counted. Default is 0 (no ratio).
MinSuccessRatio caddyhttp.Ratio `json:"min_success_ratio,omitempty"`

// The minimum number of successful requests before considering the
// minimum success ratio. Default is 5. Requires MinSuccessRatio >= 0.
//
// If there are less than this many successful requests, then the ratio is
// ignored, because of a lack of data. This ensures that the upstream isn't
// prematurely considered unhealthy because no requests have happened yet.
MinSuccesses int `json:"min_successes,omitempty"`

// Limits the number of simultaneous requests to a backend by
// marking the backend as "down" if it has this many concurrent
// requests or more.
Expand Down Expand Up @@ -362,6 +378,56 @@ func (h *Handler) doActiveHealthCheck(dialInfo DialInfo, hostAddr string, upstre
return nil
}

// countSuccess is used with passive health checks. It
// remembers 1 success for upstream for the configured
// duration. If passive health checks are disabled or
// success expiry is 0, this is a no-op.
func (h *Handler) countSuccess(upstream *Upstream) {
// only count successes if passive health checking is enabled
// and if successes are configured have a non-zero expiry
if h.HealthChecks == nil || h.HealthChecks.Passive == nil {
return
}
successDuration := time.Duration(h.HealthChecks.Passive.SuccessDuration)
if successDuration == 0 {
return
}

// count success immediately
err := upstream.Host.countSuccess(1)
if err != nil {
h.HealthChecks.Passive.logger.Error("could not count success",
zap.String("host", upstream.Dial),
zap.Error(err))
return
}

// forget it later
go func(host *Host, successDuration time.Duration) {
defer func() {
if err := recover(); err != nil {
h.HealthChecks.Passive.logger.Error("passive health check success forgetter panicked",
zap.Any("error", err),
zap.ByteString("stack", debug.Stack()))
}
}()
timer := time.NewTimer(successDuration)
select {
case <-h.ctx.Done():
if !timer.Stop() {
<-timer.C
}
case <-timer.C:
}
err := host.countSuccess(-1)
if err != nil {
h.HealthChecks.Passive.logger.Error("could not forget success",
zap.String("host", upstream.Dial),
zap.Error(err))
}
}(upstream.Host, successDuration)
}

// countFailure is used with passive health checks. It
// remembers 1 failure for upstream for the configured
// duration. If passive health checks are disabled or
Expand Down