Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add prometheus definitions for key metrics. #9088

Merged
merged 5 commits into from Nov 5, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 1 addition & 2 deletions go.mod
Expand Up @@ -13,7 +13,7 @@ require (
github.com/NYTimes/gziphandler v1.0.1
github.com/StackExchange/wmi v0.0.0-20180116203802-5d049714c4a6 // indirect
github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e
github.com/armon/go-metrics v0.3.5-0.20200914211745-2bc64ebd2914
github.com/armon/go-metrics v0.3.5-0.20201104215618-6fd5a4ddf425
github.com/armon/go-radix v1.0.0
github.com/aws/aws-sdk-go v1.25.41
github.com/coredns/coredns v1.1.2
Expand Down Expand Up @@ -63,7 +63,6 @@ require (
github.com/joyent/triton-go v1.7.1-0.20200416154420-6801d15b779f // indirect
github.com/konsorten/go-windows-terminal-sequences v1.0.2 // indirect
github.com/kr/text v0.1.0
github.com/mattn/go-colorable v0.1.7 // indirect
github.com/miekg/dns v1.1.31
github.com/mitchellh/cli v1.1.0
github.com/mitchellh/copystructure v1.0.0
Expand Down
6 changes: 2 additions & 4 deletions go.sum
Expand Up @@ -59,8 +59,8 @@ github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6/go.mod h1:grANhF5
github.com/armon/go-metrics v0.0.0-20180917152333-f0300d1749da/go.mod h1:Q73ZrmVTwzkszR9V5SSuryQ31EELlFMUz1kKyl939pY=
github.com/armon/go-metrics v0.0.0-20190430140413-ec5e00d3c878/go.mod h1:3AMJUQhVx52RsWOnlkpikZr01T/yAVN2gn0861vByNg=
github.com/armon/go-metrics v0.3.0/go.mod h1:zXjbSimjXTd7vOpY8B0/2LpvNvDoXBuplAD+gJD3GYs=
github.com/armon/go-metrics v0.3.5-0.20200914211745-2bc64ebd2914 h1:Yiw8vrY+7jX6pCOdAkIUNU8QBS9c6HJAct+K36MeANw=
github.com/armon/go-metrics v0.3.5-0.20200914211745-2bc64ebd2914/go.mod h1:4O98XIr/9W0sxpJ8UaYkvjk10Iff7SnFrb4QAOwNTFc=
github.com/armon/go-metrics v0.3.5-0.20201104215618-6fd5a4ddf425 h1:23nUvGE+8HYFc0AUXuYxgFws6IdyzOrSJJmKfPMJmi8=
github.com/armon/go-metrics v0.3.5-0.20201104215618-6fd5a4ddf425/go.mod h1:4O98XIr/9W0sxpJ8UaYkvjk10Iff7SnFrb4QAOwNTFc=
github.com/armon/go-radix v0.0.0-20180808171621-7fddfc383310/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
github.com/armon/go-radix v1.0.0 h1:F4z6KzEeeQIMeLFa97iZU6vupzoecKdU5TX24SNppXI=
github.com/armon/go-radix v1.0.0/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
Expand Down Expand Up @@ -345,8 +345,6 @@ github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaO
github.com/mattn/go-colorable v0.1.4/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE=
github.com/mattn/go-colorable v0.1.6 h1:6Su7aK7lXmJ/U79bYtBjLNaha4Fs1Rg9plHpcH+vvnE=
github.com/mattn/go-colorable v0.1.6/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
github.com/mattn/go-colorable v0.1.7 h1:bQGKb3vps/j0E9GfJQ03JyhRuxsvdAanXlT9BTw3mdw=
github.com/mattn/go-colorable v0.1.7/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc=
github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4=
github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s=
github.com/mattn/go-isatty v0.0.10/go.mod h1:qgIWMr58cqv1PHHyhnkY9lrL7etaEgOFcMEpPG5Rm84=
Expand Down
69 changes: 68 additions & 1 deletion lib/telemetry.go
Expand Up @@ -280,8 +280,75 @@ func prometheusSink(cfg TelemetryConfig, hostname string) (metrics.MetricSink, e
if cfg.PrometheusRetentionTime.Nanoseconds() < 1 {
return nil, nil
}

// TODO(kit) define these in vars in the package/file they're used
gaugeDefs := []prometheus.GaugeDefinition{
{
Name: []string{"consul", "autopilot", "healthy"},
Help: "This tracks the overall health of the local server cluster. 1 if all servers are healthy, 0 if one or more are unhealthy.",
},
}

// TODO(kit) define these in vars in the package/file they're used
counterDefs := []prometheus.CounterDefinition{
{
Name: []string{"consul", "raft", "apply"},
Help: "This counts the number of Raft transactions occurring over the interval.",
},
{
Name: []string{"consul", "raft", "state", "candidate"},
Help: "This increments whenever a Consul server starts an election.",
},
{
Name: []string{"consul", "raft", "state", "leader"},
Help: "This increments whenever a Consul server becomes a leader.",
},
{
Name: []string{"consul", "client", "api", "catalog_register"},
Help: "Increments whenever a Consul agent receives a catalog register request.",
},
{
Name: []string{"consul", "runtime", "total_gc_pause_ns"},
Help: "Number of nanoseconds consumed by stop-the-world garbage collection (GC) pauses since Consul started.",
},
{
Name: []string{"consul", "client", "rpc"},
Help: "Increments whenever a Consul agent in client mode makes an RPC request to a Consul server.",
},
{
Name: []string{"consul", "client", "rpc", "exceeded"},
Help: "Increments whenever a Consul agent in client mode makes an RPC request to a Consul server gets rate limited by that agent's limits configuration.",
},
{
Name: []string{"consul", "client", "rpc", "failed"},
Help: "Increments whenever a Consul agent in client mode makes an RPC request to a Consul server and fails.",
},
}

// TODO(kit) define these in vars in the package/file they're used
summaryDefs := []prometheus.SummaryDefinition{
{
Name: []string{"consul", "kvs", "apply"},
Help: "This measures the time it takes to complete an update to the KV store.",
},
{
Name: []string{"consul", "txn", "apply"},
Help: "This measures the time spent applying a transaction operation.",
},
{
Name: []string{"consul", "raft", "commitTime"},
Help: "This measures the time it takes to commit a new entry to the Raft log on the leader.",
},
{
Name: []string{"consul", "raft", "leader", "lastContact"},
Help: "Measures the time since the leader was last able to contact the follower nodes when checking its leader lease.",
},
}
prometheusOpts := prometheus.PrometheusOpts{
Expiration: cfg.PrometheusRetentionTime,
Expiration: cfg.PrometheusRetentionTime,
GaugeDefinitions: gaugeDefs,
CounterDefinitions: counterDefs,
SummaryDefinitions: summaryDefs,
}
sink, err := prometheus.NewPrometheusSinkFrom(prometheusOpts)
if err != nil {
Expand Down