Skip to content

Commit

Permalink
Prometheus client: fix lock copy + add traces (kiali#3664)
Browse files Browse the repository at this point in the history
- Do not copy prom's default round tripper as it contains a lock
- Add trace-level logs on prom queries
  • Loading branch information
jotak authored and xeviknal committed Feb 3, 2021
1 parent 74f9a54 commit c11329c
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 2 deletions.
23 changes: 23 additions & 0 deletions business/dashboards_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,29 @@ func TestDiscoveryMatcherWithComposition(t *testing.T) {
assert.Equal("dashboard2", runtimes[0].DashboardRefs[0].Template)
}

func TestGetCustomDashboardRefs(t *testing.T) {
assert := assert.New(t)

// Setup mocks
service, k8s, prom := setupService()
d1 := fakeDashboard("1")
d2 := fakeDashboard("2")
k8s.On("GetDashboards", "my-namespace").Return([]v1alpha1.MonitoringDashboard{}, nil)
k8s.On("GetDashboards", "istio-system").Return([]v1alpha1.MonitoringDashboard{*d1, *d2}, nil)
prom.MockMetricsForLabels([]string{"my_metric_1_1", "request_count", "tcp_received", "tcp_sent"})
pods := []*models.Pod{}

runtimes := service.GetCustomDashboardRefs("my-namespace", "app", "", pods)

k8s.AssertNumberOfCalls(t, "GetDashboards", 2)
prom.AssertNumberOfCalls(t, "GetMetricsForLabels", 1)
assert.Len(runtimes, 1)
assert.Equal("Runtime 1", runtimes[0].Name)
assert.Len(runtimes[0].DashboardRefs, 1)
assert.Equal("dashboard1", runtimes[0].DashboardRefs[0].Template)
assert.Equal("Dashboard 1", runtimes[0].DashboardRefs[0].Title)
}

func fakeDashboard(id string) *v1alpha1.MonitoringDashboard {
return &v1alpha1.MonitoringDashboard{
ObjectMeta: v1.ObjectMeta{
Expand Down
14 changes: 12 additions & 2 deletions prometheus/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package prometheus
import (
"context"
"fmt"
"net"
"net/http"
"strings"
"sync"
Expand Down Expand Up @@ -84,9 +85,17 @@ func NewClientForConfig(cfg config.PrometheusConfig) (*Client, error) {
}

// make a copy of the prometheus DefaultRoundTripper to avoid race condition (issue #3518)
defaultRoundTripper := *api.DefaultRoundTripper.(*http.Transport)
// Do not copy the struct itself, it contains a lock. Re-create it from scratch instead.
roundTripper := &http.Transport{
Proxy: http.ProxyFromEnvironment,
DialContext: (&net.Dialer{
Timeout: 30 * time.Second,
KeepAlive: 30 * time.Second,
}).DialContext,
TLSHandshakeTimeout: 10 * time.Second,
}

transportConfig, err := httputil.CreateTransport(&auth, &defaultRoundTripper, httputil.DefaultTimeout)
transportConfig, err := httputil.CreateTransport(&auth, roundTripper, httputil.DefaultTimeout)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -275,6 +284,7 @@ func (in *Client) GetMetricsForLabels(labels []string) ([]string, error) {
// Arbitrarily set time range. Meaning that discovery works with metrics produced within last hour
end := time.Now()
start := end.Add(-time.Hour)
log.Tracef("[Prom] GetMetricsForLabels: %v", labels)
results, warnings, err := in.api.Series(in.ctx, labels, start, end)
if warnings != nil && len(warnings) > 0 {
log.Warningf("GetMetricsForLabels. Prometheus Warnings: [%s]", strings.Join(warnings, ","))
Expand Down
3 changes: 3 additions & 0 deletions prometheus/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ func fetchHistogramValues(ctx context.Context, api prom_v1.API, metricName, labe
queries := buildHistogramQueries(metricName, labels, grouping, rateInterval, avg, quantiles)
histogram := make(map[string]model.Vector, len(queries))
for k, query := range queries {
log.Tracef("[Prom] fetchHistogramValues: %s", query)
result, warnings, err := api.Query(ctx, query, queryTime)
if warnings != nil && len(warnings) > 0 {
log.Warningf("fetchHistogramValues. Prometheus Warnings: [%s]", strings.Join(warnings, ","))
Expand Down Expand Up @@ -93,6 +94,7 @@ func buildHistogramQueries(metricName, labels, grouping, rateInterval string, av
}

func fetchRange(ctx context.Context, api prom_v1.API, query string, bounds prom_v1.Range) Metric {
log.Tracef("[Prom] fetchRange: %s", query)
result, warnings, err := api.QueryRange(ctx, query, bounds)
if warnings != nil && len(warnings) > 0 {
log.Warningf("fetchRange. Prometheus Warnings: [%s]", strings.Join(warnings, ","))
Expand Down Expand Up @@ -172,6 +174,7 @@ func getItemRequestRates(ctx context.Context, api prom_v1.API, namespace, item,

func getRequestRatesForLabel(ctx context.Context, api prom_v1.API, time time.Time, labels, ratesInterval string) (model.Vector, error) {
query := fmt.Sprintf("rate(istio_requests_total{%s}[%s]) > 0", labels, ratesInterval)
log.Tracef("[Prom] getRequestRatesForLabel: %s", query)
promtimer := internalmetrics.GetPrometheusProcessingTimePrometheusTimer("Metrics-GetRequestRates")
result, warnings, err := api.Query(ctx, query, time)
if warnings != nil && len(warnings) > 0 {
Expand Down
5 changes: 5 additions & 0 deletions prometheus/prometheustest/mock.go
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,11 @@ func (o *PromClientMock) MockWorkloadRequestRates(namespace, wkld string, in, ou
o.On("GetWorkloadRequestRates", namespace, wkld, mock.AnythingOfType("string"), mock.AnythingOfType("time.Time")).Return(in, out, nil)
}

// MockMetricsForLabels mocks GetMetricsForLabels
func (o *PromClientMock) MockMetricsForLabels(metrics []string) {
o.On("GetMetricsForLabels", mock.AnythingOfType("[]string")).Return(metrics, nil)
}

func (o *PromClientMock) GetAllRequestRates(namespace, ratesInterval string, queryTime time.Time) (model.Vector, error) {
args := o.Called(namespace, ratesInterval, queryTime)
return args.Get(0).(model.Vector), args.Error(1)
Expand Down

0 comments on commit c11329c

Please sign in to comment.