Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ddtrace/tracer: add sample_rate_limit field to startup log. #1230

Merged
merged 5 commits into from May 10, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 6 additions & 0 deletions ddtrace/tracer/log.go
Expand Up @@ -36,6 +36,7 @@ type startupInfo struct {
Debug bool `json:"debug"` // Whether debug mode is enabled
AnalyticsEnabled bool `json:"analytics_enabled"` // True if there is a global analytics rate set
SampleRate string `json:"sample_rate"` // The default sampling rate for the rules sampler
SampleRateLimit string `json:"sample_rate_limit"` // The rate limit configured with the rules sampler
gbbr marked this conversation as resolved.
Show resolved Hide resolved
SamplingRules []SamplingRule `json:"sampling_rules"` // Rules used by the rules sampler
SamplingRulesError string `json:"sampling_rules_error"` // Any errors that occurred while parsing sampling rules
ServiceMappings map[string]string `json:"service_mappings"` // Service Mappings
Expand Down Expand Up @@ -90,6 +91,7 @@ func logStartup(t *tracer) {
Debug: t.config.debug,
AnalyticsEnabled: !math.IsNaN(globalconfig.AnalyticsRate()),
SampleRate: fmt.Sprintf("%f", t.rulesSampling.globalRate),
SampleRateLimit: "disabled",
SamplingRules: t.rulesSampling.rules,
ServiceMappings: t.config.serviceMappings,
Tags: tags,
Expand All @@ -107,6 +109,10 @@ func logStartup(t *tracer) {
if _, err := samplingRulesFromEnv(); err != nil {
info.SamplingRulesError = fmt.Sprintf("%s", err)
}

if limit, ok := t.rulesSampling.limit(); ok {
info.SampleRateLimit = fmt.Sprintf("%v", limit)
}
if !t.config.logToStdout {
if err := checkEndpoint(t.config.transport.endpoint()); err != nil {
info.AgentError = fmt.Sprintf("%s", err)
Expand Down
39 changes: 35 additions & 4 deletions ddtrace/tracer/log_test.go
Expand Up @@ -26,7 +26,7 @@ func TestStartupLog(t *testing.T) {
logStartup(tracer)
lines := removeAppSec(tp.Lines())
assert.Len(lines, 2)
assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"","service":"tracer\.test","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":false,"analytics_enabled":false,"sample_rate":"NaN","sampling_rules":null,"sampling_rules_error":"","service_mappings":null,"tags":{"runtime-id":"[^"]*"},"runtime_metrics_enabled":false,"health_metrics_enabled":false,"profiler_code_hotspots_enabled":((false)|(true)),"profiler_endpoints_enabled":((false)|(true)),"dd_version":"","architecture":"[^"]*","global_service":"","lambda_mode":"false","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, lines[1])
assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"","service":"tracer\.test","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":false,"analytics_enabled":false,"sample_rate":"NaN","sample_rate_limit":"disabled","sampling_rules":null,"sampling_rules_error":"","service_mappings":null,"tags":{"runtime-id":"[^"]*"},"runtime_metrics_enabled":false,"health_metrics_enabled":false,"profiler_code_hotspots_enabled":((false)|(true)),"profiler_endpoints_enabled":((false)|(true)),"dd_version":"","architecture":"[^"]*","global_service":"","lambda_mode":"false","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, lines[1])
})

t.Run("configured", func(t *testing.T) {
Expand Down Expand Up @@ -55,7 +55,38 @@ func TestStartupLog(t *testing.T) {
tp.Reset()
logStartup(tracer)
assert.Len(tp.Lines(), 2)
assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"configuredEnv","service":"configured.service","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":true,"analytics_enabled":true,"sample_rate":"0\.123000","sampling_rules":\[{"service":"mysql","name":"","sample_rate":0\.75}\],"sampling_rules_error":"","service_mappings":{"initial_service":"new_service"},"tags":{"runtime-id":"[^"]*","tag":"value","tag2":"NaN"},"runtime_metrics_enabled":true,"health_metrics_enabled":true,"profiler_code_hotspots_enabled":((false)|(true)),"profiler_endpoints_enabled":((false)|(true)),"dd_version":"2.3.4","architecture":"[^"]*","global_service":"configured.service","lambda_mode":"false","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, tp.Lines()[1])
assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"configuredEnv","service":"configured.service","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":true,"analytics_enabled":true,"sample_rate":"0\.123000","sample_rate_limit":"100","sampling_rules":\[{"service":"mysql","name":"","sample_rate":0\.75}\],"sampling_rules_error":"","service_mappings":{"initial_service":"new_service"},"tags":{"runtime-id":"[^"]*","tag":"value","tag2":"NaN"},"runtime_metrics_enabled":true,"health_metrics_enabled":true,"profiler_code_hotspots_enabled":((false)|(true)),"profiler_endpoints_enabled":((false)|(true)),"dd_version":"2.3.4","architecture":"[^"]*","global_service":"configured.service","lambda_mode":"false","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, tp.Lines()[1])
})

t.Run("limit", func(t *testing.T) {
assert := assert.New(t)
tp := new(testLogger)
os.Setenv("DD_TRACE_SAMPLE_RATE", "0.123")
defer os.Unsetenv("DD_TRACE_SAMPLE_RATE")
os.Setenv("DD_TRACE_RATE_LIMIT", "1000.001")
defer os.Unsetenv("DD_TRACE_RATE_LIMIT")
tracer, _, _, stop := startTestTracer(t,
WithLogger(tp),
WithService("configured.service"),
WithAgentAddr("test.host:1234"),
WithEnv("configuredEnv"),
WithServiceMapping("initial_service", "new_service"),
WithGlobalTag("tag", "value"),
WithGlobalTag("tag2", math.NaN()),
WithRuntimeMetrics(),
WithAnalyticsRate(1.0),
WithServiceVersion("2.3.4"),
WithSamplingRules([]SamplingRule{ServiceRule("mysql", 0.75)}),
WithDebugMode(true),
)
defer globalconfig.SetAnalyticsRate(math.NaN())
defer globalconfig.SetServiceName("")
defer stop()

tp.Reset()
logStartup(tracer)
assert.Len(tp.Lines(), 2)
assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"configuredEnv","service":"configured.service","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":true,"analytics_enabled":true,"sample_rate":"0\.123000","sample_rate_limit":"1000.001","sampling_rules":\[{"service":"mysql","name":"","sample_rate":0\.75}\],"sampling_rules_error":"","service_mappings":{"initial_service":"new_service"},"tags":{"runtime-id":"[^"]*","tag":"value","tag2":"NaN"},"runtime_metrics_enabled":true,"health_metrics_enabled":true,"profiler_code_hotspots_enabled":((false)|(true)),"profiler_endpoints_enabled":((false)|(true)),"dd_version":"2.3.4","architecture":"[^"]*","global_service":"configured.service","lambda_mode":"false","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, tp.Lines()[1])
})

t.Run("errors", func(t *testing.T) {
Expand All @@ -69,7 +100,7 @@ func TestStartupLog(t *testing.T) {
tp.Reset()
logStartup(tracer)
assert.Len(tp.Lines(), 2)
assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"","service":"tracer\.test","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":false,"analytics_enabled":false,"sample_rate":"NaN","sampling_rules":\[{"service":"some.service","name":"","sample_rate":0\.234}\],"sampling_rules_error":"found errors:\\n\\tat index 1: rate not provided","service_mappings":null,"tags":{"runtime-id":"[^"]*"},"runtime_metrics_enabled":false,"health_metrics_enabled":false,"profiler_code_hotspots_enabled":((false)|(true)),"profiler_endpoints_enabled":((false)|(true)),"dd_version":"","architecture":"[^"]*","global_service":"","lambda_mode":"false","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, tp.Lines()[1])
assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"","service":"tracer\.test","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":false,"analytics_enabled":false,"sample_rate":"NaN","sample_rate_limit":"100","sampling_rules":\[{"service":"some.service","name":"","sample_rate":0\.234}\],"sampling_rules_error":"found errors:\\n\\tat index 1: rate not provided","service_mappings":null,"tags":{"runtime-id":"[^"]*"},"runtime_metrics_enabled":false,"health_metrics_enabled":false,"profiler_code_hotspots_enabled":((false)|(true)),"profiler_endpoints_enabled":((false)|(true)),"dd_version":"","architecture":"[^"]*","global_service":"","lambda_mode":"false","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, tp.Lines()[1])
})

t.Run("lambda", func(t *testing.T) {
Expand All @@ -81,7 +112,7 @@ func TestStartupLog(t *testing.T) {
tp.Reset()
logStartup(tracer)
assert.Len(tp.Lines(), 1)
assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"","service":"tracer\.test","agent_url":"http://localhost:9/v0.4/traces","agent_error":"","debug":false,"analytics_enabled":false,"sample_rate":"NaN","sampling_rules":null,"sampling_rules_error":"","service_mappings":null,"tags":{"runtime-id":"[^"]*"},"runtime_metrics_enabled":false,"health_metrics_enabled":false,"profiler_code_hotspots_enabled":((false)|(true)),"profiler_endpoints_enabled":((false)|(true)),"dd_version":"","architecture":"[^"]*","global_service":"","lambda_mode":"true","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, tp.Lines()[0])
assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"","service":"tracer\.test","agent_url":"http://localhost:9/v0.4/traces","agent_error":"","debug":false,"analytics_enabled":false,"sample_rate":"NaN","sample_rate_limit":"disabled","sampling_rules":null,"sampling_rules_error":"","service_mappings":null,"tags":{"runtime-id":"[^"]*"},"runtime_metrics_enabled":false,"health_metrics_enabled":false,"profiler_code_hotspots_enabled":((false)|(true)),"profiler_endpoints_enabled":((false)|(true)),"dd_version":"","architecture":"[^"]*","global_service":"","lambda_mode":"true","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, tp.Lines()[0])
})
}

Expand Down
15 changes: 14 additions & 1 deletion ddtrace/tracer/sampler.go
Expand Up @@ -280,12 +280,16 @@ func newRateLimiter() *rateLimiter {
}
}

func (rs *rulesSampler) enabled() bool {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

<3

return len(rs.rules) > 0 || !math.IsNaN(rs.globalRate)
}

// apply uses the sampling rules to determine the sampling rate for the
// provided span. If the rules don't match, and a default rate hasn't been
// set using DD_TRACE_SAMPLE_RATE, then it returns false and the span is not
// modified.
func (rs *rulesSampler) apply(span *span) bool {
if len(rs.rules) == 0 && math.IsNaN(rs.globalRate) {
if !rs.enabled() {
// short path when disabled
return false
}
Expand Down Expand Up @@ -325,6 +329,15 @@ func (rs *rulesSampler) applyRate(span *span, rate float64, now time.Time) {
span.SetTag(keyRulesSamplerLimiterRate, rate)
}

// limit returns the rate limit set in the rules sampler, controlled by DD_TRACE_RATE_LIMIT, and
// true if rules sampling is enabled. If not present it returns math.NaN() and false.
func (rs *rulesSampler) limit() (float64, bool) {
if rs.enabled() {
return float64(rs.limiter.limiter.Limit()), true
Copy link
Contributor Author

@knusbaum knusbaum May 5, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should be OK without nil checks here. Other places throughout the rulesSampler code assume these fields are not nil and all of the constructors populate them.

If we ever changed rulesSampler to allow nil members, we would be unlikely to miss this line, since it lives within the rulesSampler methods.

}
return math.NaN(), false
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This works, but to remove complexity, I would probably return -1, false in this case. Then 284 can just be rs.globalRate >= 0. Unless the rate can actually be negative, which would be surprising.
Don't have a strong preference though.

Copy link
Contributor Author

@knusbaum knusbaum May 10, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

-1 would probably be a better choice, however we already use NaN in various places in the sampler and elsewhere as the canonical "not present" float64 value so I would like to stay consistent.

}

// SamplingRule is used for applying sampling rates to spans that match
// the service name, operation name or both.
// For basic usage, consider using the helper functions ServiceRule, NameRule, etc.
Expand Down