Skip to content

Commit

Permalink
profiler, ddtrace/tracer: enable code hotspots & endpoints by default…
Browse files Browse the repository at this point in the history
… with 100% CPU profiling (#1169)

* profiler: set default CPU profiling interval to match duration

After testing this default on many high-volume internal workloads, we've
determined this default is safe for production. It's also important for
the code hotspots feature to be able to associated as many traces as
possible with profiling data.

* ddtrace/tracer: enable code hotspots & endpoints by default

After testing the feature internally on several high-volume workloads,
we've determined this feature can be safely enabled by default for
production workloads.

Co-authored-by: Felix Geisendörfer <felix@datadoghq.com>
  • Loading branch information
nsrip-dd and felixge committed Feb 24, 2022
1 parent 24cd31a commit 54604a1
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 16 deletions.
8 changes: 4 additions & 4 deletions ddtrace/tracer/log_test.go
Expand Up @@ -26,7 +26,7 @@ func TestStartupLog(t *testing.T) {
logStartup(tracer)
lines := removeAppSec(tp.Lines())
assert.Len(lines, 2)
assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"","service":"tracer\.test","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":false,"analytics_enabled":false,"sample_rate":"NaN","sampling_rules":null,"sampling_rules_error":"","service_mappings":null,"tags":{"runtime-id":"[^"]*"},"runtime_metrics_enabled":false,"health_metrics_enabled":false,"profiler_code_hotspots_enabled":false,"profiler_endpoints_enabled":false,"dd_version":"","architecture":"[^"]*","global_service":"","lambda_mode":"false","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, lines[1])
assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"","service":"tracer\.test","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":false,"analytics_enabled":false,"sample_rate":"NaN","sampling_rules":null,"sampling_rules_error":"","service_mappings":null,"tags":{"runtime-id":"[^"]*"},"runtime_metrics_enabled":false,"health_metrics_enabled":false,"profiler_code_hotspots_enabled":((false)|(true)),"profiler_endpoints_enabled":((false)|(true)),"dd_version":"","architecture":"[^"]*","global_service":"","lambda_mode":"false","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, lines[1])
})

t.Run("configured", func(t *testing.T) {
Expand Down Expand Up @@ -55,7 +55,7 @@ func TestStartupLog(t *testing.T) {
tp.Reset()
logStartup(tracer)
assert.Len(tp.Lines(), 2)
assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"configuredEnv","service":"configured.service","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":true,"analytics_enabled":true,"sample_rate":"0\.123000","sampling_rules":\[{"service":"mysql","name":"","sample_rate":0\.75}\],"sampling_rules_error":"","service_mappings":{"initial_service":"new_service"},"tags":{"runtime-id":"[^"]*","tag":"value","tag2":"NaN"},"runtime_metrics_enabled":true,"health_metrics_enabled":true,"profiler_code_hotspots_enabled":false,"profiler_endpoints_enabled":false,"dd_version":"2.3.4","architecture":"[^"]*","global_service":"configured.service","lambda_mode":"false","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, tp.Lines()[1])
assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"configuredEnv","service":"configured.service","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":true,"analytics_enabled":true,"sample_rate":"0\.123000","sampling_rules":\[{"service":"mysql","name":"","sample_rate":0\.75}\],"sampling_rules_error":"","service_mappings":{"initial_service":"new_service"},"tags":{"runtime-id":"[^"]*","tag":"value","tag2":"NaN"},"runtime_metrics_enabled":true,"health_metrics_enabled":true,"profiler_code_hotspots_enabled":((false)|(true)),"profiler_endpoints_enabled":((false)|(true)),"dd_version":"2.3.4","architecture":"[^"]*","global_service":"configured.service","lambda_mode":"false","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, tp.Lines()[1])
})

t.Run("errors", func(t *testing.T) {
Expand All @@ -69,7 +69,7 @@ func TestStartupLog(t *testing.T) {
tp.Reset()
logStartup(tracer)
assert.Len(tp.Lines(), 2)
assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"","service":"tracer\.test","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":false,"analytics_enabled":false,"sample_rate":"NaN","sampling_rules":\[{"service":"some.service","name":"","sample_rate":0\.234}\],"sampling_rules_error":"found errors:\\n\\tat index 1: rate not provided","service_mappings":null,"tags":{"runtime-id":"[^"]*"},"runtime_metrics_enabled":false,"health_metrics_enabled":false,"profiler_code_hotspots_enabled":false,"profiler_endpoints_enabled":false,"dd_version":"","architecture":"[^"]*","global_service":"","lambda_mode":"false","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, tp.Lines()[1])
assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"","service":"tracer\.test","agent_url":"http://localhost:9/v0.4/traces","agent_error":"Post .*","debug":false,"analytics_enabled":false,"sample_rate":"NaN","sampling_rules":\[{"service":"some.service","name":"","sample_rate":0\.234}\],"sampling_rules_error":"found errors:\\n\\tat index 1: rate not provided","service_mappings":null,"tags":{"runtime-id":"[^"]*"},"runtime_metrics_enabled":false,"health_metrics_enabled":false,"profiler_code_hotspots_enabled":((false)|(true)),"profiler_endpoints_enabled":((false)|(true)),"dd_version":"","architecture":"[^"]*","global_service":"","lambda_mode":"false","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, tp.Lines()[1])
})

t.Run("lambda", func(t *testing.T) {
Expand All @@ -81,7 +81,7 @@ func TestStartupLog(t *testing.T) {
tp.Reset()
logStartup(tracer)
assert.Len(tp.Lines(), 1)
assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"","service":"tracer\.test","agent_url":"http://localhost:9/v0.4/traces","agent_error":"","debug":false,"analytics_enabled":false,"sample_rate":"NaN","sampling_rules":null,"sampling_rules_error":"","service_mappings":null,"tags":{"runtime-id":"[^"]*"},"runtime_metrics_enabled":false,"health_metrics_enabled":false,"profiler_code_hotspots_enabled":false,"profiler_endpoints_enabled":false,"dd_version":"","architecture":"[^"]*","global_service":"","lambda_mode":"true","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, tp.Lines()[0])
assert.Regexp(`Datadog Tracer v[0-9]+\.[0-9]+\.[0-9]+ INFO: DATADOG TRACER CONFIGURATION {"date":"[^"]*","os_name":"[^"]*","os_version":"[^"]*","version":"[^"]*","lang":"Go","lang_version":"[^"]*","env":"","service":"tracer\.test","agent_url":"http://localhost:9/v0.4/traces","agent_error":"","debug":false,"analytics_enabled":false,"sample_rate":"NaN","sampling_rules":null,"sampling_rules_error":"","service_mappings":null,"tags":{"runtime-id":"[^"]*"},"runtime_metrics_enabled":false,"health_metrics_enabled":false,"profiler_code_hotspots_enabled":((false)|(true)),"profiler_endpoints_enabled":((false)|(true)),"dd_version":"","architecture":"[^"]*","global_service":"","lambda_mode":"true","appsec":((true)|(false)),"agent_features":{"DropP0s":false,"Stats":false,"StatsdPort":0}}`, tp.Lines()[0])
})
}

Expand Down
9 changes: 4 additions & 5 deletions ddtrace/tracer/option.go
Expand Up @@ -223,9 +223,8 @@ func newConfig(opts ...StartOption) *config {
c.runtimeMetrics = internal.BoolEnv("DD_RUNTIME_METRICS_ENABLED", false)
c.debug = internal.BoolEnv("DD_TRACE_DEBUG", false)
c.enabled = internal.BoolEnv("DD_TRACE_ENABLED", true)
// TODO(fg): set these to true before going GA with this.
c.profilerEndpoints = internal.BoolEnv(traceprof.EndpointEnvVar, false)
c.profilerHotspots = internal.BoolEnv(traceprof.CodeHotspotsEnvVar, false)
c.profilerEndpoints = internal.BoolEnv(traceprof.EndpointEnvVar, true)
c.profilerHotspots = internal.BoolEnv(traceprof.CodeHotspotsEnvVar, true)

for _, fn := range opts {
fn(c)
Expand Down Expand Up @@ -690,7 +689,7 @@ func WithLogStartup(enabled bool) StartOption {
// called "span id" and "local root span id" when new spans are created. You
// should not use these label names in your own code when this is enabled. The
// enabled value defaults to the value of the
// DD_PROFILING_CODE_HOTSPOTS_COLLECTION_ENABLED env variable or false.
// DD_PROFILING_CODE_HOTSPOTS_COLLECTION_ENABLED env variable or true.
func WithProfilerCodeHotspots(enabled bool) StartOption {
return func(c *config) {
c.profilerHotspots = enabled
Expand All @@ -703,7 +702,7 @@ func WithProfilerCodeHotspots(enabled bool) StartOption {
// its type is "http", "rpc" or "" (default). You should not use this label
// name in your own code when this is enabled. The enabled value defaults to
// the value of the DD_PROFILING_ENDPOINT_COLLECTION_ENABLED env variable or
// false.
// true.
func WithProfilerEndpoints(enabled bool) StartOption {
return func(c *config) {
c.profilerEndpoints = enabled
Expand Down
12 changes: 6 additions & 6 deletions ddtrace/tracer/option_test.go
Expand Up @@ -375,28 +375,28 @@ func TestTracerOptionsDefaults(t *testing.T) {
t.Run("profiler-endpoints", func(t *testing.T) {
t.Run("default", func(t *testing.T) {
c := newConfig()
assert.False(t, c.profilerEndpoints)
assert.True(t, c.profilerEndpoints)
})

t.Run("override", func(t *testing.T) {
os.Setenv(traceprof.EndpointEnvVar, "true")
os.Setenv(traceprof.EndpointEnvVar, "false")
defer os.Unsetenv(traceprof.EndpointEnvVar)
c := newConfig()
assert.True(t, c.profilerEndpoints)
assert.False(t, c.profilerEndpoints)
})
})

t.Run("profiler-hotspots", func(t *testing.T) {
t.Run("default", func(t *testing.T) {
c := newConfig()
assert.False(t, c.profilerHotspots)
assert.True(t, c.profilerHotspots)
})

t.Run("override", func(t *testing.T) {
os.Setenv(traceprof.CodeHotspotsEnvVar, "true")
os.Setenv(traceprof.CodeHotspotsEnvVar, "false")
defer os.Unsetenv(traceprof.CodeHotspotsEnvVar)
c := newConfig()
assert.True(t, c.profilerHotspots)
assert.False(t, c.profilerHotspots)
})
})

Expand Down
2 changes: 1 addition & 1 deletion profiler/options.go
Expand Up @@ -45,7 +45,7 @@ const (
DefaultPeriod = time.Minute

// DefaultDuration specifies the default length of the CPU profile snapshot.
DefaultDuration = time.Second * 15
DefaultDuration = time.Minute

// DefaultUploadTimeout specifies the default timeout for uploading profiles.
// It can be overwritten using the DD_PROFILING_UPLOAD_TIMEOUT env variable
Expand Down

0 comments on commit 54604a1

Please sign in to comment.