Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CURATOR-695] Open Telemetry tracing #494

Draft
wants to merge 15 commits into
base: master
Choose a base branch
from
8 changes: 3 additions & 5 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,9 @@ Thumbs.db
.gradle

# Build output directies
/target
*/target
/build
*/build
*/bin
target/
build/
bin/

# IntelliJ specific files/directories
out
Expand Down
4 changes: 2 additions & 2 deletions curator-client/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@
<parent>
<groupId>org.apache.curator</groupId>
<artifactId>apache-curator</artifactId>
<version>5.5.1-SNAPSHOT</version>
<version>5.6.1-SNAPSHOT</version>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should bump the version in a separate commit, can you please revert ?

</parent>

<artifactId>curator-client</artifactId>
<version>5.5.1-SNAPSHOT</version>
<version>5.6.1-SNAPSHOT</version>
<packaging>bundle</packaging>

<name>Curator Client</name>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,43 @@
*/
public abstract class AdvancedTracerDriver implements TracerDriver {
/**
* Record the given trace event
* Records the start of a new operation that will later complete successfully or erroneously via a call to
* {@link #endTrace(OperationTrace)}. The call may optionally return driver specific state which can be
* accessed in {@link #endTrace(OperationTrace) endTrace} via {@link OperationTrace#getDriverTrace()}. The
* driver implementation is responsible for checking that any state returned is valid. Additionally, while it is
* expected that all calls to {@code startTrace} will have a corresponding call to
* {@link #endTrace(OperationTrace) endTrace}, it is the responsibility of the driver implementation to manage any
* leaking of non-terminal {@link OperationTrace OperationTraces}.
*
* @param trace the metrics of the operation
* @return The internal trace representation of the driver implementation. Driver dependent, may be {@code null}.
*/
public abstract void addTrace(OperationTrace trace);
public abstract Object startTrace(OperationTrace trace);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be better to introduce a new sub-class/sub-interface of TracerDriver so we won't break third-party usage of AdvancedTracerDriver.


/**
* Add to a named counter
* Signals the completion, successful or otherwise, of the specified {@link OperationTrace}.
*
* @param trace the metrics of the operation
*/
public abstract void endTrace(OperationTrace trace);

/**
* Record the given trace event after the completion of the event. This is equivalent to calling
* {@link #startTrace(OperationTrace) startTrace} followed by {@link #endTrace(OperationTrace) endTrace}.
*
* @param trace the metrics of the operation
* @deprecated Prefer the use of {@link #startTrace(OperationTrace)} followed by {@link #endTrace(OperationTrace)}
*/
@Deprecated
public void addTrace(OperationTrace trace) {
startTrace(trace);
endTrace(trace);
}

/**
* Record the given trace event
*
* @param name name of the counter
* @param increment amount to increment
* @param trace name of the counter
*/
public abstract void addEvent(EventTrace trace);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,28 +19,33 @@

package org.apache.curator.drivers;

import java.io.UnsupportedEncodingException;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.data.Stat;

import java.util.concurrent.TimeUnit;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.data.Stat;

import static java.nio.charset.StandardCharsets.UTF_8;

/**
* Used to trace the metrics of a certain Zookeeper operation.
*/
public class OperationTrace {
private final String name;
private final TracerDriver driver;

private final long startTimeNanos = System.nanoTime();
private final long sessionId;
private long endTimeNanos = -1L;
private int returnCode = KeeperException.Code.OK.intValue();
private long latencyMs;
private long requestBytesLength;
private long responseBytesLength;
private String path;
private boolean withWatcher;
private long sessionId;
private Stat stat;

private final long startTimeNanos = System.nanoTime();
// This would ideally be a parameterised type, but we do not wish to break the existing API at this time.
private Object driverTrace;

public OperationTrace(String name, TracerDriver driver) {
this(name, driver, -1);
Expand All @@ -50,6 +55,9 @@ public OperationTrace(String name, TracerDriver driver, long sessionId) {
this.name = name;
this.driver = driver;
this.sessionId = sessionId;
if (this.driver instanceof AdvancedTracerDriver) {
driverTrace = ((AdvancedTracerDriver) this.driver).startTrace(this);
}
}

public OperationTrace setReturnCode(int returnCode) {
Expand All @@ -66,13 +74,7 @@ public OperationTrace setRequestBytesLength(String data) {
if (data == null) {
return this;
}

try {
this.setRequestBytesLength(data.getBytes("UTF-8").length);
} catch (UnsupportedEncodingException e) {
// Ignore the exception.
}

this.setRequestBytesLength(data.getBytes(UTF_8).length);
return this;
}

Expand Down Expand Up @@ -112,6 +114,10 @@ public OperationTrace setStat(Stat stat) {
return this;
}

public Object getDriverTrace() {
return driverTrace;
}

public String getName() {
return this.name;
}
Expand Down Expand Up @@ -148,11 +154,23 @@ public Stat getStat() {
return this.stat;
}

public long getStartTimeNanos() {
return this.startTimeNanos;
}

public long getEndTimeNanos() {
if (endTimeNanos < startTimeNanos) {
throw new IllegalStateException("End time requested but trace has not yet ended.");
}
return this.endTimeNanos;
}

public void commit() {
long elapsed = System.nanoTime() - startTimeNanos;
endTimeNanos = System.nanoTime();
long elapsed = endTimeNanos - startTimeNanos;
this.latencyMs = TimeUnit.MILLISECONDS.convert(elapsed, TimeUnit.NANOSECONDS);
if (this.driver instanceof AdvancedTracerDriver) {
((AdvancedTracerDriver) this.driver).addTrace(this);
((AdvancedTracerDriver) this.driver).endTrace(this);
} else {
this.driver.addTrace(this.name, elapsed, TimeUnit.NANOSECONDS);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ public int getBaseSleepTimeMs() {
@Override
protected long getSleepTimeMs(int retryCount, long elapsedTimeMs) {
// copied from Hadoop's RetryPolicies.java
long sleepMs = baseSleepTimeMs * Math.max(1, random.nextInt(1 << (retryCount + 1)));
long sleepMs = (long) baseSleepTimeMs * Math.max(1, random.nextInt(1 << (retryCount + 1)));
if (sleepMs > maxSleepMs) {
log.warn(String.format("Sleep extension too large (%d). Pinning to %d", sleepMs, maxSleepMs));
sleepMs = maxSleepMs;
Expand Down
79 changes: 79 additions & 0 deletions curator-drivers/open-telemetry/pom.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--

Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.

-->

<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<parent>
<groupId>org.apache.curator</groupId>
<artifactId>curator-drivers</artifactId>
<version>5.6.1-SNAPSHOT</version>
</parent>

<artifactId>curator-drivers-open-telemetry</artifactId>
<version>5.6.1-SNAPSHOT</version>

<name>Curator OpenTelemetry Tracing Driver</name>
<description>A tracing driver driver that emits OpenTelemetry spans.</description>
<inceptionYear>2023</inceptionYear>

<properties>
<opentelemetry.version>1.31.0</opentelemetry.version>
</properties>

<dependencies>
<dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-client</artifactId>
<version>${project.version}</version>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be at scope 'provided'

<scope>provided</scope>
</dependency>
<dependency>
<groupId>io.opentelemetry</groupId>
<artifactId>opentelemetry-api</artifactId>
<version>${opentelemetry.version}</version>
</dependency>
<dependency>
<groupId>io.opentelemetry</groupId>
<artifactId>opentelemetry-sdk-testing</artifactId>
<version>${opentelemetry.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.junit.jupiter</groupId>
<artifactId>junit-jupiter-api</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.assertj</groupId>
<artifactId>assertj-core</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
</project>