Skip to content

Commit 1a2b623

Browse files
authored
Merge pull request #33 from braintrustdata/ark/vcr-otel
Distributed Tracing for remote scorers
2 parents 316b7b2 + 61f2cee commit 1a2b623

178 files changed

Lines changed: 3502 additions & 738 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

src/main/java/dev/braintrust/api/BraintrustApiClient.java

Lines changed: 82 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,14 @@ Optional<Prompt> getPrompt(
7777
Optional<Function> getFunction(
7878
@Nonnull String projectName, @Nonnull String slug, @Nullable String version);
7979

80+
/**
81+
* Get a function by its ID.
82+
*
83+
* @param functionId the ID of the function
84+
* @return the function if found
85+
*/
86+
Optional<Function> getFunctionById(@Nonnull String functionId);
87+
8088
/**
8189
* Invoke a function (scorer, prompt, or tool) by its ID.
8290
*
@@ -86,6 +94,15 @@ Optional<Function> getFunction(
8694
*/
8795
Object invokeFunction(@Nonnull String functionId, @Nonnull FunctionInvokeRequest request);
8896

97+
/**
98+
* Execute a BTQL (Braintrust Query Language) query. Supports both BTQL pipe syntax and standard
99+
* SQL syntax.
100+
*
101+
* @param query the BTQL/SQL query string
102+
* @return the query result containing rows of data
103+
*/
104+
BtqlQueryResponse btqlQuery(@Nonnull String query);
105+
89106
static BraintrustApiClient of(BraintrustConfig config) {
90107
return new HttpImpl(config);
91108
}
@@ -351,6 +368,21 @@ public Optional<Function> getFunction(
351368
}
352369
}
353370

371+
@Override
372+
public Optional<Function> getFunctionById(@Nonnull String functionId) {
373+
Objects.requireNonNull(functionId, "functionId must not be null");
374+
try {
375+
String path = "/v1/function/" + functionId;
376+
return Optional.of(getAsync(path, Function.class).get());
377+
} catch (InterruptedException | ExecutionException e) {
378+
if (e.getCause() instanceof ApiException apiEx
379+
&& apiEx.getMessage().contains("404")) {
380+
return Optional.empty();
381+
}
382+
throw new RuntimeException(e);
383+
}
384+
}
385+
354386
@Override
355387
public Object invokeFunction(
356388
@Nonnull String functionId, @Nonnull FunctionInvokeRequest request) {
@@ -364,6 +396,17 @@ public Object invokeFunction(
364396
}
365397
}
366398

399+
@Override
400+
public BtqlQueryResponse btqlQuery(@Nonnull String query) {
401+
Objects.requireNonNull(query, "query must not be null");
402+
try {
403+
var request = new BtqlQueryRequest(query);
404+
return postAsync("/btql", request, BtqlQueryResponse.class).get();
405+
} catch (InterruptedException | ExecutionException e) {
406+
throw new ApiException("Failed to execute BTQL query", e);
407+
}
408+
}
409+
367410
private <T> CompletableFuture<T> getAsync(String path, Class<T> responseType) {
368411
var request =
369412
HttpRequest.newBuilder()
@@ -661,11 +704,21 @@ public Optional<Function> getFunction(
661704
throw new RuntimeException("will not be invoked");
662705
}
663706

707+
@Override
708+
public Optional<Function> getFunctionById(@Nonnull String functionId) {
709+
throw new RuntimeException("will not be invoked");
710+
}
711+
664712
@Override
665713
public Object invokeFunction(
666714
@Nonnull String functionId, @Nonnull FunctionInvokeRequest request) {
667715
throw new RuntimeException("will not be invoked");
668716
}
717+
718+
@Override
719+
public BtqlQueryResponse btqlQuery(@Nonnull String query) {
720+
throw new RuntimeException("will not be invoked");
721+
}
669722
}
670723

671724
// Request/Response DTOs
@@ -794,50 +847,58 @@ record FunctionListResponse(List<Function> objects) {}
794847
*
795848
* <p>For remote Python/TypeScript scorers, the scorer handler parameters (input, output,
796849
* expected, metadata) must be wrapped in the outer input field.
850+
*
851+
* <p>The parent field enables distributed tracing by linking the remote function's spans to the
852+
* caller's span context. It can be either a base64-encoded SpanComponents string or an object
853+
* with object_type, object_id, and row_ids.
797854
*/
798-
record FunctionInvokeRequest(@Nullable Object input, @Nullable String version) {
855+
record FunctionInvokeRequest(
856+
@Nullable Object input, @Nullable String version, @Nullable Object parent) {
799857

800858
/** Create a simple invoke request with just input */
801859
public static FunctionInvokeRequest of(Object input) {
802-
return new FunctionInvokeRequest(input, null);
860+
return new FunctionInvokeRequest(input, null, null);
803861
}
804862

805863
/** Create a simple invoke request with input and version */
806864
public static FunctionInvokeRequest of(Object input, @Nullable String version) {
807-
return new FunctionInvokeRequest(input, version);
808-
}
809-
810-
/**
811-
* Create an invoke request for a scorer with input, output, expected, and metadata. This
812-
* maps to the standard scorer handler signature: handler(input, output, expected, metadata)
813-
*
814-
* <p>The scorer args are wrapped in the outer input field as required by the invoke API.
815-
*/
816-
public static FunctionInvokeRequest forScorer(
817-
Object input, Object output, Object expected, Object metadata) {
818-
return forScorer(input, output, expected, metadata, null);
865+
return new FunctionInvokeRequest(input, version, null);
819866
}
820867

821868
/**
822-
* Create an invoke request for a scorer with input, output, expected, metadata, and
823-
* version. This maps to the standard scorer handler signature: handler(input, output,
824-
* expected, metadata)
869+
* Create an invoke request for a scorer with distributed tracing support.
825870
*
826-
* <p>The scorer args are wrapped in the outer input field as required by the invoke API.
871+
* @param input the input to the task being scored
872+
* @param output the output from the task being scored
873+
* @param expected the expected output
874+
* @param metadata additional metadata
875+
* @param version optional function version
876+
* @param parent optional parent for distributed tracing - can be a base64-encoded
877+
* SpanComponents string or a Map with object_type, object_id, and row_ids
827878
*/
828-
public static FunctionInvokeRequest forScorer(
879+
public static FunctionInvokeRequest of(
829880
Object input,
830881
Object output,
831882
Object expected,
832883
Object metadata,
833-
@Nullable String version) {
884+
@Nullable String version,
885+
@Nullable Object parent) {
834886
// Wrap scorer args in an inner map that becomes the outer "input" field
835887
var scorerArgs = new java.util.LinkedHashMap<String, Object>();
836888
scorerArgs.put("input", input);
837889
scorerArgs.put("output", output);
838890
scorerArgs.put("expected", expected);
839891
scorerArgs.put("metadata", metadata);
840-
return new FunctionInvokeRequest(scorerArgs, version);
892+
return new FunctionInvokeRequest(scorerArgs, version, parent);
841893
}
842894
}
895+
896+
/** Request body for BTQL queries. */
897+
record BtqlQueryRequest(String query) {}
898+
899+
/**
900+
* Response from a BTQL query. The data field contains the rows returned by the query, where
901+
* each row is a map of column names to values.
902+
*/
903+
record BtqlQueryResponse(List<Map<String, Object>> data) {}
843904
}

src/main/java/dev/braintrust/config/BraintrustConfig.java

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ public final class BraintrustConfig extends BaseConfig {
3737
private final boolean enableTraceConsoleLog =
3838
getConfig("BRAINTRUST_ENABLE_TRACE_CONSOLE_LOG", false);
3939
private final boolean debug = getConfig("BRAINTRUST_DEBUG", false);
40-
private final boolean experimentalOtelLogs = getConfig("BRAINTRUST_X_OTEL_LOGS", false);
4140
private final Duration requestTimeout =
4241
Duration.ofSeconds(getConfig("BRAINTRUST_REQUEST_TIMEOUT", 30));
4342

@@ -47,10 +46,6 @@ public final class BraintrustConfig extends BaseConfig {
4746
/** Custom X509 trust manager for OTLP exporter. Builder-only field, not backed by envars. */
4847
private final X509TrustManager x509TrustManager;
4948

50-
/** Setting for unit testing. Do not use in production. */
51-
private final boolean exportSpansInMemoryForUnitTest =
52-
getConfig("BRAINTRUST_JAVA_EXPORT_SPANS_IN_MEMORY_FOR_UNIT_TEST", false);
53-
5449
/** CORS origins to allow when running remote eval devserver */
5550
private final String devserverCorsOriginWhitelistCsv =
5651
getConfig(
@@ -192,19 +187,6 @@ public Builder requestTimeout(Duration value) {
192187
return this;
193188
}
194189

195-
// hiding visibility. only used for testing
196-
Builder experimentalOtelLogs(boolean value) {
197-
envOverrides.put("BRAINTRUST_X_OTEL_LOGS", String.valueOf(value));
198-
return this;
199-
}
200-
201-
// only used for testing
202-
public Builder exportSpansInMemoryForUnitTest(boolean value) {
203-
envOverrides.put(
204-
"BRAINTRUST_JAVA_EXPORT_SPANS_IN_MEMORY_FOR_UNIT_TEST", String.valueOf(value));
205-
return this;
206-
}
207-
208190
public Builder sslContext(SSLContext value) {
209191
this.sslContext = value;
210192
return this;

src/main/java/dev/braintrust/devserver/Devserver.java

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -594,10 +594,12 @@ private void setScoreSpanAttributes(
594594
scoreSpanAttrs.put("generation", braintrustGeneration);
595595
}
596596

597+
var scoresJson = json(scorerScores);
597598
scoreSpan
598599
.setAttribute(PARENT, braintrustParent.toParentValue())
599600
.setAttribute("braintrust.span_attributes", json(scoreSpanAttrs))
600-
.setAttribute("braintrust.output_json", json(scorerScores));
601+
.setAttribute("braintrust.output_json", scoresJson)
602+
.setAttribute("braintrust.scores", scoresJson);
601603
}
602604

603605
private void sendSSEEvent(OutputStream os, String eventType, String data) throws IOException {
@@ -1075,10 +1077,7 @@ private static Scorer<Object, Object> resolveRemoteScorer(
10751077
}
10761078

10771079
return new ScorerBrainstoreImpl<>(
1078-
apiClient,
1079-
functionIdSpec.getFunctionId(),
1080-
remoteScorer.getName(),
1081-
functionIdSpec.getVersion());
1080+
apiClient, functionIdSpec.getFunctionId(), functionIdSpec.getVersion());
10821081
}
10831082

10841083
public static class Builder {

src/main/java/dev/braintrust/eval/Eval.java

Lines changed: 20 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -116,32 +116,33 @@ private void evalOne(String experimentId, DatasetCase<INPUT, OUTPUT> datasetCase
116116
throw new RuntimeException(e);
117117
}
118118
}
119-
{ // run scorers
119+
// run scorers - one span per scorer
120+
for (var scorer : scorers) {
120121
var scoreSpan =
121122
tracer.spanBuilder("score")
122123
.setAttribute(PARENT, "experiment_id:" + experimentId)
123-
.setAttribute(
124-
"braintrust.span_attributes", json(Map.of("type", "score")))
125124
.startSpan();
126125
try (var unused =
127126
BraintrustContext.ofExperiment(experimentId, scoreSpan).makeCurrent()) {
127+
var scores = scorer.score(taskResult);
128128
// linked map to preserve ordering. Not in the spec but nice user experience
129-
final Map<String, Double> nameToScore = new LinkedHashMap<>();
130-
scorers.forEach(
131-
scorer -> {
132-
var scores = scorer.score(taskResult);
133-
scores.forEach(
134-
score -> {
135-
if (score.value() < 0.0 || score.value() > 1.0) {
136-
throw new RuntimeException(
137-
"score must be between 0 and 1: %s : %s"
138-
.formatted(
139-
scorer.getName(), score));
140-
}
141-
nameToScore.put(score.name(), score.value());
142-
});
143-
});
144-
scoreSpan.setAttribute("braintrust.scores", json(nameToScore));
129+
final Map<String, Double> scorerScores = new LinkedHashMap<>();
130+
for (var score : scores) {
131+
if (score.value() < 0.0 || score.value() > 1.0) {
132+
throw new RuntimeException(
133+
"score must be between 0 and 1: %s : %s"
134+
.formatted(scorer.getName(), score));
135+
}
136+
scorerScores.put(score.name(), score.value());
137+
}
138+
// Set span attributes with scorer name
139+
Map<String, Object> spanAttrs = new LinkedHashMap<>();
140+
spanAttrs.put("type", "score");
141+
spanAttrs.put("name", scorer.getName());
142+
scoreSpan.setAttribute("braintrust.span_attributes", json(spanAttrs));
143+
var scoresJson = json(scorerScores);
144+
scoreSpan.setAttribute("braintrust.output_json", scoresJson);
145+
scoreSpan.setAttribute("braintrust.scores", scoresJson);
145146
} finally {
146147
scoreSpan.end();
147148
}

src/main/java/dev/braintrust/eval/Scorer.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,6 @@ static <INPUT, OUTPUT> Scorer<INPUT, OUTPUT> fetchFromBraintrust(
7878
+ ", slug="
7979
+ scorerSlug));
8080

81-
return new ScorerBrainstoreImpl<>(apiClient, function.id(), function.name(), version);
81+
return new ScorerBrainstoreImpl<>(apiClient, function.id(), version);
8282
}
8383
}

0 commit comments

Comments
 (0)