braintrustdata · Stephen Belanger (Qard) · May 8, 2026 · Apr 22, 2026 · Apr 22, 2026 · Apr 22, 2026
diff --git a/examples/eval/classifiers.rb b/examples/eval/classifiers.rb
@@ -0,0 +1,144 @@
+#!/usr/bin/env ruby
+# frozen_string_literal: true
+
+require "bundler/setup"
+require "braintrust"
+require "opentelemetry/sdk"
+
+# Example: Classifiers
+#
+# Classifiers categorize and label eval outputs. Unlike scorers (which return
+# numeric 0-1 values), classifiers return structured Classification items —
+# each with an :id, an optional :label, and optional :metadata.
+#
+# Results are stored as a dictionary keyed by classifier name:
+#
+#   { "sentiment" => [{ id: "positive", label: "Positive" }] }
+#
+# Three patterns are shown:
+#
+#   1. Block-based (Braintrust::Classifier.new):
+#      Returns a single Classification hash. Good for concise, one-off classifiers.
+#
+#   2. Multi-label block-based:
+#      Returns an Array of Classification hashes — useful when a single
+#      classifier assigns multiple labels to the same output.
+#
+#   3. Class-based (include Braintrust::Classifier):
+#      Define a class with a #call method. Good for reusable classifiers
+#      that carry their own logic and state.
+#
+# Classifiers and scorers run independently. You can use both together, or
+# use only classifiers when you don't need numeric scores.
+#
+# Usage:
+#   bundle exec ruby examples/eval/classifiers.rb
+
+Braintrust.init
+
+# ---------------------------------------------------------------------------
+# Test cases: customer support messages
+# ---------------------------------------------------------------------------
+MESSAGES = [
+  {input: "Hi! I just wanted to say thank you, the product is amazing!"},
+  {input: "I've been waiting 2 weeks for my order. This is unacceptable!"},
+  {input: "How do I reset my password? I can't find the option anywhere."},
+  {input: "The item arrived damaged. I need a refund immediately."},
+  {input: "Just checking in — any update on my ticket #4821?"}
+]
+
+# ---------------------------------------------------------------------------
+# Simulated task: generate a support response (replace with a real LLM call)
+# ---------------------------------------------------------------------------
+def generate_response(message)
+  case message
+  when /thank/i then "You're welcome! So glad you're enjoying it."
+  when /waiting|order/i then "I sincerely apologise for the delay. Let me look into this right away."
+  when /password|reset/i then "To reset your password, go to Settings > Account > Reset Password."
+  when /damaged|refund/i then "I'm sorry to hear that. I'll process your refund immediately."
+  else "Thanks for reaching out! Let me check on that for you."
+  end
+end
+
+# ---------------------------------------------------------------------------
+# Pattern 1: block-based single-label classifier
+#
+# Classifies each message into a single intent category.
+# Declare only the kwargs you need — extras are filtered automatically.
+# ---------------------------------------------------------------------------
+intent_classifier = Braintrust::Classifier.new("intent") do |input:|
+  id = case input
+  when /thank/i then "praise"
+  when /waiting|order|update/i then "follow_up"
+  when /password|reset|find/i then "how_to"
+  when /damaged|refund/i then "complaint"
+  else "other"
+  end
+
+  {name: "intent", id: id, label: id.tr("_", " ").capitalize}
+end
+
+# ---------------------------------------------------------------------------
+# Pattern 2: block-based multi-label classifier
+#
+# A single classifier can return an Array to assign multiple labels.
+# All items sharing the same :name are grouped into the same results array.
+# ---------------------------------------------------------------------------
+tone_classifier = Braintrust::Classifier.new("tone") do |input:|
+  labels = []
+  labels << {name: "tone", id: "urgent", label: "Urgent"} if input.match?(/immediately|unacceptable|waiting/i)
+  labels << {name: "tone", id: "polite", label: "Polite"} if input.match?(/please|thank|just checking/i)
+  labels << {name: "tone", id: "frustrated", label: "Frustrated"} if input.match?(/unacceptable|damaged|waiting/i)
+  labels << {name: "tone", id: "neutral", label: "Neutral"} if labels.empty?
+  labels
+end
+
+# ---------------------------------------------------------------------------
+# Pattern 3: class-based classifier
+#
+# Include Braintrust::Classifier and define #call with keyword args.
+# The class name is snake_cased to derive the default classifier name
+# (ResponseQualityClassifier -> "response_quality_classifier").
+# Override #name to customise it.
+# ---------------------------------------------------------------------------
+class ResponseQualityClassifier
+  include Braintrust::Classifier
+
+  def name
+    "response_quality"
+  end
+
+  def call(input:, output:)
+    word_count = output.to_s.split.length
+
+    id = if output.to_s.strip.empty?
+      "no_response"
+    elsif word_count < 5
+      "too_short"
+    elsif output.match?(/immediately|right away|look into/i)
+      "action_oriented"
+    else
+      "informational"
+    end
+
+    {
+      name: "response_quality",
+      id: id,
+      label: id.tr("_", " ").capitalize,
+      metadata: {word_count: word_count}
+    }
+  end
+end
+
+# ---------------------------------------------------------------------------
+# Run the eval — classifiers only (no numeric scores needed here)
+# ---------------------------------------------------------------------------
+Braintrust::Eval.run(
+  project: "ruby-sdk-examples",
+  experiment: "classifiers-example",
+  cases: MESSAGES,
+  task: ->(input:) { generate_response(input) },
+  classifiers: [intent_classifier, tone_classifier, ResponseQualityClassifier.new]
+)
+
+OpenTelemetry.tracer_provider.shutdown
diff --git a/lib/braintrust/classifier.rb b/lib/braintrust/classifier.rb
@@ -0,0 +1,157 @@
+# frozen_string_literal: true
+
+require_relative "internal/callable"
+
+module Braintrust
+  # Classifier wraps a classification function that categorizes and labels eval outputs.
+  #
+  # Unlike scorers (which return numeric 0-1 values), classifiers return structured
+  # {Classification} items with an id and optional label and metadata.
+  #
+  # Use inline with a block (keyword args):
+  #   classifier = Classifier.new("category") { |output:| {name: "category", id: "greeting", label: "Greeting"} }
+  #
+  # Or include in a class and define #call with keyword args:
+  #   class CategoryClassifier
+  #     include Braintrust::Classifier
+  #
+  #     def call(output:)
+  #       {name: "category", id: "greeting", label: "Greeting"}
+  #     end
+  #   end
+  #
+  # Classifiers may return a single Classification hash, an Array of them, or nil
+  # (meaning no classifications for this case).
+  module Classifier
+    DEFAULT_NAME = "classifier"
+
+    # @param base [Class] the class including Classifier
+    def self.included(base)
+      base.include(Callable)
+    end
+
+    # Create a block-based classifier.
+    #
+    # @param name [String, nil] optional name (defaults to "classifier")
+    # @param block [Proc] the classification implementation; declare only the keyword
+    #   args you need. Extra kwargs are filtered out automatically.
+    #
+    #   Supported kwargs: +input:+, +expected:+, +output:+, +metadata:+, +trace:+, +parameters:+
+    # @return [Classifier::Block]
+    # @raise [ArgumentError] if the block has unsupported arity
+    def self.new(name = nil, &block)
+      Block.new(name: name || DEFAULT_NAME, &block)
+    end
+
+    # Included into classes that +include Classifier+. Prepends KeywordFilter and
+    # ClassificationNormalizer so #call receives only declared kwargs and always returns
+    # Array<Hash>. Also provides a default #name and #call_parameters.
+    module Callable
+      # Normalizes the raw return value of #call into Array<Hash>.
+      # Nested inside Callable because it depends on #name which Callable provides.
+      module ClassificationNormalizer
+        # @return [Array<Hash>] normalized classification hashes with :name, :id, and optional :label, :metadata keys
+        def call(**kwargs)
+          normalize_classification_result(super)
+        end
+
+        private
+
+        # @param result [Hash, Array<Hash>, nil] raw return value from #call
+        # @return [Array<Hash>] zero or more classification hashes with :name, :id keys
+        # @raise [ArgumentError] if any item is not a non-empty object
+        def normalize_classification_result(result)
+          case result
+          when nil then []
+          when Array then result.map { |item| normalize_classification_item(item) }
+          when Hash then [normalize_classification_item(result)]
+          else
+            raise ArgumentError, "When returning structured classifier results, each classification must be a non-empty object. Got: #{result.inspect}"
+          end
+        end
+
+        # Fills in missing :name from the classifier, validates :id.
+        # @param item [Hash] a classification hash
+        # @return [Hash] the item with :name defaulted and validated
+        # @raise [ArgumentError] if item is not a non-empty Hash
+        def normalize_classification_item(item)
+          unless item.is_a?(Hash) && !item.empty?
+            raise ArgumentError, "When returning structured classifier results, each classification must be a non-empty object. Got: #{item.inspect}"
+          end
+
+          # :name defaults to the classifier's resolved name when missing, empty, or non-string
+          unless item[:name].is_a?(String) && !item[:name].empty?
+            item = item.merge(name: name)
+          end
+
+          item
+        end
+      end
+
+      # Infrastructure modules prepended onto every classifier class.
+      # Used both to set up the ancestor chain and to skip past them in
+      # #call_parameters so KeywordFilter sees the real call signature.
+      PREPENDED = [Internal::Callable::KeywordFilter, ClassificationNormalizer].freeze
+
+      # @param base [Class] the class including Callable
+      def self.included(base)
+        PREPENDED.each { |mod| base.prepend(mod) }
+      end
+
+      # Default name derived from the class name (e.g. CategoryClassifier -> "category_classifier").
+      # @return [String]
+      def name
+        klass = self.class.name&.split("::")&.last
+        return Classifier::DEFAULT_NAME unless klass
+        klass.gsub(/([a-z])([A-Z])/, '\1_\2').downcase
+      end
+
+      # Provides KeywordFilter with the actual call signature of the subclass.
+      # Walks past PREPENDED modules in the ancestor chain so that user-defined
+      # #call keyword params are correctly introspected.
+      # Block overrides this to point directly at @block.parameters.
+      # @return [Array<Array>] parameter list
+      def call_parameters
+        meth = method(:call)
+        meth = meth.super_method while meth.super_method && PREPENDED.include?(meth.owner)
+        meth.parameters
+      end
+    end
+
+    # Block-based classifier. Stores a Proc and delegates #call to it.
+    # Includes Classifier so it satisfies +Classifier ===+ checks.
+    # Exposes #call_parameters so KeywordFilter can introspect the block's
+    # declared kwargs rather than Block#call's **kwargs signature.
+    class Block
+      include Classifier
+
+      # @return [String]
+      attr_reader :name
+
+      # @param name [String] classifier name
+      # @param block [Proc] classification implementation; must use keyword args or zero-arity
+      # @raise [ArgumentError] if the block uses positional params
+      def initialize(name: DEFAULT_NAME, &block)
+        @name = name
+        params = block.parameters
+        unless Internal::Callable::KeywordFilter.has_any_keywords?(params) || block.arity == 0
+          raise ArgumentError, "Classifier block must use keyword args (got arity #{block.arity})"
+        end
+        @block = block
+      end
+
+      # @param kwargs [Hash] keyword arguments (filtered by KeywordFilter)
+      # @return [Array<Hash>] normalized classification results
+      def call(**kwargs)
+        @block.call(**kwargs)
+      end
+
+      # Exposes the block's parameter list so KeywordFilter can filter
+      # kwargs to match the block's declared keywords.
+      # @return [Array<Array>] parameter list from Proc#parameters
+      def call_parameters
+        @block.parameters
+      end
+    end
+  end
+end
diff --git a/lib/braintrust/eval.rb b/lib/braintrust/eval.rb
@@ -1,5 +1,6 @@
 # frozen_string_literal: true
 
+require_relative "classifier"
 require_relative "scorer"
 require_relative "task"
 require_relative "functions"
@@ -160,7 +161,10 @@ def scorer(name, callable = nil, &block)
       #   - String: dataset name (fetches from same project)
       #   - Hash: {name:, id:, project:, version:, limit:}
       # @param task [#call] The task to evaluate (must be callable)
-      # @param scorers [Array<String, Scorer, #call>] The scorers to use (String names, Scorer objects, or callables)
+      # @param scorers [Array<String, Scorer, #call>, nil] The scorers to use (String names, Scorer objects, or callables).
+      #   At least one of scorers or classifiers must be provided.
+      # @param classifiers [Array<Classifier, #call>, nil] The classifiers to use.
+      #   At least one of scorers or classifiers must be provided.
       # @param on_progress [#call, nil] Optional callback fired after each test case.
       #   Receives a Hash: {"data" => output, "scores" => {name => value}} on success,
       #   or {"error" => message} on failure.
@@ -177,13 +181,16 @@ def scorer(name, callable = nil, &block)
       # @param parent [Hash, nil] Parent span context ({object_type:, object_id:, generation:})
       # @param parameters [Hash, nil] Runtime parameters passed to task and scorers as a `parameters:` keyword argument
       # @return [Result]
-      def run(task:, scorers:, project: nil, experiment: nil,
-        cases: nil, dataset: nil, on_progress: nil,
+      def run(task:, scorers: nil, classifiers: nil, project: nil,
+        experiment: nil, cases: nil, dataset: nil, on_progress: nil,
         parallelism: 1, tags: nil, metadata: nil, update: false, quiet: false,
         state: nil, tracer_provider: nil, project_id: nil, parent: nil,
         parameters: nil)
         # Validate required parameters
-        validate_params!(task: task, scorers: scorers, cases: cases, dataset: dataset)
+        validate_params!(task: task, scorers: scorers,
+          classifiers: classifiers, cases: cases, dataset: dataset)
+        scorers ||= []
+        classifiers ||= []
 
         experiment_id = nil
         project_name = project
@@ -216,6 +223,7 @@ def run(task:, scorers:, project: nil, experiment: nil,
         context = Context.build(
           task: task,
           scorers: scorers,
+          classifiers: classifiers,
           cases: cases,
           experiment_id: experiment_id,
           experiment_name: experiment,
@@ -245,9 +253,19 @@ def print_result(result)
 
       # Validate required parameters
       # @raise [ArgumentError] if validation fails
-      def validate_params!(task:, scorers:, cases:, dataset:)
+      def validate_params!(task:, scorers:, classifiers:, cases:, dataset:)
         raise ArgumentError, "task is required" unless task
-        raise ArgumentError, "scorers is required" unless scorers
+
+        # Validate task is callable before anything else
+        unless task.respond_to?(:call)
+          raise ArgumentError, "task must be callable (respond to :call)"
+        end
+
+        has_scorers = scorers && !scorers.empty?
+        has_classifiers = classifiers && !classifiers.empty?
+        unless has_scorers || has_classifiers
+          raise ArgumentError, "at least one of scorers or classifiers is required"
+        end
 
         # Validate cases and dataset are mutually exclusive
         if cases && dataset
@@ -258,11 +276,6 @@ def validate_params!(task:, scorers:, cases:, dataset:)
         unless cases || dataset
           raise ArgumentError, "must specify either 'cases' or 'dataset'"
         end
-
-        # Validate task is callable
-        unless task.respond_to?(:call)
-          raise ArgumentError, "task must be callable (respond to :call)"
-        end
       end
 
       # Resolve project by name or ID. Creates if needed.