diberry · diberry · Apr 30, 2026 · Apr 30, 2026
diff --git a/.github/workflows/validate-samples.yml b/.github/workflows/validate-samples.yml
diff --git a/ai/select-algorithm-dotnet/src/CompareAll.cs b/ai/select-algorithm-dotnet/src/CompareAll.cs
@@ -37,9 +37,13 @@ public static void Run()
         {
             var database = mongoClient.GetDatabase(databaseName);
 
-            // Drop collection for a clean comparison
-            database.DropCollection("hotels");
-            Console.WriteLine("Dropped existing 'hotels' collection (if any)");
+            // Drop collection if it already exists (clean start)
+            var collectionNames = database.ListCollectionNames().ToList();
+            if (collectionNames.Contains("hotels"))
+            {
+                database.DropCollection("hotels");
+                Console.WriteLine("Dropped existing 'hotels' collection.");
+            }
 
             var collection = database.GetCollection<BsonDocument>("hotels");
 

diff --git a/ai/select-algorithm-go/src/compare_all.go b/ai/select-algorithm-go/src/compare_all.go
@@ -47,15 +47,18 @@ func RunCompareAll(ctx context.Context, config *Config, dbClient *mongo.Client,
 	fmt.Printf("Top-K:  %d\n", topK)
 	fmt.Printf("Verbose: %v\n", verbose)
 
-	// 1. Drop collection for clean comparison, then load data
+	// 1. Drop collection if it exists for clean comparison, then load data
 	database := dbClient.Database(config.DatabaseName)
 	collection := database.Collection("hotels")
 
-	// Drop existing collection for a clean comparison
-	if err := collection.Drop(ctx); err != nil {
-		fmt.Printf("Note: could not drop collection (may not exist): %v\n", err)
-	} else {
-		fmt.Println("Dropped existing 'hotels' collection")
+	// Drop existing collection if it exists (clean start)
+	names, _ := database.ListCollectionNames(ctx, bson.M{"name": "hotels"})
+	if len(names) > 0 {
+		if err := collection.Drop(ctx); err != nil {
+			fmt.Printf("Note: could not drop collection: %v\n", err)
+		} else {
+			fmt.Println("Dropped existing 'hotels' collection")
+		}
 	}
 
 	// Ensure cleanup on exit

diff --git a/ai/select-algorithm-java/src/main/java/com/azure/documentdb/selectalgorithm/CompareAll.java b/ai/select-algorithm-java/src/main/java/com/azure/documentdb/selectalgorithm/CompareAll.java
@@ -49,80 +49,85 @@ public static void run() {
             MongoDatabase database = mongoClient.getDatabase(databaseName);
             MongoCollection<Document> collection = database.getCollection(COLLECTION_NAME);
 
-            // Load data ONCE into the single collection
-            System.out.println("  Loading data from: " + dataFile);
-            List<Document> data = Utils.readJsonFile(dataFile);
-            System.out.printf("  Loaded %d documents%n", data.size());
-
-            collection.drop();
-            System.out.println("  Collection reset.");
-            Utils.insertData(collection, data, 100);
-
-            // Generate ONE embedding for the query (reused for all 9 searches)
-            OpenAIClient aiClient = Utils.getOpenAIClient();
-            System.out.printf("%n  Generating embedding for: \"%s\"%n", queryText);
-            List<Float> queryVector = Utils.getEmbedding(aiClient, queryText, model);
-            System.out.printf("  Embedding generated (%d dimensions)%n%n", queryVector.size());
-
-            // Convert to doubles for BSON
-            List<Double> vectorAsDoubles = queryVector.stream()
-                    .map(Float::doubleValue)
-                    .toList();
-
-            // Create all 9 indexes idempotently
-            System.out.println("  Creating 9 vector indexes...");
-            for (String algo : ALGORITHMS) {
-                for (String metric : METRICS) {
-                    createIndex(collection, vectorField, dimensions, algo, metric);
+            try {
+                // Load data ONCE into the single collection
+                System.out.println("  Loading data from: " + dataFile);
+                List<Document> data = Utils.readJsonFile(dataFile);
+                System.out.printf("  Loaded %d documents%n", data.size());
+
+                // Drop collection if it already exists (clean start)
+                if (database.listCollectionNames().into(new ArrayList<>()).contains(COLLECTION_NAME)) {
+                    collection.drop();
+                    System.out.println("  Dropped existing collection.");
                 }
-            }
-            System.out.println("  All indexes created.\n");
-
-            // Run searches sequentially for fair timing
-            System.out.println("  Running searches...");
-            for (String algo : ALGORITHMS) {
-                for (String metric : METRICS) {
-                    String indexName = String.format("vector_%s_%s", algo, metric.toLowerCase());
-
-                    long startNs = System.nanoTime();
-                    List<Document> searchResults = performSearch(
-                            collection, vectorAsDoubles, vectorField, topK);
-                    long elapsedNs = System.nanoTime() - startNs;
-                    double elapsedMs = elapsedNs / 1_000_000.0;
-
-                    // Extract top result info
-                    String topHotel = "-";
-                    double topScore = 0.0;
-                    if (!searchResults.isEmpty()) {
-                        Document top = searchResults.get(0);
-                        topHotel = top.getString("HotelName") != null
-                                ? top.getString("HotelName") : "-";
-                        topScore = top.getDouble("score") != null
-                                ? top.getDouble("score") : 0.0;
+                Utils.insertData(collection, data, 100);
+
+                // Generate ONE embedding for the query (reused for all 9 searches)
+                OpenAIClient aiClient = Utils.getOpenAIClient();
+                System.out.printf("%n  Generating embedding for: \"%s\"%n", queryText);
+                List<Float> queryVector = Utils.getEmbedding(aiClient, queryText, model);
+                System.out.printf("  Embedding generated (%d dimensions)%n%n", queryVector.size());
+
+                // Convert to doubles for BSON
+                List<Double> vectorAsDoubles = queryVector.stream()
+                        .map(Float::doubleValue)
+                        .toList();
+
+                // Create all 9 indexes idempotently
+                System.out.println("  Creating 9 vector indexes...");
+                for (String algo : ALGORITHMS) {
+                    for (String metric : METRICS) {
+                        createIndex(collection, vectorField, dimensions, algo, metric);
                     }
+                }
+                System.out.println("  All indexes created.\n");
+
+                // Run searches sequentially for fair timing
+                System.out.println("  Running searches...");
+                for (String algo : ALGORITHMS) {
+                    for (String metric : METRICS) {
+                        String indexName = String.format("vector_%s_%s", algo, metric.toLowerCase());
+
+                        long startNs = System.nanoTime();
+                        List<Document> searchResults = performSearch(
+                                collection, vectorAsDoubles, vectorField, topK);
+                        long elapsedNs = System.nanoTime() - startNs;
+                        double elapsedMs = elapsedNs / 1_000_000.0;
+
+                        // Extract top result info
+                        String topHotel = "-";
+                        double topScore = 0.0;
+                        if (!searchResults.isEmpty()) {
+                            Document top = searchResults.get(0);
+                            topHotel = top.getString("HotelName") != null
+                                    ? top.getString("HotelName") : "-";
+                            topScore = top.getDouble("score") != null
+                                    ? top.getDouble("score") : 0.0;
+                        }
 
-                    results.add(new SearchResult(
-                            algo.toUpperCase(), metric, indexName,
-                            elapsedMs, searchResults.size(), topHotel, topScore));
-
-                    if (verbose) {
-                        System.out.printf("    [%s] %d results in %.2f ms%n",
-                                indexName, searchResults.size(), elapsedMs);
-                        for (int i = 0; i < searchResults.size(); i++) {
-                            Document doc = searchResults.get(i);
-                            System.out.printf("      %d. %s (%.4f)%n",
-                                    i + 1,
-                                    doc.getString("HotelName"),
-                                    doc.getDouble("score"));
+                        results.add(new SearchResult(
+                                algo.toUpperCase(), metric, indexName,
+                                elapsedMs, searchResults.size(), topHotel, topScore));
+
+                        if (verbose) {
+                            System.out.printf("    [%s] %d results in %.2f ms%n",
+                                    indexName, searchResults.size(), elapsedMs);
+                            for (int i = 0; i < searchResults.size(); i++) {
+                                Document doc = searchResults.get(i);
+                                System.out.printf("      %d. %s (%.4f)%n",
+                                        i + 1,
+                                        doc.getString("HotelName"),
+                                        doc.getDouble("score"));
+                            }
                         }
                     }
                 }
+            } finally {
+                // Cleanup: always drop the comparison collection
+                System.out.println("\n  Cleanup: dropping comparison collection...");
+                collection.drop();
+                System.out.println("  Cleanup: dropped collection 'hotels'");
             }
-
-            // Cleanup: drop the comparison collection
-            System.out.println("\n  Cleanup: dropping comparison collection...");
-            collection.drop();
-            System.out.println("  Cleanup: dropped collection 'hotels'");
         }
 
         // Print comparison table

diff --git a/ai/select-algorithm-python/src/compare_all.py b/ai/select-algorithm-python/src/compare_all.py
@@ -171,9 +171,10 @@ def main():
     try:
         database = mongo_client[config["database_name"]]
 
-        # Drop collection for a clean comparison
-        database.drop_collection("hotels")
-        print("Dropped existing 'hotels' collection (if any)")
+        # Drop collection if it already exists (clean start)
+        if "hotels" in database.list_collection_names():
+            database.drop_collection("hotels")
+            print("Dropped existing 'hotels' collection")
 
         # Create fresh collection and load data
         collection = database["hotels"]

diff --git a/ai/select-algorithm-typescript/README.md b/ai/select-algorithm-typescript/README.md
@@ -75,21 +75,42 @@ npm run start:diskann
 
 ## Compare All Algorithms
 
-Run all 9 combinations (3 algorithms × 3 similarity metrics) in a single invocation and view a formatted comparison table:
+Run all 9 combinations (3 algorithms × 3 similarity metrics) across multiple diverse queries and view formatted comparison tables with a ranking divergence summary:
 
 ```bash
 npm run start:compare-all
 ```
 
+By default, the script runs **5 diverse queries** designed to stress different aspects of similarity ranking:
+
+1. `outdoor adventure with family activities`
+2. `quiet romantic getaway with ocean view`
+3. `budget-friendly downtown hotel with free WiFi`
+4. `historic building with fine dining and spa`
+5. `ski resort with yoga and winter sports`
+
 **Environment variables** (optional overrides):
 
 | Variable | Default | Description |
 |---|---|---|
-| `QUERY_TEXT` | `luxury hotel near the beach` | Search query text |
-| `TOP_K` | `3` | Number of results per combination |
+| `QUERY_TEXT` | *(5 built-in queries)* | Override with a single custom query |
+| `TOP_K` | `5` | Number of results per combination |
 | `VERBOSE` | `false` | When `true`, shows all k results per combo |
 
-The script creates a single `hotels` collection, loads data once, creates 9 vector indexes (one per algorithm/metric pair), and runs searches sequentially for fair timing comparison.
+### Architecture
+
+> **DocumentDB limitation:** Only ONE vector index per field per collection is allowed. The script creates 9 separate collections (one per algorithm×metric pair), loads data into each, creates one index per collection, runs searches, and cleans up all collections on exit.
+
+### Output
+
+The script produces:
+- **Per-query comparison table** — shows algorithm, metric, latency, top score, and #1 result for each of the 9 combinations
+- **Ranking divergence summary** — highlights queries where algorithms/metrics disagreed on the #1 result
+- **Score gap analysis** — shows the confidence margin between #1 and #2 results
+
+### Small dataset caveat
+
+With ~50 hotel documents, all algorithms typically return identical rankings. This is expected — the dataset is too small for algorithmic differences to surface. For meaningful differentiation, use 1000+ documents with varied embeddings. The diverse queries help by combining attributes that no single hotel perfectly satisfies, which can reveal metric-level differences (COS vs L2 vs IP) even on small data.
 
 ## Algorithm comparison