Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 34 additions & 14 deletions ai/vector-search-dotnet/Services/VectorSearchService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -43,24 +43,32 @@ public VectorSearchService(ILogger<VectorSearchService> logger, MongoDbService m
/// <param name="indexType">The vector search algorithm to use (IVF, HNSW, or DiskANN)</param>
public async Task RunSearchAsync(VectorIndexType indexType)
{
_logger.LogInformation($"Starting {indexType} vector search workflow");

// Setup collection
var collectionSuffix = indexType switch
{
VectorIndexType.IVF => "ivf",
VectorIndexType.HNSW => "hnsw",
VectorIndexType.DiskANN => "diskann",
_ => throw new ArgumentException($"Unknown index type: {indexType}")
};
var collectionName = $"hotels_{collectionSuffix}";
var indexName = $"vectorIndex_{collectionSuffix}";

// Drop collection if it already exists (clean start)
var database = _mongoService.GetDatabase(_config.VectorSearch.DatabaseName);
var existingCollections = (await database.ListCollectionNamesAsync()).ToList();
if (existingCollections.Contains(collectionName))
{
await _mongoService.DropCollectionAsync(_config.VectorSearch.DatabaseName, collectionName);
}

try
{
_logger.LogInformation($"Starting {indexType} vector search workflow");

// Setup collection
var collectionSuffix = indexType switch
{
VectorIndexType.IVF => "ivf",
VectorIndexType.HNSW => "hnsw",
VectorIndexType.DiskANN => "diskann",
_ => throw new ArgumentException($"Unknown index type: {indexType}")
};
var collectionName = $"hotels_{collectionSuffix}";
var indexName = $"vectorIndex_{collectionSuffix}";

var collection = _mongoService.GetCollection<HotelData>(_config.VectorSearch.DatabaseName, collectionName);

// Load data from file if collection is empty
// Load data from file
var assemblyLocation = Path.GetDirectoryName(Assembly.GetExecutingAssembly().Location) ?? string.Empty;
var dataFilePath = Path.Combine(assemblyLocation, _config.DataFiles.WithVectors);
await _mongoService.LoadDataIfNeededAsync(collection, dataFilePath);
Expand Down Expand Up @@ -137,6 +145,18 @@ await _mongoService.CreateVectorIndexAsync(
_logger.LogError(ex, $"{indexType} vector search failed");
throw;
}
finally
{
// Cleanup: always drop the collection
try
{
await _mongoService.DropCollectionAsync(_config.VectorSearch.DatabaseName, collectionName);
}
catch (Exception ex)
{
_logger.LogWarning(ex, $"Cleanup warning: failed to drop collection '{collectionName}'");
}
}
}

/// <summary>
Expand Down
17 changes: 9 additions & 8 deletions ai/vector-search-go/src/create_embeddings.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ func CreateEmbeddings(ctx context.Context, texts []string, openAIClient openai.C
})

if err != nil {
return nil, fmt.Errorf("error generating embeddings: %v", err)
return nil, fmt.Errorf("error generating embeddings: %w", err)
}

// Extract embedding vectors from the API response
Expand Down Expand Up @@ -87,7 +87,7 @@ func ProcessEmbeddingBatch(ctx context.Context, dataBatch []map[string]interface
if len(textsToEmbed) > 0 {
embeddings, err := CreateEmbeddings(ctx, textsToEmbed, openAIClient, modelName)
if err != nil {
return fmt.Errorf("failed to create embeddings: %v", err)
return fmt.Errorf("failed to create embeddings: %w", err)
}

// Add embeddings back to the original documents
Expand Down Expand Up @@ -118,7 +118,7 @@ func LoadEmbeddingConfig() *EmbeddingConfig {
// Load environment variables from .env file
err := godotenv.Load()
if err != nil {
log.Printf("Warning: Error loading .env file: %v", err)
log.Printf("Warning: Error loading .env file: %w", err)
}

batchSize, _ := strconv.Atoi(getEnvOrDefault("EMBEDDING_SIZE_BATCH", "16"))
Expand All @@ -141,7 +141,8 @@ func LoadEmbeddingConfig() *EmbeddingConfig {
// 3. Processes data in batches to generate embeddings
// 4. Saves the enhanced data with embeddings
func main() {
ctx := context.Background()
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
defer cancel()

fmt.Println("Starting embedding creation process...")

Expand All @@ -158,9 +159,9 @@ func main() {

// Initialize clients for MongoDB and Azure OpenAI
fmt.Println("\nInitializing Azure OpenAI client...")
mongoClient, azureOpenAIClient, err := GetClientsPasswordless()
mongoClient, azureOpenAIClient, err := GetClientsPasswordless(ctx)
if err != nil {
log.Fatalf("Failed to initialize clients: %v", err)
log.Fatalf("Failed to initialize clients: %w", err)
}
defer func() {
if mongoClient != nil {
Expand All @@ -172,7 +173,7 @@ func main() {
fmt.Printf("\nReading input data from %s...\n", config.DataWithoutVectors)
data, err := ReadFileReturnJSON(config.DataWithoutVectors)
if err != nil {
log.Fatalf("Failed to read input file: %v", err)
log.Fatalf("Failed to read input file: %w", err)
}
fmt.Printf("Loaded %d documents\n", len(data))

Expand Down Expand Up @@ -215,7 +216,7 @@ func main() {
fmt.Printf("\nSaving enhanced data to %s...\n", config.DataWithVectors)
err = WriteFileJSON(data, config.DataWithVectors)
if err != nil {
log.Fatalf("Failed to save output file: %v", err)
log.Fatalf("Failed to save output file: %w", err)
}

fmt.Println("\nEmbedding creation completed successfully!")
Expand Down
31 changes: 22 additions & 9 deletions ai/vector-search-go/src/diskann.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,28 @@ func main() {
database := mongoClient.Database(config.DatabaseName)
collection := database.Collection("hotels_diskann")

// Drop collection if it already exists (clean start)
names, err := database.ListCollectionNames(ctx, bson.M{"name": "hotels_diskann"})
if err != nil {
log.Fatalf("Failed to list collections: %v", err)
}
if len(names) > 0 {
if err := collection.Drop(ctx); err != nil {
log.Fatalf("Failed to drop existing collection: %v", err)
}
fmt.Println("Dropped existing collection 'hotels_diskann'")
}

// Ensure cleanup on exit
defer func() {
fmt.Println("Cleanup: dropping collection 'hotels_diskann'...")
if dropErr := collection.Drop(ctx); dropErr != nil {
fmt.Printf("Cleanup warning: %v\n", dropErr)
} else {
fmt.Println("Cleanup: dropped collection 'hotels_diskann'")
}
}()

// Load data with embeddings
fmt.Printf("\nLoading data from %s...\n", config.DataFile)
data, err := ReadFileReturnJSON(config.DataFile)
Expand All @@ -177,15 +199,6 @@ func main() {
// Insert data into collection
fmt.Printf("\nInserting data into collection '%s'...\n", config.CollectionName)

// Clear existing data to ensure clean state
deleteResult, err := collection.DeleteMany(ctx, bson.M{})
if err != nil {
log.Fatalf("Failed to clear existing data: %v", err)
}
if deleteResult.DeletedCount > 0 {
fmt.Printf("Cleared %d existing documents from collection\n", deleteResult.DeletedCount)
}

// Insert the hotel data
stats, err := InsertData(ctx, collection, documentsWithEmbeddings, config.BatchSize, nil)
if err != nil {
Expand Down
31 changes: 22 additions & 9 deletions ai/vector-search-go/src/hnsw.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,28 @@ func main() {
database := mongoClient.Database(config.DatabaseName)
collection := database.Collection("hotels_hnsw")

// Drop collection if it already exists (clean start)
names, err := database.ListCollectionNames(ctx, bson.M{"name": "hotels_hnsw"})
if err != nil {
log.Fatalf("Failed to list collections: %v", err)
}
if len(names) > 0 {
if err := collection.Drop(ctx); err != nil {
log.Fatalf("Failed to drop existing collection: %v", err)
}
fmt.Println("Dropped existing collection 'hotels_hnsw'")
}

// Ensure cleanup on exit
defer func() {
fmt.Println("Cleanup: dropping collection 'hotels_hnsw'...")
if dropErr := collection.Drop(ctx); dropErr != nil {
fmt.Printf("Cleanup warning: %v\n", dropErr)
} else {
fmt.Println("Cleanup: dropped collection 'hotels_hnsw'")
}
}()

// Load hotel data with embeddings
fmt.Printf("\nLoading data from %s...\n", config.DataFile)
data, err := ReadFileReturnJSON(config.DataFile)
Expand All @@ -178,15 +200,6 @@ func main() {
// Insert data into MongoDB collection
fmt.Printf("\nPreparing collection '%s'...\n", config.CollectionName)

// Clear any existing data to start fresh
deleteResult, err := collection.DeleteMany(ctx, bson.M{})
if err != nil {
log.Fatalf("Failed to clear existing data: %v", err)
}
if deleteResult.DeletedCount > 0 {
fmt.Printf("Cleared %d existing documents from collection\n", deleteResult.DeletedCount)
}

// Insert hotel data with embeddings
stats, err := InsertData(ctx, collection, documentsWithEmbeddings, config.BatchSize, nil)
if err != nil {
Expand Down
31 changes: 22 additions & 9 deletions ai/vector-search-go/src/ivf.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,28 @@ func main() {
database := mongoClient.Database(config.DatabaseName)
collection := database.Collection("hotels_ivf")

// Drop collection if it already exists (clean start)
names, err := database.ListCollectionNames(ctx, bson.M{"name": "hotels_ivf"})
if err != nil {
log.Fatalf("Failed to list collections: %v", err)
}
if len(names) > 0 {
if err := collection.Drop(ctx); err != nil {
log.Fatalf("Failed to drop existing collection: %v", err)
}
fmt.Println("Dropped existing collection 'hotels_ivf'")
}

// Ensure cleanup on exit
defer func() {
fmt.Println("Cleanup: dropping collection 'hotels_ivf'...")
if dropErr := collection.Drop(ctx); dropErr != nil {
fmt.Printf("Cleanup warning: %v\n", dropErr)
} else {
fmt.Println("Cleanup: dropped collection 'hotels_ivf'")
}
}()

// Load hotel data with embeddings
fmt.Printf("\nLoading data from %s...\n", config.DataFile)
data, err := ReadFileReturnJSON(config.DataFile)
Expand All @@ -175,15 +197,6 @@ func main() {
// Prepare collection with fresh data
fmt.Printf("\nPreparing collection '%s'...\n", config.CollectionName)

// Remove any existing data for clean state
deleteResult, err := collection.DeleteMany(ctx, bson.M{})
if err != nil {
log.Fatalf("Failed to clear existing data: %v", err)
}
if deleteResult.DeletedCount > 0 {
fmt.Printf("Cleared %d existing documents from collection\n", deleteResult.DeletedCount)
}

// Insert hotel data with embeddings
stats, err := InsertData(ctx, collection, documentsWithEmbeddings, config.BatchSize, nil)
if err != nil {
Expand Down
12 changes: 7 additions & 5 deletions ai/vector-search-go/src/show_indexes.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"
"log"
"strings"
"time"

"go.mongodb.org/mongo-driver/bson"
"go.mongodb.org/mongo-driver/mongo"
Expand Down Expand Up @@ -138,7 +139,7 @@ func showCollectionIndexes(ctx context.Context, collection *mongo.Collection, co

var indexes []IndexInfo
if err := cursor.All(ctx, &indexes); err != nil {
return fmt.Errorf("error decoding indexes: %v", err)
return fmt.Errorf("error decoding indexes: %w", err)
}

if len(indexes) == 0 {
Expand Down Expand Up @@ -172,7 +173,7 @@ func showDatabaseCollectionsAndIndexes(ctx context.Context, database *mongo.Data
// Get list of all collections in the database
collectionNames, err := database.ListCollectionNames(ctx, bson.M{})
if err != nil {
return fmt.Errorf("error accessing database '%s': %v", databaseName, err)
return fmt.Errorf("error accessing database '%s': %w", databaseName, err)
}

if len(collectionNames) == 0 {
Expand Down Expand Up @@ -208,7 +209,8 @@ func showDatabaseCollectionsAndIndexes(ctx context.Context, database *mongo.Data

// main function displays vector indexes and collection information
func main() {
ctx := context.Background()
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute)
defer cancel()

fmt.Println("Vector Index Information Display")
fmt.Printf("%s\n", strings.Repeat("=", 50))
Expand All @@ -221,9 +223,9 @@ func main() {

// Initialize MongoDB client
fmt.Println("\nConnecting to MongoDB...")
mongoClient, _, err := GetClientsPasswordless()
mongoClient, _, err := GetClientsPasswordless(ctx)
if err != nil {
log.Fatalf("Failed to initialize MongoDB client: %v", err)
log.Fatalf("Failed to initialize MongoDB client: %w", err)
}
defer mongoClient.Disconnect(ctx)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,24 +47,33 @@ public void run() {
var database = mongoClient.getDatabase(DATABASE_NAME);
var collection = database.getCollection(COLLECTION_NAME, Document.class);

// Drop and recreate collection
collection.drop();
// Drop collection if it already exists (clean start)
if (database.listCollectionNames().into(new ArrayList<>()).contains(COLLECTION_NAME)) {
collection.drop();
System.out.println("Dropped existing collection: " + COLLECTION_NAME);
}
database.createCollection(COLLECTION_NAME);
System.out.println("Created collection: " + COLLECTION_NAME);

// Load and insert data
var hotelData = loadHotelData();
insertDataInBatches(collection, hotelData);
try {
// Load and insert data
var hotelData = loadHotelData();
insertDataInBatches(collection, hotelData);

// Create standard indexes
createStandardIndexes(collection);
// Create standard indexes
createStandardIndexes(collection);

// Create vector index
createVectorIndex(database);
// Create vector index
createVectorIndex(database);

// Perform vector search
var queryEmbedding = createEmbedding(openAIClient, SAMPLE_QUERY);
performVectorSearch(collection, queryEmbedding);
// Perform vector search
var queryEmbedding = createEmbedding(openAIClient, SAMPLE_QUERY);
performVectorSearch(collection, queryEmbedding);
} finally {
// Cleanup: always drop collection at end
collection.drop();
System.out.println("Cleanup: dropped collection '" + COLLECTION_NAME + "'");
}

} catch (Exception e) {
System.err.println("Error: " + e.getMessage());
Expand Down
Loading
Loading