Fixed embeddings typos

JamePeng · JamePeng · commit 8a96113e592f · 2026-01-01T21:41:57.000+08:00
diff --git a/README.md b/README.md
@@ -758,7 +758,7 @@ To generate embeddings, use the `LlamaEmbedding` class. It automatically configu
 ```python
 from llama_cpp.llama_embedding import LlamaEmbedding
 
-# Initialize the model (automatically sets embedding=True)
+# Initialize the model (automatically sets embeddings=True)
 llm = LlamaEmbedding(model_path="path/to/bge-m3.gguf", n_gpu_layers=-1)
 
 # 1. Simple usage (OpenAI-compatible format)
diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py
@@ -1118,7 +1118,7 @@ def embed(
 
         if self.context_params.embeddings is False:
             raise RuntimeError(
-                "Llama model must be created with embedding=True to call this method"
+                "Llama model must be created with embeddings=True to call this method"
             )
 
         if self.verbose:
diff --git a/llama_cpp/llama_embedding.py b/llama_cpp/llama_embedding.py
@@ -28,7 +28,7 @@ class LlamaEmbedding(Llama):
     Inherits from the base Llama class but is optimized for vector operations.
 
     Key Features:
-    1. Auto-configuration: Automatically sets embedding=True.
+    1. Auto-configuration: Automatically sets embeddings=True.
     2. Streaming Batch: Handles massive datasets without OOM (Out Of Memory).
     3. Native Reranking Support: Specifically handles `LLAMA_POOLING_TYPE_RANK` models (like BGE-Reranker). /
        It correctly identifies classification heads to output scalar relevance scores instead of high-dimensional vectors.
diff --git a/tests/test_llama.py b/tests/test_llama.py
@@ -232,9 +232,9 @@ def test_real_llama_embeddings(llama_cpp_model_path):
         n_threads=multiprocessing.cpu_count(),
         n_threads_batch=multiprocessing.cpu_count(),
         logits_all=False,
-        swa_full=True,
+        embeddings=True,
         kv_unified=True,
-        embedding=True
+        swa_full=True,
     )
     # Smoke test for now
     model.embed("Hello World")

Original file line number	Diff line number	Diff line change
`@@ -1118,7 +1118,7 @@ def embed(`
`1118`	`1118`
`1119`	`1119`	`if self.context_params.embeddings is False:`
`1120`	`1120`	`raise RuntimeError(`
`1121`		`- "Llama model must be created with embedding=True to call this method"`
	`1121`	`+ "Llama model must be created with embeddings=True to call this method"`
`1122`	`1122`	`)`
`1123`	`1123`
`1124`	`1124`	`if self.verbose:`