feat(LlamaContext): add safety checks and docstrings to logits retrieval

JamePeng · JamePeng · commit 5364cf914b59 · 2026-05-23T12:58:02.000+08:00
- Add explicit null pointer validation to `get_logits` and `get_logits_ith`.
  These methods now raise a `RuntimeError` instead of silently returning
  invalid pointers when logits are unavailable or the index is out of bounds.
- Add comprehensive docstrings to both methods, detailing the underlying
  buffer shape and memory layout.
- Include a performance warning in `get_logits_ith` about the internal
  synchronization/reordering overhead to discourage its use on the hot path.

Signed-off-by: JamePeng &lt;jame_peng@sina.com&gt;
diff --git a/llama_cpp/_internals.py b/llama_cpp/_internals.py
@@ -755,12 +755,36 @@ def synchronize(self):
         llama_cpp.llama_synchronize(self.ctx)
 
     def get_logits(self):
+        """
+        Token logits obtained from the last call to llama_decode()
+        The logits for which llama_batch.logits[i] != 0 are stored contiguously
+        in the order they have appeared in the batch.
+        Rows: number of tokens for which llama_batch.logits[i] != 0
+        Cols: n_vocab
+
+        Returns:
+            Pointer to the logits buffer of shape (n_tokens, n_vocab)
+        """
         self._assert_ctx()
-        return llama_cpp.llama_get_logits(self.ctx)
+        logits = llama_cpp.llama_get_logits(self.ctx)
+        if not logits:
+            raise RuntimeError(f"LlamaContext.get_logits: failed to get logits")
+        return logits
 
     def get_logits_ith(self, i: int):
+        """
+        Return logits for the ith output row from the last llama_decode call.
+
+        Note:
+            This calls llama_get_logits_ith(), which may reorder/synchronize
+            the output buffer internally. Avoid calling it on the hot path unless
+            Python-side logits are required.
+        """
         self._assert_ctx()
-        return llama_cpp.llama_get_logits_ith(self.ctx, i)
+        logits = llama_cpp.llama_get_logits_ith(self.ctx, i)
+        if not logits:
+            raise RuntimeError(f"LlamaContext.get_logits_ith: invalid logits index {i}")
+        return logits
 
     def set_embeddings(self, embeddings: bool):
         self._assert_ctx()