generative-computing · akihikokuroda · Mar 17, 2026 · Mar 20, 2026 · Mar 20, 2026 · Mar 20, 2026
@@ -25,6 +25,8 @@ Assesses if retrieved context is relevant to a query.
 ### hallucination_detection.py
 Detects when model outputs contain hallucinated information.
 
+**Note:** For RAG workflows, consider using `HallucinationRequirement` from `mellea.stdlib.requirements` which wraps this intrinsic with automatic thresholding and validation logic. See `docs/examples/requirements/hallucination_requirement.py` for details.
+
 ### query_rewrite.py
 Rewrites queries for better retrieval or understanding.
 

@@ -1,5 +1,8 @@
 # pytest: huggingface, requires_heavy_ram, llm
 
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
 import mellea.stdlib.functional as mfuncs
 from mellea.backends.adapters.adapter import AdapterType, IntrinsicAdapter
 from mellea.backends.huggingface import LocalHFBackend
@@ -9,7 +12,14 @@
 # This is an example for how you would directly use intrinsics. See `mellea/stdlib/intrinsics/rag.py`
 # for helper functions.
 
-backend = LocalHFBackend(model_id="ibm-granite/granite-3.3-8b-instruct")
+# Force CPU usage by creating custom config
+model_id = "ibm-granite/granite-3.3-8b-instruct"
+device = torch.device("cpu")
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cpu")
+
+custom_config = (tokenizer, model, device)
+backend = LocalHFBackend(model_id=model_id, custom_config=custom_config)
 
 # Create the Adapter. IntrinsicAdapter's default to ALORAs.
 req_adapter = IntrinsicAdapter(

@@ -0,0 +1,159 @@
+# pytest: huggingface, requires_heavy_ram, llm, qualitative
+
+"""RAG example with hallucination detection using HallucinationRequirement.
+
+This example demonstrates how to integrate hallucination detection into a RAG
+pipeline using Mellea's HallucinationRequirement.
+
+To run this script from the root of the Mellea source tree, use the command:
+```
+uv run python docs/examples/rag/rag_with_hallucination_detection.py
+```
+"""
+
+from mellea import start_session
+from mellea.backends import model_ids
+from mellea.stdlib.components import Document, Message
+from mellea.stdlib.context import ChatContext
+from mellea.stdlib.functional import validate
+from mellea.stdlib.requirements import HallucinationRequirement
+
+# Sample documents for RAG
+docs = [
+    "The purple bumble fish is a rare species found in tropical waters. It has a distinctive yellow coloration.",
+    "Purple bumble fish typically grow to 15-20 cm in length and feed primarily on small crustaceans.",
+    "Conservation efforts have helped stabilize purple bumble fish populations in recent years.",
+]
+
+print("=" * 60)
+print("RAG with Hallucination Detection Example")
+print("=" * 60)
+
+# Create session
+m = start_session(model_id=model_ids.IBM_GRANITE_4_MICRO_3B)
+
+# User query
+query = "What do we know about purple bumble fish?"
+
+# Step 1: Generate answer using RAG pattern with grounding_context
+print("\nStep 1: Generating answer with grounded context...")
+answer = m.instruct(
+    "Based on the provided documents, answer the question: {{query}}",
+    user_variables={"query": query},
+    grounding_context={f"doc{i}": doc for i, doc in enumerate(docs)},
+)
+
+print(f"Generated answer: {answer.value}")
+
+# Step 2: Validate for hallucinations
+print("\nStep 2: Validating answer for hallucinations...")
+
+# Create Document objects for validation
+doc_objects = [Document(doc_id=str(i), text=doc) for i, doc in enumerate(docs)]
+
+# Build validation context with documents attached to assistant message
+validation_context = (
+    ChatContext()
+    .add(Message("user", query))
+    .add(Message("assistant", str(answer.value), documents=doc_objects))
+)
+
+# Create hallucination requirement
+hallucination_req = HallucinationRequirement(
+    threshold=0.5,
+    max_hallucinated_ratio=0.0,  # Strict: no hallucinations allowed
+)
+
+# Validate
+validation_results = validate(
+    reqs=[hallucination_req], context=validation_context, backend=m.backend
+)
+
+print(f"Validation passed: {validation_results[0].as_bool()}")
+print(f"Validation reason: {validation_results[0].reason}")
+if validation_results[0].score is not None:
+    print(f"Faithfulness score: {validation_results[0].score:.2f}")
+
+# Step 3: Example with potential hallucination
+print("\n" + "=" * 60)
+print("Example with Hallucinated Content")
+print("=" * 60)
+
+# Manually create a response with hallucination for demonstration
+hallucinated_answer = (
+    "Purple bumble fish are rare tropical fish with yellow coloration. "
+    "They grow to 15-20 cm and feed on small crustaceans. "
+    "They are known to migrate thousands of miles each year."  # Hallucinated!
+)
+
+validation_context2 = (
+    ChatContext()
+    .add(Message("user", query))
+    .add(Message("assistant", hallucinated_answer, documents=doc_objects))
+)
+
+validation_results2 = validate(
+    reqs=[hallucination_req], context=validation_context2, backend=m.backend
+)
+
+print(f"Response: {hallucinated_answer}")
+print(f"Validation passed: {validation_results2[0].as_bool()}")
+print(f"Validation reason: {validation_results2[0].reason}")
+if validation_results2[0].score is not None:
+    print(f"Faithfulness score: {validation_results2[0].score:.2f}")
+
+# Step 4: Complete RAG pipeline with validation
+print("\n" + "=" * 60)
+print("Complete RAG Pipeline with Validation")
+print("=" * 60)
+
+
+def rag_with_validation(session, query, documents, requirement):
+    """Complete RAG pipeline with hallucination detection.
+
+    Args:
+        session: MelleaSession instance
+        query: User question
+        documents: List of document strings
+        requirement: HallucinationRequirement instance
+
+    Returns:
+        tuple: (answer, validation_result)
+    """
+    # Generate answer
+    answer = session.instruct(
+        "Based on the provided documents, answer the question: {{query}}",
+        user_variables={"query": query},
+        grounding_context={f"doc{i}": doc for i, doc in enumerate(documents)},
+    )
+
+    # Prepare for validation
+    doc_objects = [Document(doc_id=str(i), text=doc) for i, doc in enumerate(documents)]
+
+    validation_context = (
+        ChatContext()
+        .add(Message("user", query))
+        .add(Message("assistant", str(answer.value), documents=doc_objects))
+    )
+
+    # Validate
+    validation_results = validate(
+        reqs=[requirement], context=validation_context, backend=session.backend
+    )
+
+    return answer.value, validation_results[0]
+
+
+# Use the pipeline
+query2 = "How big do purple bumble fish grow?"
+answer, validation = rag_with_validation(m, query2, docs, hallucination_req)
+
+print(f"Query: {query2}")
+print(f"Answer: {answer}")
+print(f"Validated: {validation.as_bool()}")
+if validation.score is not None:
+    print(f"Faithfulness score: {validation.score:.2f}")
+
+print("\n" + "=" * 60)
+print("Example complete!")
+print("=" * 60)
@@ -0,0 +1,148 @@
+"""Example: RAG with instruct() and hallucination_check factory.
+
+This example demonstrates using the hallucination_check() factory function
+with instruct() and sampling strategies for automatic validation and repair.
+
+# pytest: ollama, llm, huggingface
+"""
+
+import asyncio
+
+from mellea.backends.hf import HuggingFaceBackend
+
+from mellea.stdlib.components import Document
+from mellea.stdlib.functional import instruct
+from mellea.stdlib.requirements import hallucination_check
+from mellea.stdlib.sampling import RejectionSamplingStrategy
+
+
+async def main():
+    """Demonstrate RAG with instruct() and hallucination_check factory."""
+    # Initialize backend
+    backend = HuggingFaceBackend(
+        "ibm-granite/granite-3.0-2b-instruct", device_map="auto"
+    )
+
+    # Sample documents for RAG
+    documents = [
+        Document(
+            doc_id="1",
+            text="The only type of fish that is yellow is the purple bumble fish.",
+        ),
+        Document(
+            doc_id="2",
+            text="The purple bumble fish is a rare species found in tropical waters.",
+        ),
+        Document(
+            doc_id="3",
+            text="Purple bumble fish typically grow to 6-8 inches in length.",
+        ),
+    ]
+
+    # Example 1: Basic usage with rejection sampling
+    print("=" * 80)
+    print("Example 1: instruct() with hallucination_check and rejection sampling")
+    print("=" * 80)
+
+    # Create requirement with factory function
+    req = hallucination_check(
+        documents=documents,
+        threshold=0.5,
+        max_hallucinated_ratio=0.0,  # Strict: no hallucinations allowed
+    )
+
+    # Use with instruct() - automatic validation and retry
+    result = await instruct(
+        """Based on the provided documents, answer the following question.
+
+Question: What color are purple bumble fish?
+
+Answer:""",
+        backend=backend,
+        requirements=[req],
+        strategy=RejectionSamplingStrategy(loop_budget=3),
+    )
+
+    print(f"Response: {result}")
+    print()
+
+    # Example 2: With grounding context for prompt templating
+    print("=" * 80)
+    print("Example 2: Using grounding_context with hallucination_check")
+    print("=" * 80)
+
+    query = "How big do purple bumble fish grow?"
+
+    # Create requirement with documents
+    req2 = hallucination_check(
+        documents=documents,
+        threshold=0.5,
+        max_hallucinated_ratio=0.1,  # Allow up to 10% hallucination
+    )
+
+    # Use grounding_context for prompt variables
+    result2 = await instruct(
+        """Based on the provided documents, answer: {{query}}
+
+Answer:""",
+        backend=backend,
+        grounding_context={"query": query},
+        requirements=[req2],
+        strategy=RejectionSamplingStrategy(loop_budget=3),
+    )
+
+    print(f"Query: {query}")
+    print(f"Response: {result2}")
+    print()
+
+    # Example 3: Multiple requirements including hallucination check
+    print("=" * 80)
+    print("Example 3: Combining hallucination_check with other requirements")
+    print("=" * 80)
+
+    from mellea.stdlib.requirements import Requirement
+
+    # Multiple requirements
+    requirements = [
+        hallucination_check(documents=documents, threshold=0.5),
+        Requirement("Response must be concise (under 50 words)"),
+        Requirement("Response must be in complete sentences"),
+    ]
+
+    result3 = await instruct(
+        "Describe purple bumble fish based on the documents.",
+        backend=backend,
+        requirements=requirements,
+        strategy=RejectionSamplingStrategy(loop_budget=5),
+    )
+
+    print(f"Response: {result3}")
+    print()
+
+    # Example 4: Lenient hallucination tolerance
+    print("=" * 80)
+    print("Example 4: Lenient hallucination tolerance")
+    print("=" * 80)
+
+    # Allow some hallucination (useful for creative responses)
+    lenient_req = hallucination_check(
+        documents=documents,
+        threshold=0.3,  # Lower threshold
+        max_hallucinated_ratio=0.3,  # Allow up to 30% hallucination
+    )
+
+    result4 = await instruct(
+        """Based on the documents, write a creative description of purple bumble fish.
+
+Description:""",
+        backend=backend,
+        requirements=[lenient_req],
+        strategy=RejectionSamplingStrategy(loop_budget=3),
+    )
+
+    print(f"Response: {result4}")
+    print()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())