Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/examples/intrinsics/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ Assesses if retrieved context is relevant to a query.
### hallucination_detection.py
Detects when model outputs contain hallucinated information.

**Note:** For RAG workflows, consider using `HallucinationRequirement` from `mellea.stdlib.requirements` which wraps this intrinsic with automatic thresholding and validation logic. See `docs/examples/requirements/hallucination_requirement.py` for details.

### query_rewrite.py
Rewrites queries for better retrieval or understanding.

Expand Down
12 changes: 11 additions & 1 deletion docs/examples/intrinsics/intrinsics.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# pytest: huggingface, requires_heavy_ram, llm

import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

import mellea.stdlib.functional as mfuncs
from mellea.backends.adapters.adapter import AdapterType, IntrinsicAdapter
from mellea.backends.huggingface import LocalHFBackend
Expand All @@ -9,7 +12,14 @@
# This is an example for how you would directly use intrinsics. See `mellea/stdlib/intrinsics/rag.py`
# for helper functions.

backend = LocalHFBackend(model_id="ibm-granite/granite-3.3-8b-instruct")
# Force CPU usage by creating custom config
model_id = "ibm-granite/granite-3.3-8b-instruct"
device = torch.device("cpu")
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="cpu")

custom_config = (tokenizer, model, device)
backend = LocalHFBackend(model_id=model_id, custom_config=custom_config)

# Create the Adapter. IntrinsicAdapter's default to ALORAs.
req_adapter = IntrinsicAdapter(
Expand Down
159 changes: 159 additions & 0 deletions docs/examples/rag/rag_with_hallucination_detection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
# pytest: huggingface, requires_heavy_ram, llm, qualitative

"""RAG example with hallucination detection using HallucinationRequirement.

This example demonstrates how to integrate hallucination detection into a RAG
pipeline using Mellea's HallucinationRequirement.

To run this script from the root of the Mellea source tree, use the command:
```
uv run python docs/examples/rag/rag_with_hallucination_detection.py
```
"""

from mellea import start_session
from mellea.backends import model_ids
from mellea.stdlib.components import Document, Message
from mellea.stdlib.context import ChatContext
from mellea.stdlib.functional import validate
from mellea.stdlib.requirements import HallucinationRequirement

# Sample documents for RAG
docs = [
"The purple bumble fish is a rare species found in tropical waters. It has a distinctive yellow coloration.",
"Purple bumble fish typically grow to 15-20 cm in length and feed primarily on small crustaceans.",
"Conservation efforts have helped stabilize purple bumble fish populations in recent years.",
]

print("=" * 60)
print("RAG with Hallucination Detection Example")
print("=" * 60)

# Create session
m = start_session(model_id=model_ids.IBM_GRANITE_4_MICRO_3B)

# User query
query = "What do we know about purple bumble fish?"

# Step 1: Generate answer using RAG pattern with grounding_context
print("\nStep 1: Generating answer with grounded context...")
answer = m.instruct(
"Based on the provided documents, answer the question: {{query}}",
user_variables={"query": query},
grounding_context={f"doc{i}": doc for i, doc in enumerate(docs)},
)

print(f"Generated answer: {answer.value}")

# Step 2: Validate for hallucinations
print("\nStep 2: Validating answer for hallucinations...")

# Create Document objects for validation
doc_objects = [Document(doc_id=str(i), text=doc) for i, doc in enumerate(docs)]

# Build validation context with documents attached to assistant message
validation_context = (
ChatContext()
.add(Message("user", query))
.add(Message("assistant", str(answer.value), documents=doc_objects))
)

# Create hallucination requirement
hallucination_req = HallucinationRequirement(
threshold=0.5,
max_hallucinated_ratio=0.0, # Strict: no hallucinations allowed
)

# Validate
validation_results = validate(
reqs=[hallucination_req], context=validation_context, backend=m.backend
)

print(f"Validation passed: {validation_results[0].as_bool()}")
print(f"Validation reason: {validation_results[0].reason}")
if validation_results[0].score is not None:
print(f"Faithfulness score: {validation_results[0].score:.2f}")

# Step 3: Example with potential hallucination
print("\n" + "=" * 60)
print("Example with Hallucinated Content")
print("=" * 60)

# Manually create a response with hallucination for demonstration
hallucinated_answer = (
"Purple bumble fish are rare tropical fish with yellow coloration. "
"They grow to 15-20 cm and feed on small crustaceans. "
"They are known to migrate thousands of miles each year." # Hallucinated!
)

validation_context2 = (
ChatContext()
.add(Message("user", query))
.add(Message("assistant", hallucinated_answer, documents=doc_objects))
)

validation_results2 = validate(
reqs=[hallucination_req], context=validation_context2, backend=m.backend
)

print(f"Response: {hallucinated_answer}")
print(f"Validation passed: {validation_results2[0].as_bool()}")
print(f"Validation reason: {validation_results2[0].reason}")
if validation_results2[0].score is not None:
print(f"Faithfulness score: {validation_results2[0].score:.2f}")

# Step 4: Complete RAG pipeline with validation
print("\n" + "=" * 60)
print("Complete RAG Pipeline with Validation")
print("=" * 60)


def rag_with_validation(session, query, documents, requirement):
"""Complete RAG pipeline with hallucination detection.

Args:
session: MelleaSession instance
query: User question
documents: List of document strings
requirement: HallucinationRequirement instance

Returns:
tuple: (answer, validation_result)
"""
# Generate answer
answer = session.instruct(
"Based on the provided documents, answer the question: {{query}}",
user_variables={"query": query},
grounding_context={f"doc{i}": doc for i, doc in enumerate(documents)},
)

# Prepare for validation
doc_objects = [Document(doc_id=str(i), text=doc) for i, doc in enumerate(documents)]

validation_context = (
ChatContext()
.add(Message("user", query))
.add(Message("assistant", str(answer.value), documents=doc_objects))
)

# Validate
validation_results = validate(
reqs=[requirement], context=validation_context, backend=session.backend
)

return answer.value, validation_results[0]


# Use the pipeline
query2 = "How big do purple bumble fish grow?"
answer, validation = rag_with_validation(m, query2, docs, hallucination_req)

print(f"Query: {query2}")
print(f"Answer: {answer}")
print(f"Validated: {validation.as_bool()}")
if validation.score is not None:
print(f"Faithfulness score: {validation.score:.2f}")

print("\n" + "=" * 60)
print("Example complete!")
print("=" * 60)
148 changes: 148 additions & 0 deletions docs/examples/rag/rag_with_instruct_and_factory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
"""Example: RAG with instruct() and hallucination_check factory.

This example demonstrates using the hallucination_check() factory function
with instruct() and sampling strategies for automatic validation and repair.

# pytest: ollama, llm, huggingface
"""

import asyncio

from mellea.backends.hf import HuggingFaceBackend

from mellea.stdlib.components import Document
from mellea.stdlib.functional import instruct
from mellea.stdlib.requirements import hallucination_check
from mellea.stdlib.sampling import RejectionSamplingStrategy


async def main():
"""Demonstrate RAG with instruct() and hallucination_check factory."""
# Initialize backend
backend = HuggingFaceBackend(
"ibm-granite/granite-3.0-2b-instruct", device_map="auto"
)

# Sample documents for RAG
documents = [
Document(
doc_id="1",
text="The only type of fish that is yellow is the purple bumble fish.",
),
Document(
doc_id="2",
text="The purple bumble fish is a rare species found in tropical waters.",
),
Document(
doc_id="3",
text="Purple bumble fish typically grow to 6-8 inches in length.",
),
]

# Example 1: Basic usage with rejection sampling
print("=" * 80)
print("Example 1: instruct() with hallucination_check and rejection sampling")
print("=" * 80)

# Create requirement with factory function
req = hallucination_check(
documents=documents,
threshold=0.5,
max_hallucinated_ratio=0.0, # Strict: no hallucinations allowed
)

# Use with instruct() - automatic validation and retry
result = await instruct(
"""Based on the provided documents, answer the following question.

Question: What color are purple bumble fish?

Answer:""",
backend=backend,
requirements=[req],
strategy=RejectionSamplingStrategy(loop_budget=3),
)

print(f"Response: {result}")
print()

# Example 2: With grounding context for prompt templating
print("=" * 80)
print("Example 2: Using grounding_context with hallucination_check")
print("=" * 80)

query = "How big do purple bumble fish grow?"

# Create requirement with documents
req2 = hallucination_check(
documents=documents,
threshold=0.5,
max_hallucinated_ratio=0.1, # Allow up to 10% hallucination
)

# Use grounding_context for prompt variables
result2 = await instruct(
"""Based on the provided documents, answer: {{query}}

Answer:""",
backend=backend,
grounding_context={"query": query},
requirements=[req2],
strategy=RejectionSamplingStrategy(loop_budget=3),
)

print(f"Query: {query}")
print(f"Response: {result2}")
print()

# Example 3: Multiple requirements including hallucination check
print("=" * 80)
print("Example 3: Combining hallucination_check with other requirements")
print("=" * 80)

from mellea.stdlib.requirements import Requirement

# Multiple requirements
requirements = [
hallucination_check(documents=documents, threshold=0.5),
Requirement("Response must be concise (under 50 words)"),
Requirement("Response must be in complete sentences"),
]

result3 = await instruct(
"Describe purple bumble fish based on the documents.",
backend=backend,
requirements=requirements,
strategy=RejectionSamplingStrategy(loop_budget=5),
)

print(f"Response: {result3}")
print()

# Example 4: Lenient hallucination tolerance
print("=" * 80)
print("Example 4: Lenient hallucination tolerance")
print("=" * 80)

# Allow some hallucination (useful for creative responses)
lenient_req = hallucination_check(
documents=documents,
threshold=0.3, # Lower threshold
max_hallucinated_ratio=0.3, # Allow up to 30% hallucination
)

result4 = await instruct(
"""Based on the documents, write a creative description of purple bumble fish.

Description:""",
backend=backend,
requirements=[lenient_req],
strategy=RejectionSamplingStrategy(loop_budget=3),
)

print(f"Response: {result4}")
print()


if __name__ == "__main__":
asyncio.run(main())
Loading
Loading