chore: add RAG sample

radu-mocanu · radu-mocanu · commit fb26a5f6ed75 · 2025-04-25T15:50:32.000+03:00
diff --git a/samples/RAG-sample/langgraph.json b/samples/RAG-sample/langgraph.json
@@ -0,0 +1,8 @@
+{
+    "dependencies": ["."],
+    "graphs": {
+      "researcher-and-uploader-agent": "./src/agents/researcher-and-uploader.py:graph",
+      "quiz-generator-RAG-agent": "./src/agents/quiz-generator-RAG.py:graph"
+    },
+    "env": ".env"
+  }
diff --git a/samples/RAG-sample/pyproject.toml b/samples/RAG-sample/pyproject.toml
@@ -0,0 +1,48 @@
+[project]
+name = "RAG-agents"
+version = "0.0.6"
+description = "Package containing 2 agents. The first one crawls the internet and adds relevant information to a storage bucket, the first one generates quizzes based on the gathered info and user input."
+authors = [
+    { name = "Radu Mocanu" }
+]
+requires-python = ">=3.10"
+dependencies = [
+    "langgraph>=0.2.55",
+    "langchain-community>=0.3.9",
+    "langchain-anthropic>=0.3.8",
+    "langchain-experimental>=0.3.4",
+    "tavily-python>=0.5.0",
+    "uipath==2.0.1",
+    "uipath-langchain==0.0.87"
+]
+
+[project.optional-dependencies]
+dev = ["mypy>=1.11.1", "ruff>=0.6.1"]
+
+[build-system]
+requires = ["setuptools>=73.0.0", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[tool.setuptools.package-data]
+"*" = ["py.typed"]
+
+[tool.ruff]
+lint.select = [
+    "E",    # pycodestyle
+    "F",    # pyflakes
+    "I",    # isort
+    "D",    # pydocstyle
+    "D401", # First line should be in imperative mood
+    "T201",
+    "UP",
+]
+lint.ignore = [
+    "UP006",
+    "UP007",
+    "UP035",
+    "D417",
+    "E501",
+]
+
+[tool.ruff.lint.per-file-ignores]
+"tests/*" = ["D", "UP"]
diff --git a/samples/RAG-sample/quiz-generator-RAG-agent.mermaid b/samples/RAG-sample/quiz-generator-RAG-agent.mermaid
@@ -0,0 +1,16 @@
+---
+config:
+  flowchart:
+    curve: linear
+---
+graph TD;
+	__start__([<p>__start__</p>]):::first
+	invoke_researcher(invoke_researcher)
+	create_quiz(create_quiz)
+	__end__([<p>__end__</p>]):::last
+	__start__ --> invoke_researcher;
+	create_quiz --> __end__;
+	invoke_researcher --> create_quiz;
+	classDef default fill:#f2f0ff,line-height:1.2
+	classDef first fill-opacity:0
+	classDef last fill:#bfb6fc
diff --git a/samples/RAG-sample/researcher-and-uploader-agent.mermaid b/samples/RAG-sample/researcher-and-uploader-agent.mermaid
@@ -0,0 +1,27 @@
+---
+config:
+  flowchart:
+    curve: linear
+---
+graph TD;
+	__start__([<p>__start__</p>]):::first
+	upload_to_bucket(upload_to_bucket)
+	prepare_input(prepare_input)
+	__end__([<p>__end__</p>]):::last
+	__start__ --> prepare_input;
+	prepare_input --> researcher___start__;
+	researcher___end__ --> upload_to_bucket;
+	upload_to_bucket --> __end__;
+	subgraph researcher
+	researcher___start__(<p>__start__</p>)
+	researcher_agent(agent)
+	researcher_tools(tools)
+	researcher___end__(<p>__end__</p>)
+	researcher___start__ --> researcher_agent;
+	researcher_tools --> researcher_agent;
+	researcher_agent -.-> researcher_tools;
+	researcher_agent -.-> researcher___end__;
+	end
+	classDef default fill:#f2f0ff,line-height:1.2
+	classDef first fill-opacity:0
+	classDef last fill:#bfb6fc
diff --git a/samples/RAG-sample/src/agents/quiz-generator-RAG.py b/samples/RAG-sample/src/agents/quiz-generator-RAG.py
@@ -0,0 +1,168 @@
+from typing import Optional, List, Literal
+from langgraph.graph import END, START, MessagesState, StateGraph
+from langgraph.types import Command, interrupt
+from pydantic import BaseModel, Field, field_validator, ValidationInfo
+from uipath import UiPath
+from langchain_core.output_parsers import PydanticOutputParser
+import logging
+import time
+from uipath.models import InvokeProcess, IngestionInProgressException
+from langchain_core.messages import HumanMessage
+from uipath_langchain.retrievers import ContextGroundingRetriever
+from langchain_anthropic import ChatAnthropic
+
+
+logger = logging.getLogger(__name__)
+
+llm = ChatAnthropic(model="claude-3-5-sonnet-latest")
+
+class QuizItem(BaseModel):
+    question: str = Field(
+        description="One quiz question"
+    )
+    difficulty: float = Field(
+        description="How difficult is the question", ge=0.0, le=1.0
+    )
+    answer: str = Field(
+        description="The expected answer to the question",
+    )
+class Quiz(BaseModel):
+   quiz_items: List[QuizItem] = Field(
+        description="A list of quiz items"
+    )
+class QuizOrInsufficientInfo(BaseModel):
+    quiz: Optional[Quiz] = Field(
+        description="A quiz based on user input and available documents."
+    )
+    additional_info: Optional[str] = Field(
+        description="String that controls whether additional information is required",
+    )
+
+    @field_validator("quiz")
+    def check_quiz(cls, v, info: ValidationInfo):
+        additional_info = info.data.get("additional_info")
+        if additional_info == "false" and v is None:
+            raise ValueError("Quiz should be None when additional_info is not 'false'")
+        return v
+
+output_parser = PydanticOutputParser(pydantic_object=QuizOrInsufficientInfo)
+
+system_message ="""You are a quiz generator. Try to generate a quiz about {quiz_topic} with multiple questions ONLY based on the following documents. Do not use any extra information from your knowledgebase.
+If the documents do not provide enough info, respond with as little words as possible in the format 'additional_info=Need data about ...'. The additional_info should be around 10-15 words.
+If they provide enough info, create the quiz and set additional_info='false'
+
+This is the context data: {context}
+
+{format_instructions}
+
+Respond with the classification in the requested JSON format."""
+
+uipath = UiPath()
+
+
+class GraphOutput(BaseModel):
+    quiz: Quiz
+
+class GraphInput(BaseModel):
+    quiz_topic: str
+    bucket_name: str
+    index_name: str
+    bucket_folder: Optional[str] = None
+
+class GraphState(MessagesState):
+    quiz_topic: str
+    bucket_name: str
+    bucket_folder: Optional[str]
+    index_name: str
+    additional_info: Optional[bool]
+    quiz: Optional[Quiz]
+
+def prepare_input(state: GraphInput) -> GraphState:
+    return GraphState(
+        quiz_topic=state.quiz_topic,
+        bucket_name=state.bucket_name,
+        index_name=state.index_name,
+        additional_info="false",
+        messages=("user", f"create a quiz about {state.quiz_topic}"),
+        bucket_folder=state.bucket_folder,
+    )
+
+async def invoke_researcher(state: GraphState) -> Command:
+    state["messages"].append(HumanMessage(f"{state['additional_info']}")),
+
+    input_args_json = {
+            "messages": state["messages"],
+            "bucket_name": state["bucket_name"],
+            "bucket_folder": state.get("bucket_folder", None),
+        }
+    agent_response = interrupt(InvokeProcess(
+        name = "researcher-and-uploader-agent",
+        input_arguments = input_args_json,
+    ))
+
+    return Command(
+        update={
+            "messages": [agent_response["messages"][-1]],
+        })
+
+async def create_quiz(state: GraphState) -> Command:
+    no_of_retries = 5
+    context_data = None
+    data_queried = False
+    index = uipath.context_grounding.get_or_create_index(state["index_name"], storage_bucket_name=state["bucket_name"], storage_bucket_folder_path=state["bucket_folder"])
+    uipath.context_grounding.ingest_data(index)
+    while no_of_retries != 0:
+        try:
+            context_data = await ContextGroundingRetriever(
+                index_name=state["index_name"],
+                uipath_sdk=uipath,
+                number_of_results=10
+            ).ainvoke(state["quiz_topic"])
+            data_queried = True
+            break
+        except IngestionInProgressException as ex:
+            logger.info(ex.message)
+            no_of_retries -= 1
+            logger.info(f"{no_of_retries} retries left")
+            time.sleep(5)
+    if not data_queried:
+        raise Exception("Ingestion is taking too long.")
+    message = system_message.format(format_instructions=output_parser.get_format_instructions(),
+        context = context_data if context_data else "No context available yet",
+        quiz_topic=state["quiz_topic"])
+    result = llm.invoke(message)
+    try:
+        llm_response = output_parser.parse(result.content)
+        return Command(
+            update={
+                "quiz": llm_response.quiz if llm_response.additional_info == "false" else None,
+                "additional_info": llm_response.additional_info,
+            }
+        )
+    except Exception as e:
+        print(f"Failed to parse {e}")
+        return Command(goto=END)
+
+def check_quiz_creation(state: GraphState) -> Literal["invoke_researcher", "return_quiz"]:
+    if state["additional_info"] != "false":
+        return "invoke_researcher"
+    return "return_quiz"
+
+def return_quiz(state: GraphState) -> GraphOutput:
+    return GraphOutput(quiz=state["quiz"])
+
+# Build the state graph
+builder = StateGraph(input=GraphInput, output=GraphOutput)
+builder.add_node("invoke_researcher", invoke_researcher)
+builder.add_node("create_quiz", create_quiz)
+builder.add_node("return_quiz", return_quiz)
+builder.add_node("prepare_input", prepare_input)
+
+builder.add_edge(START, "prepare_input")
+builder.add_edge("prepare_input", "create_quiz")
+builder.add_conditional_edges("create_quiz", check_quiz_creation)
+builder.add_edge("invoke_researcher", "create_quiz")
+builder.add_edge("return_quiz", END)
+
+# Compile the graph
+graph = builder.compile()
diff --git a/samples/RAG-sample/src/agents/researcher-and-uploader.py b/samples/RAG-sample/src/agents/researcher-and-uploader.py
@@ -0,0 +1,95 @@
+from typing import Optional
+import time
+from langchain_anthropic import ChatAnthropic
+from langchain_community.tools.tavily_search import TavilySearchResults
+from langgraph.graph import END, START, MessagesState, StateGraph
+from langgraph.prebuilt import create_react_agent
+from langgraph.types import Command
+from uipath import UiPath
+from langchain_core.messages import AIMessage, SystemMessage
+
+uipath = UiPath()
+tavily_tool = TavilySearchResults(max_results=5)
+anthropic_model = "claude-3-5-sonnet-latest"
+
+
+llm = ChatAnthropic(model=anthropic_model)
+
+research_agent = create_react_agent(
+    llm, tools=[tavily_tool], prompt="You are a researcher. Search relevant information given the user topic. Don't do summarizations. Retrieve raw, unstructured data."
+)
+
+class GraphInput(MessagesState):
+    bucket_name: str
+    bucket_folder: Optional[str]
+
+class GraphState(MessagesState):
+    web_results: str
+    file_name: Optional[str]
+    bucket_name: str
+    bucket_folder: Optional[str]
+
+def prepare_input(state: GraphInput) -> GraphState:
+    return GraphState(
+        messages=state["messages"],
+        web_results="",
+        bucket_name=state["bucket_name"],
+        bucket_folder=state.get("bucket_folder",None),
+        file_name=None,
+    )
+
+async def research_node(state: GraphState) -> Command:
+    result = await research_agent.ainvoke(state)
+    web_results = result["messages"][-1].content
+    return Command(
+        update={
+            "web_results": web_results,
+            "file_name": state["file_name"],
+        })
+
+async def create_file_name(state: GraphState) -> GraphState:
+    file_name = await llm.ainvoke(
+        [SystemMessage(
+            """
+            You are a message summarizer.
+            Generate a file name from the received message, replacing spaces with underscores,
+            to create a succinct and descriptive identification.
+            For instance, 'Need data about formula 1' should be converted to format like 'data_about_formula_1'.
+            """
+        ),
+        state['messages'][-1]])
+    return GraphState(
+        messages=state["messages"],
+        web_results="",
+        bucket_name=state["bucket_name"],
+        bucket_folder=state.get("bucket_folder", None),
+        file_name=file_name.content,
+    )
+
+
+def upload_to_bucket(state: GraphState) -> MessagesState:
+    current_timestamp = int(time.time())
+    file_name = state["file_name"]
+    uipath.buckets.upload_from_memory(
+        bucket_name=state["bucket_name"],
+        blob_file_path=f"{file_name}-{current_timestamp}.txt",
+        content_type="application/txt",
+        content=state["web_results"],)
+    return MessagesState(messages=[AIMessage("Relevant information uploaded to bucket.")])
+
+
+# Build the state graph
+builder = StateGraph(input=GraphInput, output=MessagesState)
+builder.add_node("researcher", research_node)
+builder.add_node("upload_to_bucket", upload_to_bucket)
+builder.add_node("prepare_input", prepare_input)
+builder.add_node("create_file_name", create_file_name)
+
+builder.add_edge(START, "prepare_input")
+builder.add_edge("prepare_input", "create_file_name")
+builder.add_edge("create_file_name", "researcher")
+builder.add_edge("researcher", "upload_to_bucket")
+builder.add_edge("upload_to_bucket", END)
+
+# Compile the graph
+graph = builder.compile()
diff --git a/samples/RAG-sample/uipath.json b/samples/RAG-sample/uipath.json
diff --git a/samples/RAG-sample/uv.lock b/samples/RAG-sample/uv.lock