modalityml · wangzuo · Dec 30, 2025 · Dec 5, 2025 · Dec 5, 2025 · Dec 30, 2025
diff --git a/bun.lock b/bun.lock
@@ -7,11 +7,7 @@
     },
     "packages/fluent-ai": {
       "name": "fluent-ai",
-      "version": "0.4.4",
-      "dependencies": {
-        "eventsource-parser": "^3.0.6",
-        "partial-json": "^0.1.7",
-      },
+      "version": "0.4.7",
       "devDependencies": {
         "@types/bun": "1.3.0",
         "bun-plugin-dts": "^0.3.0",
@@ -504,8 +500,6 @@
 
     "etag": ["etag@1.8.1", "", {}, "sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg=="],
 
-    "eventsource-parser": ["eventsource-parser@3.0.6", "", {}, "sha512-Vo1ab+QXPzZ4tCa8SwIHJFaSzy4R6SHf7BY79rFBDf0idraZWAkYrDjDj8uWaSm3S2TK+hJ7/t1CEmZ7jXw+pg=="],
-
     "exit-hook": ["exit-hook@2.2.1", "", {}, "sha512-eNTPlAD67BmP31LDINZ3U7HSF8l57TxOY2PmBJ1shpCvpnxBF93mWCE8YHBnXs8qiUZJc9WDcWIeC3a2HIAMfw=="],
 
     "express": ["express@4.21.2", "", { "dependencies": { "accepts": "~1.3.8", "array-flatten": "1.1.1", "body-parser": "1.20.3", "content-disposition": "0.5.4", "content-type": "~1.0.4", "cookie": "0.7.1", "cookie-signature": "1.0.6", "debug": "2.6.9", "depd": "2.0.0", "encodeurl": "~2.0.0", "escape-html": "~1.0.3", "etag": "~1.8.1", "finalhandler": "1.3.1", "fresh": "0.5.2", "http-errors": "2.0.0", "merge-descriptors": "1.0.3", "methods": "~1.1.2", "on-finished": "2.4.1", "parseurl": "~1.3.3", "path-to-regexp": "0.1.12", "proxy-addr": "~2.0.7", "qs": "6.13.0", "range-parser": "~1.2.1", "safe-buffer": "5.2.1", "send": "0.19.0", "serve-static": "1.16.2", "setprototypeof": "1.2.0", "statuses": "2.0.1", "type-is": "~1.6.18", "utils-merge": "1.0.1", "vary": "~1.1.2" } }, "sha512-28HqgMZAmih1Czt9ny7qr6ek2qddF4FclbMzwhCREB6OFfH+rXAnuNCwo1/wFvrtbgsQDb4kSbX9de9lFbrXnA=="],
@@ -664,8 +658,6 @@
 
     "parseurl": ["parseurl@1.3.3", "", {}, "sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ=="],
 
-    "partial-json": ["partial-json@0.1.7", "", {}, "sha512-Njv/59hHaokb/hRUjce3Hdv12wd60MtM9Z5Olmn+nehe0QDAsRtRbJPvJ0Z91TusF0SuZRIvnM+S4l6EIP8leA=="],
-
     "path-key": ["path-key@3.1.1", "", {}, "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q=="],
 
     "path-scurry": ["path-scurry@1.11.1", "", { "dependencies": { "lru-cache": "^10.2.0", "minipass": "^5.0.0 || ^6.0.2 || ^7.0.0" } }, "sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA=="],

diff --git a/packages/fluent-ai/.gitignore b/packages/fluent-ai/.gitignore
@@ -0,0 +1 @@
+/dist
diff --git a/packages/fluent-ai/examples/ollama-agent.ts b/packages/fluent-ai/examples/ollama-agent.ts
@@ -0,0 +1,41 @@
+import z from "zod";
+import { agent, agentTool, inspectAgentStream, ollama } from "~/src/index";
+
+const retrieveContext = async (args: { query: string }) => {
+  await Bun.sleep(500); // Simulate latency
+
+  return {
+    context:
+      "The Eiffel Tower is a wrought-iron lattice tower on the Champ de Mars in Paris, France. It is named after the engineer Gustave Eiffel, whose company designed and built the tower from 1887 to 1889.",
+  };
+};
+
+const retrieveContextTool = agentTool("retrieve_context")
+  .description("Retrieve information to help answer a query.")
+  .input(
+    z.object({
+      query: z.string().describe("The query to retrieve context for."),
+    }),
+  )
+  .execute(retrieveContext);
+
+const chatAgent = agent("chat-agent")
+  .model(ollama().chat("qwen3:1.7b"))
+  .tool(retrieveContextTool)
+  .instructions(
+    () => `You have access to a tool that retrieves context.
+Use the tool to help answer user queries.`,
+  );
+
+const stream = chatAgent.generate(
+  [
+    {
+      id: "1",
+      role: "user",
+      text: "Tell me about the Eiffel Tower.",
+    },
+  ],
+  { maxSteps: 8 },
+);
+
+await inspectAgentStream(stream);
diff --git a/packages/fluent-ai/examples/ollama-chat.ts b/packages/fluent-ai/examples/ollama-chat.ts
@@ -5,13 +5,30 @@ const models = await ollama().models().run();
 console.log(models);
 
 const response = await ollama()
-  .chat(models[0].name)
+  .chat(models[0].id)
   .messages([
     {
       role: "user",
-      content: "What is the capital of France?",
+      text: "What is the capital of France?",
     },
   ])
   .run();
 
 console.log(response);
+
+const streamResponse = await ollama()
+  .chat(models[0].id)
+  .messages([
+    {
+      role: "user",
+      text: "What is the capital of Spain?",
+    },
+  ])
+  .stream()
+  .run();
+
+for await (const chunk of streamResponse) {
+  if (chunk.message?.text) {
+    process.stdout.write(chunk.message.text);
+  }
+}
diff --git a/packages/fluent-ai/examples/ollama-embed.ts b/packages/fluent-ai/examples/ollama-embed.ts
@@ -0,0 +1,8 @@
+import { ollama } from "../src";
+
+const result = await ollama()
+  .embedding("embeddinggemma")
+  .input(["Why is the sky blue?", "Why is the grass green?"])
+  .run();
+
+console.log(result.embeddings);
diff --git a/packages/fluent-ai/examples/openrouter-chat.ts b/packages/fluent-ai/examples/openrouter-chat.ts
@@ -7,8 +7,8 @@ const job: Job = {
   input: {
     model: "google/gemini-2.5-flash",
     messages: [
-      { role: "system", content: "You are a helpful assistant." },
-      { role: "user", content: "Hi" },
+      { role: "system", text: "You are a helpful assistant." },
+      { role: "user", text: "Hi" },
     ],
   },
 };

diff --git a/packages/fluent-ai/examples/openrouter-reasoning.ts b/packages/fluent-ai/examples/openrouter-reasoning.ts
@@ -0,0 +1,16 @@
+import { openrouter } from "~/src/index";
+
+const stream = await openrouter()
+  .chat("deepseek/deepseek-r1")
+  .messages([
+    {
+      role: "user",
+      text: "How would you build the world's tallest skyscraper?",
+    },
+  ])
+  .stream()
+  .run();
+
+for await (const chunk of stream) {
+  console.log(JSON.stringify(chunk, null, 2));
+}
diff --git a/packages/fluent-ai/package.json b/packages/fluent-ai/package.json
@@ -9,7 +9,8 @@
     ".": {
       "import": "./dist/index.js",
       "types": "./dist/index.d.ts"
-    }
+    },
+    "./src": "./src/index.ts"
   },
   "files": [
     "src",
@@ -19,10 +20,6 @@
     "build": "bun run build.ts",
     "prepublishOnly": "rm -rf dist && bun run build"
   },
-  "dependencies": {
-    "eventsource-parser": "^3.0.6",
-    "partial-json": "^0.1.7"
-  },
   "keywords": [
     "ai",
     "openai",

diff --git a/packages/fluent-ai/src/agent/agent.ts b/packages/fluent-ai/src/agent/agent.ts
@@ -1,11 +1,15 @@
 import { z } from "zod";
-import { convertMessagesForChatCompletion } from "~/src/agent/message";
 import {
   agentToolSchema,
   type AgentToolBuilder,
   type AgentTool,
 } from "~/src/agent/tool";
-import type { Message } from "~/src/job/schema";
+import type {
+  Message,
+  ToolMessage,
+  MessageChunk,
+  AssistantMessage,
+} from "~/src/job/schema";
 import type { ChatBuilder } from "~/src/builder/chat";
 
 export const agentSchema = z.object({
@@ -14,7 +18,30 @@ export const agentSchema = z.object({
   tools: z.array(agentToolSchema),
 });
 
-interface GenerateOptions {
+interface ChunkEvent {
+  type: "chunk";
+  chunk: {
+    text?: string;
+    reasoning?: string;
+  };
+}
+
+interface ToolEvent {
+  type: "tool";
+  tool: {
+    name: string;
+    args: any;
+    result?: any;
+    error?: any;
+  };
+}
+
+interface MessageEvent {
+  type: "message";
+  message: Message;
+}
+
+export interface AgentGenerateOptions {
   maxSteps: number;
 }
 
@@ -46,28 +73,27 @@ export class Agent<TContext = any> {
   generate = async function* (
     this: Agent<TContext>,
     initialMessages: Message[],
-    options: GenerateOptions,
+    options: AgentGenerateOptions,
     context?: TContext,
   ) {
     const body = agentSchema.parse(this.body);
 
-    let shouldBreak = false;
+    let shouldFinish = false;
     let newMessages: Message[] = [];
     for (let iteration = 0; iteration < options.maxSteps; iteration++) {
-      if (shouldBreak) {
+      if (shouldFinish) {
         break;
       }
 
       const instructions =
         typeof body.instructions === "function"
-          ? body.instructions()
+          ? body.instructions() // TODO: more context
           : body.instructions;
-      const allMessages = initialMessages.concat(newMessages);
-      const convertedMessages = convertMessagesForChatCompletion(allMessages);
-      const messages = [{ role: "system", content: instructions }].concat(
-        convertedMessages as any,
+      const systemMessage = { role: "system", text: instructions };
+      const messages = ([systemMessage] as Message[]).concat(
+        initialMessages,
+        newMessages,
       );
-      // TODO: agent tool vs chat tool
       const tools = body.tools.map((tool) => ({
         name: tool.name,
         description: tool.description,
@@ -78,82 +104,87 @@ export class Agent<TContext = any> {
         .stream()
         .run();
 
-      let totalText = "";
-      for await (const chunk of result) {
-        const delta = chunk.raw.choices[0].delta;
+      let newAssistantMessage: AssistantMessage = {
+        role: "assistant",
+        text: "",
+        reasoning: "",
+      };
 
-        // TODO: tool calls with content??
-        if (delta.tool_calls) {
-          // TODO: tool call with content
-          // TODO: tool call with input streaming
-          // TODO: support multiple tool calls
-          const toolCall = delta.tool_calls[0];
-          const toolName = toolCall.function.name;
-          const input = JSON.parse(toolCall.function.arguments); // TODO: parsing error handling
+      for await (const chunk of result as AsyncIterable<MessageChunk>) {
+        if (chunk.toolCalls) {
+          // existing assistant message chunked out before tool call
+          if (newAssistantMessage.text || newAssistantMessage.reasoning) {
+            yield {
+              type: "message",
+              message: newAssistantMessage,
+            } as MessageEvent;
+            newMessages.push(newAssistantMessage);
+            newAssistantMessage = {
+              role: "assistant",
+              text: "",
+              reasoning: "",
+            };
+          }
 
-          const agentTool = body.tools.find((t) => t.name === toolName);
+          const toolCall = chunk.toolCalls[0];
+          const { name, arguments: args } = toolCall.function;
+          const agentTool = body.tools.find((t) => t.name === name);
           if (!agentTool) {
-            throw new Error(`Unknown tool: ${toolName}`);
+            throw new Error(`Unknown tool: ${name}`);
           }
 
-          const toolPart = {
-            type: "tool-" + toolName,
-            toolCallId: toolCall.id,
-            input: input,
-          };
-
-          yield { type: "tool-call-input", data: toolPart };
-
-          let output = null;
-          let outputError = null;
+          yield { type: "tool", tool: { name, args } };
 
+          let result = null;
+          let error = null;
           try {
-            output = await agentTool.execute(input, context!);
+            result = await agentTool.execute(args, context!);
           } catch (err) {
-            outputError = (err as Error).message;
+            error = (err as Error).message;
           }
 
-          if (outputError) {
-            yield {
-              type: "tool-call-output",
-              data: { ...toolPart, outputError },
-            };
-          } else {
-            yield { type: "tool-call-output", data: { ...toolPart, output } };
-          }
+          yield {
+            type: "tool",
+            tool: { name, args, result, error },
+          } as ToolEvent;
 
-          const newMessage: Message = {
+          const newMessage: ToolMessage = {
             role: "tool",
-            parts: [
-              {
-                type: `tool-${toolName}`,
-                toolCallId: toolCall.id,
-                input: input,
-                output: output,
-                outputError: outputError,
-              },
-            ],
+            text: "",
+            content: {
+              callId: toolCall.id,
+              name: name,
+              args: args,
+              result: result,
+              error: error,
+            },
           };
 
-          yield { type: "message-created", data: newMessage };
+          yield { type: "message", message: newMessage } as MessageEvent;
           newMessages.push(newMessage);
-        } else if (delta.content) {
-          const text = delta.content as string;
-          yield { type: "text-delta", data: { text } };
-          totalText += text;
-          shouldBreak = true;
+          shouldFinish = false;
+        } else if (chunk.text || chunk.reasoning) {
+          yield {
+            type: "chunk",
+            chunk: {
+              text: chunk.text,
+              reasoning: chunk.reasoning,
+            },
+          } as ChunkEvent;
+
+          if (chunk.text) {
+            newAssistantMessage.text += chunk.text;
+          }
+          if (chunk.reasoning) {
+            newAssistantMessage.reasoning += chunk.reasoning;
+          }
+          shouldFinish = true;
         }
       }
 
-      if (totalText.trim()) {
-        const newMessage: Message = {
-          role: "assistant",
-          parts: [{ type: "text", text: totalText.trim() }],
-        };
-
-        yield { type: "message-created", data: newMessage };
-        newMessages.push(newMessage);
-        shouldBreak = true;
+      if (newAssistantMessage.text || newAssistantMessage.reasoning) {
+        yield { type: "message", message: newAssistantMessage } as MessageEvent;
+        newMessages.push(newAssistantMessage);
       }
     }
   };