callstackincubator · JKobrynski · May 20, 2026 · May 21, 2026 · May 21, 2026 · May 22, 2026
diff --git a/apps/expo-example/src/screens/ChatScreen/index.tsx b/apps/expo-example/src/screens/ChatScreen/index.tsx
@@ -107,6 +107,7 @@ export default function ChatScreen() {
       setToolExecutionReporter(({ toolName, args, result }) => {
         addToolExecutionMessage(chatId, toolName, args, result)
       })
+      let streamError: unknown
       const result = streamText({
         model: selectedAdapter.model,
         messages: [
@@ -129,6 +130,9 @@ export default function ChatScreen() {
                 'If the user asks, tell who you are (assistant) and what is this (Callstack AI demo app).',
             })
           : 'You are a helpful assistant. If the user asks, tell who you are (assistant) and what is this (Callstack AI demo app).',
+        onError: ({ error }) => {
+          streamError = error
+        },
       })
 
       let accumulated = ''
@@ -139,6 +143,8 @@ export default function ChatScreen() {
         updateMessageContent(chatId, assistantMessageId, accumulated)
       }
 
+      if (streamError) throw streamError
+
       if (accumulated.trim().length === 0) {
         updateMessageContent(
           chatId,

diff --git a/packages/apple-llm/ios/AppleLLM.mm b/packages/apple-llm/ios/AppleLLM.mm
@@ -155,8 +155,12 @@ - (nonnull NSString *)generateStream:(nonnull NSArray *)messages options:(JS::Na
                                  onComplete:^(NSString *streamId) {
     [self emitOnStreamComplete:@{@"streamId": streamId}];
   }
-                                    onError:^(NSString *streamId, NSString *error) {
-    [self emitOnStreamError:@{@"streamId": streamId, @"error": error}];
+                                    onError:^(NSString *streamId, NSString *code, NSString *error) {
+    NSMutableDictionary *payload = [@{@"streamId": streamId, @"error": error} mutableCopy];
+    if (code.length > 0) {
+      payload[@"code"] = code;
+    }
+    [self emitOnStreamError:payload];
   }
                                 toolInvoker:callToolBlock];
 

diff --git a/packages/apple-llm/ios/AppleLLMError.swift b/packages/apple-llm/ios/AppleLLMError.swift
@@ -17,6 +17,7 @@ enum AppleLLMError: Error, LocalizedError {
   case invalidSchema(String)
   case toolCallError(Error)
   case unknownToolCallError
+  case contextWindowExceeded
 
   var errorDescription: String? {
     switch self {
@@ -38,9 +39,20 @@ enum AppleLLMError: Error, LocalizedError {
       return "Error calling tool: \(error.localizedDescription)"
     case .unknownToolCallError:
       return "Unknown tool call error"
+    case .contextWindowExceeded:
+      return "Context window exceeded"
     }
 
   }
+
+  var contextWindowErrorCode: String? {
+    switch self {
+    case .contextWindowExceeded:
+      return "CONTEXT_WINDOW_EXCEEDED"
+    default:
+      return nil
+    }
+  }
 
   var code: Int {
     switch self {
@@ -53,6 +65,7 @@ enum AppleLLMError: Error, LocalizedError {
     case .invalidSchema: return 7
     case .unknownToolCallError: return 8
     case .toolCallError: return 9
+    case .contextWindowExceeded: return 10
     }
   }
 }
diff --git a/packages/apple-llm/ios/AppleLLMImpl.swift b/packages/apple-llm/ios/AppleLLMImpl.swift
@@ -74,7 +74,12 @@ public class AppleLLMImpl: NSObject {
             resolve(response.toModelMessages())
           }
         } catch {
-          reject("AppleLLM", error.localizedDescription, error)
+          if let appleError = self.createContextWindowError(from: error),
+             let code = appleError.contextWindowErrorCode {
+            reject(code, appleError.localizedDescription, appleError)
+          } else {
+            reject("AppleLLM", error.localizedDescription, error)
+          }
         }
       }
     } else {
@@ -93,14 +98,15 @@ public class AppleLLMImpl: NSObject {
     options: [String: Any],
     onUpdate: @escaping (String, String) -> Void,
     onComplete: @escaping (String) -> Void,
-    onError: @escaping (String, String) -> Void,
+    onError: @escaping (String, String, String) -> Void,
     toolInvoker: @escaping ToolInvoker
   ) throws -> String {
 #if canImport(FoundationModels)
     if #available(iOS 26, *) {
       let streamId = UUID().uuidString
       guard SystemLanguageModel.default.availability == .available else {
-        onError(streamId, "Apple Intelligence model is not available")
+        let error = AppleLLMError.modelUnavailable
+        onError(streamId, "", error.localizedDescription)
         return streamId
       }
 
@@ -140,7 +146,12 @@ public class AppleLLMImpl: NSObject {
             onComplete(streamId)
           }
         } catch {
-          onError(streamId, error.localizedDescription)
+          if let appleError = self.createContextWindowError(from: error),
+             let code = appleError.contextWindowErrorCode {
+            onError(streamId, code, appleError.localizedDescription)
+          } else {
+            onError(streamId, "", error.localizedDescription)
+          }
         }
 
         // Clean up task from map when completed
@@ -171,6 +182,19 @@ public class AppleLLMImpl: NSObject {
 
   // MARK: - Private Methods
 #if canImport(FoundationModels)
+
+  @available(iOS 26, *)
+  private func createContextWindowError(from error: Error) -> AppleLLMError? {
+    guard let generationError = error as? LanguageModelSession.GenerationError else {
+      return nil
+    }
+
+    if case .exceededContextWindowSize = generationError {
+      return .contextWindowExceeded
+    }
+
+    return nil
+  }
 
   @available(iOS 26, *)
   private func createTools(from options: [String: Any], toolInvoker: @escaping ToolInvoker) throws -> [any Tool] {

diff --git a/packages/apple-llm/src/NativeAppleLLM.ts b/packages/apple-llm/src/NativeAppleLLM.ts
@@ -30,6 +30,7 @@ export type StreamCompleteEvent = {
 
 export type StreamErrorEvent = {
   streamId: string
+  code?: string
   error: string
 }
 

diff --git a/packages/apple-llm/src/ai-sdk.ts b/packages/apple-llm/src/ai-sdk.ts
@@ -21,6 +21,7 @@ import {
   ToolCallOptions,
 } from '@ai-sdk/provider-utils'
 
+import { createAppleLLMError } from './errors'
 import NativeAppleEmbeddings from './NativeAppleEmbeddings'
 import NativeAppleLLM, { type AppleMessage } from './NativeAppleLLM'
 import NativeAppleSpeech from './NativeAppleSpeech'
@@ -464,7 +465,7 @@ class AppleLLMChatLanguageModel implements LanguageModelV3 {
             if (data.streamId === streamId) {
               controller.enqueue({
                 type: 'error',
-                error: data.error,
+                error: createAppleLLMError(data.error, data.code),
               })
               cleanup()
               controller.close()

diff --git a/packages/apple-llm/src/errors.ts b/packages/apple-llm/src/errors.ts
@@ -0,0 +1,21 @@
+export type AppleLLMError = Error & {
+  code?: string
+}
+
+export const AppleLLMErrorCodes = {
+  ContextWindowExceeded: 'CONTEXT_WINDOW_EXCEEDED',
+} as const
+
+export type AppleLLMErrorCode =
+  (typeof AppleLLMErrorCodes)[keyof typeof AppleLLMErrorCodes]
+
+export function createAppleLLMError(
+  message: string,
+  code?: string
+): AppleLLMError {
+  const error = new Error(message) as AppleLLMError
+  if (code) {
+    error.code = code
+  }
+  return error
+}
diff --git a/packages/apple-llm/src/index.ts b/packages/apple-llm/src/index.ts
@@ -1,4 +1,6 @@
 export { apple, createAppleProvider } from './ai-sdk'
+export type { AppleLLMError, AppleLLMErrorCode } from './errors'
+export { AppleLLMErrorCodes } from './errors'
 export { default as AppleEmbeddings } from './NativeAppleEmbeddings'
 export { default as AppleFoundationModels } from './NativeAppleLLM'
 export { default as AppleSpeech, VoiceInfo } from './NativeAppleSpeech'

diff --git a/packages/apple-llm/src/stream.ts b/packages/apple-llm/src/stream.ts
@@ -1,5 +1,6 @@
 import type { LanguageModelV2StreamPart } from '@ai-sdk/provider'
 
+import { createAppleLLMError } from './errors'
 import NativeAppleLLMSpec, {
   type AppleGenerationOptions,
   type AppleMessage,
@@ -67,7 +68,7 @@ export function generateStream(
           if (data.streamId === streamId) {
             controller.enqueue({
               type: 'error',
-              error: data.error,
+              error: createAppleLLMError(data.error, data.code),
             })
             cleanup()
             controller.close()

diff --git a/website/src/docs/apple/generating.md b/website/src/docs/apple/generating.md
@@ -203,6 +203,104 @@ if (!apple.isAvailable()) {
 }
 ```
 
+## Context Window
+
+Apple Foundation Models have a fixed context window of 4096 tokens. This limit applies to the full request context, including system instructions, previous conversation messages, tool definitions, schemas, and the current user prompt.
+
+The `maxTokens` option only limits how many tokens the model can generate in its response. It does not increase the available context window or reserve enough room for a long prompt.
+
+If the full context is too large, Apple may fail generation with a context-window overflow error. The provider does not automatically estimate tokens, remove messages from your prompt, or retry the request, because token estimates can vary by language and different apps need different memory strategies. Handle this at the application level by catching the error and choosing the recovery behavior that fits your product:
+
+- Start a new conversation without the previous transcript, which is Apple's recommended baseline after this error
+- Keep a sliding window of recent messages
+- Summarize older messages and include the summary instead of the full transcript
+- Ask the user to shorten the prompt or start a new chat
+
+```typescript
+import {
+  AppleLLMErrorCodes,
+  type AppleLLMError,
+  apple,
+} from '@react-native-ai/apple';
+import { generateText } from 'ai';
+
+try {
+  const result = await generateText({
+    model: apple(),
+    messages
+  });
+} catch (error) {
+  const appleError = error as AppleLLMError;
+
+  if (appleError.code === AppleLLMErrorCodes.ContextWindowExceeded) {
+    // Apply your app's recovery strategy here.
+    // For example: retry with fewer messages or start a new chat.
+  }
+
+  throw error;
+}
+```
+
+For streaming calls, use `fullStream` when you need to inspect provider error parts:
+
+```typescript
+import {
+  AppleLLMErrorCodes,
+  type AppleLLMError,
+  apple,
+} from '@react-native-ai/apple';
+import { streamText } from 'ai';
+
+const result = streamText({
+  model: apple(),
+  messages
+});
+
+for await (const part of result.fullStream) {
+  if (part.type === 'error') {
+    const error = part.error as AppleLLMError;
+
+    if (error.code === AppleLLMErrorCodes.ContextWindowExceeded) {
+      // Apply your app's recovery strategy here.
+    }
+  }
+}
+```
+
+If you only consume `textStream`, pass `onError` to `streamText`. The AI SDK does not emit error parts through the text-only stream, so capture the error there and handle it after the stream finishes:
+
+```typescript
+import {
+  AppleLLMErrorCodes,
+  type AppleLLMError,
+  apple,
+} from '@react-native-ai/apple';
+import { streamText } from 'ai';
+
+let streamError: unknown;
+const result = streamText({
+  model: apple(),
+  messages,
+  onError: ({ error }) => {
+    streamError = error;
+  },
+});
+
+for await (const delta of result.textStream) {
+  console.log(delta);
+}
+
+if (streamError) {
+  const error = streamError as AppleLLMError;
+
+  if (error.code === AppleLLMErrorCodes.ContextWindowExceeded) {
+    // Apply your app's recovery strategy here.
+  }
+
+  throw streamError;
+}
+```
+
 ## Available Options
 
 Configure model behavior with generation options: