tattn · tattn · Apr 25, 2026 · Apr 25, 2026 · Apr 20, 2026 · Apr 20, 2026
diff --git a/.github/workflows/docc.yml b/.github/workflows/docc.yml
@@ -16,9 +16,9 @@ concurrency:
 
 jobs:
   generate-docc:
-    runs-on: macos-15
+    runs-on: macos-26
     env:
-      DEVELOPER_DIR: "/Applications/Xcode_16.4.app/Contents/Developer"
+      DEVELOPER_DIR: "/Applications/Xcode_26.4.app/Contents/Developer"
     steps:
       - uses: actions/checkout@v4
         with:
@@ -27,6 +27,10 @@ jobs:
       - name: Setup Pages
         uses: actions/configure-pages@v4
 
+      - name: Download Metal Toolchain
+        continue-on-error: true
+        run: xcodebuild -downloadComponent MetalToolchain
+
       - name: Build DocC
         # NOTE: LocalLLMClientMLX documentation is excluded because mlx-swift
         # symbol extraction requires Metal GPU support which is not available

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -35,7 +35,7 @@ jobs:
         # To enable MLX tests, use self-hosted runners: runs-on: [self-hosted, macos]
         test-type: [Llama, FoundationModels]
     env:
-      DEVELOPER_DIR: "/Applications/Xcode_26.2.app/Contents/Developer"
+      DEVELOPER_DIR: "/Applications/Xcode_26.4.app/Contents/Developer"
       TEST_RUNNER_GITHUB_MODEL_CACHE: "${{ github.workspace }}/model_cache"
     steps:
       - &checkout
@@ -58,7 +58,7 @@ jobs:
           xcodebuild -downloadComponent MetalToolchain
 
       - name: Run ${{ matrix.test-type }} tests with Xcode 26
-        run: TEST_RUNNER_GITHUB_ACTIONS_TEST="${{ matrix.test-type }}" xcodebuild test -scheme LocalLLMClient-Package -destination 'platform=macOS'
+        run: TEST_RUNNER_GITHUB_ACTIONS_TEST="${{ matrix.test-type }}" xcodebuild test -skipMacroValidation -scheme LocalLLMClient-Package -destination 'platform=macOS'
 
       - name: Upload test results
         if: failure()
@@ -74,7 +74,7 @@ jobs:
     runs-on: macos-26
     needs: test-macos
     env:
-      DEVELOPER_DIR: "/Applications/Xcode_26.2.app/Contents/Developer"
+      DEVELOPER_DIR: "/Applications/Xcode_26.4.app/Contents/Developer"
     steps:
       - *checkout
 
@@ -84,13 +84,13 @@ jobs:
         working-directory: Example
         run: |
           xcodebuild -downloadPlatform iOS
-          xcodebuild build -project LocalLLMClientExample.xcodeproj -scheme LocalLLMClientExample -destination 'platform=macOS' CODE_SIGN_IDENTITY="-"
+          xcodebuild build -skipMacroValidation -project LocalLLMClientExample.xcodeproj -scheme LocalLLMClientExample -destination 'platform=macOS' CODE_SIGN_IDENTITY="-"
 
   build-example-ios:
     runs-on: macos-26
     needs: test-macos
     env:
-      DEVELOPER_DIR: "/Applications/Xcode_26.2.app/Contents/Developer"
+      DEVELOPER_DIR: "/Applications/Xcode_26.4.app/Contents/Developer"
     steps:
       - *checkout
 
@@ -100,7 +100,7 @@ jobs:
         working-directory: Example
         run: |
           xcodebuild -downloadPlatform iOS
-          xcodebuild build -project LocalLLMClientExample.xcodeproj -scheme LocalLLMClientExample -destination 'platform=iOS Simulator,name=iPhone 17 Pro,OS=26.2' CODE_SIGN_IDENTITY="-"
+          xcodebuild build -skipMacroValidation -project LocalLLMClientExample.xcodeproj -scheme LocalLLMClientExample -destination 'platform=iOS Simulator,name=iPhone 17 Pro,OS=26.4' CODE_SIGN_IDENTITY="-"
 
   test-ubuntu-x86_64:
     runs-on: ubuntu-latest

diff --git a/.github/workflows/update-dependencies.yml b/.github/workflows/update-dependencies.yml
@@ -7,9 +7,9 @@ on:
 
 jobs:
   update-dependencies:
-    runs-on: macos-15
+    runs-on: macos-26
     env:
-      DEVELOPER_DIR: "/Applications/Xcode_16.4.app/Contents/Developer"
+      DEVELOPER_DIR: "/Applications/Xcode_26.4.app/Contents/Developer"
     permissions:
       contents: write
       pull-requests: write

diff --git a/Example/LocalLLMClientExample/AI.swift b/Example/LocalLLMClientExample/AI.swift
@@ -12,9 +12,11 @@ enum LLMModel: Sendable, CaseIterable, Identifiable {
     case qwen3_4b
     case qwen2_5VL_3b
     case gemma3_4b_mlx
+    case gemma4_e2b_mlx
     case phi4mini
     case gemma3
     case gemma3_4b
+    case gemma4_E2B
     case mobileVLM_3b
 
     static let `default` = qwen3
@@ -25,9 +27,11 @@ enum LLMModel: Sendable, CaseIterable, Identifiable {
         case .qwen3_4b: "MLX / Qwen3 4B"
         case .qwen2_5VL_3b: "MLX / Qwen2.5VL 3B"
         case .gemma3_4b_mlx: "MLX / Gemma3 4B"
+        case .gemma4_e2b_mlx: "MLX / Gemma4 E2B (4bit)"
         case .phi4mini: "llama.cpp / Phi-4 Mini 3.8B"
         case .gemma3: "llama.cpp / Gemma3 1B"
         case .gemma3_4b: "llama.cpp / Gemma3 4B"
+        case .gemma4_E2B: "llama.cpp / Gemma4 E2B"
         case .mobileVLM_3b: "llama.cpp / MobileVLM 3B"
         }
     }
@@ -38,30 +42,35 @@ enum LLMModel: Sendable, CaseIterable, Identifiable {
         case .qwen3_4b: "mlx-community/Qwen3-4B-4bit"
         case .qwen2_5VL_3b: "mlx-community/Qwen2.5-VL-3B-Instruct-abliterated-4bit"
         case .gemma3_4b_mlx: "mlx-community/gemma-3-4b-it-qat-4bit"
+        case .gemma4_e2b_mlx: "mlx-community/gemma-4-e2b-it-4bit"
         case .phi4mini: "unsloth/Phi-4-mini-instruct-GGUF"
         case .gemma3: "lmstudio-community/gemma-3-1B-it-qat-GGUF"
         case .gemma3_4b: "lmstudio-community/gemma-3-4B-it-qat-GGUF"
+        case .gemma4_E2B: "lmstudio-community/gemma-4-E2B-it-GGUF"
         case .mobileVLM_3b: "Blombert/MobileVLM-3B-GGUF"
         }
     }
 
     var filename: String? {
         switch self {
-        case .qwen3, .qwen3_4b, .qwen2_5VL_3b, .gemma3_4b_mlx: nil
+        case .qwen3, .qwen3_4b, .qwen2_5VL_3b, .gemma3_4b_mlx, .gemma4_e2b_mlx: nil
         case .phi4mini: "Phi-4-mini-instruct-Q4_K_M.gguf"
         case .gemma3: "gemma-3-1B-it-QAT-Q4_0.gguf"
         case .gemma3_4b: "gemma-3-4B-it-QAT-Q4_0.gguf"
+        case .gemma4_E2B: "gemma-4-E2B-it-Q4_K_M.gguf"
         case .mobileVLM_3b: "ggml-MobileVLM-3B-q5_k_s.gguf"
         }
     }
 
     var mmprojFilename: String? {
         switch self {
-        case .qwen3, .qwen3_4b, .qwen2_5VL_3b, .gemma3_4b_mlx, .phi4mini, .gemma3: nil
+        case .qwen3, .qwen3_4b, .qwen2_5VL_3b, .gemma3_4b_mlx, .gemma4_e2b_mlx, .phi4mini, .gemma3: nil
 #if os(macOS)
         case .gemma3_4b: "mmproj-model-f16.gguf"
+        case .gemma4_E2B: "mmproj-gemma-4-E4B-it-BF16.gguf"
 #elseif os(iOS)
-        case .gemma3_4b: nil
+        // Total footprint (model + mmproj ≈ 6 GB) exceeds what most iPhones can map; text-only on iOS.
+        case .gemma3_4b, .gemma4_E2B: nil
 #endif
         case .mobileVLM_3b: "mmproj-model-f16.gguf"
         }
@@ -75,26 +84,28 @@ enum LLMModel: Sendable, CaseIterable, Identifiable {
         switch self {
         case .qwen3, .qwen3_4b, .phi4mini, .gemma3: false
 #if os(macOS)
-        case .gemma3_4b: true
+        case .gemma3_4b, .gemma4_E2B: true
 #elseif os(iOS)
-        case .gemma3_4b: false
+        case .gemma3_4b, .gemma4_E2B: false
 #endif
-        case .qwen2_5VL_3b, .gemma3_4b_mlx, .mobileVLM_3b: true
+        case .qwen2_5VL_3b, .gemma3_4b_mlx, .gemma4_e2b_mlx, .mobileVLM_3b: true
         }
     }
 
     var extraEOSTokens: Set<String> {
         switch self {
         case .gemma3_4b_mlx:
             return ["<end_of_turn>"]
-        case .qwen3, .qwen3_4b, .qwen2_5VL_3b, .phi4mini, .gemma3, .gemma3_4b, .mobileVLM_3b:
+        case .gemma4_e2b_mlx:
+            return ["<turn|>"]
+        case .qwen3, .qwen3_4b, .qwen2_5VL_3b, .phi4mini, .gemma3, .gemma3_4b, .gemma4_E2B, .mobileVLM_3b:
             return []
         }
     }
-    
+
     var supportsTools: Bool {
         switch self {
-        case .qwen3, .qwen3_4b, .phi4mini, .gemma3, .gemma3_4b:
+        case .qwen3, .qwen3_4b, .phi4mini, .gemma3, .gemma3_4b, .gemma4_E2B, .gemma4_e2b_mlx:
             return true
         case .qwen2_5VL_3b, .gemma3_4b_mlx, .mobileVLM_3b:
             return false

diff --git a/Example/LocalLLMClientExample/ChatViewModel.swift b/Example/LocalLLMClientExample/ChatViewModel.swift
@@ -14,9 +14,14 @@ final class ChatViewModel {
     private var ai: AI
     private var generateTask: Task<Void, Never>?
     private var generatingText = ""
+    /// Optimistically displayed user message until it lands in `ai.messages`.
+    private var pendingUserMessage: LLMInput.Message?
 
     var messages: [LLMInput.Message] {
         var messages = ai.messages
+        if let pendingUserMessage, messages.last?.role != .user {
+            messages.append(pendingUserMessage)
+        }
         if !generatingText.isEmpty, messages.last?.role != .assistant {
             messages.append(.assistant(generatingText))
         }
@@ -33,6 +38,7 @@ final class ChatViewModel {
         let currentInput = (text: inputText, images: inputAttachments)
         inputText = ""
         inputAttachments = []
+        pendingUserMessage = .user(currentInput.text, attachments: currentInput.images)
 
         generateTask = Task {
             generatingText = ""
@@ -46,6 +52,7 @@ final class ChatViewModel {
                 (inputText, inputAttachments) = currentInput
             }
 
+            pendingUserMessage = nil
             generateTask = nil
             generatingText = ""
         }

diff --git a/Example/LocalLLMClientExample/Downloader.swift b/Example/LocalLLMClientExample/Downloader.swift
@@ -5,8 +5,8 @@ struct Downloader: Sendable {
     init(model: LLMModel) {
         self.model = model
         let globs: Globs = switch model {
-        case .qwen3, .qwen3_4b, .qwen2_5VL_3b, .gemma3_4b_mlx: .mlx
-        case .phi4mini, .gemma3, .gemma3_4b, .mobileVLM_3b: .init(
+        case .qwen3, .qwen3_4b, .qwen2_5VL_3b, .gemma3_4b_mlx, .gemma4_e2b_mlx: .mlx
+        case .phi4mini, .gemma3, .gemma3_4b, .gemma4_E2B, .mobileVLM_3b: .init(
             (model.filename.map { [$0] } ?? []) + (model.mmprojFilename.map { [$0] } ?? [])
         )}
 #if os(macOS)

diff --git a/Package.resolved b/Package.resolved
diff --git a/Package.swift b/Package.swift
@@ -3,36 +3,21 @@
 import PackageDescription
 import CompilerPluginSupport
 
-let llamaVersion = "b6871"
+let llamaVersion = "b8851"
+let llamaBuildNumber = String(llamaVersion.dropFirst())
 
 // MARK: - Package Dependencies
 
 var packageDependencies: [Package.Dependency] = [
     .package(url: "https://github.com/apple/swift-argument-parser.git", .upToNextMinor(from: "1.4.0")),
-    .package(url: "https://github.com/huggingface/swift-jinja", .upToNextMinor(from: "2.0.0")),
+    .package(url: "https://github.com/huggingface/swift-jinja", from: "2.3.5"),
     .package(url: "https://github.com/swiftlang/swift-syntax", from: "600.0.0")
 ]
 
 #if os(iOS) || os(macOS)
 packageDependencies.append(contentsOf: [
-    // mlx-swift-lm v3 (PR #118 merged 2026-04-01) removed
-    // `loadTokenizer(configuration:hub:)` and reshaped the Hub/Downloader
-    // API; `LocalLLMClientMLX/Context.swift` still uses the old API. Until
-    // the MLX backend is migrated to v3 (`AutoTokenizer.from(directory:)` +
-    // `Downloader`), pin to the last pre-v3 commit so consumers can build.
-    // Tracked in LocalLLMClient#93 — switch back to `branch: "main"` once
-    // Context.swift is migrated.
-    .package(
-        url: "https://github.com/ml-explore/mlx-swift-lm",
-        revision: "2a296f145c3129fea4290bb6e4a0a5fb458efa06"  // 2026-03-27, last pre-v3
-    ),
-    // `Tokenizers` (from swift-transformers) is what `LocalLLMClientMLX`
-    // imports for `any Tokenizer`. Pre-v3 mlx-swift-lm transitively pulled
-    // swift-transformers in, but its Package.swift didn't declare it as a
-    // public re-export, so consumers still need to depend on it directly.
-    // Range matches the pre-v3 mlx-swift-lm transitive pin so SPM resolves.
-    // Bump to `from: "1.3.0"` once Context.swift is migrated to mlx-swift-lm v3.
-    .package(url: "https://github.com/huggingface/swift-transformers.git", "1.2.0"..<"1.3.0"),
+    .package(url: "https://github.com/ml-explore/mlx-swift-lm", from: "3.31.3"),
+    .package(url: "https://github.com/huggingface/swift-transformers", from: "1.3.0"),
     .package(url: "https://github.com/apple/swift-docc-plugin", from: "1.4.0")
 ])
 #endif
@@ -152,6 +137,7 @@ packageTargets.append(contentsOf: [
             "LocalLLMClientCore",
             .product(name: "MLXLLM", package: "mlx-swift-lm"),
             .product(name: "MLXVLM", package: "mlx-swift-lm"),
+            .product(name: "MLXHuggingFace", package: "mlx-swift-lm"),
             .product(name: "Tokenizers", package: "swift-transformers"),
         ],
     ),
@@ -172,18 +158,23 @@ packageTargets.append(contentsOf: [
         name: "LocalLLMClientLlamaFramework",
         url:
             "https://github.com/ggml-org/llama.cpp/releases/download/\(llamaVersion)/llama-\(llamaVersion)-xcframework.zip",
-        checksum: "ac657d70112efadbf5cd1db5c4f67eea94ca38556ada9e7442d5a5a461010d6f"
+        checksum: "f5eb26820b9890ae026aee4963cd4f43af1c567d39534012f2685601a59c2519"
     ),
     .target(
         name: "LocalLLMClientLlamaC",
         dependencies: ["LocalLLMClientLlamaFramework"],
         exclude: ["exclude"],
         cSettings: [
             .unsafeFlags(["-w"]),
-            .headerSearchPath(".")
+            .define("LLAMA_BUILD_NUMBER", to: llamaBuildNumber),
+            .headerSearchPath("."),
+            .headerSearchPath("common")
         ],
         cxxSettings: [
-            .headerSearchPath(".")
+            .unsafeFlags(["-UDEBUG"]),
+            .define("LLAMA_BUILD_NUMBER", to: llamaBuildNumber),
+            .headerSearchPath("."),
+            .headerSearchPath("common")
         ],
         swiftSettings: [
             .interoperabilityMode(.Cxx)
@@ -194,7 +185,8 @@ packageTargets.append(contentsOf: [
         name: "LocalLLMClientUtilityTests",
         dependencies: [
             "LocalLLMClientUtility",
-            .product(name: "MLXLMCommon", package: "mlx-swift-lm")
+            .product(name: "MLXLMCommon", package: "mlx-swift-lm"),
+            .product(name: "Hub", package: "swift-transformers"),
         ]
     )
 ])