Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions .github/workflows/docc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ concurrency:

jobs:
generate-docc:
runs-on: macos-15
runs-on: macos-26
env:
DEVELOPER_DIR: "/Applications/Xcode_16.4.app/Contents/Developer"
DEVELOPER_DIR: "/Applications/Xcode_26.4.app/Contents/Developer"
steps:
- uses: actions/checkout@v4
with:
Expand All @@ -27,6 +27,10 @@ jobs:
- name: Setup Pages
uses: actions/configure-pages@v4

- name: Download Metal Toolchain
continue-on-error: true
run: xcodebuild -downloadComponent MetalToolchain

- name: Build DocC
# NOTE: LocalLLMClientMLX documentation is excluded because mlx-swift
# symbol extraction requires Metal GPU support which is not available
Expand Down
12 changes: 6 additions & 6 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ jobs:
# To enable MLX tests, use self-hosted runners: runs-on: [self-hosted, macos]
test-type: [Llama, FoundationModels]
env:
DEVELOPER_DIR: "/Applications/Xcode_26.2.app/Contents/Developer"
DEVELOPER_DIR: "/Applications/Xcode_26.4.app/Contents/Developer"
TEST_RUNNER_GITHUB_MODEL_CACHE: "${{ github.workspace }}/model_cache"
steps:
- &checkout
Expand All @@ -58,7 +58,7 @@ jobs:
xcodebuild -downloadComponent MetalToolchain

- name: Run ${{ matrix.test-type }} tests with Xcode 26
run: TEST_RUNNER_GITHUB_ACTIONS_TEST="${{ matrix.test-type }}" xcodebuild test -scheme LocalLLMClient-Package -destination 'platform=macOS'
run: TEST_RUNNER_GITHUB_ACTIONS_TEST="${{ matrix.test-type }}" xcodebuild test -skipMacroValidation -scheme LocalLLMClient-Package -destination 'platform=macOS'

- name: Upload test results
if: failure()
Expand All @@ -74,7 +74,7 @@ jobs:
runs-on: macos-26
needs: test-macos
env:
DEVELOPER_DIR: "/Applications/Xcode_26.2.app/Contents/Developer"
DEVELOPER_DIR: "/Applications/Xcode_26.4.app/Contents/Developer"
steps:
- *checkout

Expand All @@ -84,13 +84,13 @@ jobs:
working-directory: Example
run: |
xcodebuild -downloadPlatform iOS
xcodebuild build -project LocalLLMClientExample.xcodeproj -scheme LocalLLMClientExample -destination 'platform=macOS' CODE_SIGN_IDENTITY="-"
xcodebuild build -skipMacroValidation -project LocalLLMClientExample.xcodeproj -scheme LocalLLMClientExample -destination 'platform=macOS' CODE_SIGN_IDENTITY="-"

build-example-ios:
runs-on: macos-26
needs: test-macos
env:
DEVELOPER_DIR: "/Applications/Xcode_26.2.app/Contents/Developer"
DEVELOPER_DIR: "/Applications/Xcode_26.4.app/Contents/Developer"
steps:
- *checkout

Expand All @@ -100,7 +100,7 @@ jobs:
working-directory: Example
run: |
xcodebuild -downloadPlatform iOS
xcodebuild build -project LocalLLMClientExample.xcodeproj -scheme LocalLLMClientExample -destination 'platform=iOS Simulator,name=iPhone 17 Pro,OS=26.2' CODE_SIGN_IDENTITY="-"
xcodebuild build -skipMacroValidation -project LocalLLMClientExample.xcodeproj -scheme LocalLLMClientExample -destination 'platform=iOS Simulator,name=iPhone 17 Pro,OS=26.4' CODE_SIGN_IDENTITY="-"

test-ubuntu-x86_64:
runs-on: ubuntu-latest
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/update-dependencies.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ on:

jobs:
update-dependencies:
runs-on: macos-15
runs-on: macos-26
env:
DEVELOPER_DIR: "/Applications/Xcode_16.4.app/Contents/Developer"
DEVELOPER_DIR: "/Applications/Xcode_26.4.app/Contents/Developer"
permissions:
contents: write
pull-requests: write
Expand Down
29 changes: 20 additions & 9 deletions Example/LocalLLMClientExample/AI.swift
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@ enum LLMModel: Sendable, CaseIterable, Identifiable {
case qwen3_4b
case qwen2_5VL_3b
case gemma3_4b_mlx
case gemma4_e2b_mlx
case phi4mini
case gemma3
case gemma3_4b
case gemma4_E2B
case mobileVLM_3b

static let `default` = qwen3
Expand All @@ -25,9 +27,11 @@ enum LLMModel: Sendable, CaseIterable, Identifiable {
case .qwen3_4b: "MLX / Qwen3 4B"
case .qwen2_5VL_3b: "MLX / Qwen2.5VL 3B"
case .gemma3_4b_mlx: "MLX / Gemma3 4B"
case .gemma4_e2b_mlx: "MLX / Gemma4 E2B (4bit)"
case .phi4mini: "llama.cpp / Phi-4 Mini 3.8B"
case .gemma3: "llama.cpp / Gemma3 1B"
case .gemma3_4b: "llama.cpp / Gemma3 4B"
case .gemma4_E2B: "llama.cpp / Gemma4 E2B"
case .mobileVLM_3b: "llama.cpp / MobileVLM 3B"
}
}
Expand All @@ -38,30 +42,35 @@ enum LLMModel: Sendable, CaseIterable, Identifiable {
case .qwen3_4b: "mlx-community/Qwen3-4B-4bit"
case .qwen2_5VL_3b: "mlx-community/Qwen2.5-VL-3B-Instruct-abliterated-4bit"
case .gemma3_4b_mlx: "mlx-community/gemma-3-4b-it-qat-4bit"
case .gemma4_e2b_mlx: "mlx-community/gemma-4-e2b-it-4bit"
case .phi4mini: "unsloth/Phi-4-mini-instruct-GGUF"
case .gemma3: "lmstudio-community/gemma-3-1B-it-qat-GGUF"
case .gemma3_4b: "lmstudio-community/gemma-3-4B-it-qat-GGUF"
case .gemma4_E2B: "lmstudio-community/gemma-4-E2B-it-GGUF"
case .mobileVLM_3b: "Blombert/MobileVLM-3B-GGUF"
}
}

var filename: String? {
switch self {
case .qwen3, .qwen3_4b, .qwen2_5VL_3b, .gemma3_4b_mlx: nil
case .qwen3, .qwen3_4b, .qwen2_5VL_3b, .gemma3_4b_mlx, .gemma4_e2b_mlx: nil
case .phi4mini: "Phi-4-mini-instruct-Q4_K_M.gguf"
case .gemma3: "gemma-3-1B-it-QAT-Q4_0.gguf"
case .gemma3_4b: "gemma-3-4B-it-QAT-Q4_0.gguf"
case .gemma4_E2B: "gemma-4-E2B-it-Q4_K_M.gguf"
case .mobileVLM_3b: "ggml-MobileVLM-3B-q5_k_s.gguf"
}
}

var mmprojFilename: String? {
switch self {
case .qwen3, .qwen3_4b, .qwen2_5VL_3b, .gemma3_4b_mlx, .phi4mini, .gemma3: nil
case .qwen3, .qwen3_4b, .qwen2_5VL_3b, .gemma3_4b_mlx, .gemma4_e2b_mlx, .phi4mini, .gemma3: nil
#if os(macOS)
case .gemma3_4b: "mmproj-model-f16.gguf"
case .gemma4_E2B: "mmproj-gemma-4-E4B-it-BF16.gguf"
#elseif os(iOS)
case .gemma3_4b: nil
// Total footprint (model + mmproj ≈ 6 GB) exceeds what most iPhones can map; text-only on iOS.
case .gemma3_4b, .gemma4_E2B: nil
#endif
case .mobileVLM_3b: "mmproj-model-f16.gguf"
}
Expand All @@ -75,26 +84,28 @@ enum LLMModel: Sendable, CaseIterable, Identifiable {
switch self {
case .qwen3, .qwen3_4b, .phi4mini, .gemma3: false
#if os(macOS)
case .gemma3_4b: true
case .gemma3_4b, .gemma4_E2B: true
#elseif os(iOS)
case .gemma3_4b: false
case .gemma3_4b, .gemma4_E2B: false
#endif
case .qwen2_5VL_3b, .gemma3_4b_mlx, .mobileVLM_3b: true
case .qwen2_5VL_3b, .gemma3_4b_mlx, .gemma4_e2b_mlx, .mobileVLM_3b: true
}
}

var extraEOSTokens: Set<String> {
switch self {
case .gemma3_4b_mlx:
return ["<end_of_turn>"]
case .qwen3, .qwen3_4b, .qwen2_5VL_3b, .phi4mini, .gemma3, .gemma3_4b, .mobileVLM_3b:
case .gemma4_e2b_mlx:
return ["<turn|>"]
case .qwen3, .qwen3_4b, .qwen2_5VL_3b, .phi4mini, .gemma3, .gemma3_4b, .gemma4_E2B, .mobileVLM_3b:
return []
}
}

var supportsTools: Bool {
switch self {
case .qwen3, .qwen3_4b, .phi4mini, .gemma3, .gemma3_4b:
case .qwen3, .qwen3_4b, .phi4mini, .gemma3, .gemma3_4b, .gemma4_E2B, .gemma4_e2b_mlx:
return true
case .qwen2_5VL_3b, .gemma3_4b_mlx, .mobileVLM_3b:
return false
Expand Down
7 changes: 7 additions & 0 deletions Example/LocalLLMClientExample/ChatViewModel.swift
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,14 @@ final class ChatViewModel {
private var ai: AI
private var generateTask: Task<Void, Never>?
private var generatingText = ""
/// Optimistically displayed user message until it lands in `ai.messages`.
private var pendingUserMessage: LLMInput.Message?

var messages: [LLMInput.Message] {
var messages = ai.messages
if let pendingUserMessage, messages.last?.role != .user {
messages.append(pendingUserMessage)
}
if !generatingText.isEmpty, messages.last?.role != .assistant {
messages.append(.assistant(generatingText))
}
Expand All @@ -33,6 +38,7 @@ final class ChatViewModel {
let currentInput = (text: inputText, images: inputAttachments)
inputText = ""
inputAttachments = []
pendingUserMessage = .user(currentInput.text, attachments: currentInput.images)

generateTask = Task {
generatingText = ""
Expand All @@ -46,6 +52,7 @@ final class ChatViewModel {
(inputText, inputAttachments) = currentInput
}

pendingUserMessage = nil
generateTask = nil
generatingText = ""
}
Expand Down
4 changes: 2 additions & 2 deletions Example/LocalLLMClientExample/Downloader.swift
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ struct Downloader: Sendable {
init(model: LLMModel) {
self.model = model
let globs: Globs = switch model {
case .qwen3, .qwen3_4b, .qwen2_5VL_3b, .gemma3_4b_mlx: .mlx
case .phi4mini, .gemma3, .gemma3_4b, .mobileVLM_3b: .init(
case .qwen3, .qwen3_4b, .qwen2_5VL_3b, .gemma3_4b_mlx, .gemma4_e2b_mlx: .mlx
case .phi4mini, .gemma3, .gemma3_4b, .gemma4_E2B, .mobileVLM_3b: .init(
(model.filename.map { [$0] } ?? []) + (model.mmprojFilename.map { [$0] } ?? [])
)}
#if os(macOS)
Expand Down
17 changes: 9 additions & 8 deletions Package.resolved

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

40 changes: 16 additions & 24 deletions Package.swift
Original file line number Diff line number Diff line change
Expand Up @@ -3,36 +3,21 @@
import PackageDescription
import CompilerPluginSupport

let llamaVersion = "b6871"
let llamaVersion = "b8851"
let llamaBuildNumber = String(llamaVersion.dropFirst())

// MARK: - Package Dependencies

var packageDependencies: [Package.Dependency] = [
.package(url: "https://github.com/apple/swift-argument-parser.git", .upToNextMinor(from: "1.4.0")),
.package(url: "https://github.com/huggingface/swift-jinja", .upToNextMinor(from: "2.0.0")),
.package(url: "https://github.com/huggingface/swift-jinja", from: "2.3.5"),
.package(url: "https://github.com/swiftlang/swift-syntax", from: "600.0.0")
]

#if os(iOS) || os(macOS)
packageDependencies.append(contentsOf: [
// mlx-swift-lm v3 (PR #118 merged 2026-04-01) removed
// `loadTokenizer(configuration:hub:)` and reshaped the Hub/Downloader
// API; `LocalLLMClientMLX/Context.swift` still uses the old API. Until
// the MLX backend is migrated to v3 (`AutoTokenizer.from(directory:)` +
// `Downloader`), pin to the last pre-v3 commit so consumers can build.
// Tracked in LocalLLMClient#93 — switch back to `branch: "main"` once
// Context.swift is migrated.
.package(
url: "https://github.com/ml-explore/mlx-swift-lm",
revision: "2a296f145c3129fea4290bb6e4a0a5fb458efa06" // 2026-03-27, last pre-v3
),
// `Tokenizers` (from swift-transformers) is what `LocalLLMClientMLX`
// imports for `any Tokenizer`. Pre-v3 mlx-swift-lm transitively pulled
// swift-transformers in, but its Package.swift didn't declare it as a
// public re-export, so consumers still need to depend on it directly.
// Range matches the pre-v3 mlx-swift-lm transitive pin so SPM resolves.
// Bump to `from: "1.3.0"` once Context.swift is migrated to mlx-swift-lm v3.
.package(url: "https://github.com/huggingface/swift-transformers.git", "1.2.0"..<"1.3.0"),
.package(url: "https://github.com/ml-explore/mlx-swift-lm", from: "3.31.3"),
.package(url: "https://github.com/huggingface/swift-transformers", from: "1.3.0"),
.package(url: "https://github.com/apple/swift-docc-plugin", from: "1.4.0")
])
#endif
Expand Down Expand Up @@ -152,6 +137,7 @@ packageTargets.append(contentsOf: [
"LocalLLMClientCore",
.product(name: "MLXLLM", package: "mlx-swift-lm"),
.product(name: "MLXVLM", package: "mlx-swift-lm"),
.product(name: "MLXHuggingFace", package: "mlx-swift-lm"),
.product(name: "Tokenizers", package: "swift-transformers"),
],
),
Expand All @@ -172,18 +158,23 @@ packageTargets.append(contentsOf: [
name: "LocalLLMClientLlamaFramework",
url:
"https://github.com/ggml-org/llama.cpp/releases/download/\(llamaVersion)/llama-\(llamaVersion)-xcframework.zip",
checksum: "ac657d70112efadbf5cd1db5c4f67eea94ca38556ada9e7442d5a5a461010d6f"
checksum: "f5eb26820b9890ae026aee4963cd4f43af1c567d39534012f2685601a59c2519"
),
.target(
name: "LocalLLMClientLlamaC",
dependencies: ["LocalLLMClientLlamaFramework"],
exclude: ["exclude"],
cSettings: [
.unsafeFlags(["-w"]),
.headerSearchPath(".")
.define("LLAMA_BUILD_NUMBER", to: llamaBuildNumber),
.headerSearchPath("."),
.headerSearchPath("common")
],
cxxSettings: [
.headerSearchPath(".")
.unsafeFlags(["-UDEBUG"]),
.define("LLAMA_BUILD_NUMBER", to: llamaBuildNumber),
.headerSearchPath("."),
.headerSearchPath("common")
],
swiftSettings: [
.interoperabilityMode(.Cxx)
Expand All @@ -194,7 +185,8 @@ packageTargets.append(contentsOf: [
name: "LocalLLMClientUtilityTests",
dependencies: [
"LocalLLMClientUtility",
.product(name: "MLXLMCommon", package: "mlx-swift-lm")
.product(name: "MLXLMCommon", package: "mlx-swift-lm"),
.product(name: "Hub", package: "swift-transformers"),
]
)
])
Expand Down
Loading
Loading