Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ fun ModelLoadingScreen(
// Create and insert config based on provider type
val config = when (model.providerType) {
ProviderType.GGUF -> {
val defaultSchema = GgufEngineSchema()
val defaultSchema = GgufEngineSchema.defaultsForModel(model.modelName)
ModelConfig(
modelId = model.id,
modelLoadingParams = defaultSchema.toLoadingJson(),
Expand Down Expand Up @@ -1060,4 +1060,4 @@ sealed class InstallState {
data object Installing : InstallState()
data object Installed : InstallState()
data class Error(val message: String) : InstallState()
}
}
22 changes: 22 additions & 0 deletions app/src/main/java/com/dark/tool_neuron/engine/GGUFEngine.kt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package com.dark.tool_neuron.engine

import android.app.ActivityManager
import android.content.Context
import android.util.Log
import com.dark.tool_neuron.models.table_schema.Model
import com.dark.tool_neuron.models.table_schema.ModelConfig
import com.dark.tool_neuron.models.engine_schema.DeviceTier
Expand Down Expand Up @@ -66,6 +67,7 @@ class GGUFEngine {
if (inference.chatTemplate.isNotEmpty()) {
nativeLib.nativeSetChatTemplate(inference.chatTemplate)
}
setChatTemplateKwargs(inference.chatTemplateKwargs)
}

success
Expand Down Expand Up @@ -117,6 +119,7 @@ class GGUFEngine {
if (inference.chatTemplate.isNotEmpty()) {
nativeLib.nativeSetChatTemplate(inference.chatTemplate)
}
setChatTemplateKwargs(inference.chatTemplateKwargs)
}

success
Expand Down Expand Up @@ -231,6 +234,16 @@ class GGUFEngine {
nativeLib.nativeStopGeneration()
}

private fun setChatTemplateKwargs(kwargsJson: String) {
if (kwargsJson.isBlank()) return
try {
setChatTemplateKwargsMethod?.invoke(nativeLib, kwargsJson)
} catch (e: ReflectiveOperationException) {
Log.d(TAG, "Chat template kwargs not supported in this native lib version (expected for older versions): ${e.message}")
// Backward-compatible with native libs that don't expose chat template kwargs
}
}

suspend fun unload() = withContext(Dispatchers.IO) {
if (isLoaded) {
nativeLib.nativeRelease()
Expand Down Expand Up @@ -414,6 +427,15 @@ class GGUFEngine {
fun hasToolsEnabled(): Boolean = !currentToolsJson.isNullOrEmpty()

companion object {
private const val TAG = "GGUFEngine"
private val setChatTemplateKwargsMethod by lazy {
GGUFNativeLib::class.java.methods.firstOrNull {
it.name == "nativeSetChatTemplateKwargs" &&
it.parameterTypes.size == 1 &&
it.parameterTypes[0] == String::class.java
}
}

/**
* Detect device tier based on available RAM
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import android.app.ActivityManager
import android.content.Context
import kotlinx.serialization.Serializable
import kotlinx.serialization.json.Json
import java.util.Locale

enum class DeviceTier {
LOW_END, // < 4GB RAM
Expand Down Expand Up @@ -72,6 +73,7 @@ data class GgufInferenceParams(
val maxTokens: Int = 4096,
val systemPrompt: String = "",
val chatTemplate: String = "",
val chatTemplateKwargs: String = "",
val toolsJson: String = "" // JSON array of tool definitions
)

Expand All @@ -85,6 +87,9 @@ data class GgufEngineSchema(
fun toInferenceJson(): String = json.encodeToString(inferenceParams)

companion object {
private const val QWEN_THINKING_DISABLED_KWARGS = """{"enable_thinking": false}"""
// Match common Qwen 3.5 naming styles used across local files and HF repos.
private val qwen35ModelMarkers = listOf("qwen3.5", "qwen-3.5", "qwen3_5")
private val json = Json {
ignoreUnknownKeys = true
encodeDefaults = true
Expand All @@ -109,5 +114,20 @@ data class GgufEngineSchema(

return GgufEngineSchema(loading, inference)
}

fun defaultsForModel(modelName: String): GgufEngineSchema {
val base = GgufEngineSchema()
if (!isQwen35Model(modelName)) return base
return base.copy(
inferenceParams = base.inferenceParams.copy(
chatTemplateKwargs = QWEN_THINKING_DISABLED_KWARGS
)
)
}

private fun isQwen35Model(modelName: String): Boolean {
val normalized = modelName.lowercase(Locale.ROOT)
return qwen35ModelMarkers.any { normalized.contains(it) }
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,14 @@ class ModelRepositoryDataStore(private val context: Context) {
isEnabled = true,
category = ModelCategory.GENERAL
),
HFModelRepository(
id = "qwen3_5_4b_instruct",
name = "Qwen3.5 Instruct (4B)",
repoPath = "Qwen/Qwen3.5-4B-Instruct-GGUF",
modelType = ModelType.GGUF,
isEnabled = true,
category = ModelCategory.GENERAL
),
HFModelRepository(
id = "liquidai-lfm2-350m",
name = "LFM2 350M",
Expand Down Expand Up @@ -180,4 +188,4 @@ class ModelRepositoryDataStore(private val context: Context) {
if (it.id == repo.id) repo else it
})
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -574,7 +574,7 @@ class ModelDownloadService : Service() {
}

ProviderType.GGUF -> {
val ggufSchema = GgufEngineSchema()
val ggufSchema = GgufEngineSchema.defaultsForModel(modelName)
ModelConfig(
modelId = checksum,
modelLoadingParams = ggufSchema.toLoadingJson(),
Expand Down Expand Up @@ -671,4 +671,4 @@ class ModelDownloadService : Service() {
super.onDestroy()
serviceScope.cancel()
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,14 @@ private fun GgufConfigEditor(viewModel: ModelConfigEditorViewModel) {
multiline = true,
minLines = 3
)

TextField(
label = "Chat Template Kwargs (Optional JSON)",
value = ggufConfig.inferenceParams.chatTemplateKwargs,
onValueChange = { viewModel.updateGgufChatTemplateKwargs(it) },
multiline = true,
minLines = 2
)
}
}
}
Expand Down Expand Up @@ -884,4 +892,4 @@ private fun SuccessMessage() {
)
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,12 @@ class ModelConfigEditorViewModel @Inject constructor(
}
}

fun updateGgufChatTemplateKwargs(value: String) {
_ggufConfig.update {
it.copy(inferenceParams = it.inferenceParams.copy(chatTemplateKwargs = value))
}
}

// ==================== Diffusion Config Updates ====================

fun updateDiffusionEmbeddingSize(value: Int) {
Expand Down Expand Up @@ -328,4 +334,4 @@ class ModelConfigEditorViewModel @Inject constructor(
it.copy(showDiffusionStride = value)
}
}
}
}