Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion app/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,10 @@ dependencies {
implementation(libs.jna) { artifact { type = "aar" } }
implementation(libs.vosk.android)

// LiteRT / Tensorflow Lite
// ONNX Runtime for Android (runs the Parakeet encoder and TDT decoder)
implementation(libs.onnxruntime.android)

// LiteRT / Tensorflow Lite (used by OpenWakeWord)
implementation(libs.litert)

// OkHttp
Expand Down
16 changes: 15 additions & 1 deletion app/src/main/kotlin/org/stypox/dicio/di/SttInputDeviceWrapper.kt
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,14 @@ import org.stypox.dicio.io.input.InputEvent
import org.stypox.dicio.io.input.SttInputDevice
import org.stypox.dicio.io.input.SttState
import org.stypox.dicio.io.input.external_popup.ExternalPopupInputDevice
import org.stypox.dicio.io.input.parakeet.ParakeetInputDevice
import org.stypox.dicio.io.input.scribe.ScribeRealtimeInputDevice
import org.stypox.dicio.io.input.vosk.VoskInputDevice
import org.stypox.dicio.settings.datastore.InputDevice
import org.stypox.dicio.settings.datastore.InputDevice.INPUT_DEVICE_EXTERNAL_POPUP
import org.stypox.dicio.settings.datastore.InputDevice.INPUT_DEVICE_NOTHING
import org.stypox.dicio.settings.datastore.InputDevice.INPUT_DEVICE_PARAKEET
import org.stypox.dicio.settings.datastore.InputDevice.INPUT_DEVICE_SCRIBE_REALTIME
import org.stypox.dicio.settings.datastore.InputDevice.INPUT_DEVICE_UNSET
import org.stypox.dicio.settings.datastore.InputDevice.INPUT_DEVICE_VOSK
import org.stypox.dicio.settings.datastore.InputDevice.UNRECOGNIZED
Expand Down Expand Up @@ -60,6 +64,7 @@ class SttInputDeviceWrapperImpl(
private var inputDeviceSetting: InputDevice
private var sttPlaySoundSetting: SttPlaySound
private val silencesBeforeStop: StateFlow<Int>
private val scribeApiKey: StateFlow<String>
private var sttInputDevice: SttInputDevice?

// null means that the user has not enabled any STT input device
Expand All @@ -77,7 +82,9 @@ class SttInputDeviceWrapperImpl(

inputDeviceSetting = firstSettings.first
sttPlaySoundSetting = firstSettings.second
silencesBeforeStop = dataStore.data.map(SttInputDevice::getSttSilenceDurationOrDefault)
silencesBeforeStop = MutableStateFlow(SttInputDevice.DEFAULT_STT_SILENCE_DURATION)
scribeApiKey = dataStore.data
.map { it.scribeApiKey.trim() }
.toStateFlowDistinctBlockingFirst(scope)
sttInputDevice = buildInputDevice(inputDeviceSetting)
scope.launch {
Expand Down Expand Up @@ -107,6 +114,13 @@ class SttInputDeviceWrapperImpl(
UNRECOGNIZED,
INPUT_DEVICE_UNSET,
INPUT_DEVICE_VOSK -> VoskInputDevice(appContext, okHttpClient, localeManager, silencesBeforeStop)
INPUT_DEVICE_PARAKEET -> ParakeetInputDevice(appContext, okHttpClient, localeManager)
INPUT_DEVICE_SCRIBE_REALTIME -> ScribeRealtimeInputDevice(
okHttpClient,
localeManager,
scribeApiKey,
silencesBeforeStop,
)
INPUT_DEVICE_EXTERNAL_POPUP ->
ExternalPopupInputDevice(appContext, activityForResultManager, localeManager)
INPUT_DEVICE_NOTHING -> null
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package org.stypox.dicio.io.input

import kotlinx.coroutines.flow.StateFlow
import org.stypox.dicio.settings.datastore.UserSettings

interface SttInputDevice {
val uiState: StateFlow<SttState>
Expand All @@ -16,9 +15,5 @@ interface SttInputDevice {

companion object {
const val DEFAULT_STT_SILENCE_DURATION = 2
fun getSttSilenceDurationOrDefault(settings: UserSettings): Int {
// unfortunately there is no way to tell protobuf to use "2" as the default value
return settings.sttSilenceDuration.takeIf { it > 0 } ?: DEFAULT_STT_SILENCE_DURATION
}
}
}
11 changes: 11 additions & 0 deletions app/src/main/kotlin/org/stypox/dicio/io/input/SttState.kt
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,17 @@ sealed interface SttState {
*/
data object Listening : SttState

/**
* Speech has ended and silence was detected. This state is expected to be very short-lived
* and acts as user feedback before the final inference starts.
*/
data object SilenceDetected : SttState

/**
* The model is processing recorded audio and generating the final recognition result.
*/
data object Thinking : SttState

/**
* An external Android app has been asked to listen (e.g. through
* `RecognizerIntent.ACTION_RECOGNIZE_SPEECH`), and may be listening but we don't know for
Expand Down
Loading