Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions charts/openab/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,9 @@ data:
api_key = "${STT_API_KEY}"
model = {{ ($cfg.stt).model | default "whisper-large-v3-turbo" | toJson }}
base_url = {{ ($cfg.stt).baseUrl | default "https://api.groq.com/openai/v1" | toJson }}
{{- if hasKey ($cfg.stt | default dict) "echoTranscript" }}
echo_transcript = {{ ($cfg.stt).echoTranscript }}
{{- end }}
{{- end }}
{{- if ($cfg.gateway).enabled }}
{{- if not ($cfg.gateway).url }}
Expand Down
3 changes: 3 additions & 0 deletions charts/openab/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,9 @@ agents:
apiKey: ""
model: "whisper-large-v3-turbo"
baseUrl: "https://api.groq.com/openai/v1"
# Echo the transcribed text back to the thread before the agent reply
# so users can verify STT accuracy. Default: false (opt-in).
echoTranscript: false
gateway:
enabled: false # set to true + provide url to enable the [gateway] config block
deploy: true # set to false to skip Gateway Deployment/Service (config-only mode)
Expand Down
1 change: 1 addition & 0 deletions docs/config-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ Speech-to-text transcription for voice messages. Uses an OpenAI-compatible `/aud
| `api_key` | string | `""` | API key for the STT service. When empty and `base_url` contains `groq.com`, the `GROQ_API_KEY` environment variable is used automatically. For local servers, use `api_key = "not-needed"`. |
| `model` | string | `"whisper-large-v3-turbo"` | Model name to use for transcription. |
| `base_url` | string | `"https://api.groq.com/openai/v1"` | Base URL of the STT API. Any OpenAI-compatible `/audio/transcriptions` endpoint works. |
| `echo_transcript` | bool | `false` | When set to `true` and STT runs, post a `> 🎀 <transcript>` message to the thread before the agent reply so users can verify what was heard. Failures show `(transcription failed)` and add a ⚠️ reaction to the original message. |

---

Expand Down
9 changes: 9 additions & 0 deletions docs/stt.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ enabled = true # default: false
api_key = "${GROQ_API_KEY}" # required for cloud providers
model = "whisper-large-v3-turbo" # default
base_url = "https://api.groq.com/openai/v1" # default
echo_transcript = true # default: false (opt-in)
```

| Field | Required | Default | Description |
Expand All @@ -58,6 +59,7 @@ base_url = "https://api.groq.com/openai/v1" # default
| `api_key` | no* | β€” | API key for the STT provider. *Auto-detected from `GROQ_API_KEY` env var if not set. For local servers, use any non-empty string (e.g. `"not-needed"`). |
| `model` | no | `whisper-large-v3-turbo` | Whisper model name. Varies by provider. |
| `base_url` | no | `https://api.groq.com/openai/v1` | OpenAI-compatible API base URL. |
| `echo_transcript` | no | `false` | When set to `true` and STT runs, post a `> 🎀 <transcript>` message to the thread before the agent reply so users can verify what was heard. Failures show `(transcription failed)` and add a ⚠️ reaction to the original message. |

## Deployment Options

Expand Down Expand Up @@ -147,6 +149,13 @@ helm upgrade openab openab/openab \
--set agents.kiro.stt.baseUrl=https://api.groq.com/openai/v1
```

```bash
helm upgrade openab openab/openab \
--set agents.kiro.stt.enabled=true \
--set agents.kiro.stt.apiKey=gsk_xxx \
--set agents.kiro.stt.echoTranscript=true # opt in to transcript echo
```

## Disabling STT

Omit the `[stt]` section entirely, or set:
Expand Down
80 changes: 55 additions & 25 deletions src/acp/connection.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use crate::acp::protocol::{ConfigOption, JsonRpcMessage, JsonRpcRequest, JsonRpcResponse, parse_config_options};
use crate::acp::protocol::{
parse_config_options, ConfigOption, JsonRpcMessage, JsonRpcRequest, JsonRpcResponse,
};
use anyhow::{anyhow, Result};
use serde_json::{json, Value};
use std::collections::HashMap;
Expand All @@ -10,7 +12,6 @@ use tokio::sync::{mpsc, oneshot, Mutex};
use tokio::task::JoinHandle;
use tracing::{debug, error, info};


/// Pick the most permissive selectable permission option from ACP options.
fn pick_best_option(options: &[Value]) -> Option<String> {
let mut fallback: Option<&Value> = None;
Expand Down Expand Up @@ -187,20 +188,39 @@ impl AcpConnection {
// Preserve the real HOME so agents can find OAuth/auth files (~/.codex,
// ~/.claude, ~/.config/gh, etc.). working_dir is already set via
// current_dir() above and is not necessarily the user's home directory.
cmd.env("HOME", std::env::var("HOME").unwrap_or_else(|_| working_dir.into()));
cmd.env("PATH", std::env::var("PATH").unwrap_or_else(|_| "/usr/local/bin:/usr/bin:/bin".into()));
cmd.env(
"HOME",
std::env::var("HOME").unwrap_or_else(|_| working_dir.into()),
);
cmd.env(
"PATH",
std::env::var("PATH").unwrap_or_else(|_| "/usr/local/bin:/usr/bin:/bin".into()),
);
#[cfg(unix)]
{
cmd.env("USER", std::env::var("USER").unwrap_or_else(|_| "agent".into()));
cmd.env(
"USER",
std::env::var("USER").unwrap_or_else(|_| "agent".into()),
);
}
#[cfg(windows)]
{
// Windows requires SystemRoot for DLL loading and basic OS functionality.
// USERPROFILE is the Windows equivalent of HOME.
cmd.env("USERPROFILE", std::env::var("USERPROFILE").unwrap_or_else(|_| working_dir.into()));
cmd.env("USERNAME", std::env::var("USERNAME").unwrap_or_else(|_| "agent".into()));
if let Ok(v) = std::env::var("SystemRoot") { cmd.env("SystemRoot", v); }
if let Ok(v) = std::env::var("SystemDrive") { cmd.env("SystemDrive", v); }
cmd.env(
"USERPROFILE",
std::env::var("USERPROFILE").unwrap_or_else(|_| working_dir.into()),
);
cmd.env(
"USERNAME",
std::env::var("USERNAME").unwrap_or_else(|_| "agent".into()),
);
if let Ok(v) = std::env::var("SystemRoot") {
cmd.env("SystemRoot", v);
}
if let Ok(v) = std::env::var("SystemDrive") {
cmd.env("SystemDrive", v);
}
}
for (k, v) in env {
cmd.env(k, expand_env(v));
Expand All @@ -223,8 +243,7 @@ impl AcpConnection {
let mut proc = cmd
.spawn()
.map_err(|e| anyhow!("failed to spawn {command}: {e}"))?;
let child_pgid = proc.id()
.and_then(|pid| i32::try_from(pid).ok());
let child_pgid = proc.id().and_then(|pid| i32::try_from(pid).ok());

let stdout = proc.stdout.take().ok_or_else(|| anyhow!("no stdout"))?;
let stdin = proc.stdin.take().ok_or_else(|| anyhow!("no stdin"))?;
Expand Down Expand Up @@ -403,19 +422,22 @@ impl AcpConnection {
.and_then(|c| c.get("loadSession"))
.and_then(|v| v.as_bool())
.unwrap_or(false);
info!(agent = agent_name, load_session = self.supports_load_session, "initialized");
info!(
agent = agent_name,
load_session = self.supports_load_session,
"initialized"
);
Ok(())
}

pub async fn session_new(&mut self, cwd: &str) -> Result<String> {
let resp = self
.send_request(
"session/new",
Some(json!({"cwd": cwd, "mcpServers": []})),
)
.send_request("session/new", Some(json!({"cwd": cwd, "mcpServers": []})))
.await?;

let session_id = resp.result.as_ref()
let session_id = resp
.result
.as_ref()
.and_then(|r| r.get("sessionId"))
.and_then(|s| s.as_str())
.ok_or_else(|| anyhow!("no sessionId in session/new response"))?
Expand All @@ -434,7 +456,11 @@ impl AcpConnection {

/// Set a config option (e.g. model, mode) via ACP session/set_config_option.
/// Returns the updated list of all config options.
pub async fn set_config_option(&mut self, config_id: &str, value: &str) -> Result<Vec<ConfigOption>> {
pub async fn set_config_option(
&mut self,
config_id: &str,
value: &str,
) -> Result<Vec<ConfigOption>> {
let session_id = self
.acp_session_id
.as_ref()
Expand Down Expand Up @@ -462,7 +488,10 @@ impl AcpConnection {
Err(_) => {
// Fall back: send as a slash command (e.g. "/model claude-sonnet-4")
let cmd = format!("/{config_id} {value}");
info!(cmd, "set_config_option not supported, falling back to prompt");
info!(
cmd,
"set_config_option not supported, falling back to prompt"
);
let _resp = self
.send_request(
"session/prompt",
Expand Down Expand Up @@ -503,10 +532,7 @@ impl AcpConnection {
let id = self.next_id();

// Convert content blocks to JSON
let prompt_json: Vec<Value> = content_blocks
.iter()
.map(|b| b.to_json())
.collect();
let prompt_json: Vec<Value> = content_blocks.iter().map(|b| b.to_json()).collect();

let req = JsonRpcRequest::new(
id,
Expand Down Expand Up @@ -572,11 +598,15 @@ impl AcpConnection {
#[cfg(unix)]
{
// Stage 1: SIGTERM the process group
unsafe { libc::kill(-pgid, libc::SIGTERM); }
unsafe {
libc::kill(-pgid, libc::SIGTERM);
}
// Stage 2: SIGKILL after brief grace (std::thread survives runtime shutdown)
std::thread::spawn(move || {
std::thread::sleep(std::time::Duration::from_millis(1500));
unsafe { libc::kill(-pgid, libc::SIGKILL); }
unsafe {
libc::kill(-pgid, libc::SIGKILL);
}
});
}
#[cfg(not(unix))]
Expand Down
2 changes: 1 addition & 1 deletion src/acp/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@ pub mod connection;
pub mod pool;
pub mod protocol;

pub use connection::ContentBlock;
pub use pool::SessionPool;
pub use protocol::{classify_notification, AcpEvent};
pub use connection::ContentBlock;
42 changes: 26 additions & 16 deletions src/acp/pool.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,7 @@ pub struct SessionPool {
mapping_path: PathBuf,
}

type EvictionCandidate = (
String,
Arc<Mutex<AcpConnection>>,
Instant,
Option<String>,
);
type EvictionCandidate = (String, Arc<Mutex<AcpConnection>>, Instant, Option<String>);

fn remove_if_same_handle<T>(
map: &mut HashMap<String, Arc<Mutex<T>>>,
Expand All @@ -54,10 +49,7 @@ fn remove_if_same_handle<T>(
}
}

fn get_or_insert_gate(
map: &mut HashMap<String, Arc<Mutex<()>>>,
key: &str,
) -> Arc<Mutex<()>> {
fn get_or_insert_gate(map: &mut HashMap<String, Arc<Mutex<()>>>, key: &str) -> Arc<Mutex<()>> {
map.entry(key.to_string())
.or_insert_with(|| Arc::new(Mutex::new(())))
.clone()
Expand Down Expand Up @@ -104,7 +96,9 @@ impl SessionPool {
}
};
let tmp = self.mapping_path.with_extension("json.tmp");
if let Err(e) = std::fs::write(&tmp, &data).and_then(|_| std::fs::rename(&tmp, &self.mapping_path)) {
if let Err(e) =
std::fs::write(&tmp, &data).and_then(|_| std::fs::rename(&tmp, &self.mapping_path))
{
warn!(path = %self.mapping_path.display(), error = %e, "failed to persist thread mapping");
}
}
Expand Down Expand Up @@ -157,7 +151,12 @@ impl SessionPool {
skipped_locked_candidates += 1;
continue;
};
let candidate = (key, conn_handle, conn.last_active, conn.acp_session_id.clone());
let candidate = (
key,
conn_handle,
conn.last_active,
conn.acp_session_id.clone(),
);
match &eviction_candidate {
Some((_, _, oldest_last_active, _)) if candidate.2 >= *oldest_last_active => {}
_ => eviction_candidate = Some(candidate),
Expand Down Expand Up @@ -250,7 +249,9 @@ impl SessionPool {
state.active.insert(thread_id.to_string(), new_conn);
self.save_mapping(&state.suspended);
if !cancel_session_id.is_empty() {
state.cancel_handles.insert(thread_id.to_string(), (cancel_handle, cancel_session_id));
state
.cancel_handles
.insert(thread_id.to_string(), (cancel_handle, cancel_session_id));
}
Ok(())
}
Expand All @@ -260,7 +261,9 @@ impl SessionPool {
where
F: for<'a> FnOnce(
&'a mut AcpConnection,
) -> std::pin::Pin<Box<dyn std::future::Future<Output = Result<R>> + Send + 'a>>,
) -> std::pin::Pin<
Box<dyn std::future::Future<Output = Result<R>> + Send + 'a>,
>,
{
let conn = {
let state = self.state.read().await;
Expand Down Expand Up @@ -311,7 +314,10 @@ impl SessionPool {
pub async fn cancel_session(&self, thread_id: &str) -> Result<()> {
let (stdin, session_id) = {
let state = self.state.read().await;
state.cancel_handles.get(thread_id).cloned()
state
.cancel_handles
.get(thread_id)
.cloned()
.ok_or_else(|| anyhow!("no session for thread {thread_id}"))?
};
let data = serde_json::to_string(&serde_json::json!({
Expand Down Expand Up @@ -414,7 +420,11 @@ impl SessionPool {
// awaiting a connection lock).
let snapshot: Vec<(String, Arc<Mutex<AcpConnection>>)> = {
let state = self.state.read().await;
state.active.iter().map(|(k, v)| (k.clone(), Arc::clone(v))).collect()
state
.active
.iter()
.map(|(k, v)| (k.clone(), Arc::clone(v)))
.collect()
};

let mut session_ids: Vec<(String, String)> = Vec::new();
Expand Down
Loading
Loading