ercbot · ercbot · Mar 17, 2025 · Mar 12, 2025 · Mar 17, 2025 · Mar 17, 2025
diff --git a/examples/openai_quickstart.py b/examples/openai_quickstart.py
@@ -0,0 +1,107 @@
+# Adapted from: https://github.com/openai/openai-cua-sample-app
+import os
+
+import requests
+
+from valk import Computer
+
+
+def create_response(**kwargs):
+    url = "https://api.openai.com/v1/responses"
+    headers = {
+        "Authorization": f"Bearer {os.getenv('OPENAI_API_KEY')}",
+        "Content-Type": "application/json",
+    }
+
+    response = requests.post(url, headers=headers, json=kwargs)
+
+    if response.status_code != 200:
+        print(f"Error: {response.status_code} {response.text}")
+
+    return response.json()
+
+
+def acknowledge_safety_check_callback(message: str) -> bool:
+    response = input(
+        f"Safety Check Warning: {message}\nDo you want to acknowledge and proceed? (y/n): "
+    ).lower()
+    return response.strip() == "y"
+
+
+def handle_item(item, computer: Computer):
+    """Handle each item; may cause a computer action + screenshot."""
+    if item["type"] == "message":  # print messages
+        print(item["content"][0]["text"])
+
+    if item["type"] == "computer_call":  # perform computer actions
+        action = item["action"]
+        action_type = action["type"]
+        action_args = {k: v for k, v in action.items() if k != "type"}
+        print(f"{action_type}({action_args})")
+
+        # give our computer environment action to perform
+        getattr(computer, action_type)(**action_args)
+
+        screenshot_base64 = computer.screenshot()
+
+        pending_checks = item.get("pending_safety_checks", [])
+        for check in pending_checks:
+            if not acknowledge_safety_check_callback(check["message"]):
+                raise ValueError(f"Safety check failed: {check['message']}")
+
+        # return value informs model of the latest screenshot
+        call_output = {
+            "type": "computer_call_output",
+            "call_id": item["call_id"],
+            "acknowledged_safety_checks": pending_checks,
+            "output": {
+                "type": "input_image",
+                "image_url": f"data:image/png;base64,{screenshot_base64}",
+            },
+        }
+
+        return [call_output]
+
+    return []
+
+
+def main():
+    """Run the CUA (Computer Use Assistant) loop, using Valk."""
+    with Computer() as computer:
+        tools = [
+            {
+                "type": "computer_use_preview",
+                "display_width": computer.dimensions[0],
+                "display_height": computer.dimensions[1],
+                "environment": computer.environment,
+            }
+        ]
+
+        items = []
+        while True:  # get user input forever
+            user_input = input("> ")
+            items.append({"role": "user", "content": user_input})
+
+            while True:  # keep looping until we get a final response
+                response = create_response(
+                    model="computer-use-preview",
+                    input=items,
+                    tools=tools,
+                    truncation="auto",
+                )
+
+                if "output" not in response:
+                    print(response)
+                    raise ValueError("No output from model")
+
+                items += response["output"]
+
+                for item in response["output"]:
+                    items += handle_item(item, computer)
+
+                if items[-1].get("role") == "assistant":
+                    break
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pyproject.toml b/pyproject.toml
@@ -42,6 +42,7 @@ dev = [
 ]
 examples = [
     "anthropic>=0.45.2",
+    "requests>=2.32.3",
 ]
 tests = [
     "docker>=7.1.0",

diff --git a/uv.lock b/uv.lock
diff --git a/valk-server/src/main.rs b/valk-server/src/main.rs
@@ -5,7 +5,6 @@ use axum::{
     routing::{get, post},
     Json, Router,
 };
-use serde::Serialize;
 use std::time::Duration;
 
 use std::sync::Arc;
@@ -18,49 +17,18 @@ mod action_types;
 mod config;
 mod key_press;
 mod monitor;
+mod system_info;
+
 use action_queue::{create_action_queue, SharedQueue};
 use action_types::{ActionError, ActionRequest, ActionResponse, ActionResponseStatus};
 use config::Config;
 use monitor::monitor_websocket;
+use system_info::system_info;
 
 async fn root() -> &'static str {
     "Valk is running"
 }
 
-#[derive(Debug, Serialize)]
-struct ComputerInfo {
-    os_type: String,
-    os_version: String,
-    display_width: u32,
-    display_height: u32,
-}
-
-/// Get information about the computer system
-async fn system_info() -> Result<Json<ComputerInfo>, (StatusCode, String)> {
-    let monitor = xcap::Monitor::all()
-        .map_err(|e| {
-            (
-                StatusCode::INTERNAL_SERVER_ERROR,
-                format!("Failed to get display info: {}", e),
-            )
-        })?
-        .first()
-        .cloned()
-        .ok_or((
-            StatusCode::INTERNAL_SERVER_ERROR,
-            "No monitor found".to_string(),
-        ))?;
-
-    let os_info = os_info::get();
-
-    Ok(Json(ComputerInfo {
-        os_type: os_info.os_type().to_string(),
-        os_version: os_info.version().to_string(),
-        display_width: monitor.width(),
-        display_height: monitor.height(),
-    }))
-}
-
 /// A single RCP style action request.
 async fn action(
     extract::State(state): extract::State<Arc<AppState>>,

diff --git a/valk-server/src/system_info.rs b/valk-server/src/system_info.rs
@@ -0,0 +1,37 @@
+use axum::http::StatusCode;
+use axum::Json;
+use serde::Serialize;
+
+#[derive(Debug, Serialize)]
+pub struct ComputerInfo {
+    os_type: String,
+    os_version: String,
+    display_width: u32,
+    display_height: u32,
+}
+
+/// Get information about the computer system
+pub async fn system_info() -> Result<Json<ComputerInfo>, (StatusCode, String)> {
+    let monitor = xcap::Monitor::all()
+        .map_err(|e| {
+            (
+                StatusCode::INTERNAL_SERVER_ERROR,
+                format!("Failed to get display info: {}", e),
+            )
+        })?
+        .first()
+        .cloned()
+        .ok_or((
+            StatusCode::INTERNAL_SERVER_ERROR,
+            "No monitor found".to_string(),
+        ))?;
+
+    let os_info = os_info::get();
+
+    Ok(Json(ComputerInfo {
+        os_type: os_info.os_type().to_string(),
+        os_version: os_info.version().to_string(),
+        display_width: monitor.width(),
+        display_height: monitor.height(),
+    }))
+}