Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 107 additions & 0 deletions examples/openai_quickstart.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
# Adapted from: https://github.com/openai/openai-cua-sample-app
import os

import requests

from valk import Computer


def create_response(**kwargs):
url = "https://api.openai.com/v1/responses"
headers = {
"Authorization": f"Bearer {os.getenv('OPENAI_API_KEY')}",
"Content-Type": "application/json",
}

response = requests.post(url, headers=headers, json=kwargs)

if response.status_code != 200:
print(f"Error: {response.status_code} {response.text}")

return response.json()


def acknowledge_safety_check_callback(message: str) -> bool:
response = input(
f"Safety Check Warning: {message}\nDo you want to acknowledge and proceed? (y/n): "
).lower()
return response.strip() == "y"


def handle_item(item, computer: Computer):
"""Handle each item; may cause a computer action + screenshot."""
if item["type"] == "message": # print messages
print(item["content"][0]["text"])

if item["type"] == "computer_call": # perform computer actions
action = item["action"]
action_type = action["type"]
action_args = {k: v for k, v in action.items() if k != "type"}
print(f"{action_type}({action_args})")

# give our computer environment action to perform
getattr(computer, action_type)(**action_args)

screenshot_base64 = computer.screenshot()

pending_checks = item.get("pending_safety_checks", [])
for check in pending_checks:
if not acknowledge_safety_check_callback(check["message"]):
raise ValueError(f"Safety check failed: {check['message']}")

# return value informs model of the latest screenshot
call_output = {
"type": "computer_call_output",
"call_id": item["call_id"],
"acknowledged_safety_checks": pending_checks,
"output": {
"type": "input_image",
"image_url": f"data:image/png;base64,{screenshot_base64}",
},
}

return [call_output]

return []


def main():
"""Run the CUA (Computer Use Assistant) loop, using Valk."""
with Computer() as computer:
tools = [
{
"type": "computer_use_preview",
"display_width": computer.dimensions[0],
"display_height": computer.dimensions[1],
"environment": computer.environment,
}
]

items = []
while True: # get user input forever
user_input = input("> ")
items.append({"role": "user", "content": user_input})

while True: # keep looping until we get a final response
response = create_response(
model="computer-use-preview",
input=items,
tools=tools,
truncation="auto",
)

if "output" not in response:
print(response)
raise ValueError("No output from model")

items += response["output"]

for item in response["output"]:
items += handle_item(item, computer)

if items[-1].get("role") == "assistant":
break


if __name__ == "__main__":
main()
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ dev = [
]
examples = [
"anthropic>=0.45.2",
"requests>=2.32.3",
]
tests = [
"docker>=7.1.0",
Expand Down
7 changes: 6 additions & 1 deletion uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

38 changes: 3 additions & 35 deletions valk-server/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ use axum::{
routing::{get, post},
Json, Router,
};
use serde::Serialize;
use std::time::Duration;

use std::sync::Arc;
Expand All @@ -18,49 +17,18 @@ mod action_types;
mod config;
mod key_press;
mod monitor;
mod system_info;

use action_queue::{create_action_queue, SharedQueue};
use action_types::{ActionError, ActionRequest, ActionResponse, ActionResponseStatus};
use config::Config;
use monitor::monitor_websocket;
use system_info::system_info;

async fn root() -> &'static str {
"Valk is running"
}

#[derive(Debug, Serialize)]
struct ComputerInfo {
os_type: String,
os_version: String,
display_width: u32,
display_height: u32,
}

/// Get information about the computer system
async fn system_info() -> Result<Json<ComputerInfo>, (StatusCode, String)> {
let monitor = xcap::Monitor::all()
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to get display info: {}", e),
)
})?
.first()
.cloned()
.ok_or((
StatusCode::INTERNAL_SERVER_ERROR,
"No monitor found".to_string(),
))?;

let os_info = os_info::get();

Ok(Json(ComputerInfo {
os_type: os_info.os_type().to_string(),
os_version: os_info.version().to_string(),
display_width: monitor.width(),
display_height: monitor.height(),
}))
}

/// A single RCP style action request.
async fn action(
extract::State(state): extract::State<Arc<AppState>>,
Expand Down
37 changes: 37 additions & 0 deletions valk-server/src/system_info.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
use axum::http::StatusCode;
use axum::Json;
use serde::Serialize;

#[derive(Debug, Serialize)]
pub struct ComputerInfo {
os_type: String,
os_version: String,
display_width: u32,
display_height: u32,
}

/// Get information about the computer system
pub async fn system_info() -> Result<Json<ComputerInfo>, (StatusCode, String)> {
let monitor = xcap::Monitor::all()
.map_err(|e| {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("Failed to get display info: {}", e),
)
})?
.first()
.cloned()
.ok_or((
StatusCode::INTERNAL_SERVER_ERROR,
"No monitor found".to_string(),
))?;

let os_info = os_info::get();

Ok(Json(ComputerInfo {
os_type: os_info.os_type().to_string(),
os_version: os_info.version().to_string(),
display_width: monitor.width(),
display_height: monitor.height(),
}))
}
Loading
Loading