Edison-Watch · Miyamura80 · Mar 23, 2026 · Mar 23, 2026 · Mar 23, 2026 · Mar 23, 2026
diff --git a/IDEA.md b/IDEA.md
diff --git a/TODO.md b/TODO.md
@@ -0,0 +1,45 @@
+# TODO
+
+## Security Hardening
+
+Items identified during a security audit (2026-03-23). Prioritized by impact.
+
+### High Priority
+
+- ~~**Eliminate SYS_ADMIN for AppImages**~~: Done — `--appimage-extract-and-run` was already in use for all AppImages, so SYS_ADMIN + `/dev/fuse` have been removed entirely from container creation.
+
+- **`needs_fuse` config escape hatch**: Add a `needs_fuse: true` config/task field so custom Docker images that internally depend on FUSE can opt in to `CAP_SYS_ADMIN` + `/dev/fuse`. No app types currently receive these privileges.
+
+- **Container network egress restrictions**: Add an option (e.g., `--no-network` or a config field) to disable outbound network access inside containers. Especially important for CI use cases where LLM-generated code runs inside the container with full internet access.
+
+- **Path traversal in evaluators**: `expected_path` in `file_compare` and `script_path` in `script_replay` are read directly from task JSON without validation. Canonicalize paths and verify they stay within the project/working directory before reading.
+
+- **Path traversal in tar extraction**: `docker/transfer.rs` `copy_from()` strips the first tar component but doesn't reject `..` sequences or absolute paths in remaining components. A compromised container could write files outside the intended destination.
+
+- **Sanitize API error responses**: `provider/http_base.rs` and `provider/anthropic.rs` include raw error bodies in error messages. These could leak sensitive info into logs or artifacts. Truncate and sanitize before logging.
+
+- **SSRF via custom base URL**: The `api_base_url` config field accepts any URL with no validation. Add protocol enforcement (require HTTPS unless explicitly overridden) and optionally block private/link-local IP ranges.
+
+### Medium Priority
+
+- **Container resource limits**: Add default memory/CPU/PID limits to container creation. A runaway process (or malicious LLM-generated code) can currently consume all host resources.
+
+- **Prompt injection awareness**: Raw accessibility tree text, bash output, and error feedback are interpolated directly into LLM prompts (`agent/context.rs`). A malicious application could embed prompt injection payloads in its UI text or command output. Consider structured delimiters or content-length limits.
+
+- **Evaluator temp file races**: `evaluator/file_compare.rs` uses fixed filenames (`eval_actual_file`) in the artifacts directory. Use unique names (e.g., with UUIDs) to prevent corruption during concurrent suite runs.
+
+- **ReDoS in evaluator regex**: Regex patterns from task JSON (`MatchMode::Regex`) are compiled without complexity limits. Consider adding a timeout or using `regex` crate's built-in linear-time guarantees (already the case — verify this is sufficient).
+
+- **API key hygiene**: The `api_key` config field is a plain string in JSON files that could be accidentally committed. Consider documenting env-var-only usage as the recommended approach and adding a warning when `api_key` is found in a config file.
+
+### Low Priority / Future Consideration
+
+- **Process-level sandbox for execute-action.py**: The current `exec()` + restricted builtins approach has documented bypass vectors (attribute traversal, `__globals__`, module-level access). RestrictedPython or a seccomp-based sandbox would be stronger, though the container boundary remains the primary isolation.
+
+- **VNC authentication option**: Add an optional `vnc_password` config field for users who need LAN-accessible VNC. Default is now localhost-only, but password auth would be useful for remote debugging workflows.
+
+- **Monitor bind address override**: Add a `monitor_bind_addr` config field (defaulting to `127.0.0.1`) to match the VNC pattern. Users running desktest on remote machines currently have no way to expose the monitor dashboard to their workstation.
+
+- **Image digest pinning**: Allow task JSON or config to specify image digests (`image@sha256:...`) for custom Docker images, enabling reproducible and tamper-resistant builds in CI.
+
+- **TLS certificate pinning**: The HTTP client uses default TLS settings. For known API endpoints (api.openai.com, api.anthropic.com), certificate pinning would protect against MITM.
diff --git a/src/config.rs b/src/config.rs
@@ -21,7 +21,7 @@ fn default_height() -> u32 {
 }
 
 fn default_vnc_addr() -> String {
-    "0.0.0.0".into()
+    "127.0.0.1".into()
 }
 
 fn default_timeout() -> u64 {
@@ -199,6 +199,13 @@ impl Config {
             }
         }
 
+        if self.vnc_bind_addr.parse::<std::net::IpAddr>().is_err() {
+            return Err(AppError::Config(format!(
+                "vnc_bind_addr is not a valid IP address: {:?}",
+                self.vnc_bind_addr
+            )));
+        }
+
         if let Some(port) = self.vnc_port {
             if port == 0 {
                 return Err(AppError::Config("vnc_port must be > 0".into()));
@@ -215,6 +222,15 @@ impl Config {
     }
 }
 
+/// Format a host:port string, wrapping IPv6 addresses in brackets.
+pub fn format_host_port(addr: &str, port: u16) -> String {
+    if addr.contains(':') {
+        format!("[{addr}]:{port}")
+    } else {
+        format!("{addr}:{port}")
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -284,7 +300,7 @@ mod tests {
         assert_eq!(config.model, "claude-sonnet-4-5-20250929");
         assert_eq!(config.display_width, 1920);
         assert_eq!(config.display_height, 1080);
-        assert_eq!(config.vnc_bind_addr, "0.0.0.0");
+        assert_eq!(config.vnc_bind_addr, "127.0.0.1");
         assert!(config.vnc_port.is_none());
         assert_eq!(config.startup_timeout_seconds, 30);
     }
@@ -393,6 +409,62 @@ mod tests {
         assert!(err.to_string().contains("vnc_port must be > 0"));
     }
 
+    #[test]
+    fn test_vnc_bind_addr_invalid() {
+        let json = r#"{
+            "api_key": "sk-test",
+            "app_type": "docker_image",
+            "vnc_bind_addr": "not-an-ip"
+        }"#;
+        let err = Config::parse_and_validate(json).unwrap_err();
+        assert!(err.to_string().contains("vnc_bind_addr is not a valid IP address"));
+    }
+
+    #[test]
+    fn test_vnc_bind_addr_empty() {
+        let json = r#"{
+            "api_key": "sk-test",
+            "app_type": "docker_image",
+            "vnc_bind_addr": ""
+        }"#;
+        let err = Config::parse_and_validate(json).unwrap_err();
+        assert!(err.to_string().contains("vnc_bind_addr is not a valid IP address"));
+    }
+
+    #[test]
+    fn test_vnc_bind_addr_valid_ipv4() {
+        let json = r#"{
+            "api_key": "sk-test",
+            "app_type": "docker_image",
+            "vnc_bind_addr": "0.0.0.0"
+        }"#;
+        let config = Config::parse_and_validate(json).unwrap();
+        assert_eq!(config.vnc_bind_addr, "0.0.0.0");
+    }
+
+    #[test]
+    fn test_vnc_bind_addr_valid_ipv6() {
+        let json = r#"{
+            "api_key": "sk-test",
+            "app_type": "docker_image",
+            "vnc_bind_addr": "::1"
+        }"#;
+        let config = Config::parse_and_validate(json).unwrap();
+        assert_eq!(config.vnc_bind_addr, "::1");
+    }
+
+    #[test]
+    fn test_format_host_port_ipv4() {
+        assert_eq!(format_host_port("127.0.0.1", 5900), "127.0.0.1:5900");
+        assert_eq!(format_host_port("0.0.0.0", 8080), "0.0.0.0:8080");
+    }
+
+    #[test]
+    fn test_format_host_port_ipv6() {
+        assert_eq!(format_host_port("::1", 5900), "[::1]:5900");
+        assert_eq!(format_host_port("::0", 7860), "[::0]:7860");
+    }
+
     #[test]
     fn test_valid_docker_image_config() {
         let json = r#"{