Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
284 changes: 266 additions & 18 deletions crates/openshell-core/src/gpu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,164 @@

//! Shared GPU request helpers.

use std::fmt;
use std::sync::atomic::{AtomicUsize, Ordering};

use crate::config::CDI_GPU_DEVICE_ALL;

const CDI_NVIDIA_GPU_PREFIX: &str = "nvidia.com/gpu=";
const CDI_NVIDIA_GPU_ALL_SUFFIX: &str = "all";

/// Normalized CDI GPU inventory used by local container drivers.
#[derive(Debug, Clone, Default, PartialEq, Eq)]
pub struct CdiGpuInventory {
device_ids: Vec<String>,
}

impl CdiGpuInventory {
/// Build a normalized inventory from runtime-reported CDI device IDs.
///
/// Non-NVIDIA GPU IDs are ignored. Duplicate IDs are removed. For default
/// selection, indexed IDs and UUID-style IDs are treated as separate naming
/// families because they may refer to the same devices.
#[must_use]
pub fn new(device_ids: impl IntoIterator<Item = impl AsRef<str>>) -> Self {
let mut device_ids = device_ids
.into_iter()
.filter_map(|id| {
let id = id.as_ref().trim();
id.starts_with(CDI_NVIDIA_GPU_PREFIX)
.then(|| id.to_string())
})
.collect::<Vec<_>>();
device_ids.sort();
device_ids.dedup();
Self { device_ids }
}

#[must_use]
pub fn as_slice(&self) -> &[String] {
&self.device_ids
}

#[must_use]
pub fn is_empty(&self) -> bool {
self.device_ids.is_empty()
}

fn default_device_family(&self) -> Result<Vec<String>, CdiGpuSelectionError> {
let mut indexed = self
.device_ids
.iter()
.filter_map(|id| {
let suffix = cdi_nvidia_gpu_suffix(id)?;
let index = suffix.parse::<u64>().ok()?;
Some((index, id.clone()))
})
.collect::<Vec<_>>();
if !indexed.is_empty() {
indexed.sort_by(|left, right| left.0.cmp(&right.0).then_with(|| left.1.cmp(&right.1)));
return Ok(indexed.into_iter().map(|(_, id)| id).collect());
}

let mut uuid_style = self
.device_ids
.iter()
.filter_map(|id| {
let suffix = cdi_nvidia_gpu_suffix(id)?;
(suffix != CDI_NVIDIA_GPU_ALL_SUFFIX).then(|| id.clone())
})
.collect::<Vec<_>>();
if !uuid_style.is_empty() {
uuid_style.sort();
return Ok(uuid_style);
}

if self.device_ids.iter().any(|id| id == CDI_GPU_DEVICE_ALL) {
return Ok(vec![CDI_GPU_DEVICE_ALL.to_string()]);
}

Err(CdiGpuSelectionError::NoAvailableDevices)
}
}

/// Concurrency-safe round-robin cursor for default CDI GPU selection.
#[derive(Debug, Default)]
pub struct CdiGpuRoundRobin {
next: AtomicUsize,
}

impl CdiGpuRoundRobin {
#[must_use]
pub const fn new() -> Self {
Self {
next: AtomicUsize::new(0),
}
}

/// Return the next default device ID and advance the cursor.
pub fn next_default_device_id(
&self,
inventory: &CdiGpuInventory,
) -> Result<String, CdiGpuSelectionError> {
let devices = inventory.default_device_family()?;
let idx = self.next.fetch_add(1, Ordering::Relaxed) % devices.len();
Ok(devices[idx].clone())
}

/// Return the current default device ID without advancing the cursor.
pub fn peek_default_device_id(
&self,
inventory: &CdiGpuInventory,
) -> Result<String, CdiGpuSelectionError> {
let devices = inventory.default_device_family()?;
let idx = self.next.load(Ordering::Relaxed) % devices.len();
Ok(devices[idx].clone())
}
}

/// CDI GPU selection failed.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum CdiGpuSelectionError {
NoAvailableDevices,
MissingDefaultDevice,
}

impl fmt::Display for CdiGpuSelectionError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::NoAvailableDevices => f.write_str("no NVIDIA CDI GPU devices were discovered"),
Self::MissingDefaultDevice => {
f.write_str("GPU request requires a selected default CDI GPU device")
}
}
}
}

impl std::error::Error for CdiGpuSelectionError {}

/// Resolve the existing GPU request fields into CDI device identifiers.
///
/// `None` means no GPU was requested. A GPU request with no explicit device
/// ID uses the CDI all-GPU request; otherwise the driver-native ID passes
/// through unchanged.
#[must_use]
pub fn cdi_gpu_device_ids(gpu: bool, gpu_device: &str) -> Option<Vec<String>> {
gpu.then(|| {
if gpu_device.is_empty() {
vec![CDI_GPU_DEVICE_ALL.to_string()]
} else {
vec![gpu_device.to_string()]
}
})
/// `None` means no GPU was requested. A GPU request with an explicit device ID
/// passes through unchanged. A GPU request with no explicit device uses the
/// driver-selected default ID.
pub fn cdi_gpu_device_ids(
gpu: bool,
gpu_device: &str,
selected_default_device: Option<&str>,
) -> Result<Option<Vec<String>>, CdiGpuSelectionError> {
if !gpu {
return Ok(None);
}
if !gpu_device.is_empty() {
return Ok(Some(vec![gpu_device.to_string()]));
}
let device = selected_default_device.ok_or(CdiGpuSelectionError::MissingDefaultDevice)?;
Ok(Some(vec![device.to_string()]))
}

fn cdi_nvidia_gpu_suffix(id: &str) -> Option<&str> {
id.strip_prefix(CDI_NVIDIA_GPU_PREFIX)
}

#[cfg(test)]
Expand All @@ -27,22 +169,128 @@ mod tests {

#[test]
fn cdi_gpu_device_ids_returns_none_when_absent() {
assert_eq!(cdi_gpu_device_ids(false, ""), None);
assert_eq!(cdi_gpu_device_ids(false, "", None), Ok(None));
}

#[test]
fn cdi_gpu_device_ids_uses_selected_default_device() {
assert_eq!(
cdi_gpu_device_ids(true, "", Some("nvidia.com/gpu=0")),
Ok(Some(vec!["nvidia.com/gpu=0".to_string()]))
);
}

#[test]
fn cdi_gpu_device_ids_defaults_empty_request_to_all_gpus() {
fn cdi_gpu_device_ids_rejects_missing_default_device() {
assert_eq!(
cdi_gpu_device_ids(true, ""),
Some(vec![CDI_GPU_DEVICE_ALL.to_string()])
cdi_gpu_device_ids(true, "", None),
Err(CdiGpuSelectionError::MissingDefaultDevice)
);
}

#[test]
fn cdi_gpu_device_ids_passes_explicit_device_id_through() {
assert_eq!(
cdi_gpu_device_ids(true, "nvidia.com/gpu=0"),
Some(vec!["nvidia.com/gpu=0".to_string()])
cdi_gpu_device_ids(true, "nvidia.com/gpu=0", None),
Ok(Some(vec!["nvidia.com/gpu=0".to_string()]))
);
}

#[test]
fn inventory_filters_and_deduplicates_nvidia_gpu_ids() {
let inventory = CdiGpuInventory::new([
"nvidia.com/gpu=1",
"vendor.example/device=0",
"nvidia.com/gpu=1",
" nvidia.com/gpu=0 ",
]);

assert_eq!(
inventory.as_slice(),
&vec![
"nvidia.com/gpu=0".to_string(),
"nvidia.com/gpu=1".to_string()
]
);
}

#[test]
fn round_robin_prefers_indexed_family_and_sorts_numerically() {
let inventory = CdiGpuInventory::new([
"nvidia.com/gpu=10",
"nvidia.com/gpu=UUID-b",
"nvidia.com/gpu=2",
"nvidia.com/gpu=all",
]);
let selector = CdiGpuRoundRobin::new();

assert_eq!(
selector.next_default_device_id(&inventory),
Ok("nvidia.com/gpu=2".to_string())
);
assert_eq!(
selector.next_default_device_id(&inventory),
Ok("nvidia.com/gpu=10".to_string())
);
assert_eq!(
selector.next_default_device_id(&inventory),
Ok("nvidia.com/gpu=2".to_string())
);
}

#[test]
fn round_robin_uses_uuid_family_when_no_indexed_ids_exist() {
let inventory = CdiGpuInventory::new(["nvidia.com/gpu=UUID-b", "nvidia.com/gpu=UUID-a"]);
let selector = CdiGpuRoundRobin::new();

assert_eq!(
selector.next_default_device_id(&inventory),
Ok("nvidia.com/gpu=UUID-a".to_string())
);
}

#[test]
fn round_robin_uses_all_only_inventory() {
let inventory = CdiGpuInventory::new([CDI_GPU_DEVICE_ALL]);
let selector = CdiGpuRoundRobin::new();

assert_eq!(
selector.next_default_device_id(&inventory),
Ok(CDI_GPU_DEVICE_ALL.to_string())
);
}

#[test]
fn round_robin_rejects_empty_inventory() {
let inventory = CdiGpuInventory::new(["vendor.example/device=0"]);
let selector = CdiGpuRoundRobin::new();

assert_eq!(
selector.next_default_device_id(&inventory),
Err(CdiGpuSelectionError::NoAvailableDevices)
);
}

#[test]
fn peek_does_not_advance_round_robin_cursor() {
let inventory = CdiGpuInventory::new(["nvidia.com/gpu=0", "nvidia.com/gpu=1"]);
let selector = CdiGpuRoundRobin::new();

assert_eq!(
selector.peek_default_device_id(&inventory),
Ok("nvidia.com/gpu=0".to_string())
);
assert_eq!(
selector.peek_default_device_id(&inventory),
Ok("nvidia.com/gpu=0".to_string())
);
assert_eq!(
selector.next_default_device_id(&inventory),
Ok("nvidia.com/gpu=0".to_string())
);
assert_eq!(
selector.next_default_device_id(&inventory),
Ok("nvidia.com/gpu=1".to_string())
);
}
}
9 changes: 8 additions & 1 deletion crates/openshell-driver-docker/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,17 @@ contract:
| `apparmor=unconfined` | Avoids Docker's default profile blocking required mount operations. |
| `restart_policy = unless-stopped` | Keeps managed sandboxes resumable across daemon or gateway restarts. |
| `PidsLimit` | Enforces the sandbox PID budget at the Docker cgroup layer. Set `[openshell.drivers.docker].sandbox_pids_limit = 0` to inherit the Docker/runtime default. |
| CDI GPU request | Uses the sandbox `gpu_device` value when set; otherwise requests all NVIDIA GPUs when the sandbox spec asks for GPU support and daemon CDI support is detected. |
| CDI GPU request | Uses the sandbox `gpu_device` value when set; otherwise selects one NVIDIA CDI GPU from daemon `DiscoveredDevices` inventory. |

The agent child process does not retain these supervisor privileges.

For bare GPU requests, the driver prefers indexed CDI IDs such as
`nvidia.com/gpu=0`. If Docker only reports UUID-style IDs, the driver selects
from that family instead. The families are not mixed because they may refer to
the same physical devices. If Docker reports CDI support through `CDISpecDirs`
but omits or empties `DiscoveredDevices`, bare GPU requests fail with a
precondition error; explicit `gpu_device` values still pass through to Docker.

## Supervisor Binary Resolution

The Docker driver bind-mounts a host-side Linux `openshell-sandbox` binary into
Expand Down
Loading
Loading