Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions rs/orchestrator/image_upgrader/src/error.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use ic_http_utils::file_downloader::FileDownloadError;
use std::error::Error;
use std::ffi::OsStr;
use std::fmt;
use std::io;
use tokio::process::Command;

pub type UpgradeResult<T> = Result<T, UpgradeError>;

Expand Down Expand Up @@ -31,8 +31,11 @@ impl UpgradeError {
UpgradeError::RebootTimeError(msg.to_string())
}

pub(crate) fn file_command_error(e: io::Error, cmd: &Command) -> Self {
UpgradeError::IoError(format!("Failed to executing command: {cmd:?}"), e)
pub(crate) fn manageboot_error(e: io::Error, args: &[&OsStr]) -> Self {
UpgradeError::IoError(
format!("Failed to execute manageboot command with args {args:?}"),
e,
)
}
}

Expand Down
72 changes: 48 additions & 24 deletions rs/orchestrator/image_upgrader/src/image_upgrader.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
use async_trait::async_trait;
use ic_http_utils::file_downloader::FileDownloader;
use ic_logger::{ReplicaLogger, error, info, warn};
use std::ffi::OsStr;
use std::str::FromStr;
use std::{
fmt::Debug,
io::Write,
path::PathBuf,
process::Output,
time::{Duration, SystemTime},
};
use tokio::process::Command;
Expand All @@ -14,6 +16,32 @@ use crate::error::{UpgradeError, UpgradeResult};

pub mod error;

/// Trait for running manageboot.sh commands.
#[async_trait]
pub trait ManagebootRunner: Send + Sync {
/// Run the given manageboot command and return its output.
async fn run(&self, args: &[&OsStr]) -> std::io::Result<Output>;
}

/// Production implementation of [`ManagebootRunner`] that executes the command
/// as a child process.
pub struct ManagebootRunnerImpl {
binary: PathBuf,
}

impl ManagebootRunnerImpl {
pub fn new(binary: PathBuf) -> Self {
Self { binary }
}
}

#[async_trait]
impl ManagebootRunner for ManagebootRunnerImpl {
async fn run(&self, args: &[&OsStr]) -> std::io::Result<Output> {
Command::new(&self.binary).args(args).output().await
}
}

/// Used to signal that the system is rebooting.
pub struct Rebooting;

Expand Down Expand Up @@ -95,8 +123,6 @@ pub trait ImageUpgrader<V: Clone + Debug + PartialEq + Eq + Send + Sync>: Send +
/// Set or unset the currently prepared version. Default is No-op.
/// The prepared version is set during `prepare_upgrade()` and unset during the `execute_upgrade()` step.
fn set_prepared_version(&mut self, _version: Option<V>) {}
/// Path to the directory containing boot scripts.
fn binary_dir(&self) -> &PathBuf;
/// Path to the image image download and unpacking destination.
fn image_path(&self) -> &PathBuf;
/// Optional data path, used for storing latest reboot time. Default is None.
Expand All @@ -115,16 +141,16 @@ pub trait ImageUpgrader<V: Clone + Debug + PartialEq + Eq + Send + Sync>: Send +
/// Runs the disk encryption key exchange process if SEV is active. NOOP otherwise.
async fn maybe_exchange_disk_encryption_key(&mut self) -> UpgradeResult<()>;

/// Return the implementation of [`ManagebootRunner`] to be used for running the
/// `manageboot.sh` commands.
fn manageboot_runner(&self) -> &dyn ManagebootRunner;

/// Calls a corresponding script to "confirm" that the base OS could boot
/// successfully. Without a confirmation the image will be reverted on the next
/// restart.
async fn confirm_boot(&self) {
if let Err(err) = Command::new(self.binary_dir().join("manageboot.sh").into_os_string())
.arg("guestos")
.arg("confirm")
.output()
.await
{
let args = ["guestos".as_ref(), "confirm".as_ref()];
if let Err(err) = self.manageboot_runner().run(&args).await {
error!(self.log(), "Could not confirm the boot: {:?}", err);
}
}
Expand Down Expand Up @@ -195,16 +221,16 @@ pub trait ImageUpgrader<V: Clone + Debug + PartialEq + Eq + Send + Sync>: Send +
// clear it here.
self.set_prepared_version(None);

let mut script = self.binary_dir().clone();
script.push("manageboot.sh");
let mut c = Command::new(script.clone().into_os_string());
let out = c
.arg("guestos")
.arg("upgrade-install")
.arg(self.image_path())
.output()
let args = [
"guestos".as_ref(),
"upgrade-install".as_ref(),
self.image_path().as_os_str(),
];
let out = self
.manageboot_runner()
.run(&args)
.await
.map_err(|e| UpgradeError::file_command_error(e, &c))?;
.map_err(|e| UpgradeError::manageboot_error(e, &args))?;

if !out.status.success() {
warn!(self.log(), "upgrade-install has failed");
Expand Down Expand Up @@ -245,14 +271,12 @@ pub trait ImageUpgrader<V: Clone + Debug + PartialEq + Eq + Send + Sync>: Send +
.map_err(|e| UpgradeError::IoError("Couldn't delete the image".to_string(), e))?;

info!(self.log(), "Attempting to reboot");
let script = self.binary_dir().join("manageboot.sh");
let mut cmd = Command::new(script.into_os_string());
let out = cmd
.arg("guestos")
.arg("upgrade-commit")
.output()
let args = ["guestos".as_ref(), "upgrade-commit".as_ref()];
let out = self
.manageboot_runner()
.run(&args)
.await
.map_err(|e| UpgradeError::file_command_error(e, &cmd))?;
.map_err(|e| UpgradeError::manageboot_error(e, &args))?;

if !out.status.success() {
warn!(self.log(), "upgrade-commit has failed: {:?}", out.status);
Expand Down
48 changes: 27 additions & 21 deletions rs/orchestrator/src/boundary_node.rs
Original file line number Diff line number Diff line change
@@ -1,23 +1,24 @@
use crate::{
error::{OrchestratorError, OrchestratorResult},
metrics::OrchestratorMetrics,
process_manager::{Process, ProcessManager},
process_manager::{Process, ProcessManager, ProcessManagerImpl},
registry_helper::RegistryHelper,
};
use ic_config::crypto::CryptoConfig;
use ic_logger::{ReplicaLogger, info, warn};
use ic_types::{NodeId, ReplicaVersion};
use std::{
collections::HashMap,
path::PathBuf,
ffi::OsString,
path::{Path, PathBuf},
sync::{Arc, Mutex},
};

struct BoundaryNodeProcess {
version: ReplicaVersion,
binary: String,
args: Vec<String>,
env: HashMap<String, String>,
binary: PathBuf,
args: Vec<OsString>,
env: HashMap<OsString, OsString>,
}

impl Process for BoundaryNodeProcess {
Expand All @@ -29,23 +30,23 @@ impl Process for BoundaryNodeProcess {
&self.version
}

fn get_binary(&self) -> &str {
fn get_binary(&self) -> &Path {
&self.binary
}

fn get_args(&self) -> &[String] {
fn get_args(&self) -> &[OsString] {
&self.args
}

fn get_env(&self) -> HashMap<String, String> {
fn get_env(&self) -> HashMap<OsString, OsString> {
self.env.clone()
}
}

pub(crate) struct BoundaryNodeManager {
registry: Arc<RegistryHelper>,
_metrics: Arc<OrchestratorMetrics>,
process: Arc<Mutex<ProcessManager<BoundaryNodeProcess>>>,
process: Arc<Mutex<dyn ProcessManager<BoundaryNodeProcess>>>,
ic_binary_dir: PathBuf,
crypto_config: CryptoConfig,
version: ReplicaVersion,
Expand All @@ -67,7 +68,7 @@ impl BoundaryNodeManager {
Self {
registry,
_metrics: metrics,
process: Arc::new(Mutex::new(ProcessManager::new(logger.clone()))),
process: Arc::new(Mutex::new(ProcessManagerImpl::new(logger.clone()))),
ic_binary_dir,
crypto_config,
version,
Expand Down Expand Up @@ -153,29 +154,34 @@ impl BoundaryNodeManager {
}
info!(self.logger, "Starting new boundary node process");

let binary = self
.ic_binary_dir
.join("ic-boundary")
.as_path()
.display()
.to_string();
let binary = self.ic_binary_dir.join("ic-boundary");

let domain_name = self
.domain_name
.as_ref()
.ok_or_else(|| OrchestratorError::DomainNameMissingError(self.node_id))?;

let env = env_file_reader::read_file("/opt/ic/share/ic-boundary.env").map_err(|e| {
OrchestratorError::IoError("unable to read ic-boundary environment variables".into(), e)
})?;
let env = match env_file_reader::read_file("/opt/ic/share/ic-boundary.env") {
Ok(env) => env
.into_iter()
.map(|(k, v)| (OsString::from(k), OsString::from(v)))
.collect(),
Err(e) => {
return Err(OrchestratorError::IoError(
"unable to read ic-boundary environment variables".to_string(),
e,
));
}
};

let args = vec![
format!("--tls-hostname={}", domain_name),
format!("--tls-hostname={}", domain_name).into(),
format!(
"--crypto-config={}",
serde_json::to_string(&self.crypto_config)
.map_err(OrchestratorError::SerializeCryptoConfigError)?
),
)
.into(),
];

process
Expand Down
4 changes: 2 additions & 2 deletions rs/orchestrator/src/dashboard.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ pub(crate) struct OrchestratorDashboard {
last_applied_firewall_version: Arc<RwLock<RegistryVersion>>,
last_applied_ipv4_config_version: Arc<RwLock<RegistryVersion>>,
last_poll_certified_time: Arc<RwLock<Time>>,
replica_process: Arc<Mutex<ProcessManager<ReplicaProcess>>>,
replica_process: Arc<Mutex<dyn ProcessManager<ReplicaProcess>>>,
subnet_assignment: Arc<RwLock<SubnetAssignment>>,
replica_version: ReplicaVersion,
hostos_version: Option<HostosVersion>,
Expand Down Expand Up @@ -90,7 +90,7 @@ impl OrchestratorDashboard {
last_applied_firewall_version: Arc<RwLock<RegistryVersion>>,
last_applied_ipv4_config_version: Arc<RwLock<RegistryVersion>>,
last_poll_certified_time: Arc<RwLock<Time>>,
replica_process: Arc<Mutex<ProcessManager<ReplicaProcess>>>,
replica_process: Arc<Mutex<dyn ProcessManager<ReplicaProcess>>>,
subnet_assignment: Arc<RwLock<SubnetAssignment>>,
replica_version: ReplicaVersion,
hostos_version: Option<HostosVersion>,
Expand Down
12 changes: 8 additions & 4 deletions rs/orchestrator/src/orchestrator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use crate::{
hostos_upgrade::HostosUpgrader,
ipv4_network::Ipv4Configurator,
metrics::OrchestratorMetrics,
process_manager::ProcessManager,
process_manager::ProcessManagerImpl,
registration::NodeRegistration,
registry_helper::RegistryHelper,
ssh_access_manager::SshAccessManager,
Expand All @@ -24,7 +24,7 @@ use ic_config::{
use ic_crypto::CryptoComponent;
use ic_crypto_node_key_generation::{NodeKeyGenerationError, generate_node_keys_once};
use ic_http_endpoints_metrics::MetricsHttpEndpoint;
use ic_image_upgrader::ImageUpgrader;
use ic_image_upgrader::{ImageUpgrader, ManagebootRunnerImpl};
use ic_logger::{ReplicaLogger, error, info, warn};
use ic_metrics::MetricsRegistry;
use ic_registry_replicator::RegistryReplicator;
Expand Down Expand Up @@ -240,12 +240,15 @@ impl Orchestrator {
Arc::clone(&crypto) as _,
);

let replica_process = Arc::new(Mutex::new(ProcessManager::new(logger.clone())));
let replica_process = Arc::new(Mutex::new(ProcessManagerImpl::new(logger.clone())));
let ic_binary_directory = args
.ic_binary_directory
.as_ref()
.unwrap_or(&PathBuf::from("/tmp"))
.clone();
let manageboot_runner = Box::new(ManagebootRunnerImpl::new(
ic_binary_directory.join("manageboot.sh"),
));

// Create a read-only CUP reader that can be shared among Dashboard and Firewall
// They read from the same file, so they'll see the same persisted CUP
Expand Down Expand Up @@ -278,7 +281,8 @@ impl Orchestrator {
Upgrade::new(
Arc::clone(&registry) as _,
Arc::clone(&metrics),
Arc::clone(&replica_process),
Arc::clone(&replica_process) as _,
manageboot_runner,
cup_provider,
Arc::clone(&subnet_assignment),
replica_version.clone(),
Expand Down
Loading
Loading