GoogleCloudDataproc · bhandarivijay-png · Mar 4, 2026 · gemini-code-assist · Mar 4, 2026 · gemini-code-assist
diff --git a/README.md b/README.md
@@ -32,7 +32,7 @@ from upstream in the cluster:
 ```bash
 BUCKET=<your_init_actions_bucket>
 CLUSTER=<cluster_name>
-gsutil cp presto/presto.sh gs://${BUCKET}/
+gcloud storage cp presto/presto.sh gs://${BUCKET}/
 gcloud dataproc clusters create ${CLUSTER} --initialization-actions gs://${BUCKET}/presto.sh
 ```
 

diff --git a/alluxio/alluxio.sh b/alluxio/alluxio.sh
@@ -41,7 +41,7 @@ download_file() {
   local -r uri="$1"
 
   if [[ "${uri}" == gs://* ]]; then
-    gsutil cp "${uri}" ./
+    gcloud storage cp "${uri}" ./
   else
     # TODO Add metadata header tag to the wget for filtering out in download metrics.
     wget -nv --timeout=30 --tries=5 --retry-connrefused "${uri}"

diff --git a/beam/README.md b/beam/README.md
@@ -62,7 +62,7 @@ Then, upload the jar to a Cloud Storage path that clusters can access during
 initialization.
 
 ```bash
-gsutil cp \
+gcloud storage cp \
   ./runners/flink/job-server/build/libs/beam-runners-flink_2.11-job-server-*-SNAPSHOT.jar \
   <BEAM_JOB_SERVICE_DESTINATION>/beam-runners-flink_2.11-job-server-latest-SNAPSHOT.jar
 ```

diff --git a/beam/beam.sh b/beam/beam.sh
@@ -40,7 +40,7 @@ function download_snapshot() {
   readonly snapshot_url="${1}"
   readonly protocol="$(echo "${snapshot_url}" | head -c5)"
   if [ "${protocol}" = "gs://" ]; then
-    gsutil cp "${snapshot_url}" "${LOCAL_JAR_NAME}"
+    gcloud storage cp "${snapshot_url}" "${LOCAL_JAR_NAME}"
   else
     curl -o "${LOCAL_JAR_NAME}" "${snapshot_url}"
   fi

diff --git a/conda/README.md b/conda/README.md
@@ -77,8 +77,8 @@ Where `create-my-cluster.sh` specifies a list of conda and/or pip packages to in
 ```
 #!/usr/bin/env bash
 
-gsutil -m cp -r gs://goog-dataproc-initialization-actions-${REGION}/conda/bootstrap-conda.sh .
-gsutil -m cp -r gs://goog-dataproc-initialization-actions-${REGION}/conda/install-conda-env.sh .
+gcloud storage cp --recursive gs://goog-dataproc-initialization-actions-${REGION}/conda/bootstrap-conda.sh .
+gcloud storage cp --recursive gs://goog-dataproc-initialization-actions-${REGION}/conda/install-conda-env.sh .
-gcloud storage cp --recursive gs://goog-dataproc-initialization-actions-${REGION}/conda/bootstrap-conda.sh .
-gcloud storage cp --recursive gs://goog-dataproc-initialization-actions-${REGION}/conda/install-conda-env.sh .
+gcloud storage cp gs://goog-dataproc-initialization-actions-${REGION}/conda/bootstrap-conda.sh .
+gcloud storage cp gs://goog-dataproc-initialization-actions-${REGION}/conda/install-conda-env.sh .
-gcloud storage cp --recursive gs://goog-dataproc-initialization-actions-${REGION}/conda/bootstrap-conda.sh .
-gcloud storage cp --recursive gs://goog-dataproc-initialization-actions-${REGION}/conda/install-conda-env.sh .
+gcloud storage cp gs://goog-dataproc-initialization-actions-${REGION}/conda/bootstrap-conda.sh .
+gcloud storage cp gs://goog-dataproc-initialization-actions-${REGION}/conda/install-conda-env.sh .
 
 chmod 755 ./*conda*.sh
 
@@ -100,9 +100,9 @@ Similarly, one can also specify a [conda environment yml file](https://github.co
 CONDA_ENV_YAML_GSC_LOC="gs://my-bucket/path/to/conda-environment.yml"
 CONDA_ENV_YAML_PATH="/root/conda-environment.yml"
 echo "Downloading conda environment at $CONDA_ENV_YAML_GSC_LOC to $CONDA_ENV_YAML_PATH ... "
-gsutil -m cp -r $CONDA_ENV_YAML_GSC_LOC $CONDA_ENV_YAML_PATH
-gsutil -m cp -r gs://goog-dataproc-initialization-actions-${REGION}/conda/bootstrap-conda.sh .
-gsutil -m cp -r gs://goog-dataproc-initialization-actions-${REGION}/conda/install-conda-env.sh .
+gcloud storage cp --recursive $CONDA_ENV_YAML_GSC_LOC $CONDA_ENV_YAML_PATH
+gcloud storage cp --recursive gs://goog-dataproc-initialization-actions-${REGION}/conda/bootstrap-conda.sh .
+gcloud storage cp --recursive gs://goog-dataproc-initialization-actions-${REGION}/conda/install-conda-env.sh .
-gcloud storage cp --recursive $CONDA_ENV_YAML_GSC_LOC $CONDA_ENV_YAML_PATH
-gcloud storage cp --recursive gs://goog-dataproc-initialization-actions-${REGION}/conda/bootstrap-conda.sh .
-gcloud storage cp --recursive gs://goog-dataproc-initialization-actions-${REGION}/conda/install-conda-env.sh .
+gcloud storage cp $CONDA_ENV_YAML_GSC_LOC $CONDA_ENV_YAML_PATH
+gcloud storage cp gs://goog-dataproc-initialization-actions-${REGION}/conda/bootstrap-conda.sh .
+gcloud storage cp gs://goog-dataproc-initialization-actions-${REGION}/conda/install-conda-env.sh .
-gcloud storage cp --recursive $CONDA_ENV_YAML_GSC_LOC $CONDA_ENV_YAML_PATH
-gcloud storage cp --recursive gs://goog-dataproc-initialization-actions-${REGION}/conda/bootstrap-conda.sh .
-gcloud storage cp --recursive gs://goog-dataproc-initialization-actions-${REGION}/conda/install-conda-env.sh .
+gcloud storage cp $CONDA_ENV_YAML_GSC_LOC $CONDA_ENV_YAML_PATH
+gcloud storage cp gs://goog-dataproc-initialization-actions-${REGION}/conda/bootstrap-conda.sh .
+gcloud storage cp gs://goog-dataproc-initialization-actions-${REGION}/conda/install-conda-env.sh .
 
 chmod 755 ./*conda*.sh
 

diff --git a/connectors/connectors.sh b/connectors/connectors.sh
@@ -128,7 +128,7 @@ update_connector_url() {
 
   find "${vm_connectors_dir}/" -name "${pattern}" -delete
 
-  gsutil cp -P "${url}" "${vm_connectors_dir}/"
+  gcloud storage cp --preserve-posix "${url}" "${vm_connectors_dir}/"
 
   local -r jar_name=${url##*/}
 

diff --git a/dask/README.md b/dask/README.md
@@ -136,7 +136,7 @@ You can also `ssh` into the cluster and execute Dask jobs from Python files. To
 run jobs, you can either `scp` a file onto your cluster or use `gsutil` on the
 cluster to download the Python file.
 
-`gcloud compute ssh <cluster-name> --command="gsutil cp gs://path/to/file.py .;
+`gcloud compute ssh <cluster-name> --command="gcloud storage cp gs://path/to/file.py .;
 python file.py`
 
 ### Accessing Web UIs

diff --git a/hbase/hbase.sh b/hbase/hbase.sh
@@ -223,7 +223,7 @@ EOF
         kadmin.local -q "addprinc -randkey hbase/${m}.${DOMAIN}@${REALM}"
         echo "Generating hbase keytab..."
         kadmin.local -q "xst -k ${HBASE_HOME}/conf/hbase-${m}.keytab hbase/${m}.${DOMAIN}"
-        gsutil cp "${HBASE_HOME}/conf/hbase-${m}.keytab" \
+        gcloud storage cp "${HBASE_HOME}/conf/hbase-${m}.keytab" \
           "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/hbase-${m}.keytab"
       done
 
@@ -232,17 +232,17 @@ EOF
         kadmin.local -q "addprinc -randkey hbase/${CLUSTER_NAME}-w-${c}.${DOMAIN}"
         echo "Generating hbase keytab..."
         kadmin.local -q "xst -k ${HBASE_HOME}/conf/hbase-${CLUSTER_NAME}-w-${c}.keytab hbase/${CLUSTER_NAME}-w-${c}.${DOMAIN}"
-        gsutil cp "${HBASE_HOME}/conf/hbase-${CLUSTER_NAME}-w-${c}.keytab" \
+        gcloud storage cp "${HBASE_HOME}/conf/hbase-${CLUSTER_NAME}-w-${c}.keytab" \
           "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/hbase-${CLUSTER_NAME}-w-${c}.keytab"
       done
       touch /tmp/_success
-      gsutil cp /tmp/_success "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/_success"
+      gcloud storage cp /tmp/_success "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/_success"
     fi
     success=1
     while [[ $success == "1" ]]; do
       sleep 1
       success=$(
-        gsutil -q stat "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/_success"
+        gcloud storage objects list --stat --fetch-encrypted-object-hashes "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/_success"
-        gcloud storage objects list --stat --fetch-encrypted-object-hashes "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/_success"
+        gcloud storage objects describe "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/_success" &> /dev/null
-        gcloud storage objects list --stat --fetch-encrypted-object-hashes "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/_success"
+        gcloud storage objects describe "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/_success" &> /dev/null
         echo $?
       )
     done
@@ -255,7 +255,7 @@ EOF
     fi
 
     # Copy keytab to machine
-    gsutil cp "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/hbase-$(hostname -s).keytab" $hbase_keytab_path
+    gcloud storage cp "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/hbase-$(hostname -s).keytab" $hbase_keytab_path
 
     # Change owner of keytab to hbase with read only permissions
     if [ -f $hbase_keytab_path ]; then

diff --git a/hive-llap/llap.sh b/hive-llap/llap.sh
@@ -69,7 +69,7 @@ function download_init_actions() {
     # Download initialization actions locally. This will download the start_llap.sh file to the cluster for execution Check if metadata is supplied
     echo "downalod init actions supplied as metadata..."
     mkdir -p "${INIT_ACTIONS_DIR}"
-    gsutil cp "${INIT_ACTIONS_REPO}/hive-llap/start_llap.sh" "${INIT_ACTIONS_DIR}"
+    gcloud storage cp "${INIT_ACTIONS_REPO}/hive-llap/start_llap.sh" "${INIT_ACTIONS_DIR}"
     chmod 700 "${INIT_ACTIONS_DIR}/start_llap.sh"
 }
 

diff --git a/mlvm/mlvm.sh b/mlvm/mlvm.sh
@@ -97,9 +97,9 @@ function download_init_actions() {
   # Download initialization actions locally.
   mkdir "${INIT_ACTIONS_DIR}"/{gpu,rapids,dask}
 
-  gsutil -m rsync -r "${INIT_ACTIONS_REPO}/rapids/" "${INIT_ACTIONS_DIR}/rapids/"
-  gsutil -m rsync -r "${INIT_ACTIONS_REPO}/gpu/" "${INIT_ACTIONS_DIR}/gpu/"
-  gsutil -m rsync -r "${INIT_ACTIONS_REPO}/dask/" "${INIT_ACTIONS_DIR}/dask/"
+  gcloud storage rsync --recursive "${INIT_ACTIONS_REPO}/rapids/" "${INIT_ACTIONS_DIR}/rapids/"
+  gcloud storage rsync --recursive "${INIT_ACTIONS_REPO}/gpu/" "${INIT_ACTIONS_DIR}/gpu/"
+  gcloud storage rsync --recursive "${INIT_ACTIONS_REPO}/dask/" "${INIT_ACTIONS_DIR}/dask/"
 
   find "${INIT_ACTIONS_DIR}" -name '*.sh' -exec chmod +x {} \;
 }
@@ -167,7 +167,7 @@ function install_spark_nlp() {
 function install_connectors() {
   local -r url="gs://spark-lib/bigquery/spark-bigquery-with-dependencies_2.12-${SPARK_BIGQUERY_VERSION}.jar"
 
-  gsutil cp "${url}" "${CONNECTORS_DIR}/"
+  gcloud storage cp "${url}" "${CONNECTORS_DIR}/"
 
   local -r jar_name=${url##*/}