Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ from upstream in the cluster:
```bash
BUCKET=<your_init_actions_bucket>
CLUSTER=<cluster_name>
gsutil cp presto/presto.sh gs://${BUCKET}/
gcloud storage cp presto/presto.sh gs://${BUCKET}/
gcloud dataproc clusters create ${CLUSTER} --initialization-actions gs://${BUCKET}/presto.sh
```

Expand Down
2 changes: 1 addition & 1 deletion alluxio/alluxio.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ download_file() {
local -r uri="$1"

if [[ "${uri}" == gs://* ]]; then
gsutil cp "${uri}" ./
gcloud storage cp "${uri}" ./
else
# TODO Add metadata header tag to the wget for filtering out in download metrics.
wget -nv --timeout=30 --tries=5 --retry-connrefused "${uri}"
Expand Down
2 changes: 1 addition & 1 deletion beam/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ Then, upload the jar to a Cloud Storage path that clusters can access during
initialization.

```bash
gsutil cp \
gcloud storage cp \
./runners/flink/job-server/build/libs/beam-runners-flink_2.11-job-server-*-SNAPSHOT.jar \
<BEAM_JOB_SERVICE_DESTINATION>/beam-runners-flink_2.11-job-server-latest-SNAPSHOT.jar
```
Expand Down
2 changes: 1 addition & 1 deletion beam/beam.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ function download_snapshot() {
readonly snapshot_url="${1}"
readonly protocol="$(echo "${snapshot_url}" | head -c5)"
if [ "${protocol}" = "gs://" ]; then
gsutil cp "${snapshot_url}" "${LOCAL_JAR_NAME}"
gcloud storage cp "${snapshot_url}" "${LOCAL_JAR_NAME}"
else
curl -o "${LOCAL_JAR_NAME}" "${snapshot_url}"
fi
Expand Down
10 changes: 5 additions & 5 deletions conda/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ Where `create-my-cluster.sh` specifies a list of conda and/or pip packages to in
```
#!/usr/bin/env bash

gsutil -m cp -r gs://goog-dataproc-initialization-actions-${REGION}/conda/bootstrap-conda.sh .
gsutil -m cp -r gs://goog-dataproc-initialization-actions-${REGION}/conda/install-conda-env.sh .
gcloud storage cp --recursive gs://goog-dataproc-initialization-actions-${REGION}/conda/bootstrap-conda.sh .
gcloud storage cp --recursive gs://goog-dataproc-initialization-actions-${REGION}/conda/install-conda-env.sh .
Comment on lines +80 to +81
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The --recursive flag is unnecessary when copying single files. Its use here could be confusing for users. For clarity, it's better to remove it.

Suggested change
gcloud storage cp --recursive gs://goog-dataproc-initialization-actions-${REGION}/conda/bootstrap-conda.sh .
gcloud storage cp --recursive gs://goog-dataproc-initialization-actions-${REGION}/conda/install-conda-env.sh .
gcloud storage cp gs://goog-dataproc-initialization-actions-${REGION}/conda/bootstrap-conda.sh .
gcloud storage cp gs://goog-dataproc-initialization-actions-${REGION}/conda/install-conda-env.sh .


chmod 755 ./*conda*.sh

Expand All @@ -100,9 +100,9 @@ Similarly, one can also specify a [conda environment yml file](https://github.co
CONDA_ENV_YAML_GSC_LOC="gs://my-bucket/path/to/conda-environment.yml"
CONDA_ENV_YAML_PATH="/root/conda-environment.yml"
echo "Downloading conda environment at $CONDA_ENV_YAML_GSC_LOC to $CONDA_ENV_YAML_PATH ... "
gsutil -m cp -r $CONDA_ENV_YAML_GSC_LOC $CONDA_ENV_YAML_PATH
gsutil -m cp -r gs://goog-dataproc-initialization-actions-${REGION}/conda/bootstrap-conda.sh .
gsutil -m cp -r gs://goog-dataproc-initialization-actions-${REGION}/conda/install-conda-env.sh .
gcloud storage cp --recursive $CONDA_ENV_YAML_GSC_LOC $CONDA_ENV_YAML_PATH
gcloud storage cp --recursive gs://goog-dataproc-initialization-actions-${REGION}/conda/bootstrap-conda.sh .
gcloud storage cp --recursive gs://goog-dataproc-initialization-actions-${REGION}/conda/install-conda-env.sh .
Comment on lines +103 to +105
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The --recursive flag is unnecessary when copying single files. Its use here could be confusing for users. For clarity, it's better to remove it.

Suggested change
gcloud storage cp --recursive $CONDA_ENV_YAML_GSC_LOC $CONDA_ENV_YAML_PATH
gcloud storage cp --recursive gs://goog-dataproc-initialization-actions-${REGION}/conda/bootstrap-conda.sh .
gcloud storage cp --recursive gs://goog-dataproc-initialization-actions-${REGION}/conda/install-conda-env.sh .
gcloud storage cp $CONDA_ENV_YAML_GSC_LOC $CONDA_ENV_YAML_PATH
gcloud storage cp gs://goog-dataproc-initialization-actions-${REGION}/conda/bootstrap-conda.sh .
gcloud storage cp gs://goog-dataproc-initialization-actions-${REGION}/conda/install-conda-env.sh .


chmod 755 ./*conda*.sh

Expand Down
2 changes: 1 addition & 1 deletion connectors/connectors.sh
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ update_connector_url() {

find "${vm_connectors_dir}/" -name "${pattern}" -delete

gsutil cp -P "${url}" "${vm_connectors_dir}/"
gcloud storage cp --preserve-posix "${url}" "${vm_connectors_dir}/"

local -r jar_name=${url##*/}

Expand Down
2 changes: 1 addition & 1 deletion dask/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ You can also `ssh` into the cluster and execute Dask jobs from Python files. To
run jobs, you can either `scp` a file onto your cluster or use `gsutil` on the
cluster to download the Python file.

`gcloud compute ssh <cluster-name> --command="gsutil cp gs://path/to/file.py .;
`gcloud compute ssh <cluster-name> --command="gcloud storage cp gs://path/to/file.py .;
python file.py`

### Accessing Web UIs
Expand Down
10 changes: 5 additions & 5 deletions hbase/hbase.sh
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ EOF
kadmin.local -q "addprinc -randkey hbase/${m}.${DOMAIN}@${REALM}"
echo "Generating hbase keytab..."
kadmin.local -q "xst -k ${HBASE_HOME}/conf/hbase-${m}.keytab hbase/${m}.${DOMAIN}"
gsutil cp "${HBASE_HOME}/conf/hbase-${m}.keytab" \
gcloud storage cp "${HBASE_HOME}/conf/hbase-${m}.keytab" \
"${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/hbase-${m}.keytab"
done

Expand All @@ -232,17 +232,17 @@ EOF
kadmin.local -q "addprinc -randkey hbase/${CLUSTER_NAME}-w-${c}.${DOMAIN}"
echo "Generating hbase keytab..."
kadmin.local -q "xst -k ${HBASE_HOME}/conf/hbase-${CLUSTER_NAME}-w-${c}.keytab hbase/${CLUSTER_NAME}-w-${c}.${DOMAIN}"
gsutil cp "${HBASE_HOME}/conf/hbase-${CLUSTER_NAME}-w-${c}.keytab" \
gcloud storage cp "${HBASE_HOME}/conf/hbase-${CLUSTER_NAME}-w-${c}.keytab" \
"${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/hbase-${CLUSTER_NAME}-w-${c}.keytab"
done
touch /tmp/_success
gsutil cp /tmp/_success "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/_success"
gcloud storage cp /tmp/_success "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/_success"
fi
success=1
while [[ $success == "1" ]]; do
sleep 1
success=$(
gsutil -q stat "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/_success"
gcloud storage objects list --stat --fetch-encrypted-object-hashes "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/_success"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The original gsutil -q stat command was quiet. The replacement gcloud storage objects list will print an error to stderr if the object doesn't exist, causing log noise inside this while loop. A more accurate replacement for checking object existence is gcloud storage objects describe, with its output redirected to /dev/null to replicate the quiet behavior. The --fetch-encrypted-object-hashes flag is also unnecessary.

Suggested change
gcloud storage objects list --stat --fetch-encrypted-object-hashes "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/_success"
gcloud storage objects describe "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/_success" &> /dev/null

echo $?
)
done
Expand All @@ -255,7 +255,7 @@ EOF
fi

# Copy keytab to machine
gsutil cp "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/hbase-$(hostname -s).keytab" $hbase_keytab_path
gcloud storage cp "${KEYTAB_BUCKET}/keytabs/${CLUSTER_NAME}/hbase-$(hostname -s).keytab" $hbase_keytab_path

# Change owner of keytab to hbase with read only permissions
if [ -f $hbase_keytab_path ]; then
Expand Down
2 changes: 1 addition & 1 deletion hive-llap/llap.sh
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ function download_init_actions() {
# Download initialization actions locally. This will download the start_llap.sh file to the cluster for execution Check if metadata is supplied
echo "downalod init actions supplied as metadata..."
mkdir -p "${INIT_ACTIONS_DIR}"
gsutil cp "${INIT_ACTIONS_REPO}/hive-llap/start_llap.sh" "${INIT_ACTIONS_DIR}"
gcloud storage cp "${INIT_ACTIONS_REPO}/hive-llap/start_llap.sh" "${INIT_ACTIONS_DIR}"
chmod 700 "${INIT_ACTIONS_DIR}/start_llap.sh"
}

Expand Down
8 changes: 4 additions & 4 deletions mlvm/mlvm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,9 @@ function download_init_actions() {
# Download initialization actions locally.
mkdir "${INIT_ACTIONS_DIR}"/{gpu,rapids,dask}

gsutil -m rsync -r "${INIT_ACTIONS_REPO}/rapids/" "${INIT_ACTIONS_DIR}/rapids/"
gsutil -m rsync -r "${INIT_ACTIONS_REPO}/gpu/" "${INIT_ACTIONS_DIR}/gpu/"
gsutil -m rsync -r "${INIT_ACTIONS_REPO}/dask/" "${INIT_ACTIONS_DIR}/dask/"
gcloud storage rsync --recursive "${INIT_ACTIONS_REPO}/rapids/" "${INIT_ACTIONS_DIR}/rapids/"
gcloud storage rsync --recursive "${INIT_ACTIONS_REPO}/gpu/" "${INIT_ACTIONS_DIR}/gpu/"
gcloud storage rsync --recursive "${INIT_ACTIONS_REPO}/dask/" "${INIT_ACTIONS_DIR}/dask/"

find "${INIT_ACTIONS_DIR}" -name '*.sh' -exec chmod +x {} \;
}
Expand Down Expand Up @@ -167,7 +167,7 @@ function install_spark_nlp() {
function install_connectors() {
local -r url="gs://spark-lib/bigquery/spark-bigquery-with-dependencies_2.12-${SPARK_BIGQUERY_VERSION}.jar"

gsutil cp "${url}" "${CONNECTORS_DIR}/"
gcloud storage cp "${url}" "${CONNECTORS_DIR}/"

local -r jar_name=${url##*/}

Expand Down