Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 32 additions & 5 deletions cloudbuild/run-presubmit-on-k8s.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,19 +42,46 @@ EOF
kubectl apply -f $POD_CONFIG

# Delete POD on exit and describe it before deletion if exit was unsuccessful
trap '[[ $? != 0 ]] && kubectl describe "pod/${POD_NAME}"; kubectl delete pods "${POD_NAME}"' EXIT
trap 'exit_code=$?
if [[ ${exit_code} != 0 ]]; then
echo "Presubmit failed for ${POD_NAME}. Describing pod..."
kubectl describe "pod/${POD_NAME}" || echo "Failed to describe pod."

PROJECT_ID=$(gcloud config get-value project 2>/dev/null || echo "unknown-project")
BUCKET="dataproc-init-actions-test-${PROJECT_ID}"
LOG_GCS_PATH="gs://${BUCKET}/${BUILD_ID}/logs/${POD_NAME}.log"

echo "Attempting to upload logs to ${LOG_GCS_PATH}"
if kubectl logs "${POD_NAME}" | gsutil cp - "${LOG_GCS_PATH}"; then
echo "Logs for failed pod ${POD_NAME} uploaded to: ${LOG_GCS_PATH}"
else
echo "Log upload to ${LOG_GCS_PATH} failed."
fi
fi
echo "Deleting pod ${POD_NAME}..."
kubectl delete pods "${POD_NAME}" --ignore-not-found=true
exit ${exit_code}' EXIT

kubectl wait --for=condition=Ready "pod/${POD_NAME}" --timeout=15m

# To mitigate problems with early test failure, retry kubectl logs
sleep 10s
while ! kubectl describe "pod/${POD_NAME}" | grep -q Terminated; do
kubectl logs -f "${POD_NAME}" --since-time="${LOGS_SINCE_TIME}" --timestamps=true
# Try to stream logs, but primary log capture is now in the trap
kubectl logs -f "${POD_NAME}" --since-time="${LOGS_SINCE_TIME}" --timestamps=true || true
LOGS_SINCE_TIME=$(date --iso-8601=seconds)
sleep 2 # Short sleep to avoid busy waiting if logs -f exits
done

EXIT_CODE=$(kubectl get pod "${POD_NAME}" \
-o go-template="{{range .status.containerStatuses}}{{.state.terminated.exitCode}}{{end}}")
# Final check on the pod exit code
EXIT_CODE=$(kubectl get pod "${POD_NAME}" -o go-template="{{range .status.containerStatuses}}{{.state.terminated.exitCode}}{{end}}" || echo "1")

if [[ ${EXIT_CODE} != 0 ]]; then
echo "Presubmit failed!"
echo "Presubmit final state for ${POD_NAME} indicates failure (Exit Code: ${EXIT_CODE})."
# The trap will handle the log upload and cleanup
exit 1
fi

echo "Presubmit for ${POD_NAME} successful."
# Explicitly exit 0 to clear the trap's exit code
exit 0
12 changes: 12 additions & 0 deletions gpu/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,18 @@ sometimes found in the "building from source" sections.
modulus md5sum of the files referenced by both the private and
public secret names.

- `http-proxy: <HOST>:<PORT>` - Optional. The address of an HTTP
proxy to use for internet egress. The script will configure `apt`,
`curl`, `gsutil`, `pip`, `java`, and `gpg` to use this proxy.

- `http-proxy-pem-uri: <GS_PATH>` - Optional. A `gs://` path to the
PEM-encoded certificate file used by the proxy specified in
`http-proxy`. This is needed if the proxy uses TLS and its
certificate is not already trusted by the cluster's default trust
store (e.g., if it's a self-signed certificate or signed by an
internal CA). The script will install this certificate into the
system and Java trust stores.

#### Loading built kernel module

For platforms which do not have pre-built binary kernel drivers, the
Expand Down
Loading