Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/actionlint.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
self-hosted-runner:
labels:
# Custom label for GPU-enabled self-hosted runners
- gpu
139 changes: 139 additions & 0 deletions .github/workflows/core.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ on:
description: Whether to test using macOS
type: boolean
default: false
test_gpu:
description: Whether to test using CUDA-enabled PETSc
type: boolean
default: false
deploy_website:
description: Whether to deploy the website
type: boolean
Expand Down Expand Up @@ -54,6 +58,10 @@ on:
description: Whether to test using macOS
type: boolean
default: false
test_gpu:
description: Whether to test using CUDA-enabled PETSc
type: boolean
default: false
deploy_website:
description: Whether to deploy the website
type: boolean
Expand Down Expand Up @@ -465,6 +473,137 @@ jobs:
run: |
find . -delete

test_gpu:
name: Build and test Firedrake (Linux CUDA)
runs-on: [self-hosted, Linux, gpu]
container:
image: ubuntu:latest
options: --gpus all
if: inputs.test_gpu
env:
OMPI_ALLOW_RUN_AS_ROOT: 1
OMPI_ALLOW_RUN_AS_ROOT_CONFIRM: 1
OMP_NUM_THREADS: 1
OPENBLAS_NUM_THREADS: 1
FIREDRAKE_CI: 1
PYOP2_SPMD_STRICT: 1
# Disable fast math as it exposes compiler bugs
PYOP2_CFLAGS: -fno-fast-math
# NOTE: One should occasionally update test_durations.json by running
# 'make test_durations' inside a 'firedrake:latest' Docker image.
EXTRA_PYTEST_ARGS: --splitting-algorithm least_duration --timeout=600 --timeout-method=thread -o faulthandler_timeout=660 --durations-path=./firedrake-repo/tests/test_durations.json --durations=50
PYTEST_MPI_MAX_NPROCS: 8
# Prevent PETSc from exiting with an error due to using non-GPU aware system MPI
PETSC_OPTIONS: -use_gpu_aware_mpi 0
Comment thread
connorjward marked this conversation as resolved.
steps:
- name: Confirm Nvidia GPUs are enabled
# The presence of the nvidia-smi command indicates that the Nvidia drivers have
# successfully been imported into the container, there is no point continuing
# if nvidia-smi is not present
run: nvidia-smi

- name: Fix HOME
# For unknown reasons GitHub actions overwrite HOME to /github/home
# which will break everything unless fixed
# (https://github.com/actions/runner/issues/863)
run: echo "HOME=/root" >> "$GITHUB_ENV"


# Git is needed for actions/checkout and Python for firedrake-configure
# curl needed for adding new deb repositories to ubuntu
- name: Install system dependencies (1)
run: |
apt-get update
apt-get -y install git python3 curl


- name: Pre-run cleanup
# Make sure the current directory is empty
run: find . -delete

- uses: actions/checkout@v5
with:
path: firedrake-repo
ref: ${{ inputs.source_ref }}

- name: Add Nvidia CUDA deb repositories
run: |
deburl=$( python3 ./firedrake-repo/scripts/firedrake-configure --show-extra-repo-pkg-url --gpu-arch cuda )
Comment thread
connorjward marked this conversation as resolved.
debfile=$( basename "${deburl}" )
curl -fsSLO "${deburl}"
dpkg -i "${debfile}"
apt-get update

- name: Install system dependencies (2)
run: |
apt-get -y install \
$(python3 ./firedrake-repo/scripts/firedrake-configure --arch default --gpu-arch cuda --show-system-packages)
apt-get -y install python3-venv
: # Dependencies needed to run the test suite
apt-get -y install fonts-dejavu graphviz graphviz-dev parallel poppler-utils

- name: Install PETSc
env:
EXTRA_OPTIONS: -use_gpu_aware_mpi 0
run: |
if [ ${{ inputs.target_branch }} = 'release' ]; then
git clone --depth 1 \
--branch $(python3 ./firedrake-repo/scripts/firedrake-configure --gpu-arch cuda --show-petsc-version) \
https://gitlab.com/petsc/petsc.git
else
git clone --depth 1 https://gitlab.com/petsc/petsc.git
fi
cd petsc
python3 ../firedrake-repo/scripts/firedrake-configure \
--arch default --gpu-arch cuda --show-petsc-configure-options | \
xargs -L1 ./configure --with-make-np=4
make
make check
{
echo "PETSC_DIR=/__w/firedrake/firedrake/petsc"
echo "PETSC_ARCH=arch-firedrake-default-cuda"
echo "SLEPC_DIR=/__w/firedrake/firedrake/petsc/arch-firedrake-default-cuda"
} >> "$GITHUB_ENV"

- name: Install Firedrake
id: install
run: |
export $(python3 ./firedrake-repo/scripts/firedrake-configure --arch default --gpu-arch cuda --show-env)
python3 -m venv venv
. venv/bin/activate

: # Empty the pip cache to ensure that everything is compiled from scratch
pip cache purge

if [ ${{ inputs.target_branch }} = 'release' ]; then
EXTRA_PIP_FLAGS=''
else
: # Install build dependencies
pip install "$PETSC_DIR"/src/binding/petsc4py
pip install -r ./firedrake-repo/requirements-build.txt

: # We have to pass '--no-build-isolation' to use a custom petsc4py
EXTRA_PIP_FLAGS='--no-build-isolation'
fi

pip install --verbose $EXTRA_PIP_FLAGS \
--no-binary h5py \
'./firedrake-repo[check]'

firedrake-clean
pip list

- name: Run smoke tests
run: |
. venv/bin/activate
firedrake-check
timeout-minutes: 10

- name: Post-run cleanup
if: always()
run: |
find . -delete

lint:
name: Lint codebase
runs-on: ubuntu-latest
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/pr.yml
Comment thread
connorjward marked this conversation as resolved.
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,6 @@ jobs:
target_branch: ${{ github.base_ref }}
# Only run macOS tests if the PR is labelled 'macOS'
test_macos: ${{ contains(github.event.pull_request.labels.*.name, 'macOS') }}
# Only run GPU tests if the PR is labelled 'gpu'
test_gpu: ${{ contains(github.event.pull_request.labels.*.name, 'gpu') }}
secrets: inherit
1 change: 1 addition & 0 deletions .github/workflows/push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ jobs:
source_ref: ${{ github.ref_name }}
target_branch: ${{ github.ref_name }}
test_macos: true
test_gpu: true
deploy_website: true
secrets: inherit

Expand Down
6 changes: 6 additions & 0 deletions firedrake/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,9 @@ class NonUniqueMeshSequenceError(FiredrakeException):
"""Raised when calling `.unique()` on a MeshSequence which contains
non-unique meshes.
"""


class UnrecognisedDeviceError(FiredrakeException):
"""Raised when a GPU device has been initialised in PETSc that Firedrake
does not support.
"""
61 changes: 61 additions & 0 deletions firedrake/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
from pyop2.datatypes import IntType # noqa: F401
from pyop2.datatypes import as_ctypes # noqa: F401
from pyop2.mpi import MPI
from petsc4py import PETSc
from functools import cache
from firedrake.exceptions import UnrecognisedDeviceError
import petsctools


Expand All @@ -23,6 +26,64 @@
SLATE_SUPPORTS_COMPLEX = False


@cache
def device_matrix_type(warn: bool = True) -> str | None:
r"""Get device matrix type

Attempt to initialise a GPU device and return the PETSc mat_type
compatible with that device, or None if no device is detected.
Typical Usage Example:
mat_type = device_matrix_type(pc.comm.rank == 0)

Parameters
----------
warn
Emit a warning containing the reason a device mat_type
has not been returned. Defaults to False.

Raises
------
UnrecognisedDeviceError
Raised when PETSc initialises a GPU device that
Firedrake does not understand

Returns
-------
str | None
The PETSc mat_type compatible with the GPU device detected on
this system or None

"""
Comment thread
connorjward marked this conversation as resolved.
_device_mat_type_map = {"HOST": None, "CUDA": "aijcusparse"}
try:
dev = PETSc.Device.create()
except PETSc.Error:
# Could not initialise device - not a failure condition as this could
# be a GPU-enabled PETSc installation running on a CPU-only host.
if warn:
warnings.warn(
"This installation of Firedrake is GPU-enabled, but no GPU device has been detected"
)
return None
dev_type = dev.getDeviceType()
dev.destroy()
if dev_type not in _device_mat_type_map:
raise UnrecognisedDeviceError(
f"Unknown device type: {dev_type} initialised by PETSc. Firedrake "
f"currently understands {', '.join([k for k in _device_mat_type_map if k != 'HOST'])}"
"devices"
)

if warn:
if dev_type == "HOST":
warnings.warn(
"This installation of Firedrake is not GPU-enabled, to enable GPU functionality "
"PETSc will need to be rebuilt with some GPU capability appropriate for this system "
"(e.g. '--with-cuda=1')."
)
return _device_mat_type_map[dev_type]


def _new_uid(comm):
uid = comm.Get_attr(FIREDRAKE_UID)
if uid is None:
Expand Down
Loading
Loading