Skip to content

Commit 7bb53e8

Browse files
authored
Add cusparseSpMVOp backend with compile-time auto-detection (#88)
* working SpMVop * update * improve SpMVOp via cupdlpx_spmv_ctx_t * update feasibility polishing spmv * Apply formatter * fix spmvop version issue * add more CUDA versions to CI * CI: update cuda-toolkit setting * update build.yml * update build.yml * update SpMV backend in readme * update version to v0.2.8
1 parent f83dae2 commit 7bb53e8

13 files changed

Lines changed: 395 additions & 297 deletions

File tree

.github/workflows/build.yml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ jobs:
1313
fail-fast: false
1414
matrix:
1515
os: [ubuntu-latest, windows-latest]
16+
cuda: ["12.4.0", "12.5.0", "12.6.0", "12.8.0", "12.9.0", "13.0.0", "13.1.0"]
1617
runs-on: ${{ matrix.os }}
1718

1819
steps:
@@ -21,7 +22,8 @@ jobs:
2122
- uses: Jimver/cuda-toolkit@v0.2.30
2223
id: cuda-toolkit
2324
with:
24-
cuda: "13.1.0"
25+
cuda: ${{ matrix.cuda }}
26+
linux-local-args: '["--toolkit"]'
2527

2628
- name: CUDA info
2729
run: |

CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,11 @@ project(cupdlpx LANGUAGES C CXX CUDA)
88

99
set(CUPDLPX_VERSION_MAJOR 0)
1010
set(CUPDLPX_VERSION_MINOR 2)
11-
set(CUPDLPX_VERSION_PATCH 7)
11+
set(CUPDLPX_VERSION_PATCH 8)
1212

1313
set(CUPDLPX_VERSION "${CUPDLPX_VERSION_MAJOR}.${CUPDLPX_VERSION_MINOR}.${CUPDLPX_VERSION_PATCH}")
1414
add_compile_definitions(CUPDLPX_VERSION="${CUPDLPX_VERSION}")
15+
add_compile_definitions(CUSPARSE_ENABLE_EXPERIMENTAL_API)
1516

1617
if (WIN32)
1718
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@ Our work is presented in two papers:
2929
* **GPU:** NVIDIA GPU with CUDA 12.4+.
3030
* **Build Tools:** CMake (≥ 3.20), GCC, NVCC.
3131

32+
> **SpMV backend** is selected automatically at compile time based on cuSPARSE version:
33+
> - `cusparseSpMV` — CUDA 12.4 – 13.1 (cuSPARSE < 12.7.3)
34+
> - `cusparseSpMVOp` — CUDA 13.1 Update 1+ (cuSPARSE ≥ 12.7.3)
3235
3336
### Build from Source
3437
Clone the repository and compile the project using CMake.

internal/cusparse_compat.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#pragma once
2+
3+
#include <cusparse.h>
4+
5+
// cusparseSpMVOp_bufferSize was introduced in cuSPARSE 12.7.3 (CUDA 13.1 Update 1).
6+
// CUSPARSE_VERSION encoding: major*1000 + minor*100 + patch.
7+
#if defined(CUSPARSE_VERSION) && CUSPARSE_VERSION >= 12703
8+
#define CUPDLPX_HAS_SPMVOP 1
9+
#else
10+
#define CUPDLPX_HAS_SPMVOP 0
11+
#endif
12+
13+
#if !CUPDLPX_HAS_SPMVOP
14+
// The SpMVOp types were added to cusparse.h before the functions
15+
// (e.g. CUDA 13.1 base has the types but not the functions).
16+
// Only provide fallback typedefs for cuSPARSE versions that lack them entirely.
17+
#if !defined(CUSPARSE_VERSION) || CUSPARSE_VERSION < 12700
18+
typedef void *cusparseSpMVOpDescr_t;
19+
typedef void *cusparseSpMVOpPlan_t;
20+
#endif
21+
#endif

internal/internal_types.h

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ limitations under the License.
1717
#pragma once
1818

1919
#include "cupdlpx_types.h"
20+
#include "cusparse_compat.h"
2021
#include <cublas_v2.h>
2122
#include <cusparse.h>
2223
#include <stdbool.h>
@@ -114,19 +115,7 @@ typedef struct
114115

115116
cusparseHandle_t sparse_handle;
116117
cublasHandle_t blas_handle;
117-
size_t spmv_buffer_size;
118-
size_t primal_spmv_buffer_size;
119-
size_t dual_spmv_buffer_size;
120-
void *primal_spmv_buffer;
121-
void *dual_spmv_buffer;
122-
void *spmv_buffer;
123-
124-
cusparseSpMatDescr_t matA;
125-
cusparseSpMatDescr_t matAt;
126-
cusparseDnVecDescr_t vec_primal_sol;
127-
cusparseDnVecDescr_t vec_dual_sol;
128-
cusparseDnVecDescr_t vec_primal_prod;
129-
cusparseDnVecDescr_t vec_dual_prod;
118+
void *spmv_ctx;
130119

131120
double *ones_primal_d;
132121
double *ones_dual_d;

internal/utils.h

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ limitations under the License.
1616

1717
#pragma once
1818

19+
#include "cusparse_compat.h"
1920
#include "internal_types.h"
2021
#include <cublas_v2.h>
2122
#include <cuda_runtime.h>
@@ -63,9 +64,6 @@ extern "C"
6364

6465
#define THREADS_PER_BLOCK 256
6566

66-
extern const double HOST_ONE;
67-
extern const double HOST_ZERO;
68-
6967
void *safe_malloc(size_t size);
7068

7169
void *safe_calloc(size_t num, size_t size);
@@ -79,6 +77,45 @@ extern "C"
7977
int max_iterations,
8078
double tolerance);
8179

80+
bool cupdlpx_use_spmvop_by_default(void);
81+
82+
void cupdlpx_spmv_buffer_size(cusparseHandle_t sparse_handle,
83+
cusparseSpMatDescr_t mat,
84+
cusparseDnVecDescr_t vec_x,
85+
cusparseDnVecDescr_t vec_y,
86+
size_t *buffer_size);
87+
88+
void cupdlpx_spmv_prepare(cusparseHandle_t sparse_handle,
89+
cusparseSpMatDescr_t mat,
90+
cusparseDnVecDescr_t vec_x,
91+
cusparseDnVecDescr_t vec_y,
92+
void *buffer,
93+
void **descr,
94+
void **plan);
95+
96+
void cupdlpx_spmv_release(void *descr, void *plan);
97+
98+
void cupdlpx_spmv_execute(cusparseHandle_t sparse_handle,
99+
cusparseSpMatDescr_t mat,
100+
cusparseDnVecDescr_t vec_x,
101+
cusparseDnVecDescr_t vec_y,
102+
void *buffer,
103+
void *plan);
104+
105+
void *cupdlpx_spmv_ctx_create(cusparseHandle_t sparse_handle,
106+
const cu_sparse_matrix_csr_t *A,
107+
const cu_sparse_matrix_csr_t *AT,
108+
const double *ax_x_init,
109+
double *ax_y_init,
110+
const double *atx_x_init,
111+
double *atx_y_init);
112+
113+
void cupdlpx_spmv_ctx_destroy(void *ctx);
114+
115+
void cupdlpx_spmv_Ax(cusparseHandle_t sparse_handle, void *ctx, const double *x, double *y);
116+
117+
void cupdlpx_spmv_ATx(cusparseHandle_t sparse_handle, void *ctx, const double *x, double *y);
118+
82119
void compute_interaction_and_movement(pdhg_solver_state_t *solver_state, double *interaction, double *movement);
83120

84121
bool should_do_adaptive_restart(pdhg_solver_state_t *solver_state,

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "scikit_build_core.build"
44

55
[project]
66
name = "cupdlpx"
7-
version = "0.2.7"
7+
version = "0.2.8"
88
description = "Python bindings for cuPDLPx (GPU-accelerated first-order LP solver)"
99
readme = "README.md"
1010
license = { text = "Apache-2.0" }

python/README.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@ It provides a high-level, Pythonic API for constructing, modifying, and solving
1818
- An NVIDIA GPU with CUDA support (≥12.4 required)
1919
- A C/C++ toolchain with GCC and NVCC
2020

21+
> **SpMV backend** is selected automatically at compile time based on cuSPARSE version:
22+
> - `cusparseSpMV` — CUDA 12.4 – 13.1 (cuSPARSE < 12.7.3)
23+
> - `cusparseSpMVOp` — CUDA 13.1 Update 1+ (cuSPARSE ≥ 12.7.3)
24+
25+
2126
### Install
2227
Install from PyPI:
2328

@@ -262,4 +267,4 @@ or
262267

263268
```python
264269
m.setWarmStart(primal=None, dual=None)
265-
```
270+
```

python/cupdlpx/PDLP.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,4 +60,4 @@
6060
# presolve
6161
"Presolve": "presolve",
6262
"MatrixZeroTol": "matrix_zero_tol",
63-
}
63+
}

src/feasibility_polish.cu

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -341,6 +341,14 @@ static pdhg_solver_state_t *initialize_primal_feas_polish_state(const pdhg_solve
341341
primal_state->relative_objective_gap = 0.0;
342342
primal_state->objective_gap = 0.0;
343343

344+
primal_state->spmv_ctx = cupdlpx_spmv_ctx_create(primal_state->sparse_handle,
345+
primal_state->constraint_matrix,
346+
primal_state->constraint_matrix_t,
347+
primal_state->pdhg_primal_solution,
348+
primal_state->primal_product,
349+
primal_state->pdhg_dual_solution,
350+
primal_state->dual_product);
351+
344352
return primal_state;
345353
}
346354

@@ -372,6 +380,7 @@ void primal_feas_polish_state_free(pdhg_solver_state_t *state)
372380
SAFE_CUDA_FREE(state->dual_residual);
373381
SAFE_CUDA_FREE(state->delta_primal_solution);
374382
SAFE_CUDA_FREE(state->delta_dual_solution);
383+
cupdlpx_spmv_ctx_destroy(state->spmv_ctx);
375384
free(state);
376385
}
377386

@@ -473,6 +482,15 @@ static pdhg_solver_state_t *initialize_dual_feas_polish_state(const pdhg_solver_
473482
dual_state->absolute_primal_residual = 0.0;
474483
dual_state->relative_objective_gap = 0.0;
475484
dual_state->objective_gap = 0.0;
485+
486+
dual_state->spmv_ctx = cupdlpx_spmv_ctx_create(dual_state->sparse_handle,
487+
dual_state->constraint_matrix,
488+
dual_state->constraint_matrix_t,
489+
dual_state->pdhg_primal_solution,
490+
dual_state->primal_product,
491+
dual_state->pdhg_dual_solution,
492+
dual_state->dual_product);
493+
476494
return dual_state;
477495
}
478496

@@ -514,6 +532,7 @@ void dual_feas_polish_state_free(pdhg_solver_state_t *state)
514532
SAFE_CUDA_FREE(state->dual_residual);
515533
SAFE_CUDA_FREE(state->delta_primal_solution);
516534
SAFE_CUDA_FREE(state->delta_dual_solution);
535+
cupdlpx_spmv_ctx_destroy(state->spmv_ctx);
517536
free(state);
518537
}
519538

0 commit comments

Comments
 (0)