-
Notifications
You must be signed in to change notification settings - Fork 8
Add rocSPARSE CSR SpGEMM #51
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
405bb91
9263ec1
59cfabe
ed7d26f
e7ecada
4595c8d
eeb3a67
59c0b7d
28379cd
5f9c090
e36a516
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,319 @@ | ||
| #pragma once | ||
|
|
||
| #include <cstdint> | ||
| #include <functional> | ||
| #include <memory> | ||
| #include <type_traits> | ||
|
|
||
| #include <hip/hip_runtime.h> | ||
| #include <rocsparse/rocsparse.h> | ||
|
|
||
| #include <spblas/detail/ranges.hpp> | ||
| #include <spblas/detail/view_inspectors.hpp> | ||
|
|
||
| #include "exception.hpp" | ||
| #include "hip_allocator.hpp" | ||
| #include "types.hpp" | ||
|
|
||
| namespace spblas { | ||
| namespace __rocsparse { | ||
|
|
||
| template <typename T> | ||
| T create_null_matrix() { | ||
| return {nullptr, nullptr, nullptr, index<index_t>{0, 0}, 0}; | ||
| } | ||
|
|
||
| } // namespace __rocsparse | ||
|
|
||
| class spgemm_state_t { | ||
| public: | ||
| spgemm_state_t() : spgemm_state_t(rocsparse::hip_allocator<char>{}) {} | ||
|
|
||
| spgemm_state_t(rocsparse::hip_allocator<char> alloc) | ||
| : alloc_(alloc), buffer_size_(0), workspace_(nullptr), result_nnz_(0), | ||
| result_shape_(0, 0) { | ||
| rocsparse_handle handle; | ||
| __rocsparse::throw_if_error(rocsparse_create_handle(&handle)); | ||
| if (auto stream = alloc.stream()) { | ||
| rocsparse_set_stream(handle, stream); | ||
| } | ||
| handle_ = handle_manager(handle, [](rocsparse_handle handle) { | ||
| __rocsparse::throw_if_error(rocsparse_destroy_handle(handle)); | ||
| }); | ||
| } | ||
|
|
||
| spgemm_state_t(rocsparse::hip_allocator<char> alloc, rocsparse_handle handle) | ||
| : alloc_(alloc), buffer_size_(0), workspace_(nullptr), result_nnz_(0), | ||
| result_shape_(0, 0) { | ||
| handle_ = handle_manager(handle, [](rocsparse_handle handle) { | ||
| // it is provided by user, we do not delete it at all. | ||
| }); | ||
| } | ||
|
|
||
| ~spgemm_state_t() { | ||
| alloc_.deallocate(this->workspace_, this->buffer_size_); | ||
| __rocsparse::throw_if_error(rocsparse_destroy_spmat_descr(this->mat_a_)); | ||
| __rocsparse::throw_if_error(rocsparse_destroy_spmat_descr(this->mat_b_)); | ||
| __rocsparse::throw_if_error(rocsparse_destroy_spmat_descr(this->mat_c_)); | ||
| __rocsparse::throw_if_error(rocsparse_destroy_spmat_descr(this->mat_d_)); | ||
| } | ||
|
|
||
| auto result_shape() { | ||
| return this->result_shape_; | ||
| } | ||
|
|
||
| auto result_nnz() { | ||
| return this->result_nnz_; | ||
| } | ||
|
|
||
| template <matrix A, matrix B, matrix C, matrix D> | ||
| requires __detail::has_csr_base<A> && __detail::has_csr_base<B> && | ||
| __detail::is_csr_view_v<C> && __detail::has_csr_base<D> | ||
| void multiply_compute(A&& a, B&& b, C&& c, D&& d) { | ||
| auto a_base = __detail::get_ultimate_base(a); | ||
| auto b_base = __detail::get_ultimate_base(b); | ||
| auto d_base = __detail::get_ultimate_base(d); | ||
| using matrix_type = decltype(a_base); | ||
| using input_type = decltype(b_base); | ||
| using output_type = std::remove_reference_t<decltype(c)>; | ||
| using value_type = typename matrix_type::scalar_type; | ||
|
|
||
| size_t buffer_size = 0; | ||
|
|
||
| auto alpha_optional = __detail::get_scaling_factor(a, b); | ||
| value_type alpha = alpha_optional.value_or(1); | ||
| auto beta_optional = __detail::get_scaling_factor(d); | ||
| value_type beta = beta_optional.value_or(1); | ||
| auto handle = this->handle_.get(); | ||
| // Create sparse matrix in CSR format | ||
| this->mat_a_ = __rocsparse::create_rocsparse_handle(a_base); | ||
| this->mat_b_ = __rocsparse::create_rocsparse_handle(b_base); | ||
| this->mat_c_ = __rocsparse::create_rocsparse_handle(c); | ||
| this->mat_d_ = __rocsparse::create_rocsparse_handle(d_base); | ||
| // ask buffer_size bytes for external memory | ||
| __rocsparse::throw_if_error(rocsparse_spgemm( | ||
| handle, rocsparse_operation_none, rocsparse_operation_none, &alpha, | ||
| this->mat_a_, this->mat_b_, &beta, this->mat_d_, this->mat_c_, | ||
| detail::rocsparse_data_type_v<value_type>, rocsparse_spgemm_alg_default, | ||
| rocsparse_spgemm_stage_buffer_size, &buffer_size, nullptr)); | ||
| // allocate the new buffer if it requires more than what the buffer | ||
| // currently has. | ||
| if (buffer_size > this->buffer_size_) { | ||
| this->alloc_.deallocate(workspace_, this->buffer_size_); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. let's have a reallocate method in the allocator?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it is close to the array more than the allocator? |
||
| this->buffer_size_ = buffer_size; | ||
| workspace_ = this->alloc_.allocate(buffer_size); | ||
| } | ||
| __rocsparse::throw_if_error(rocsparse_spgemm( | ||
| handle, rocsparse_operation_none, rocsparse_operation_none, &alpha, | ||
| this->mat_a_, this->mat_b_, &beta, this->mat_d_, this->mat_c_, | ||
| detail::rocsparse_data_type_v<value_type>, rocsparse_spgemm_alg_default, | ||
| rocsparse_spgemm_stage_nnz, &this->buffer_size_, this->workspace_)); | ||
| // get matrix C non-zero entries and size | ||
| int64_t c_num_rows; | ||
| int64_t c_num_cols; | ||
| __rocsparse::throw_if_error(rocsparse_spmat_get_size( | ||
| this->mat_c_, &c_num_rows, &c_num_cols, &this->result_nnz_)); | ||
| // form a shape | ||
| this->result_shape_ = index<index_t>(c_num_rows, c_num_cols); | ||
yhmtsai marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
|
|
||
| template <matrix A, matrix B, matrix C, matrix D> | ||
| requires __detail::has_csr_base<A> && __detail::has_csr_base<B> && | ||
| __detail::is_csr_view_v<C> && __detail::has_csr_base<D> | ||
| void multiply_fill(A&& a, B&& b, C&& c, D&& d) { | ||
| auto a_base = __detail::get_ultimate_base(a); | ||
| auto b_base = __detail::get_ultimate_base(b); | ||
| using matrix_type = decltype(a_base); | ||
| using input_type = decltype(b_base); | ||
| using output_type = std::remove_reference_t<decltype(c)>; | ||
| using value_type = typename matrix_type::scalar_type; | ||
|
|
||
| auto alpha_optional = __detail::get_scaling_factor(a, b); | ||
| tensor_scalar_t<A> alpha = alpha_optional.value_or(1); | ||
| value_type alpha_val = alpha; | ||
| auto beta_optional = __detail::get_scaling_factor(d); | ||
| value_type beta = beta_optional.value_or(1); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not sure that relying on a and d types for alpha and beta is the best approach. |
||
|
|
||
| __rocsparse::throw_if_error(rocsparse_csr_set_pointers( | ||
| this->mat_c_, c.rowptr().data(), c.colind().data(), c.values().data())); | ||
|
|
||
| __rocsparse::throw_if_error(rocsparse_spgemm( | ||
| handle_.get(), rocsparse_operation_none, rocsparse_operation_none, | ||
| &alpha, this->mat_a_, this->mat_b_, &beta, this->mat_d_, this->mat_c_, | ||
| detail::rocsparse_data_type_v<value_type>, rocsparse_spgemm_alg_default, | ||
| rocsparse_spgemm_stage_compute, &this->buffer_size_, workspace_)); | ||
| } | ||
|
|
||
| template <matrix A, matrix B, matrix C, matrix D> | ||
| requires __detail::has_csr_base<A> && __detail::has_csr_base<B> && | ||
| __detail::is_csr_view_v<C> && __detail::has_csr_base<D> | ||
| void multiply_symbolic_fill(A&& a, B&& b, C&& c, D&& d) { | ||
| auto a_base = __detail::get_ultimate_base(a); | ||
| auto b_base = __detail::get_ultimate_base(b); | ||
| auto d_base = __detail::get_ultimate_base(d); | ||
| using matrix_type = decltype(a_base); | ||
| using input_type = decltype(b_base); | ||
| using output_type = std::remove_reference_t<decltype(c)>; | ||
| using value_type = typename matrix_type::scalar_type; | ||
|
|
||
| auto alpha_optional = __detail::get_scaling_factor(a, b); | ||
| value_type alpha = alpha_optional.value_or(1); | ||
| auto beta_optional = __detail::get_scaling_factor(d); | ||
| value_type beta = beta_optional.value_or(1); | ||
|
|
||
| __rocsparse::throw_if_error(rocsparse_csr_set_pointers( | ||
| this->mat_c_, c.rowptr().data(), c.colind().data(), c.values().data())); | ||
|
|
||
| __rocsparse::throw_if_error(rocsparse_spgemm( | ||
| this->handle_.get(), rocsparse_operation_none, rocsparse_operation_none, | ||
| &alpha, this->mat_a_, this->mat_b_, &beta, this->mat_d_, this->mat_c_, | ||
| detail::rocsparse_data_type_v<value_type>, rocsparse_spgemm_alg_default, | ||
| rocsparse_spgemm_stage_symbolic, &this->buffer_size_, | ||
| this->workspace_)); | ||
| } | ||
|
|
||
| template <matrix A, matrix B, matrix C, matrix D> | ||
| requires __detail::has_csr_base<A> && __detail::has_csr_base<B> && | ||
| __detail::is_csr_view_v<C> && __detail::has_csr_base<D> | ||
| void multiply_numeric(A&& a, B&& b, C&& c, D&& d) { | ||
| auto a_base = __detail::get_ultimate_base(a); | ||
| auto b_base = __detail::get_ultimate_base(b); | ||
| auto d_base = __detail::get_ultimate_base(d); | ||
| using matrix_type = decltype(a_base); | ||
| using input_type = decltype(b_base); | ||
| using output_type = std::remove_reference_t<decltype(c)>; | ||
| using value_type = typename matrix_type::scalar_type; | ||
|
|
||
| auto alpha_optional = __detail::get_scaling_factor(a, b); | ||
| tensor_scalar_t<A> alpha = alpha_optional.value_or(1); | ||
| value_type alpha_val = alpha; | ||
| auto beta_optional = __detail::get_scaling_factor(d); | ||
| value_type beta = beta_optional.value_or(1); | ||
|
|
||
| // Update the pointer from the matrix but they must contains the same | ||
| // sparsity as the previous call. | ||
| __rocsparse::throw_if_error(rocsparse_csr_set_pointers( | ||
| this->mat_a_, a_base.rowptr().data(), a_base.colind().data(), | ||
| a_base.values().data())); | ||
| __rocsparse::throw_if_error(rocsparse_csr_set_pointers( | ||
| this->mat_b_, b_base.rowptr().data(), b_base.colind().data(), | ||
| b_base.values().data())); | ||
| __rocsparse::throw_if_error(rocsparse_csr_set_pointers( | ||
| this->mat_c_, c.rowptr().data(), c.colind().data(), c.values().data())); | ||
| if (d_base.values().data()) { | ||
| // when it is still a null matrix, we can not use set pointer function | ||
| __rocsparse::throw_if_error(rocsparse_csr_set_pointers( | ||
| this->mat_d_, d_base.rowptr().data(), d_base.colind().data(), | ||
| d_base.values().data())); | ||
| } | ||
| __rocsparse::throw_if_error(rocsparse_spgemm( | ||
| this->handle_.get(), rocsparse_operation_none, rocsparse_operation_none, | ||
| &alpha, this->mat_a_, this->mat_b_, &beta, this->mat_d_, this->mat_c_, | ||
| detail::rocsparse_data_type_v<value_type>, rocsparse_spgemm_alg_default, | ||
| rocsparse_spgemm_stage_numeric, &this->buffer_size_, this->workspace_)); | ||
| } | ||
|
|
||
| private: | ||
| using handle_manager = | ||
| std::unique_ptr<std::pointer_traits<rocsparse_handle>::element_type, | ||
| std::function<void(rocsparse_handle)>>; | ||
| handle_manager handle_; | ||
| rocsparse::hip_allocator<char> alloc_; | ||
| std::uint64_t buffer_size_; | ||
| char* workspace_; | ||
| index<index_t> result_shape_; | ||
| std::int64_t result_nnz_; | ||
| rocsparse_spmat_descr mat_a_; | ||
| rocsparse_spmat_descr mat_b_; | ||
| rocsparse_spmat_descr mat_c_; | ||
| rocsparse_spmat_descr mat_d_; | ||
| }; | ||
|
|
||
| template <matrix A, matrix B, matrix C> | ||
| requires __detail::has_csr_base<A> && __detail::has_csr_base<B> && | ||
| __detail::is_csr_view_v<C> | ||
| void multiply_inspect(spgemm_state_t& spgemm_handle, A&& a, B&& b, C&& c) {} | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need to at least add a version that takes in three matrices and returns a state (see |
||
|
|
||
| template <matrix A, matrix B, matrix C, matrix D> | ||
| requires __detail::has_csr_base<A> && __detail::has_csr_base<B> && | ||
| __detail::is_csr_view_v<C> && __detail::has_csr_base<D> | ||
| void multiply_compute(spgemm_state_t& spgemm_handle, A&& a, B&& b, C&& c, | ||
| D&& d) { | ||
| spgemm_handle.multiply_compute(a, b, c, d); | ||
| } | ||
|
|
||
| template <matrix A, matrix B, matrix C, matrix D> | ||
| requires __detail::has_csr_base<A> && __detail::has_csr_base<B> && | ||
| __detail::is_csr_view_v<C> && __detail::has_csr_base<D> | ||
| void multiply_fill(spgemm_state_t& spgemm_handle, A&& a, B&& b, C&& c, D&& d) { | ||
| spgemm_handle.multiply_fill(a, b, c, d); | ||
| } | ||
|
|
||
| template <matrix A, matrix B, matrix C, matrix D> | ||
| requires __detail::has_csr_base<A> && __detail::has_csr_base<B> && | ||
| __detail::is_csr_view_v<C> && __detail::has_csr_base<D> | ||
| void multiply_symbolic_compute(spgemm_state_t& spgemm_handle, A&& a, B&& b, | ||
| C&& c, D&& d) { | ||
| spgemm_handle.multiply_compute(a, b, c, d); | ||
| } | ||
|
|
||
| template <matrix A, matrix B, matrix C, matrix D> | ||
| requires __detail::has_csr_base<A> && __detail::has_csr_base<B> && | ||
| __detail::is_csr_view_v<C> && __detail::has_csr_base<D> | ||
| void multiply_symbolic_fill(spgemm_state_t& spgemm_handle, A&& a, B&& b, C&& c, | ||
| D&& d) { | ||
| spgemm_handle.multiply_symbolic_fill(a, b, c, d); | ||
| } | ||
|
|
||
| template <matrix A, matrix B, matrix C, matrix D> | ||
| requires __detail::has_csr_base<A> && __detail::has_csr_base<B> && | ||
| __detail::is_csr_view_v<C> && __detail::has_csr_base<D> | ||
| void multiply_numeric(spgemm_state_t& spgemm_handle, A&& a, B&& b, C&& c, | ||
| D&& d) { | ||
| spgemm_handle.multiply_numeric(a, b, c, d); | ||
| } | ||
|
|
||
| // the followings support C = A*B by giving null D matrix. | ||
| template <matrix A, matrix B, matrix C> | ||
| requires __detail::has_csr_base<A> && __detail::has_csr_base<B> && | ||
| __detail::is_csr_view_v<C> | ||
| void multiply_compute(spgemm_state_t& spgemm_handle, A&& a, B&& b, C&& c) { | ||
| auto d = __rocsparse::create_null_matrix<std::remove_reference_t<C>>(); | ||
| spgemm_handle.multiply_compute(a, b, c, scaled(0.0, d)); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why do we have either scaled(0.0, d) or d?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is to reuse the 4 arguments SpGEMM. It will give the same setup when using rocsparse_spgemm for C = A*B, null_matrix set the all pointer to nullptr. |
||
| } | ||
|
|
||
| template <matrix A, matrix B, matrix C> | ||
| requires __detail::has_csr_base<A> && __detail::has_csr_base<B> && | ||
| __detail::is_csr_view_v<C> | ||
| void multiply_fill(spgemm_state_t& spgemm_handle, A&& a, B&& b, C&& c) { | ||
| auto d = __rocsparse::create_null_matrix<std::remove_reference_t<C>>(); | ||
| spgemm_handle.multiply_fill(a, b, c, scaled(0.0, d)); | ||
| } | ||
|
|
||
| template <matrix A, matrix B, matrix C> | ||
| requires __detail::has_csr_base<A> && __detail::has_csr_base<B> && | ||
| __detail::is_csr_view_v<C> | ||
| void multiply_symbolic_compute(spgemm_state_t& spgemm_handle, A&& a, B&& b, | ||
| C&& c) { | ||
| auto d = __rocsparse::create_null_matrix<std::remove_reference_t<C>>(); | ||
| spgemm_handle.multiply_compute(a, b, c, scaled(0.0, d)); | ||
| } | ||
|
|
||
| template <matrix A, matrix B, matrix C> | ||
| requires __detail::has_csr_base<A> && __detail::has_csr_base<B> && | ||
| __detail::is_csr_view_v<C> | ||
| void multiply_symbolic_fill(spgemm_state_t& spgemm_handle, A&& a, B&& b, | ||
| C&& c) { | ||
| auto d = __rocsparse::create_null_matrix<std::remove_reference_t<C>>(); | ||
| spgemm_handle.multiply_symbolic_fill(a, b, c, scaled(0.0, d)); | ||
| } | ||
|
|
||
| template <matrix A, matrix B, matrix C> | ||
| requires __detail::has_csr_base<A> && __detail::has_csr_base<B> && | ||
| __detail::is_csr_view_v<C> | ||
| void multiply_numeric(spgemm_state_t& spgemm_handle, A&& a, B&& b, C&& c) { | ||
| auto d = __rocsparse::create_null_matrix<std::remove_reference_t<C>>(); | ||
| spgemm_handle.multiply_numeric(a, b, c, scaled(0.0, d)); | ||
| } | ||
|
|
||
| } // namespace spblas | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,3 +1,4 @@ | ||
| #pragma once | ||
|
|
||
| #include "multiply.hpp" | ||
| #include "multiply_spgemm.hpp" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is okay for now, but eventually we need to have one
operation_state_tthat encapsulates this (if that's how you want to design it).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, if the operation_state_t is for user-friendly in the end. for now, I think separating individually should be clearer and easier for future change