-
Notifications
You must be signed in to change notification settings - Fork 8
onemkl GPU version SpMV #52
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
26884c2
90cd049
ce922af
17d52b4
a69e41b
4bb2d5c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,70 @@ | ||||||||||||
| #pragma once | ||||||||||||
|
|
||||||||||||
| #include <sycl/sycl.hpp> | ||||||||||||
|
|
||||||||||||
| namespace spblas { | ||||||||||||
| namespace mkl { | ||||||||||||
|
|
||||||||||||
| template <typename T, std::size_t Alignment = 0> | ||||||||||||
| class mkl_allocator { | ||||||||||||
| public: | ||||||||||||
| using value_type = T; | ||||||||||||
| using pointer = T*; | ||||||||||||
| using const_pointer = const T*; | ||||||||||||
| using reference = T&; | ||||||||||||
| using const_reference = const T&; | ||||||||||||
| using size_type = std::size_t; | ||||||||||||
| using difference_type = std::ptrdiff_t; | ||||||||||||
|
Comment on lines
+8
to
+17
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why do we have a
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It currently follows the std::allocator design. In the operation, we will use |
||||||||||||
|
|
||||||||||||
| mkl_allocator() noexcept { | ||||||||||||
| auto* queue = new sycl::queue{sycl::default_selector_v}; | ||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. elsewhere we are using sycl::cpu_selector_v right now, but should probably switch to this default selector ...
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ahh i looked further and it seems you are handling that in spmv by using the queue that the state object is introduced with ...
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @yhmtsai @BenBrock @upsj @YvanMokwinski and any others who are interested in design of state/policy/allocator interactions here. We should probably huddle sometime soon and discuss ownership and interaction of queue (stream) indicating device intent between the two objects that will be input into each operation:
|
||||||||||||
| queue_manager_ = | ||||||||||||
| std::move(std::shared_ptr<sycl::queue>{queue, [](sycl::queue* q) { | ||||||||||||
| q->wait_and_throw(); | ||||||||||||
| delete q; | ||||||||||||
| }}); | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| // taking a shallow copy of queue from elsewhere, so we don't own destruction | ||||||||||||
| mkl_allocator(sycl::queue* q) noexcept | ||||||||||||
| : queue_manager_(q, [](sycl::queue* q) {}) {} | ||||||||||||
|
Comment on lines
+29
to
+30
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||
|
|
||||||||||||
| template <typename U> | ||||||||||||
| mkl_allocator(const mkl_allocator<U, Alignment>& other) noexcept | ||||||||||||
| : queue_manager_(other.queue_) {} | ||||||||||||
|
|
||||||||||||
| mkl_allocator(const mkl_allocator&) = default; | ||||||||||||
| mkl_allocator& operator=(const mkl_allocator&) = default; | ||||||||||||
| ~mkl_allocator() = default; | ||||||||||||
|
|
||||||||||||
| using is_always_equal = std::false_type; | ||||||||||||
|
|
||||||||||||
| pointer allocate(std::size_t size) { | ||||||||||||
| return sycl::malloc_device<value_type>(size, *(this->queue())); | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| void deallocate(pointer ptr, std::size_t n = 0) { | ||||||||||||
| if (ptr != nullptr) { | ||||||||||||
| sycl::free(ptr, *(this->queue())); | ||||||||||||
| } | ||||||||||||
| } | ||||||||||||
|
Comment on lines
+42
to
+50
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it seems that allocate/deallocate should be templated, and not the allocator class, right ? |
||||||||||||
|
|
||||||||||||
| bool operator==(const mkl_allocator&) const = default; | ||||||||||||
| bool operator!=(const mkl_allocator&) const = default; | ||||||||||||
|
|
||||||||||||
| template <typename U> | ||||||||||||
| struct rebind { | ||||||||||||
| using other = mkl_allocator<U, Alignment>; | ||||||||||||
| }; | ||||||||||||
|
|
||||||||||||
| sycl::queue* queue() const noexcept { | ||||||||||||
| return queue_manager_.get(); | ||||||||||||
| } | ||||||||||||
|
|
||||||||||||
| private: | ||||||||||||
| // using shared_ptr to support copy constructor | ||||||||||||
| std::shared_ptr<sycl::queue> queue_manager_; | ||||||||||||
| }; | ||||||||||||
|
|
||||||||||||
| } // namespace mkl | ||||||||||||
| } // namespace spblas | ||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,6 +2,7 @@ | |
|
|
||
| #include <oneapi/mkl.hpp> | ||
|
|
||
| #include "mkl_allocator.hpp" | ||
| #include <spblas/detail/log.hpp> | ||
| #include <spblas/detail/operation_info_t.hpp> | ||
| #include <spblas/detail/ranges.hpp> | ||
|
|
@@ -24,28 +25,53 @@ | |
|
|
||
| namespace spblas { | ||
|
|
||
| class spmv_state_t { | ||
| public: | ||
| spmv_state_t() : spmv_state_t(mkl::mkl_allocator<char>{}) {} | ||
|
|
||
| spmv_state_t(sycl::queue* q) : spmv_state_t(mkl::mkl_allocator<char>{q}) {} | ||
|
|
||
| spmv_state_t(mkl::mkl_allocator<char> alloc) : alloc_(alloc) {} | ||
|
|
||
| sycl::queue* queue() { | ||
| return alloc_.queue(); | ||
| } | ||
|
|
||
| private: | ||
| mkl::mkl_allocator<char> alloc_; | ||
| }; | ||
|
Comment on lines
+28
to
+42
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should we switch to using
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. again, I'm not sure we want a |
||
|
|
||
| template <matrix A, vector X, vector Y> | ||
| requires((__detail::has_csr_base<A> || __detail::has_csc_base<A>) && | ||
| __detail::has_contiguous_range_base<X> && | ||
| __ranges::contiguous_range<Y>) | ||
| void multiply(A&& a, X&& x, Y&& y) { | ||
| void multiply(spmv_state_t& state, A&& a, X&& x, Y&& y) { | ||
| log_trace(""); | ||
| auto a_base = __detail::get_ultimate_base(a); | ||
| auto x_base = __detail::get_ultimate_base(x); | ||
|
|
||
| auto alpha_optional = __detail::get_scaling_factor(a, x); | ||
| tensor_scalar_t<A> alpha = alpha_optional.value_or(1); | ||
|
|
||
| sycl::queue q(sycl::cpu_selector_v); | ||
| auto q_ptr = state.queue(); | ||
|
|
||
| auto a_handle = __mkl::create_matrix_handle(q, a_base); | ||
| auto a_handle = __mkl::create_matrix_handle(*q_ptr, a_base); | ||
| auto a_transpose = __mkl::get_transpose(a); | ||
|
|
||
| oneapi::mkl::sparse::gemv(q, a_transpose, alpha, a_handle, | ||
| oneapi::mkl::sparse::gemv(*q_ptr, a_transpose, alpha, a_handle, | ||
| __ranges::data(x_base), 0.0, __ranges::data(y)) | ||
| .wait(); | ||
|
|
||
| oneapi::mkl::sparse::release_matrix_handle(q, &a_handle).wait(); | ||
| oneapi::mkl::sparse::release_matrix_handle(*q_ptr, &a_handle).wait(); | ||
| } | ||
|
|
||
| template <matrix A, vector X, vector Y> | ||
| requires((__detail::has_csr_base<A> || __detail::has_csc_base<A>) && | ||
| __detail::has_contiguous_range_base<X> && | ||
| __ranges::contiguous_range<Y>) | ||
| void multiply(A&& a, X&& x, Y&& y) { | ||
| spmv_state_t state; | ||
| multiply(state, a, x, y); | ||
| } | ||
|
|
||
| } // namespace spblas | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,59 @@ | ||
| #pragma once | ||
| #include <iterator> | ||
| #include <memory> | ||
| #include <spblas/vendor/onemkl_sycl/mkl_allocator.hpp> | ||
| #include <sycl/sycl.hpp> | ||
| #include <vector> | ||
|
|
||
| namespace thrust { | ||
|
|
||
| template <typename InputIt, typename OutputIt> | ||
| requires(std::contiguous_iterator<InputIt> && | ||
| std::contiguous_iterator<OutputIt>) | ||
| OutputIt copy(InputIt first, InputIt last, OutputIt d_first) { | ||
| sycl::queue queue(sycl::default_selector_v); | ||
| using input_value_type = typename std::iterator_traits<InputIt>::value_type; | ||
| using output_value_type = typename std::iterator_traits<OutputIt>::value_type; | ||
| input_value_type* first_ptr = std::to_address(first); | ||
| output_value_type* d_first_ptr = std::to_address(d_first); | ||
| auto num = std::distance(first, last); | ||
| queue.memcpy(d_first_ptr, first_ptr, num * sizeof(input_value_type)) | ||
| .wait_and_throw(); | ||
| return d_first + num; | ||
| } | ||
|
|
||
| // incompleted impl for thrust vector in oneMKL just for test usage | ||
| template <typename ValueType> | ||
| class device_vector { | ||
| public: | ||
| device_vector(std::vector<ValueType> host_vector) | ||
| : alloc_{}, size_(host_vector.size()), ptr_(nullptr) { | ||
| ptr_ = alloc_.allocate(size_); | ||
| thrust::copy(host_vector.begin(), host_vector.end(), ptr_); | ||
| } | ||
|
|
||
| ~device_vector() { | ||
| alloc_.deallocate(ptr_, size_); | ||
| ptr_ = nullptr; | ||
| } | ||
|
|
||
| ValueType* begin() { | ||
| return ptr_; | ||
| } | ||
|
|
||
| ValueType* end() { | ||
| return ptr_ + size_; | ||
| } | ||
|
|
||
| // just to give data().get() | ||
| std::shared_ptr<ValueType> data() { | ||
| return std::shared_ptr<ValueType>(ptr_, [](ValueType* ptr) {}); | ||
| } | ||
|
|
||
| private: | ||
| spblas::mkl::mkl_allocator<ValueType> alloc_; | ||
| std::size_t size_; | ||
| ValueType* ptr_; | ||
| }; | ||
|
|
||
| } // namespace thrust |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -3,7 +3,12 @@ | |
| #include <spblas/spblas.hpp> | ||
|
|
||
| #include <gtest/gtest.h> | ||
|
|
||
| #ifdef SPBLAS_ENABLE_ONEMKL_SYCL | ||
| #include "onemkl/device_vector.hpp" | ||
| #else | ||
|
Comment on lines
+6
to
+9
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so this is generalizing the rocsparse device spmv_test to just a device/spmv_test.cpp for rocsparse/mkl_sycl and in the future others ? Should we change the name from
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I put it into #40 (change it to device). I tended to avoid the same changes in two prs because it might make the reviews hard to exchange the idea in the same pr. I can move that into here in case this pr moves more quickly.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can I suggest we shift it to
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do different folders here contain the tests with different vector allocation or specific functions? |
||
| #include <thrust/device_vector.h> | ||
| #endif | ||
|
|
||
| using value_t = float; | ||
| using index_t = spblas::index_t; | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.