Skip to content
This repository was archived by the owner on Jan 26, 2026. It is now read-only.

Commit 883f594

Browse files
committed
supporting shifted ew binary ops (except scalar only)
1 parent 14a862a commit 883f594

File tree

18 files changed

+1058
-297
lines changed

18 files changed

+1058
-297
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ set(MyCppSources ${MyCppSources} ${P2C_HPP})
4242

4343
pybind11_add_module(_ddptensor MODULE ${MyCppSources})
4444

45-
target_compile_definitions(_ddptensor PRIVATE XTENSOR_USE_XSIMD=1 XTENSOR_USE_TBB=1 DDPT_2TYPES=1 USE_MKL=1)
45+
target_compile_definitions(_ddptensor PRIVATE XTENSOR_USE_XSIMD=1 XTENSOR_USE_TBB=1 USE_MKL=1 DDPT_2TYPES=1)
4646
target_include_directories(_ddptensor PRIVATE
4747
${PROJECT_SOURCE_DIR}/src/include
4848
${PROJECT_SOURCE_DIR}/third_party/xtl/include

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def build_cmake(self, ext):
2929
extdir.parent.mkdir(parents=True, exist_ok=True)
3030

3131
# example of cmake args
32-
config = 'Debug' if self.debug else 'RelWithDebInfo' #'Release'
32+
config = 'Debug'# if self.debug else 'RelWithDebInfo' #'Release'
3333
cmake_args = [
3434
'-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + str(extdir.parent.absolute()),
3535
'-DCMAKE_BUILD_TYPE=' + config

src/CollComm.cpp

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
// SPDX-License-Identifier: BSD-3-Clause
2+
3+
#include "ddptensor/CollComm.hpp"
4+
5+
// Compute offset and displacements when mapping n_slc to o_slc. This is necessary when
6+
// slices are not equally partitioned.
7+
//
8+
// We assume we split in first dimension.
9+
// We also assume partitions are assigned to ranks in sequence from 0-N.
10+
// With this we know that our buffers (old and new) get data in the
11+
// same order. The only thing which might have changed is the tile-size.
12+
// Actually, the tile-size might change only if old or new shape does not evenly
13+
// distribute data (e.g. last partition is smaller).
14+
// In theory we could re-shape in-place when the norm-tile-size does not change.
15+
// This is not implemented: we need an extra mechanism to work with reshape-views or alike.
16+
std::vector<std::vector<int>> CollComm::map(const PVSlice & n_slc, const PVSlice & o_slc)
17+
{
18+
auto nr = theTransceiver->nranks();
19+
std::vector<int> counts_send(nr, 0);
20+
std::vector<int> disp_send(nr, 0);
21+
std::vector<int> counts_recv(nr, 0);
22+
std::vector<int> disp_recv(nr, 0);
23+
24+
// norm tile-size of orig array
25+
auto o_ntsz = o_slc.tile_size(0);
26+
// tilesize of my local partition of orig array
27+
auto o_tsz = o_slc.tile_size();
28+
// linearized local slice of orig array
29+
auto o_llslc = Slice(o_ntsz * theTransceiver->rank(), o_ntsz * theTransceiver->rank() + o_tsz);
30+
31+
// norm tile-size of new (reshaped) array
32+
auto n_ntsz = n_slc.tile_size(0);
33+
// tilesize of my local partition of new (reshaped) array
34+
auto n_tsz = n_slc.tile_size();
35+
// linearized/flattened/1d local slice of new (reshaped) array
36+
auto n_llslc = Slice(n_ntsz * theTransceiver->rank(), n_ntsz * theTransceiver->rank() + n_tsz);
37+
38+
for(auto r=0; r<nr; ++r) {
39+
// determine what I receive from rank r
40+
// e.g. which parts of my new slice overlap with rank r's old slice
41+
// Get local slice of rank r of orig array
42+
auto o_rslc = o_slc.tile_slice(r);
43+
// Flatten to 1d
44+
auto o_lrslc = Slice(o_ntsz * r, o_ntsz * r + o_rslc.size());
45+
// Determine overlap with local partition of linearized new array
46+
auto roverlap = n_llslc.overlap(o_lrslc);
47+
// number of elements to be received from rank r
48+
counts_recv[r] = roverlap.size();
49+
// displacement in new array where elements from rank r get copied to
50+
disp_recv[r] = roverlap._start - n_llslc._start;
51+
52+
// determine what I send to rank r
53+
// e.g. which parts of my old slice overlap with rank r's new slice
54+
// Get local slice of rank r of new array
55+
auto n_rslc = n_slc.tile_slice(r);
56+
// Flatten to 1d
57+
auto n_lrslc = Slice(n_ntsz * r, n_ntsz * r + n_rslc.size());
58+
// Determine overlap with local partition of linearized orig array
59+
auto soverlap = o_llslc.overlap(n_lrslc);
60+
// number of elements to be send to rank r
61+
counts_send[r] = soverlap.size();
62+
// displacement in orig array where elements from rank r get copied from
63+
disp_send[r] = soverlap._start - o_llslc._start;
64+
}
65+
return {counts_send, disp_send, counts_recv, disp_recv};
66+
}

src/Creator.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ namespace x {
5050
static ptr_type op(uint64_t start, uint64_t end, uint64_t step)
5151
{
5252
PVSlice pvslice({static_cast<uint64_t>(Slice(start, end, step).size())});
53-
auto lslc = pvslice.slice_of_rank();
53+
auto lslc = pvslice.local_slice();
5454
const auto & l1dslc = lslc.dim(0);
5555
auto a = xt::arange<T>(start + l1dslc._start*step, start + l1dslc._end * step, l1dslc._step);
5656
auto r = operatorx<T>::mk_tx(std::move(pvslice), std::move(a));

0 commit comments

Comments
 (0)