|
| 1 | +// SPDX-License-Identifier: BSD-3-Clause |
| 2 | + |
| 3 | +#include "ddptensor/CollComm.hpp" |
| 4 | + |
| 5 | +// Compute offset and displacements when mapping n_slc to o_slc. This is necessary when |
| 6 | +// slices are not equally partitioned. |
| 7 | +// |
| 8 | +// We assume we split in first dimension. |
| 9 | +// We also assume partitions are assigned to ranks in sequence from 0-N. |
| 10 | +// With this we know that our buffers (old and new) get data in the |
| 11 | +// same order. The only thing which might have changed is the tile-size. |
| 12 | +// Actually, the tile-size might change only if old or new shape does not evenly |
| 13 | +// distribute data (e.g. last partition is smaller). |
| 14 | +// In theory we could re-shape in-place when the norm-tile-size does not change. |
| 15 | +// This is not implemented: we need an extra mechanism to work with reshape-views or alike. |
| 16 | +std::vector<std::vector<int>> CollComm::map(const PVSlice & n_slc, const PVSlice & o_slc) |
| 17 | +{ |
| 18 | + auto nr = theTransceiver->nranks(); |
| 19 | + std::vector<int> counts_send(nr, 0); |
| 20 | + std::vector<int> disp_send(nr, 0); |
| 21 | + std::vector<int> counts_recv(nr, 0); |
| 22 | + std::vector<int> disp_recv(nr, 0); |
| 23 | + |
| 24 | + // norm tile-size of orig array |
| 25 | + auto o_ntsz = o_slc.tile_size(0); |
| 26 | + // tilesize of my local partition of orig array |
| 27 | + auto o_tsz = o_slc.tile_size(); |
| 28 | + // linearized local slice of orig array |
| 29 | + auto o_llslc = Slice(o_ntsz * theTransceiver->rank(), o_ntsz * theTransceiver->rank() + o_tsz); |
| 30 | + |
| 31 | + // norm tile-size of new (reshaped) array |
| 32 | + auto n_ntsz = n_slc.tile_size(0); |
| 33 | + // tilesize of my local partition of new (reshaped) array |
| 34 | + auto n_tsz = n_slc.tile_size(); |
| 35 | + // linearized/flattened/1d local slice of new (reshaped) array |
| 36 | + auto n_llslc = Slice(n_ntsz * theTransceiver->rank(), n_ntsz * theTransceiver->rank() + n_tsz); |
| 37 | + |
| 38 | + for(auto r=0; r<nr; ++r) { |
| 39 | + // determine what I receive from rank r |
| 40 | + // e.g. which parts of my new slice overlap with rank r's old slice |
| 41 | + // Get local slice of rank r of orig array |
| 42 | + auto o_rslc = o_slc.tile_slice(r); |
| 43 | + // Flatten to 1d |
| 44 | + auto o_lrslc = Slice(o_ntsz * r, o_ntsz * r + o_rslc.size()); |
| 45 | + // Determine overlap with local partition of linearized new array |
| 46 | + auto roverlap = n_llslc.overlap(o_lrslc); |
| 47 | + // number of elements to be received from rank r |
| 48 | + counts_recv[r] = roverlap.size(); |
| 49 | + // displacement in new array where elements from rank r get copied to |
| 50 | + disp_recv[r] = roverlap._start - n_llslc._start; |
| 51 | + |
| 52 | + // determine what I send to rank r |
| 53 | + // e.g. which parts of my old slice overlap with rank r's new slice |
| 54 | + // Get local slice of rank r of new array |
| 55 | + auto n_rslc = n_slc.tile_slice(r); |
| 56 | + // Flatten to 1d |
| 57 | + auto n_lrslc = Slice(n_ntsz * r, n_ntsz * r + n_rslc.size()); |
| 58 | + // Determine overlap with local partition of linearized orig array |
| 59 | + auto soverlap = o_llslc.overlap(n_lrslc); |
| 60 | + // number of elements to be send to rank r |
| 61 | + counts_send[r] = soverlap.size(); |
| 62 | + // displacement in orig array where elements from rank r get copied from |
| 63 | + disp_send[r] = soverlap._start - o_llslc._start; |
| 64 | + } |
| 65 | + return {counts_send, disp_send, counts_recv, disp_recv}; |
| 66 | +} |
0 commit comments