Skip to content
This repository was archived by the owner on Jan 26, 2026. It is now read-only.

Commit 78ac247

Browse files
committed
using latest imex; connecting idtr to enable proper distributed operation
1 parent eaa83b5 commit 78ac247

File tree

11 files changed

+174
-78
lines changed

11 files changed

+174
-78
lines changed

CMakeLists.txt

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,8 @@ add_custom_command(
8484
# ============
8585

8686
FILE(GLOB Hpps ${PROJECT_SOURCE_DIR}/src/include/ddptensor/*.hpp)
87-
set(Hpps ${Hpps} ${P2C_HPP})
87+
FILE(GLOB JitHpps ${PROJECT_SOURCE_DIR}/src/include/ddptensor/jit/*.hpp)
88+
set(Hpps ${Hpps} ${JitHpps} ${P2C_HPP})
8889

8990
set(DDPTSrcs
9091
${PROJECT_SOURCE_DIR}/src/ddptensor.cpp
@@ -153,7 +154,11 @@ target_link_libraries(idtr PRIVATE
153154
${MPI_C_LIBRARIES}
154155
# ${MKL_LIBRARIES}
155156
tbb
156-
${imex_all_libs}
157+
IMEXPTensorDialect
158+
IMEXPTensorTransforms
159+
IMEXPTensorToLinalg
160+
IMEXDistDialect
161+
IMEXDistToStandard
157162
MLIROptLib
158163
MLIRExecutionEngine
159164
MLIRIR

src/Creator.cpp

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,13 @@
55
#include "ddptensor/DDPTensorImpl.hpp"
66

77
#include <imex/Dialect/PTensor/IR/PTensorOps.h>
8+
#include <imex/internal/PassUtils.h>
9+
810
#include <mlir/IR/Builders.h>
11+
#include <mlir/Dialect/Arithmetic/IR/Arithmetic.h>
12+
#include <mlir/Dialect/Shape/IR/Shape.h>
13+
#include <mlir/Dialect/Tensor/IR/Tensor.h>
14+
#include <mlir/Dialect/Linalg/IR/Linalg.h>
915

1016
#if 0
1117
namespace x {
@@ -161,17 +167,15 @@ struct DeferredArange : public Deferred
161167

162168
bool generate_mlir(::mlir::OpBuilder & builder, ::mlir::Location loc, jit::DepManager & dm) override
163169
{
164-
// create start, stop and step
165-
auto start = jit::createI64(loc, builder, _start);
166-
auto end = jit::createI64(loc, builder, _end);
167-
auto step = jit::createI64(loc, builder, _step);
168-
// create arange
169-
auto dtype = builder.getI64Type();
170-
assert(_dtype == INT64 || _dtype == UINT64); // FIXME
171-
llvm::SmallVector<int64_t> shape(1, -1); //::mlir::ShapedType::kDynamicSize);
172-
auto artype = ::imex::ptensor::PTensorType::get(builder.getContext(), ::mlir::RankedTensorType::get(shape, dtype), true);
173-
dm.addVal(guid(),
174-
builder.create<::imex::ptensor::ARangeOp>(loc, artype, start, end, step, true),
170+
auto start = ::imex::createInt(loc, builder, _start);
171+
auto stop = ::imex::createInt(loc, builder, _end);
172+
auto step = ::imex::createInt(loc, builder, _step);
173+
auto dtype = builder.getI64Type(); // FIXME
174+
auto artype = ::imex::ptensor::PTensorType::get(builder.getContext(), ::mlir::RankedTensorType::get({-1}, dtype), false, true);
175+
auto dmy = ::imex::createInt<1>(loc, builder, 0);
176+
auto team = ::imex::createInt(loc, builder, 1);
177+
dm.addVal(this->guid(),
178+
builder.create<::imex::ptensor::ARangeOp>(loc, artype, start, stop, step, dmy, team),
175179
[this](uint64_t rank, void *allocated, void *aligned, intptr_t offset, const intptr_t * sizes, const intptr_t * strides) {
176180
assert(rank == 1);
177181
assert(strides[0] == 1);

src/Deferred.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <oneapi/tbb/concurrent_queue.h>
99
#include <mlir/Dialect/Func/IR/FuncOps.h>
1010
#include <imex/Dialect/PTensor/IR/PTensorOps.h>
11+
#include <imex/Dialect/Dist/IR/DistOps.h>
1112
#include <mlir/Dialect/LLVMIR/LLVMDialect.h>
1213

1314
#include <iostream>
@@ -71,10 +72,11 @@ void process_promises()
7172

7273
// Create a MLIR module
7374
auto module = builder.create<::mlir::ModuleOp>(loc);
74-
// Create a func
75-
auto dtype = builder.getI64Type();
75+
auto protos = builder.create<::imex::dist::RuntimePrototypesOp>(loc);
76+
module.push_back(protos);
77+
// Create the jit func
7678
// create dummy type, we'll replace it with the actual type later
77-
auto dummyFuncType = builder.getFunctionType({}, dtype);
79+
auto dummyFuncType = builder.getFunctionType({}, {});
7880
std::string fname("ddpt_jit");
7981
auto function = builder.create<::mlir::func::FuncOp>(loc, fname, dummyFuncType);
8082
// create function entry block
@@ -109,11 +111,12 @@ void process_promises()
109111
uint64_t osz = dm.handleResult(builder);
110112
// also request generation of c-wrapper function
111113
function->setAttr(::mlir::LLVM::LLVMDialect::getEmitCWrapperAttrName(), ::mlir::UnitAttr::get(&jit._context));
114+
function.getFunctionType().dump();
112115
// add the function to the module
113116
module.push_back(function);
114117
module.dump();
115118

116-
// get input buffers (before rsults!)
119+
// get input buffers (before results!)
117120
auto input = std::move(dm.store_inputs());
118121

119122
// compile and run the module

src/EWBinOp.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
#include <imex/Dialect/PTensor/IR/PTensorOps.h>
1515
#include <mlir/IR/Builders.h>
16+
#include <mlir/Dialect/Shape/IR/Shape.h>
1617

1718
// #######################################################################################
1819
// The 2 operators/tensors can have shifted partitions, e.g. local data might not be the
@@ -32,7 +33,7 @@
3233
// 2. local data which does not need communication
3334
// 3. Trailing remote data
3435
//
35-
// We attempt to minize copies by treating each region explicitly, e.g. data
36+
// We attempt to minimize copies by treating each region explicitly, e.g. data
3637
// which is already local will not be copied or communicated.
3738
//
3839
// Additionally, to reduce generated code size we convert buffers to the result
@@ -459,10 +460,10 @@ struct DeferredEWBinOp : public Deferred
459460
bool generate_mlir(::mlir::OpBuilder & builder, ::mlir::Location loc, jit::DepManager & dm) override
460461
{
461462
// FIXME the type of the result is based on a only
462-
auto a = dm.getDependent(builder, _a);
463-
auto b = dm.getDependent(builder, _b);
464-
dm.addVal(guid(),
465-
builder.create<::imex::ptensor::EWBinOp>(loc, a.getType(), builder.getI32IntegerAttr(ddpt2mlir(_op)), a, b),
463+
auto av = dm.getDependent(builder, _a);
464+
auto bv = dm.getDependent(builder, _b);
465+
dm.addVal(this->guid(),
466+
builder.create<::imex::ptensor::EWBinOp>(loc, av.getType(), builder.getI32IntegerAttr(ddpt2mlir(_op)), av, bv),
466467
[this](uint64_t rank, void *allocated, void *aligned, intptr_t offset, const intptr_t * sizes, const intptr_t * strides) {
467468
this->set_value(std::move(mk_tnsr(_dtype, rank, allocated, aligned, offset, sizes, strides)));
468469
});

src/ReduceOp.cpp

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#include <imex/Dialect/PTensor/IR/PTensorOps.h>
1010
#include <mlir/IR/Builders.h>
11+
#include <mlir/Dialect/Shape/IR/Shape.h>
1112

1213
#if 0
1314
namespace x {
@@ -119,17 +120,17 @@ struct DeferredReduceOp : public Deferred
119120
bool generate_mlir(::mlir::OpBuilder & builder, ::mlir::Location loc, jit::DepManager & dm) override
120121
{
121122
// FIXME reduction over individual dimensions is not supported
122-
auto a = dm.getDependent(builder, _a);
123-
auto a_ptt = a.getType().dyn_cast<::imex::ptensor::PTensorType>();
124-
assert(a_ptt);
125-
126-
auto rtyp = ::imex::ptensor::PTensorType::get(
127-
builder.getContext(),
128-
::mlir::RankedTensorType::get(llvm::SmallVector<int64_t>(), a_ptt.getRtensor().getElementType()),
129-
true
130-
);
131-
dm.addVal(guid(),
132-
builder.create<::imex::ptensor::ReductionOp>(loc, rtyp, builder.getI32IntegerAttr(ddpt2mlir(_op)), a),
123+
auto av = dm.getDependent(builder, _a);
124+
auto aPtTyp = av.getType().dyn_cast<::imex::ptensor::PTensorType>();
125+
assert(aPtTyp);
126+
// return type 0d with same dtype as input
127+
auto dtype = aPtTyp.getRtensor().getElementType();
128+
auto retPtTyp = ::imex::ptensor::PTensorType::get(builder.getContext(), ::mlir::RankedTensorType::get({}, dtype), false, true);
129+
// reduction op
130+
auto mop = ddpt2mlir(_op);
131+
auto op = builder.getIntegerAttr(builder.getIntegerType(sizeof(mop)*8), mop);
132+
dm.addVal(this->guid(),
133+
builder.create<::imex::ptensor::ReductionOp>(loc, retPtTyp, op, av),
133134
[this](uint64_t rank, void *allocated, void *aligned, intptr_t offset, const intptr_t * sizes, const intptr_t * strides) {
134135
this->set_value(std::move(mk_tnsr(_dtype, rank, allocated, aligned, offset, sizes, strides)));
135136
});

src/Service.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,8 @@ ddptensor * Service::replicate(const ddptensor & a)
104104

105105
void Service::run()
106106
{
107-
defer_lambda([](){ return true; });
107+
defer<DeferredService>(DeferredService::RUN);
108+
// defer_lambda([](){ return true; });
108109
}
109110

110111
bool inited = false;

src/idtr.cpp

Lines changed: 67 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,39 +4,64 @@
44
#include <ddptensor/DDPTensorImpl.hpp>
55
#include <ddptensor/MPITransceiver.hpp>
66

7+
#include <imex/Dialect/PTensor/IR/PTensorOps.h>
8+
79
#include <cassert>
810
#include <memory>
911

1012
using container_type = std::unordered_map<id_type, std::unique_ptr<DDPTensorImpl>>;
1113

1214
static container_type gtensors;
15+
static id_type _nguid = -1;
16+
inline id_type get_guid()
17+
{
18+
return ++_nguid;
19+
}
1320

1421
// Transceiver * theTransceiver = MPITransceiver();
1522

23+
template<typename T>
24+
T * mr_to_ptr(void * ptr, intptr_t offset)
25+
{
26+
auto mr = reinterpret_cast<intptr_t*>(ptr);
27+
return reinterpret_cast<T*>(ptr) + offset; // &mr.aligned[mr.offset]
28+
}
29+
1630
extern "C" {
1731

1832
// Register a global tensor of given shape.
19-
// Accepts a guid which might have been reserved before. Returns guid (reserved or new).
33+
// Returns guid.
2034
// The runtime does not own or manage any memory.
21-
id_t idtr_init_dtensor(const uint64_t * shape, uint64_t N, id_t guid)
35+
id_t idtr_init_dtensor(const uint64_t * shape, uint64_t nD)
2236
{
23-
assert(guid != UNKNOWN_GUID);
24-
gtensors[guid] = std::unique_ptr<DDPTensorImpl>(new DDPTensorImpl(shape, N));
37+
auto guid = get_guid();
38+
gtensors[guid] = std::unique_ptr<DDPTensorImpl>(nD ? new DDPTensorImpl(shape, nD) : new DDPTensorImpl);
2539
return guid;
2640
}
2741

42+
id_t _idtr_init_dtensor(void * alloced, void * aligned, intptr_t offset, intptr_t size, intptr_t stride, uint64_t nD)
43+
{
44+
return idtr_init_dtensor(mr_to_ptr<uint64_t>(aligned, offset), nD);
45+
}
46+
2847
// Get the offsets (one for each dimension) of the local partition of a distributed tensor in number of elements.
2948
// Result is stored in provided array.
30-
void idtr_local_offsets(id_t guid, uint64_t * offsets, uint64_t N)
49+
void idtr_local_offsets(id_t guid, uint64_t * offsets, uint64_t nD)
3150
{
3251
const auto & tnsr = gtensors.at(guid);
3352
auto slcs = tnsr->slice().local_slice().slices();
53+
assert(nD == slcs.size());
3454
int i = -1;
3555
for(auto s : slcs) {
3656
offsets[++i] = s._start;
3757
}
3858
}
3959

60+
void _idtr_local_offsets(id_t guid, void * alloced, void * aligned, intptr_t offset, intptr_t size, intptr_t stride, uint64_t nD)
61+
{
62+
idtr_local_offsets(guid, mr_to_ptr<uint64_t>(aligned, offset), nD);
63+
}
64+
4065
// Get the shape (one size for each dimension) of the local partition of a distributed tensor in number of elements.
4166
// Result is stored in provided array.
4267
void idtr_local_shape(id_t guid, uint64_t * lshape, uint64_t N)
@@ -46,10 +71,45 @@ void idtr_local_shape(id_t guid, uint64_t * lshape, uint64_t N)
4671
std::copy(shp.begin(), shp.end(), lshape);
4772
}
4873

74+
void _idtr_local_shape(id_t guid, void * alloced, void * aligned, intptr_t offset, intptr_t size, intptr_t stride, uint64_t nD)
75+
{
76+
idtr_local_shape(guid, mr_to_ptr<uint64_t>(aligned, offset), nD);
77+
}
78+
79+
// convert id of our reduction op to id of imex::ptensor reduction op
80+
static ReduceOpId mlir2ddpt(const ::imex::ptensor::ReduceOpId rop)
81+
{
82+
switch(rop) {
83+
case ::imex::ptensor::MEAN:
84+
return MEAN;
85+
case ::imex::ptensor::PROD:
86+
return PROD;
87+
case ::imex::ptensor::SUM:
88+
return SUM;
89+
case ::imex::ptensor::STD:
90+
return STD;
91+
case ::imex::ptensor::VAR:
92+
return VAR;
93+
case ::imex::ptensor::MAX:
94+
return MAX;
95+
case MIN:
96+
return MIN;
97+
default:
98+
throw std::runtime_error("Unknown reduction operation");
99+
}
100+
}
101+
49102
// Elementwise inplace allreduce
50-
void idtr_reduce_all(void * inout, DTypeId dtype, size_t N, RedOpType op)
103+
void idtr_reduce_all(void * inout, DTypeId dtype, uint64_t N, int op)
104+
{
105+
106+
getTransceiver()->reduce_all(inout, dtype, N, mlir2ddpt(static_cast<imex::ptensor::ReduceOpId>(op)));
107+
}
108+
109+
// FIXME hard-coded 0d tensor
110+
void _idtr_reduce_all(uint64_t * allocated, uint64_t * aligned, uint64_t offset, DTypeId dtype, int op)
51111
{
52-
getTransceiver()->reduce_all(inout, dtype, N, op);
112+
idtr_reduce_all(aligned + offset, dtype, 1, op);
53113
}
54114

55115
} // extern "C"

src/include/ddptensor/DDPTensorImpl.hpp

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,12 @@ class DDPTensorImpl : public tensor_i
2121
{
2222
mutable rank_type _owner;
2323
PVSlice _slice;
24-
void * _allocated;
25-
void * _aligned;
26-
intptr_t * _sizes;
27-
intptr_t * _strides;
28-
uint64_t _offset;
29-
DTypeId _dtype;
24+
void * _allocated = nullptr;
25+
void * _aligned = nullptr;
26+
intptr_t * _sizes = nullptr;
27+
intptr_t * _strides = nullptr;
28+
uint64_t _offset = 0;
29+
DTypeId _dtype = DTYPE_LAST;
3030

3131
public:
3232
using ptr_type = std::shared_ptr<DDPTensorImpl>;
@@ -54,9 +54,6 @@ class DDPTensorImpl : public tensor_i
5454
DDPTensorImpl(DTypeId dtype, const shape_type & shp, rank_type owner=NOOWNER)
5555
: _owner(owner),
5656
_slice(shp, static_cast<int>(owner==REPLICATED ? NOSPLIT : 0)),
57-
_allocated(nullptr),
58-
_aligned(nullptr),
59-
_offset(0),
6057
_dtype(dtype)
6158
{
6259
alloc();
@@ -73,11 +70,14 @@ class DDPTensorImpl : public tensor_i
7370
// incomplete, useful for computing meta information
7471
DDPTensorImpl(const uint64_t * shape, uint64_t N, rank_type owner=NOOWNER)
7572
: _owner(owner),
76-
_slice(shape_type(shape, shape+N), static_cast<int>(owner==REPLICATED ? NOSPLIT : 0)),
77-
_allocated(nullptr),
78-
_aligned(nullptr),
79-
_offset(0),
80-
_dtype(DTYPE_LAST)
73+
_slice(shape_type(shape, shape+N), static_cast<int>(owner==REPLICATED ? NOSPLIT : 0))
74+
{
75+
}
76+
77+
// incomplete, useful for computing meta information
78+
DDPTensorImpl()
79+
: _owner(REPLICATED),
80+
_slice(shape_type(), static_cast<int>(NOSPLIT))
8181
{
8282
}
8383

src/include/ddptensor/idtr.hpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ extern "C" {
1111
// Register a global tensor of given shape.
1212
// Accepts a guid which might have been reserved before. Returns guid (reserved or new).
1313
// The runtime does not own or manage any memory.
14-
id_t idtr_nit_dtensor(const uint64_t * shape, uint64_t N, id_t guid = UNKNOWN_GUID);
14+
id_t idtr_init_dtensor(const uint64_t * shape, uint64_t N);
1515

1616
// Get the offsets (one for each dimension) of the local partition of a distributed tensor in number of elements.
1717
// Result is stored in provided array.
@@ -22,6 +22,6 @@ extern "C" {
2222
void idtr_local_shape(id_t guid, uint64_t * lshape, uint64_t N);
2323

2424
// Elementwise inplace allreduce
25-
void idtr_reduce_all(void * inout, DTypeId T, size_t N, RedOpType op);
26-
25+
void idtr_reduce_all(void * inout, DTypeId dtype, uint64_t N, int op);
26+
2727
} // extern "C"

0 commit comments

Comments
 (0)