enabling non-compiled operations, adding sort, disabling PVSlice

fschlimb · fschlimb · commit 38dd6f9f6dcd · 2022-11-09T04:52:34.000-06:00
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -100,6 +100,7 @@ set(DDPTSrcs
     ${PROJECT_SOURCE_DIR}/src/ReduceOp.cpp
     ${PROJECT_SOURCE_DIR}/src/Service.cpp
     ${PROJECT_SOURCE_DIR}/src/SetGetItem.cpp
+    ${PROJECT_SOURCE_DIR}/src/Sorting.cpp
 )
 set(IDTRSrcs
     ${PROJECT_SOURCE_DIR}/src/idtr.cpp
diff --git a/ddptensor/__init__.py b/ddptensor/__init__.py
@@ -102,3 +102,8 @@ def to_numpy(a):
         exec(
             f"{func} = lambda this: dtensor(_cdt.LinAlgOp.{func}(this._t))"
         )
+
+for func in api.api_categories["SortOp"]:
+    exec(
+        f"{func} = lambda this, axis=-1, descending=False, stable=True: dtensor(_cdt.SortOp.{func}(this._t, descending))"
+    )
diff --git a/ddptensor/array_api.py b/ddptensor/array_api.py
@@ -190,6 +190,11 @@
         "tensordot",  # (x1, x2, /, *, axes=2)
         "vecdot",  # (x1, x2, /, *, axis=-1)
     ],
+
+    "SortOp" : [
+        "argsort",  # (x, /, *, axis=-1, descending=False, stable=True)
+        "sort",     #(x, /, *, axis=-1, descending=False, stable=True)
+    ],
 })
 
 misc_methods = [
diff --git a/src/Deferred.cpp b/src/Deferred.cpp
@@ -88,12 +88,11 @@ void process_promises()
 
         jit::DepManager dm(function);
 
+        Runable::ptr_type d;
         while(true) {
-            Runable::ptr_type d;
             _deferred.pop(d);
             if(d) {
                 if(d->generate_mlir(builder, loc, dm)) {
-                    d.reset();
                     break;
                 };
                 // keep alive for later set_value
@@ -105,28 +104,30 @@ void process_promises()
             }
         }
 
-        if(runables.empty()) continue;
+        if(!runables.empty()) {
+            // create return statement and adjust function type
+            uint64_t osz = dm.handleResult(builder);
+            // also request generation of c-wrapper function
+            function->setAttr(::mlir::LLVM::LLVMDialect::getEmitCWrapperAttrName(), ::mlir::UnitAttr::get(&jit._context));
+            function.getFunctionType().dump(); std::cout << std::endl;
+            // add the function to the module
+            module.push_back(function);
 
-        // create return statement and adjust function type
-        uint64_t osz = dm.handleResult(builder);
-        // also request generation of c-wrapper function
-        function->setAttr(::mlir::LLVM::LLVMDialect::getEmitCWrapperAttrName(), ::mlir::UnitAttr::get(&jit._context));
-        function.getFunctionType().dump();
-        // add the function to the module
-        module.push_back(function);
-        module.dump();
+            // get input buffers (before results!)
+            auto input = std::move(dm.store_inputs());
 
-        // get input buffers (before results!)
-        auto input = std::move(dm.store_inputs());
+            // compile and run the module
+            intptr_t * output = new intptr_t[osz];
+            if(jit.run(module, fname, input, output)) throw std::runtime_error("failed running jit");
 
-        // compile and run the module
-        intptr_t * output = new intptr_t[osz];
-        if(jit.run(module, fname, input, output)) throw std::runtime_error("failed running jit");
+            // push results to deliver promises
+            dm.deliver(output, osz);
 
-        // push results to deliver promises
-        dm.deliver(output, osz);
+            delete [] output;
+        } // no else needed
 
-        delete [] output;
+        // now we execute the deferred action which could not be compiled
+        if(d) d->run();
     } while(!done);
 }
 
diff --git a/src/EWBinOp.cpp b/src/EWBinOp.cpp
@@ -12,6 +12,7 @@
 #include "ddptensor/DDPTensorImpl.hpp"
 
 #include <imex/Dialect/PTensor/IR/PTensorOps.h>
+#include <imex/Dialect/Dist/IR/DistOps.h>
 #include <mlir/IR/Builders.h>
 #include <mlir/Dialect/Shape/IR/Shape.h>
 
@@ -462,8 +463,12 @@ struct DeferredEWBinOp : public Deferred
         // FIXME the type of the result is based on a only
         auto av = dm.getDependent(builder, _a);
         auto bv = dm.getDependent(builder, _b);
+
+        auto aPtTyp = ::imex::dist::getPTensorType(av);
+        assert(aPtTyp);
+
         dm.addVal(this->guid(),
-                  builder.create<::imex::ptensor::EWBinOp>(loc, av.getType(), builder.getI32IntegerAttr(ddpt2mlir(_op)), av, bv),
+                  builder.create<::imex::ptensor::EWBinOp>(loc, aPtTyp, builder.getI32IntegerAttr(ddpt2mlir(_op)), av, bv),
                   [this](uint64_t rank, void *allocated, void *aligned, intptr_t offset, const intptr_t * sizes, const intptr_t * strides,
                          uint64_t * gs_allocated, uint64_t * gs_aligned, uint64_t * lo_allocated, uint64_t * lo_aligned) {
             this->set_value(std::move(mk_tnsr(_dtype, rank, allocated, aligned, offset, sizes, strides,
diff --git a/src/ReduceOp.cpp b/src/ReduceOp.cpp
@@ -122,14 +122,9 @@ struct DeferredReduceOp : public Deferred
     {
         // FIXME reduction over individual dimensions is not supported
         auto av = dm.getDependent(builder, _a);
-        auto aDtTyp = av.getType().dyn_cast<::imex::dist::DistTensorType>();
-        ::mlir::Type dtype;
-        if(aDtTyp) {
-            dtype = aDtTyp.getPTensorType().getRtensor().getElementType();
-        } else {
-            auto aPtTyp = av.getType().dyn_cast<::imex::ptensor::PTensorType>();
-            dtype = aPtTyp.getRtensor().getElementType();
-        }
+        auto aPtTyp = ::imex::dist::getPTensorType(av);
+        assert(aPtTyp);
+        ::mlir::Type dtype = aPtTyp.getRtensor().getElementType();
         // return type 0d with same dtype as input
         auto retPtTyp = ::imex::ptensor::PTensorType::get(builder.getContext(), ::mlir::RankedTensorType::get({}, dtype), false);
         // reduction op
diff --git a/src/ddptensor.cpp b/src/ddptensor.cpp
@@ -23,22 +23,23 @@ using namespace pybind11::literals; // to bring _a
 
 #define DEF_PY11_ENUMS // used in p2c_types.hpp
 
-#include "ddptensor/MPITransceiver.hpp"
-#include "ddptensor/MPIMediator.hpp"
-#include "ddptensor/Deferred.hpp"
 #include "ddptensor/Creator.hpp"
-#include "ddptensor/IEWBinOp.hpp"
+#include "ddptensor/Deferred.hpp"
 #include "ddptensor/EWBinOp.hpp"
 #include "ddptensor/EWUnyOp.hpp"
-#include "ddptensor/ReduceOp.hpp"
-#include "ddptensor/ManipOp.hpp"
-#include "ddptensor/SetGetItem.hpp"
-#include "ddptensor/Random.hpp"
-#include "ddptensor/LinAlgOp.hpp"
-#include "ddptensor/Service.hpp"
 #include "ddptensor/Factory.hpp"
+#include "ddptensor/IEWBinOp.hpp"
 #include "ddptensor/IO.hpp"
 #include "ddptensor/jit/mlir.hpp"
+#include "ddptensor/LinAlgOp.hpp"
+#include "ddptensor/ManipOp.hpp"
+#include "ddptensor/MPIMediator.hpp"
+#include "ddptensor/MPITransceiver.hpp"
+#include "ddptensor/Random.hpp"
+#include "ddptensor/ReduceOp.hpp"
+#include "ddptensor/Service.hpp"
+#include "ddptensor/SetGetItem.hpp"
+#include "ddptensor/Sorting.hpp"
 
 // #########################################################################
 // The following classes are wrappers bridging pybind11 defs to TypeDispatch
@@ -92,20 +93,21 @@ void init(bool cw)
 // #########################################################################
 // Finally our Python module
 PYBIND11_MODULE(_ddptensor, m) {
-    Factory::init<F_ARANGE>();
-    Factory::init<F_FULL>();
-    Factory::init<F_FROMSHAPE>();
     // Factory::init<F_UNYOP>();
+    Factory::init<F_ARANGE>();
+    Factory::init<F_EWBINOP>();
     Factory::init<F_EWUNYOP>();
+    Factory::init<F_FROMSHAPE>();
+    Factory::init<F_FULL>();
+    Factory::init<F_GETITEM>();
     Factory::init<F_IEWBINOP>();
-    Factory::init<F_EWBINOP>();
-    Factory::init<F_REDUCEOP>();
-    Factory::init<F_MANIPOP>();
     Factory::init<F_LINALGOP>();
-    Factory::init<F_GETITEM>();
-    Factory::init<F_SETITEM>();
+    Factory::init<F_MANIPOP>();
     Factory::init<F_RANDOM>();
+    Factory::init<F_REDUCEOP>();
     Factory::init<F_SERVICE>();
+    Factory::init<F_SETITEM>();
+    Factory::init<F_SORTOP>();
     Factory::init<F_TONUMPY>();
 
     jit::init();
@@ -148,6 +150,9 @@ PYBIND11_MODULE(_ddptensor, m) {
     py::class_<LinAlgOp>(m, "LinAlgOp")
         .def("vecdot", &LinAlgOp::vecdot);
 
+    py::class_<SortOp>(m, "SortOp")
+        .def("sort", &SortOp::sort);
+
 /// trigger compile&run and return given attribute _x
 #define SYNC_RETURN(_f, _a) Service::run(); return (_f).get().get()->_a()
 /// Rerplicate ddptensor/future and SYNC_RETURN attributre _a
diff --git a/src/idtr.cpp b/src/idtr.cpp
@@ -49,7 +49,7 @@ uint64_t idtr_prank(int64_t team)
 id_t idtr_init_dtensor(const uint64_t * shape, uint64_t nD)
 {
     auto guid = get_guid();
-    gtensors[guid] = std::unique_ptr<DDPTensorImpl>(nD ? new DDPTensorImpl(shape, nD) : new DDPTensorImpl);
+    // gtensors[guid] = std::unique_ptr<DDPTensorImpl>(nD ? new DDPTensorImpl(shape, nD) : new DDPTensorImpl);
     return guid;
 }
 
@@ -62,13 +62,15 @@ id_t _idtr_init_dtensor(void * alloced, void * aligned, intptr_t offset, intptr_
 // Result is stored in provided array.
 void idtr_local_offsets(id_t guid, uint64_t * offsets, uint64_t nD)
 {
+#if 0
     const auto & tnsr = gtensors.at(guid);
     auto slcs = tnsr->slice().local_slice().slices();
     assert(nD == slcs.size());
     int i = -1;
     for(auto s : slcs) {
         offsets[++i] = s._start;
     }
+#endif
 }
 
 void _idtr_local_offsets(id_t guid, void * alloced, void * aligned, intptr_t offset, intptr_t size, intptr_t stride, uint64_t nD)
@@ -80,9 +82,11 @@ void _idtr_local_offsets(id_t guid, void * alloced, void * aligned, intptr_t off
 // Result is stored in provided array.
 void idtr_local_shape(id_t guid, uint64_t * lshape, uint64_t N)
 {
+#if 0
     const auto & tnsr = gtensors.at(guid);
     auto shp = tnsr->slice().local_slice().shape();
     std::copy(shp.begin(), shp.end(), lshape);
+#endif
 }
 
 void _idtr_local_shape(id_t guid, void * alloced, void * aligned, intptr_t offset, intptr_t size, intptr_t stride, uint64_t nD)
diff --git a/src/include/ddptensor/CollComm.hpp b/src/include/ddptensor/CollComm.hpp
@@ -26,6 +26,7 @@ struct CollComm
     template<typename T, typename U>
     static tensor_i::ptr_type coll_copy(std::shared_ptr<DDPTensorImpl> b_ptr, const std::shared_ptr<DDPTensorImpl> & a_ptr)
     {
+#if 0
         assert(! a_ptr->is_sliced() && ! b_ptr->is_sliced());
         auto info = CollComm::map(b_ptr->slice(), a_ptr->slice());
 
@@ -38,13 +39,15 @@ struct CollComm
                                  info[2].data(),
                                  info[3].data(),
                                  DTYPE<T>::value);
+#endif
             
         return b_ptr;
     }
 
     template<typename T, typename U>
     static std::array<int, 4> coll_map(const std::shared_ptr<DDPTensorImpl> & b_ptr, const std::shared_ptr<DDPTensorImpl> & a_ptr, std::vector<U> & rbuff)
     {
+#if 0
         auto info = CollComm::map(b_ptr->slice(), a_ptr->slice());
         
         auto nr = getTransceiver()->nranks();
@@ -83,11 +86,14 @@ struct CollComm
                                  DTYPE<U>::value);
             
         return {my_cnt_send, info[1][r], my_cnt_recv, info[3][r]};
+#endif
+        return {-1,-1,-1,-1};
     }
 
     template<typename A, typename B>
     static std::array<uint64_t, 2> coll_copy(const std::shared_ptr<DDPTensorImpl> & a_ptr, const std::array<std::vector<NDSlice>, 2> & a_overlap, std::vector<B> & rbuff)
     {
+#if 0
         if(a_overlap[0].empty()) return {0, 0};
 
         auto nr = getTransceiver()->nranks();
@@ -120,5 +126,7 @@ struct CollComm
                                  &disp_recv[0],
                                  DTYPE<B>::value);
         return {(uint64_t)disp_send[rank], (uint64_t)disp_recv[rank]};
+#endif
+        return {-1,-1};
     }
 };
diff --git a/src/include/ddptensor/CppTypes.hpp b/src/include/ddptensor/CppTypes.hpp
@@ -132,20 +132,21 @@ using id_type = uint64_t;
 
 enum FactoryId : int {
     F_ARANGE,
+    F_EWBINOP,
+    F_EWUNYOP,
     F_FROMSHAPE,
     F_FULL,
-    F_UNYOP,
-    F_EWUNYOP,
+    F_GETITEM,
     F_IEWBINOP,
-    F_EWBINOP,
-    F_REDUCEOP,
-    F_MANIPOP,
     F_LINALGOP,
-    F_GETITEM,
-    F_SETITEM,
+    F_MANIPOP,
     F_RANDOM,
+    F_REDUCEOP,
     F_SERVICE,
+    F_SETITEM,
+    F_SORTOP,
     F_TONUMPY,
+    F_UNYOP,
     FACTORY_LAST
 };
 
diff --git a/src/include/ddptensor/DDPTensorImpl.hpp b/src/include/ddptensor/DDPTensorImpl.hpp
@@ -18,7 +18,6 @@
 class DDPTensorImpl : public tensor_i
 {
     mutable rank_type _owner;
-    PVSlice _slice;
     void * _allocated = nullptr;
     void * _aligned = nullptr;
     intptr_t * _sizes = nullptr;
@@ -47,18 +46,18 @@ class DDPTensorImpl : public tensor_i
     // incomplete, useful for computing meta information
     DDPTensorImpl(const uint64_t * shape, uint64_t N, rank_type owner=NOOWNER)
         : _owner(owner),
-          _slice(shape_type(shape, shape+N), static_cast<int>(owner==REPLICATED ? NOSPLIT : 0)),
           _ndims(N)
     {
     }
 
     // incomplete, useful for computing meta information
     DDPTensorImpl()
-        : _owner(REPLICATED),
-          _slice(shape_type(), static_cast<int>(NOSPLIT))
+        : _owner(REPLICATED)
     {
     }
 
+    DDPTensorImpl::ptr_type clone(bool copy = true);
+
     void alloc();
 
     ~DDPTensorImpl()
@@ -71,7 +70,8 @@ class DDPTensorImpl : public tensor_i
 
     bool is_sliced() const
     {
-        return _slice.is_sliced();
+        assert(false);
+        return false;
     }
 
     virtual std::string __repr__() const;
@@ -83,17 +83,20 @@ class DDPTensorImpl : public tensor_i
 
     virtual const shape_type & shape() const
     {
-        return _slice.shape();
+        assert(false);
+        static shape_type dmy;
+        return dmy;
     }
 
-    virtual int ndim() const
+    virtual int ndims() const
     {
-        return _slice.ndims();
+        return _ndims;
     }
 
     virtual uint64_t size() const
     {
-        return _slice.size();
+        assert(ndims() == 1);
+        return *_sizes;
     }
 
     friend struct Service;
@@ -104,12 +107,7 @@ class DDPTensorImpl : public tensor_i
 
     virtual uint64_t __len__() const
     {
-        return _slice.slice().dim(0).size();
-    }
-
-    const PVSlice & slice() const
-    {
-        return _slice;
+        return ndims() ? *_sizes : 0;
     }
 
     bool has_owner() const
diff --git a/src/include/ddptensor/ddptensor.hpp b/src/include/ddptensor/ddptensor.hpp
diff --git a/src/include/ddptensor/tensor_i.hpp b/src/include/ddptensor/tensor_i.hpp

Original file line number	Diff line number	Diff line change
`@@ -100,6 +100,7 @@ set(DDPTSrcs`
`100`	`100`	`${PROJECT_SOURCE_DIR}/src/ReduceOp.cpp`
`101`	`101`	`${PROJECT_SOURCE_DIR}/src/Service.cpp`
`102`	`102`	`${PROJECT_SOURCE_DIR}/src/SetGetItem.cpp`
	`103`	`+ ${PROJECT_SOURCE_DIR}/src/Sorting.cpp`
`103`	`104`	`)`
`104`	`105`	`set(IDTRSrcs`
`105`	`106`	`${PROJECT_SOURCE_DIR}/src/idtr.cpp`
Original file line number	Diff line number	Diff line change
`@@ -102,3 +102,8 @@ def to_numpy(a):`
`102`	`102`	`exec(`
`103`	`103`	`f"{func} = lambda this: dtensor(_cdt.LinAlgOp.{func}(this._t))"`
`104`	`104`	`)`
	`105`	`+`
	`106`	`+for func in api.api_categories["SortOp"]:`
	`107`	`+ exec(`
	`108`	`+ f"{func} = lambda this, axis=-1, descending=False, stable=True: dtensor(_cdt.SortOp.{func}(this._t, descending))"`
	`109`	`+ )`
Original file line number	Diff line number	Diff line change
`@@ -49,7 +49,7 @@ uint64_t idtr_prank(int64_t team)`
`49`	`49`	`id_t idtr_init_dtensor(const uint64_t * shape, uint64_t nD)`
`50`	`50`	`{`
`51`	`51`	`auto guid = get_guid();`
`52`		`- gtensors[guid] = std::unique_ptr<DDPTensorImpl>(nD ? new DDPTensorImpl(shape, nD) : new DDPTensorImpl);`
	`52`	`+ // gtensors[guid] = std::unique_ptr<DDPTensorImpl>(nD ? new DDPTensorImpl(shape, nD) : new DDPTensorImpl);`
`53`	`53`	`return guid;`
`54`	`54`	`}`
`55`	`55`
`@@ -62,13 +62,15 @@ id_t _idtr_init_dtensor(void * alloced, void * aligned, intptr_t offset, intptr_`
`62`	`62`	`// Result is stored in provided array.`
`63`	`63`	`void idtr_local_offsets(id_t guid, uint64_t * offsets, uint64_t nD)`
`64`	`64`	`{`
	`65`	`+#if 0`
`65`	`66`	`const auto & tnsr = gtensors.at(guid);`
`66`	`67`	`auto slcs = tnsr->slice().local_slice().slices();`
`67`	`68`	`assert(nD == slcs.size());`
`68`	`69`	`int i = -1;`
`69`	`70`	`for(auto s : slcs) {`
`70`	`71`	`offsets[++i] = s._start;`
`71`	`72`	`}`
	`73`	`+#endif`
`72`	`74`	`}`
`73`	`75`
`74`	`76`	`void _idtr_local_offsets(id_t guid, void * alloced, void * aligned, intptr_t offset, intptr_t size, intptr_t stride, uint64_t nD)`
`@@ -80,9 +82,11 @@ void _idtr_local_offsets(id_t guid, void * alloced, void * aligned, intptr_t off`
`80`	`82`	`// Result is stored in provided array.`
`81`	`83`	`void idtr_local_shape(id_t guid, uint64_t * lshape, uint64_t N)`
`82`	`84`	`{`
	`85`	`+#if 0`
`83`	`86`	`const auto & tnsr = gtensors.at(guid);`
`84`	`87`	`auto shp = tnsr->slice().local_slice().shape();`
`85`	`88`	`std::copy(shp.begin(), shp.end(), lshape);`
	`89`	`+#endif`
`86`	`90`	`}`
`87`	`91`
`88`	`92`	`void _idtr_local_shape(id_t guid, void * alloced, void * aligned, intptr_t offset, intptr_t size, intptr_t stride, uint64_t nD)`