adding full creator, supporting nd-tensors

fschlimb · fschlimb · commit 5b07b3e775ff · 2022-12-09T10:29:04.000-06:00
diff --git a/ddptensor/ddptensor.py b/ddptensor/ddptensor.py
@@ -51,7 +51,7 @@ def _inplace(self, t):
         )
 
     def __getitem__(self, key):
-        return dtensor(self._t.__getitem__(key if isinstance(key, list) else [key,]))
+        return dtensor(self._t.__getitem__(key if isinstance(key, tuple) else (key,)))
 
     def __setitem__(self, key, value):
-         self._t.__setitem__(key if isinstance(key, list) else [key,], value._t) # if isinstance(value, dtensor) else value)
+         self._t.__setitem__(key if isinstance(key, tuple) else (key,), value._t) # if isinstance(value, dtensor) else value)
diff --git a/src/Creator.cpp b/src/Creator.cpp
@@ -128,7 +128,44 @@ struct DeferredFull : public Deferred
         // set_value(std::move(TypeDispatch<x::Creator>(_dtype, op, _shape, _val)));
     }
 
-    // FIXME mlir
+    template<typename T>
+    struct ValAndDType
+    {
+        static ::mlir::Value op(::mlir::OpBuilder & builder, ::mlir::Location loc, const PyScalar & val, ::imex::ptensor::DType & dtyp)
+        {
+            dtyp = jit::PT_DTYPE<T>::value;
+
+            if constexpr (std::is_floating_point_v<T>) return ::imex::createFloat<sizeof(T)*8>(loc, builder, val._float);
+            else if constexpr (std::is_same_v<bool, T>) return ::imex::createInt<1>(loc, builder, val._int);
+            else if constexpr (std::is_integral_v<T>) return ::imex::createInt<sizeof(T)*8>(loc, builder, val._int);
+            assert("Unsupported dtype in dispatch");
+            return {};
+        };
+    };
+
+    bool generate_mlir(::mlir::OpBuilder & builder, ::mlir::Location loc, jit::DepManager & dm) override
+    {
+        ::mlir::SmallVector<::mlir::Value> shp(_shape.size());
+        for(auto i=0; i<_shape.size(); ++i) {
+            shp[i] = ::imex::createIndex(loc, builder, _shape[i]);
+        }
+
+        ::imex::ptensor::DType dtyp;
+        ::mlir::Value val = dispatch<ValAndDType>(_dtype, builder, loc, _val, dtyp);
+
+        auto dmy = ::imex::createInt<1>(loc, builder, 0);
+        auto team = ::imex::createIndex(loc, builder, reinterpret_cast<uint64_t>(getTransceiver()));
+
+        dm.addVal(this->guid(),
+                  builder.create<::imex::ptensor::CreateOp>(loc, shp, dtyp, val, dmy, team),
+                  [this](uint64_t rank, void *allocated, void *aligned, intptr_t offset, const intptr_t * sizes, const intptr_t * strides,
+                         uint64_t * gs_allocated, uint64_t * gs_aligned, uint64_t * lo_allocated, uint64_t * lo_aligned) {
+            assert(rank == this->_shape.size());
+            this->set_value(std::move(mk_tnsr(_dtype, rank, allocated, aligned, offset, sizes, strides,
+                                              gs_allocated, gs_aligned, lo_allocated, lo_aligned)));
+        });
+        return false;
+    }
 
     FactoryId factory() const
     {
diff --git a/src/DDPTensorImpl.cpp b/src/DDPTensorImpl.cpp
@@ -0,0 +1,189 @@
+// SPDX-License-Identifier: BSD-3-Clause
+
+// Concrete implementation of tensor_i.
+// Interfaces are based on shared_ptr<tensor_i>.
+
+#include <ddptensor/DDPTensorImpl.hpp>
+#include <ddptensor/CppTypes.hpp>
+
+#include <algorithm>
+
+
+DDPTensorImpl::DDPTensorImpl(DTypeId dtype, uint64_t ndims,
+                void * allocated, void * aligned, intptr_t offset, const intptr_t * sizes, const intptr_t * strides,
+                uint64_t * gs_allocated, uint64_t * gs_aligned, uint64_t * lo_allocated, uint64_t * lo_aligned,
+                rank_type owner)
+    : _owner(owner),
+      _allocated(allocated),
+      _aligned(aligned),
+      _sizes(new intptr_t[ndims]),
+      _strides(new intptr_t[ndims]),
+      _gs_allocated(gs_allocated),
+      _gs_aligned(gs_aligned),
+      _lo_allocated(lo_allocated),
+      _lo_aligned(lo_aligned),
+      _offset(offset),
+      _ndims(ndims),
+      _dtype(dtype)
+{
+    memcpy(_sizes, sizes, ndims*sizeof(*_sizes));
+    memcpy(_strides, strides, ndims*sizeof(*_strides));
+}
+
+DDPTensorImpl::DDPTensorImpl(DTypeId dtype, const shape_type & shp, rank_type owner)
+    : _owner(owner),
+      _ndims(shp.size()),
+      _dtype(dtype)
+{
+    alloc();
+
+    intptr_t stride = 1;
+    auto ndims = shp.size();
+    assert(ndims <= 1);
+    for(auto i=0; i<ndims; ++i) {
+        _sizes[i] = shp[i];
+        _strides[ndims-i-1] = stride;
+        stride *= shp[i];
+    }
+}
+
+DDPTensorImpl::ptr_type DDPTensorImpl::clone(bool copy)
+{
+    // FIXME memory leak
+    auto nd = ndims();
+    auto sz = size();
+    auto esz = sizeof_dtype(dtype());
+    auto bsz = sz * esz;
+    auto allocated = new (std::align_val_t(esz)) char[bsz];
+    auto aligned = allocated;
+    if(copy) memcpy(aligned, _aligned, bsz);
+    // FIXME jit returns private mem
+    // memcpy(gs_aligned, _gs_aligned, nd*sizeof(*gs_aligned));
+    // auto gs_allocated = new uint64_t[nd];
+    // auto gs_aligned = gs_allocated;
+    auto gs_allocated = _gs_allocated;
+    auto gs_aligned = _gs_aligned;
+    auto lo_allocated = new uint64_t[nd];
+    auto lo_aligned = lo_allocated;
+    memcpy(lo_aligned, _lo_aligned, nd*sizeof(*lo_aligned));
+
+    // strides and sizes are allocated/copied in constructor
+    return std::make_shared<DDPTensorImpl>(dtype(), nd, allocated, aligned, _offset, _sizes, _strides,
+                                           gs_allocated, gs_aligned, lo_allocated, lo_aligned, owner());
+}
+
+void DDPTensorImpl::alloc()
+{
+    auto esz = sizeof_dtype(_dtype);
+    _allocated = new (std::align_val_t(esz)) char[esz*size()];
+    _aligned = _allocated;
+    auto nds = ndims();
+    _sizes = new intptr_t[nds];
+    _strides = new intptr_t[nds];
+    _offset = 0;
+}
+
+void * DDPTensorImpl::data()
+{
+    void * ret;
+    dispatch(_dtype, _aligned, [this, &ret](auto * ptr) { ret = ptr + this->_offset; });
+    return ret;
+}
+
+std::string DDPTensorImpl::__repr__() const
+{
+    const auto nd = ndims();
+    std::ostringstream oss;
+    oss << "ddptensor{gs=(";
+    for(auto i=0; i<nd; ++i) oss << _gs_aligned[i] << ", ";
+    oss << "), loff=(";
+    for(auto i=0; i<nd; ++i) oss << _lo_aligned[i] << ", ";
+    oss << "), lsz=(";
+    for(auto i=0; i<nd; ++i) oss << _sizes[i] << ", ";
+    oss << "), str=(";
+    for(auto i=0; i<nd; ++i) oss << _strides[i] << ", ";
+    oss << "), p=" << _allocated << ", poff=" << _offset << "}\n";
+
+    dispatch(_dtype, _aligned, [this, nd, &oss](auto * ptr) {
+        auto cptr = ptr + this->_offset;
+        printit(oss, 0, cptr);
+    });
+    return oss.str();
+}
+
+bool DDPTensorImpl::__bool__() const
+{
+    if(! is_replicated())
+        throw(std::runtime_error("Cast to scalar bool: tensor is not replicated"));
+
+    bool res;
+    dispatch(_dtype, _aligned, [this, &res](auto * ptr) { res = static_cast<bool>(ptr[this->_offset]); });
+    return res;
+}
+
+double DDPTensorImpl::__float__() const
+{
+    if(! is_replicated())
+        throw(std::runtime_error("Cast to scalar float: tensor is not replicated"));
+
+    double res;
+    dispatch(_dtype, _aligned, [this, &res](auto * ptr) { res = static_cast<double>(ptr[this->_offset]); });
+    return res;
+}
+
+int64_t DDPTensorImpl::__int__() const
+{
+    if(! is_replicated())
+        throw(std::runtime_error("Cast to scalar int: tensor is not replicated"));
+
+    float res;
+    dispatch(_dtype, _aligned, [this, &res](auto * ptr) { res = static_cast<float>(ptr[this->_offset]); });
+    return res;
+}
+
+void DDPTensorImpl::bufferize(const NDSlice & slc, Buffer & buff) const
+{
+    // FIXME slices/strides
+#if 0
+    if(slc.size() <= 0) return;
+    NDSlice lslice = NDSlice(slice().tile_shape()).slice(slc);
+#endif
+    assert(_strides[0] == 1);
+    auto pos = buff.size();
+    auto sz = size()*item_size();
+    buff.resize(pos + sz);
+    void * out = buff.data() + pos;
+    dispatch(_dtype, _aligned, [this, sz, out](auto * ptr) { memcpy(out, ptr + this->_offset, sz); });
+}
+
+void DDPTensorImpl::add_to_args(std::vector<void*> & args, int ndims)
+{
+    assert(ndims == this->ndims() || (ndims == 0 && this->ndims() == 1));
+    // global shape first
+    intptr_t * buff = new intptr_t[dtensor_sz(1)];
+    buff[0] = reinterpret_cast<intptr_t>(_gs_allocated);
+    buff[1] = reinterpret_cast<intptr_t>(_gs_aligned);
+    buff[2] = 0;
+    buff[3] = ndims;
+    buff[4] = 1;
+    args.push_back(buff);
+    assert(5 == memref_sz(1));
+    // local tensor
+    buff = new intptr_t[dtensor_sz(ndims)];
+    buff[0] = reinterpret_cast<intptr_t>(_allocated);
+    buff[1] = reinterpret_cast<intptr_t>(_aligned);
+    buff[2] = static_cast<intptr_t>(_offset);
+    memcpy(buff+3, _sizes, ndims*sizeof(intptr_t));
+    memcpy(buff+3+ndims, _strides, ndims*sizeof(intptr_t));
+    args.push_back(buff);
+    // local offsets
+    buff = new intptr_t[dtensor_sz(1)];
+    buff[0] = reinterpret_cast<intptr_t>(_lo_allocated);
+    buff[1] = reinterpret_cast<intptr_t>(_lo_aligned);
+    buff[2] = 0;
+    buff[3] = ndims;
+    buff[4] = 1;
+    args.push_back(buff);
+    // finally the team
+    args.push_back(reinterpret_cast<void*>(1));
+}
diff --git a/src/include/ddptensor/DDPTensorImpl.hpp b/src/include/ddptensor/DDPTensorImpl.hpp
@@ -13,6 +13,7 @@
 #include <cstring>
 #include <type_traits>
 #include <memory>
+#include <sstream>
 
 
 class DDPTensorImpl : public tensor_i
@@ -97,8 +98,11 @@ class DDPTensorImpl : public tensor_i
 
     virtual uint64_t size() const
     {
-        assert(ndims() == 1);
-        return *_sizes;
+        switch(ndims()) {
+            case 0 : return 1;
+            case 1 : return *_sizes;
+            default: return std::accumulate(_sizes, _sizes+ndims(), 1, std::multiplies<intptr_t>());
+        }
     }
 
     friend struct Service;
@@ -140,6 +144,30 @@ class DDPTensorImpl : public tensor_i
     virtual void bufferize(const NDSlice & slc, Buffer & buff) const;
 
     virtual void add_to_args(std::vector<void*> & args, int ndims);
+
+    template<typename T>
+    void printit(std::ostringstream & oss, uint64_t d, T * cptr) const
+    {
+        auto stride = _strides[d];
+        auto sz = _sizes[d];
+        if(d==ndims()-1) {
+            oss << "[";
+            for(auto i=0; i<sz; ++i) {
+                oss << cptr[i*stride];
+                if(i<sz-1) oss << " ";
+            }
+            oss << "]";
+        } else {
+            oss << "[";
+            for(auto i=0; i<sz; ++i) {
+                if(i) for(auto x=0; x<=d; ++x) oss << " ";
+                printit(oss, d+1, cptr);
+                if(i<sz-1) oss << "\n";
+                cptr += stride;
+            }
+            oss << "]";
+        }
+    }
 };
 
 template<typename ...Ts>
diff --git a/src/include/ddptensor/TypeDispatch.hpp b/src/include/ddptensor/TypeDispatch.hpp
diff --git a/src/include/ddptensor/jit/mlir.hpp b/src/include/ddptensor/jit/mlir.hpp

Original file line number	Diff line number	Diff line change
`@@ -51,7 +51,7 @@ def _inplace(self, t):`
`51`	`51`	`)`
`52`	`52`
`53`	`53`	`def __getitem__(self, key):`
`54`		`- return dtensor(self._t.__getitem__(key if isinstance(key, list) else [key,]))`
	`54`	`+ return dtensor(self._t.__getitem__(key if isinstance(key, tuple) else (key,)))`
`55`	`55`
`56`	`56`	`def __setitem__(self, key, value):`
`57`		`- self._t.__setitem__(key if isinstance(key, list) else [key,], value._t) # if isinstance(value, dtensor) else value)`
	`57`	`+ self._t.__setitem__(key if isinstance(key, tuple) else (key,), value._t) # if isinstance(value, dtensor) else value)`