IntelPython
diff --git a/‎src/Creator.cpp‎
Lines changed: 56 additions & 8 deletions b/‎src/Creator.cpp‎
Lines changed: 56 additions & 8 deletions
diff --git a/‎src/Deferred.cpp‎
Lines changed: 21 additions & 0 deletions b/‎src/Deferred.cpp‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎src/EWBinOp.cpp‎
Lines changed: 26 additions & 6 deletions b/‎src/EWBinOp.cpp‎
Lines changed: 26 additions & 6 deletions
diff --git a/‎src/EWUnyOp.cpp‎
Lines changed: 18 additions & 2 deletions b/‎src/EWUnyOp.cpp‎
Lines changed: 18 additions & 2 deletions
diff --git a/‎src/IEWBinOp.cpp‎
Lines changed: 35 additions & 19 deletions b/‎src/IEWBinOp.cpp‎
Lines changed: 35 additions & 19 deletions
diff --git a/‎src/LinAlgOp.cpp‎
Lines changed: 20 additions & 2 deletions b/‎src/LinAlgOp.cpp‎
Lines changed: 20 additions & 2 deletions
diff --git a/‎src/ManipOp.cpp‎
Lines changed: 18 additions & 2 deletions b/‎src/ManipOp.cpp‎
Lines changed: 18 additions & 2 deletions
diff --git a/‎src/Random.cpp‎
Lines changed: 23 additions & 7 deletions b/‎src/Random.cpp‎
Lines changed: 23 additions & 7 deletions
@@ -1,14 +1,15 @@
 #include "ddptensor/Creator.hpp"
 #include "ddptensor/TypeDispatch.hpp"
 #include "ddptensor/x.hpp"
+#include "ddptensor/Deferred.hpp"
 
 namespace x {
 
     template<typename T>
     class Creator
     {
     public:
-        using ptr_type = DPTensorBaseX::ptr_type;
+        using ptr_type = typename tensor_i::ptr_type;
         using typed_ptr_type = typename DPTensorX<T>::typed_ptr_type;
 
         static ptr_type op(CreatorId c, const shape_type & shp)
@@ -51,18 +52,65 @@ namespace x {
     }; // class creatorx
 } // namespace x
 
-tensor_i::ptr_type Creator::create_from_shape(CreatorId op, const shape_type & shape, DTypeId dtype)
+struct DeferredFromShape : public Deferred
 {
-    return TypeDispatch<x::Creator>(dtype, op, shape);
+    CreatorId _op;
+    shape_type _shape;
+    DTypeId _dtype;
+
+    DeferredFromShape(CreatorId op, const shape_type & shape, DTypeId dtype)
+        : _op(op), _shape(shape), _dtype(dtype)
+    {}
+
+    void run()
+    {
+        set_value(TypeDispatch<x::Creator>(_dtype, _op, _shape));
+    }
+};
+
+tensor_i::future_type Creator::create_from_shape(CreatorId op, const shape_type & shape, DTypeId dtype)
+{
+    return defer<DeferredFromShape>(op, shape, dtype);
 }
 
-tensor_i::ptr_type Creator::full(const shape_type & shape, const py::object & val, DTypeId dtype)
+struct DeferredFull : public Deferred
 {
-    auto op = FULL;
-    return TypeDispatch<x::Creator>(dtype, op, shape, val);
+    shape_type _shape;
+    const py::object & _val;
+    DTypeId _dtype;
+
+    DeferredFull(const shape_type & shape, const py::object & val, DTypeId dtype)
+        : _shape(shape), _val(val), _dtype(dtype)
+    {}
+
+    void run()
+    {
+        auto op = FULL;
+        set_value(TypeDispatch<x::Creator>(_dtype, op, _shape, _val));
+    }
+};
+
+tensor_i::future_type Creator::full(const shape_type & shape, const py::object & val, DTypeId dtype)
+{
+    return defer<DeferredFull>(shape, val, dtype);
 }
 
-tensor_i::ptr_type Creator::arange(uint64_t start, uint64_t end, uint64_t step, DTypeId dtype)
+struct DeferredArange : public Deferred
+{
+    uint64_t _start, _end, _step;
+    DTypeId _dtype;
+
+    DeferredArange(uint64_t start, uint64_t end, uint64_t step, DTypeId dtype)
+        : _start(start), _end(end), _step(step), _dtype(dtype)
+    {}
+
+    void run()
+    {
+        set_value(TypeDispatch<x::Creator>(_dtype, _start, _end, _step));
+    };
+};
+
+tensor_i::future_type Creator::arange(uint64_t start, uint64_t end, uint64_t step, DTypeId dtype)
 {
-    return TypeDispatch<x::Creator>(dtype, start, end, step);
+    return defer<DeferredArange>(start, end, step, dtype);
 }
@@ -0,0 +1,21 @@
+#include "include/ddptensor/Deferred.hpp"
+#include <queue>
+
+static std::queue<Deferred::ptr_type> _deferred;
+
+Deferred::future_type Deferred::defer(Deferred::ptr_type && d)
+{
+    //auto f = d->get_future();
+    _deferred.push(std::move(d));
+    // return f;
+    auto aa = Deferred::undefer_next();
+    aa->run();
+    return aa->get_future();
+}
+
+Deferred::ptr_type Deferred::undefer_next()
+{
+    auto r = std::move(_deferred.front());
+    _deferred.pop();
+    return r;
+}
@@ -53,6 +53,7 @@ namespace x {
             case __LT__:
             case LESS:
                 return operatorx<A>::mk_tx_(a_ptr, a < b);
+            // __MATMUL__ is handled before dispatching, see below
             case __MUL__:
             case MULTIPLY:
                 return operatorx<A>::mk_tx_(a_ptr, a * b);
@@ -73,8 +74,6 @@ namespace x {
                 return operatorx<A>::mk_tx_(a_ptr, b - a);
             case __RTRUEDIV__:
                 return operatorx<A>::mk_tx_(a_ptr, b / a);
-            case __MATMUL__:
-                return LinAlgOp::vecdot(a_ptr, b_ptr, 0);
             case __POW__:
             case POW:
                 return operatorx<A>::mk_tx_(a_ptr, xt::pow(a, b));
@@ -133,9 +132,30 @@ namespace x {
 
     };
 } // namespace x
-    
-tensor_i::ptr_type EWBinOp::op(EWBinOpId op, x::DPTensorBaseX::ptr_type a, py::object & b)
+
+struct DeferredEWBinOp : public Deferred
+{
+    tensor_i::future_type _a;
+    tensor_i::future_type _b;
+    EWBinOpId _op;
+
+    DeferredEWBinOp(EWBinOpId op, tensor_i::future_type & a, tensor_i::future_type & b)
+        : _a(a), _b(b), _op(op)
+    {}
+
+    void run()
+    {
+        auto a = std::move(_a.get());
+        auto b = std::move(_b.get());
+        set_value(TypeDispatch<x::EWBinOp>(a, b, _op));
+    }
+};
+
+tensor_i::future_type EWBinOp::op(EWBinOpId op, tensor_i::future_type & a, py::object & b)
 {
-    auto bb = x::mk_tx(b);
-    return TypeDispatch<x::EWBinOp>(a, bb, op);
+    if(op == __MATMUL__) {
+        auto bb = x::mk_ftx(b);
+        return LinAlgOp::vecdot(a, bb, 0);
+    }
+    return defer<DeferredEWBinOp>(op, a, x::mk_ftx(b));
 }
@@ -108,7 +108,23 @@ namespace x {
     };
 } //namespace x
 
-tensor_i::ptr_type EWUnyOp::op(EWUnyOpId op, x::DPTensorBaseX::ptr_type a)
+struct DeferredEWUnyOp : public Deferred
 {
-    return TypeDispatch<x::EWUnyOp>(a, op);
+    tensor_i::future_type _a;
+    EWUnyOpId _op;
+
+    DeferredEWUnyOp(EWUnyOpId op, tensor_i::future_type & a)
+        : _a(a), _op(op)
+    {}
+
+    void run()
+    {
+        auto a = std::move(_a.get());
+        set_value(TypeDispatch<x::EWUnyOp>(a, _op));
+    }
+};
+
+tensor_i::future_type EWUnyOp::op(EWUnyOpId op, tensor_i::future_type & a)
+{
+    return defer<DeferredEWUnyOp>(op, a);
 }
@@ -10,61 +10,60 @@ namespace x {
         using ptr_type = DPTensorBaseX::ptr_type;
 
         template<typename A, typename B>
-        static void op(IEWBinOpId iop, std::shared_ptr<DPTensorX<A>> a_ptr, const std::shared_ptr<DPTensorX<B>> & b_ptr)
+        static ptr_type op(IEWBinOpId iop, std::shared_ptr<DPTensorX<A>> a_ptr, const std::shared_ptr<DPTensorX<B>> & b_ptr)
         {
             auto & ax = a_ptr->xarray();
             const auto & bx = b_ptr->xarray();
             if(a_ptr->is_sliced() || b_ptr->is_sliced()) {
                 auto av = xt::strided_view(ax, a_ptr->lslice());
                 const auto & bv = xt::strided_view(bx, b_ptr->lslice());
-                do_op(iop, av, bv);
-            } else {
-                do_op(iop, ax, bx);
+                return do_op(iop, av, bv, a_ptr);
             }
+            return do_op(iop, ax, bx, a_ptr);
         }
 
 #pragma GCC diagnostic ignored "-Wswitch"
-        template<typename T1, typename T2>
-        static void do_op(IEWBinOpId iop, T1 & a, const T2 & b)
+        template<typename A, typename T1, typename T2>
+        static ptr_type do_op(IEWBinOpId iop, T1 & a, const T2 & b, std::shared_ptr<DPTensorX<A>> a_ptr)
         {
             switch(iop) {
             case __IADD__:
                 a += b;
-                return;
+                return a_ptr;
             case __IFLOORDIV__:
                 a = xt::floor(a / b);
-                return;
+                return a_ptr;
             case __IMUL__:
                 a *= b;
-                return;
+                return a_ptr;
             case __ISUB__:
                 a -= b;
-                return;
+                return a_ptr;
             case __ITRUEDIV__:
                 a /= b;
-                return;
+                return a_ptr;
             case __IPOW__:
                 throw std::runtime_error("Binary inplace operation not implemented");
             }
             if constexpr (std::is_integral<typename T1::value_type>::value && std::is_integral<typename T2::value_type>::value) {
                 switch(iop) {
                 case __IMOD__:
                     a %= b;
-                    return;
+                    return a_ptr;
                 case __IOR__:
                     a |= b;
-                    return;
+                    return a_ptr;
                 case __IAND__:
                     a &= b;
-                    return;
+                    return a_ptr;
                 case __IXOR__:
                     a ^= b;
                 case __ILSHIFT__:
                     a = xt::left_shift(a, b);
-                    return;
+                    return a_ptr;
                 case __IRSHIFT__:
                     a = xt::right_shift(a, b);
-                    return;
+                    return a_ptr;
                 }
             }
             throw std::runtime_error("Unknown/invalid inplace elementwise binary operation");
@@ -74,8 +73,25 @@ namespace x {
     };
 } // namespace x
 
-void IEWBinOp::op(IEWBinOpId op, x::DPTensorBaseX::ptr_type a, py::object & b)
+struct DeferredIEWBinOp : public Deferred
 {
-    auto bb = x::mk_tx(b);
-    TypeDispatch<x::IEWBinOp>(a, bb, op);
+    tensor_i::future_type _a;
+    tensor_i::future_type _b;
+    IEWBinOpId _op;
+
+    DeferredIEWBinOp(IEWBinOpId op, tensor_i::future_type & a, tensor_i::future_type & b)
+        : _a(a), _b(b), _op(op)
+    {}
+
+    void run()
+    {
+        auto a = std::move(_a.get());
+        auto b = std::move(_b.get());
+        set_value(TypeDispatch<x::IEWBinOp>(a, b, _op));
+    }
+};
+
+tensor_i::future_type IEWBinOp::op(IEWBinOpId op, tensor_i::future_type & a, py::object & b)
+{
+    return defer<DeferredIEWBinOp>(op, a, x::mk_ftx(b));
 }
@@ -109,7 +109,25 @@ namespace x {
     };
 }
 
-tensor_i::ptr_type LinAlgOp::vecdot(tensor_i::ptr_type a, tensor_i::ptr_type b, int axis)
+struct DeferredLinAlgOp : public Deferred
 {
-    return TypeDispatch<x::LinAlgOp>(a, b, axis);
+    tensor_i::future_type _a;
+    tensor_i::future_type _b;
+    int _axis;
+
+    DeferredLinAlgOp(tensor_i::future_type & a, tensor_i::future_type & b, int axis)
+        : _a(a), _b(b), _axis(axis)
+    {}
+
+    void run()
+    {
+        auto a = std::move(_a.get());
+        auto b = std::move(_b.get());
+        set_value(TypeDispatch<x::LinAlgOp>(a, b, _axis));
+    }
+};
+
+tensor_i::future_type LinAlgOp::vecdot(tensor_i::future_type & a, tensor_i::future_type & b, int axis)
+{
+    return defer<DeferredLinAlgOp>(a, b, axis);
 }
@@ -23,7 +23,23 @@ namespace x {
     };
 }
 
-tensor_i::ptr_type ManipOp::reshape(x::DPTensorBaseX::ptr_type a, const shape_type & shape)
+struct DeferredManipOp : public Deferred
 {
-    return TypeDispatch<x::ManipOp>(a, shape);
+    tensor_i::future_type _a;
+    shape_type _shape;
+
+    DeferredManipOp(tensor_i::future_type & a, const shape_type & shape)
+        : _a(a), _shape(shape)
+    {}
+
+    void run()
+    {
+        auto a = std::move(_a.get());
+        set_value(TypeDispatch<x::ManipOp>(a, _shape));
+    }
+};
+
+tensor_i::future_type ManipOp::reshape(tensor_i::future_type & a, const shape_type & shape)
+{
+    return defer<DeferredManipOp>(a, shape);
 }
@@ -21,15 +21,31 @@ namespace x {
     };
 }
 
-ptr_type Random::rand(DTypeId dtype, const shape_type & shape, const py::object & lower, const py::object & upper)
+struct DeferredRandomOp : public Deferred
 {
-    switch(dtype) {
-    case FLOAT64:
-        return x::Rand<double>::op(shape, lower, upper);
-    case FLOAT32:
-        return x::Rand<double>::op(shape, lower, upper);
+    shape_type _shape;
+    py::object _lower, _upper;
+    DTypeId _dtype;
+
+    DeferredRandomOp(DTypeId dtype, const shape_type & shape, const py::object & lower, const py::object & upper)
+        : _shape(shape), _lower(lower), _upper(upper), _dtype(dtype)
+    {}
+
+    void run()
+    {
+        switch(_dtype) {
+        case FLOAT64:
+            set_value(x::Rand<double>::op(_shape, _lower, _upper));
+        case FLOAT32:
+            set_value(x::Rand<float>::op(_shape, _lower, _upper));
+        }
+        throw std::runtime_error("rand: dtype must be a floating point type");
     }
-    throw std::runtime_error("rand: dtype must be a floating point type");
+};
+
+Random::future_type Random::rand(DTypeId dtype, const shape_type & shape, const py::object & lower, const py::object & upper)
+{
+    return defer<DeferredRandomOp>(dtype, shape, lower, upper);
 }
 
 void Random::seed(uint64_t s)