IntelPython
diff --git a/‎README.md‎
Lines changed: 6 additions & 0 deletions b/‎README.md‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎src/Creator.cpp‎
Lines changed: 4 additions & 0 deletions b/‎src/Creator.cpp‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎src/Deferred.cpp‎
Lines changed: 26 additions & 0 deletions b/‎src/Deferred.cpp‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎src/EWBinOp.cpp‎
Lines changed: 5 additions & 1 deletion b/‎src/EWBinOp.cpp‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎src/EWUnyOp.cpp‎
Lines changed: 6 additions & 0 deletions b/‎src/EWUnyOp.cpp‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎src/Factory.cpp‎
Lines changed: 7 additions & 0 deletions b/‎src/Factory.cpp‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎src/IEWBinOp.cpp‎
Lines changed: 6 additions & 0 deletions b/‎src/IEWBinOp.cpp‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎src/IO.cpp‎
Lines changed: 6 additions & 0 deletions b/‎src/IO.cpp‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎src/LinAlgOp.cpp‎
Lines changed: 6 additions & 0 deletions b/‎src/LinAlgOp.cpp‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎src/MPIMediator.cpp‎
Lines changed: 4 additions & 0 deletions b/‎src/MPIMediator.cpp‎
Lines changed: 4 additions & 0 deletions
@@ -35,6 +35,12 @@ mpirun -n $N python -m pytest test
 DDPT_CW=0 mpirun -n $N python -m pytest test
 ```
 
+If DDPT_MPI_SPAWN is set it spawns the provided number of MPI processes.
+By default new processes launch python executing a worker loop.
+This requires setting PYTHON_EXE.
+Alternatively DDPT_MPI_EXECUTABLE and DDPT_MPI_EXE_ARGS are used.
+Additionally DDPT_MPI_HOSTS can be used to control the host to use for spawning processes.
+
 ## Running
 ```python
 import ddptensor as dt
 
@@ -1,3 +1,7 @@
+/*
+  C++ representation of the array-API's creation functions.
+*/
+
 #include "ddptensor/Creator.hpp"
 #include "ddptensor/DDPTensorImpl.hpp"
 #include "ddptensor/Deferred.hpp"
 
@@ -1,5 +1,12 @@
 // SPDX-License-Identifier: BSD-3-Clause
 
+/*
+  Creation/destruction of Deferreds.
+  Implementation of worker loop processing deferred objects.
+  This worker loop is executed in a separate thread until the system
+  gets shut down.
+*/
+
 #include "include/ddptensor/Deferred.hpp"
 #include "include/ddptensor/Mediator.hpp"
 #include "include/ddptensor/Registry.hpp"
@@ -14,20 +21,29 @@
 #include <iostream>
 #include <unordered_set>
 
+// thread-safe FIFO queue holding deferred objects
 static tbb::concurrent_bounded_queue<Runable::ptr_type> _deferred;
 
+// add a deferred object to the queue
 void push_runable(Runable::ptr_type &&r) { _deferred.push(std::move(r)); }
 
+// if needed, object/promise is broadcasted to worker processes
+// (for controller/worker mode)
 void _dist(const Runable *p) {
   if (getTransceiver()->is_cw() && getTransceiver()->rank() == 0)
     getMediator()->to_workers(p);
 }
 
+// create a enriched future
 Deferred::future_type Deferred::get_future() {
   return {std::move(promise_type::get_future().share()), _guid, _dtype, _rank,
           _balanced};
 }
 
+// defer a tensor-producing computation by adding it to the queue.
+// return a future for the resulting tensor.
+// set is_global to false if result is a local temporary which does not need a
+// guid
 Deferred::future_type defer_tensor(Runable::ptr_type &&_d, bool is_global) {
   Deferred *d = dynamic_cast<Deferred *>(_d.get());
   if (!d)
@@ -42,6 +58,7 @@ Deferred::future_type defer_tensor(Runable::ptr_type &&_d, bool is_global) {
   return f;
 }
 
+// defer a global tensor producer
 void Deferred::defer(Runable::ptr_type &&p) {
   defer_tensor(std::move(p), true);
 }
@@ -50,6 +67,15 @@ void Runable::defer(Runable::ptr_type &&p) { push_runable(std::move(p)); }
 
 void Runable::fini() { _deferred.clear(); }
 
+// process promises as they arrive through calls to defer
+// This is run in a separate thread until shutdon is requested.
+// Shutdown is indicated by a Deferred object which evaluates to false.
+// The loop repeatedly creates MLIR functions for jit-compilation by letting
+// Deferred objects add their MLIR code until an object can not produce MLIR
+// but wants immediate execution (indicated by generate_mlir returning true).
+// When execution is needed, the function signature (input args, return
+// statement) is finalized, the function gets compiled and executed. The loop
+// completes by calling run() on the requesting object.
 void process_promises() {
   bool done = false;
   jit::JIT jit;
 
@@ -1,5 +1,9 @@
 // SPDX-License-Identifier: BSD-3-Clause
 
+/*
+  Elementwise binary ops.
+*/
+
 #include "ddptensor/EWBinOp.hpp"
 #include "ddptensor/CollComm.hpp"
 #include "ddptensor/Creator.hpp"
@@ -15,6 +19,7 @@
 #include <mlir/Dialect/Shape/IR/Shape.h>
 #include <mlir/IR/Builders.h>
 
+#if 0
 // #######################################################################################
 // The 2 operators/tensors can have shifted partitions, e.g. local data might
 // not be the same on a and b. This means we need to copy/communicate to bring
@@ -43,7 +48,6 @@
 // above regions and apply the op. All buffers/views get linearized/flattened.
 // #######################################################################################
 
-#if 0
 namespace x {
 
     // @return true if operation returs bool, false otherwise
 
@@ -1,3 +1,9 @@
+// SPDX-License-Identifier: BSD-3-Clause
+
+/*
+  Elementwise unary ops.
+*/
+
 #include "ddptensor/EWUnyOp.hpp"
 #include "ddptensor/DDPTensorImpl.hpp"
 #include "ddptensor/Factory.hpp"
 
@@ -1,3 +1,10 @@
+// SPDX-License-Identifier: BSD-3-Clause
+
+/*
+  A factory producing runnable objects.
+  Implementation for registering factories.
+*/
+
 #include "ddptensor/Factory.hpp"
 
 std::vector<Factory::ptr_type> s_factories(FACTORY_LAST);
 
@@ -1,3 +1,9 @@
+// SPDX-License-Identifier: BSD-3-Clause
+
+/*
+  Inplace elementwise binary ops.
+*/
+
 #include "ddptensor/IEWBinOp.hpp"
 #include "ddptensor/Creator.hpp"
 #include "ddptensor/DDPTensorImpl.hpp"
 
@@ -1,3 +1,9 @@
+// SPDX-License-Identifier: BSD-3-Clause
+
+/*
+  I/O ops.
+*/
+
 #include "ddptensor/IO.hpp"
 #include "ddptensor/Factory.hpp"
 #include "ddptensor/SetGetItem.hpp"
 
@@ -1,3 +1,9 @@
+// SPDX-License-Identifier: BSD-3-Clause
+
+/*
+  Linalg  ops.
+*/
+
 #include <mpi.h>
 //#include <mkl.h>
 #include "ddptensor/DDPTensorImpl.hpp"
 
@@ -1,5 +1,9 @@
 // SPDX-License-Identifier: BSD-3-Clause
 
+/*
+  A high-level mediation between processes/ranks implemented on top of MPI.
+*/
+
 #include <iostream>
 #include <mpi.h>
 #include <mutex>