fixing empty(), getting closer to working with PRK stencil

fschlimb · fschlimb · commit 015829d4be88 · 2023-03-03T11:22:12.000-06:00
diff --git a/ddptensor/ddptensor.py b/ddptensor/ddptensor.py
@@ -55,4 +55,4 @@ def __getitem__(self, key):
     def __setitem__(self, key, value):
         key = key if isinstance(key, tuple) else (key,)
         key = [x if isinstance(x, slice) else slice(x, x + 1, 1) for x in key]
-        self._t.__setitem__(key, value._t)  # if isinstance(value, dtensor) else value)
+        self._t.__setitem__(key, value._t if isinstance(value, dtensor) else value)
diff --git a/src/Creator.cpp b/src/Creator.cpp
@@ -127,12 +127,15 @@ struct DeferredFull : public Deferred {
                             const PyScalar &val, ::imex::ptensor::DType &dtyp) {
       dtyp = jit::PT_DTYPE<T>::value;
 
-      if constexpr (std::is_floating_point_v<T>)
+      if (is_none(val)) {
+        return {};
+      } else if constexpr (std::is_floating_point_v<T>) {
         return ::imex::createFloat<sizeof(T) * 8>(loc, builder, val._float);
-      else if constexpr (std::is_same_v<bool, T>)
+      } else if constexpr (std::is_same_v<bool, T>) {
         return ::imex::createInt<1>(loc, builder, val._int);
-      else if constexpr (std::is_integral_v<T>)
+      } else if constexpr (std::is_integral_v<T>) {
         return ::imex::createInt<sizeof(T) * 8>(loc, builder, val._int);
+      }
       assert("Unsupported dtype in dispatch");
       return {};
     };
diff --git a/src/SetGetItem.cpp b/src/SetGetItem.cpp
@@ -6,6 +6,7 @@
 */
 
 #include "ddptensor/SetGetItem.hpp"
+#include "ddptensor/Creator.hpp"
 #include "ddptensor/DDPTensorImpl.hpp"
 #include "ddptensor/Factory.hpp"
 #include "ddptensor/Mediator.hpp"
@@ -258,8 +259,13 @@ struct DeferredSetItem : public Deferred {
 };
 
 ddptensor *SetItem::__setitem__(ddptensor &a, const std::vector<py::slice> &v,
-                                const ddptensor &b) {
-  return new ddptensor(defer<DeferredSetItem>(a.get(), b.get(), v));
+                                const py::object &b) {
+
+  auto bb = Creator::mk_future(b);
+  auto res = new ddptensor(defer<DeferredSetItem>(a.get(), bb.first->get(), v));
+  if (bb.second)
+    delete bb.first;
+  return res;
 }
 
 struct DeferredGetItem : public Deferred {
diff --git a/src/include/ddptensor/CppTypes.hpp b/src/include/ddptensor/CppTypes.hpp
@@ -8,6 +8,7 @@
 
 #include "p2c_ids.hpp"
 
+#include <cmath>
 #include <cstring>
 #include <numeric>
 #include <vector>
@@ -30,6 +31,8 @@ union PyScalar {
   double _float;
 };
 
+inline bool is_none(PyScalar s) { return std::isnan(s._float); }
+
 enum _RANKS : rank_type {
   NOOWNER = std::numeric_limits<rank_type>::max(),
   REPLICATED = std::numeric_limits<rank_type>::max() - 1,
diff --git a/src/include/ddptensor/PyTypes.hpp b/src/include/ddptensor/PyTypes.hpp
@@ -15,6 +15,10 @@ template <typename T> py::object get_impl_dtype() {
 
 inline PyScalar mk_scalar(const py::object &b, DTypeId dtype) {
   PyScalar s;
+  if (b.is_none()) {
+    s._float = std::numeric_limits<double>::quiet_NaN();
+    return s;
+  }
   switch (dtype) {
   case FLOAT64:
   case FLOAT32:
diff --git a/src/include/ddptensor/SetGetItem.hpp b/src/include/ddptensor/SetGetItem.hpp
@@ -23,5 +23,5 @@ struct GetItem {
 
 struct SetItem {
   static ddptensor *__setitem__(ddptensor &a, const std::vector<py::slice> &v,
-                                const ddptensor &b);
+                                const py::object &b);
 };
diff --git a/test/stencil-2d.py b/test/stencil-2d.py
@@ -1,44 +1,221 @@
-import ddptensor as aa
-
-# import numpy as aa
-import numpy as np
-
-aa.init(False)
-
-n = 16
-r = 2
-A = aa.ones((n, n), dtype=aa.int64)
-B = aa.zeros((n, n), dtype=aa.int64)
-W = aa.ones(((2 * r + 1), (2 * r + 1)), dtype=aa.int64)
-for i in range(3):
-    #     if i:
-    #         C = B[2:n-2,2:n-2]
-    #         B[2:n-2,2:n-2] = C
-    #     else:
-    # D = B[2:n-2,2:n-2]
-    B[2 : n - 2, 2 : n - 2] = (
-        B[2 : n - 2, 2 : n - 2]
-        + W[2, 2] * A[2 : n - 2, 2 : n - 2]
-        + W[2, 0] * A[2 : n - 2, 0 : n - 4]
-        + W[2, 1] * A[2 : n - 2, 1 : n - 3]
-        + W[2, 3] * A[2 : n - 2, 3 : n - 1]
-        + W[2, 4] * A[2 : n - 2, 4 : n - 0]
-        + W[0, 2] * A[0 : n - 4, 2 : n - 2]
-        + W[1, 2] * A[1 : n - 3, 2 : n - 2]
-        + W[3, 2] * A[3 : n - 1, 2 : n - 2]
-        + W[4, 2] * A[4 : n - 0, 2 : n - 2]
-    )
-    # A[2:n-2,2:n-2] \
-    #    + A[2:n-2,2:n-2] \
-    #     + A[2:n-2,0:n-4] \
-    #     + A[2:n-2,1:n-3] \
-    #     + A[2:n-2,3:n-1] \
-    #     + A[2:n-2,4:n-0] \
-    #     + A[0:n-4,2:n-2] \
-    #     + A[1:n-3,2:n-2] \
-    #     + A[3:n-1,2:n-2] \
-    #     + A[4:n-0,2:n-2]
-    A[0:n, 0:n] = A + 1
-print(B)
-
-aa.fini()
+#!/usr/bin/env python3
+#
+# Copyright (c) 2015, Intel Corporation
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+#      copyright notice, this list of conditions and the following
+#      disclaimer in the documentation and/or other materials provided
+#      with the distribution.
+# * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products
+#      derived from this software without specific prior written
+#      permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+#
+#
+# *******************************************************************
+#
+# NAME:    Stencil
+#
+# PURPOSE: This program tests the efficiency with which a space-invariant,
+#          linear, symmetric filter (stencil) can be applied to a square
+#          grid or image.
+#
+# USAGE:   The program takes as input the linear
+#          dimension of the grid, and the number of iterations on the grid
+#
+#                <progname> <iterations> <grid size>
+#
+#          The output consists of diagnostics to make sure the
+#          algorithm worked, and of timing statistics.
+#
+# HISTORY: - Written by Rob Van der Wijngnumpyrt, February 2009.
+#          - RvdW: Removed unrolling pragmas for clarity;
+#            added constant to array "in" at end of each iteration to force
+#            refreshing of neighbor data in parallel versions; August 2013
+#          - Converted to Python by Jeff Hammond, February 2016.
+#
+# *******************************************************************
+
+import sys
+
+print(
+    "Python version = ", str(sys.version_info.major) + "." + str(sys.version_info.minor)
+)
+if sys.version_info >= (3, 3):
+    from time import process_time as timer
+else:
+    from timeit import default_timer as timer
+
+import ddptensor as numpy
+
+# print('Numpy version  = ', numpy.version.version)
+
+
+def main():
+    # ********************************************************************
+    # read and test input parameters
+    # ********************************************************************
+
+    print("Parallel Research Kernels")
+    print("Python stencil execution on 2D grid")
+
+    if len(sys.argv) < 3:
+        print("argument count = ", len(sys.argv))
+        sys.exit(
+            "Usage: ./stencil <# iterations> <array dimension> [<star/stencil> <radius>]"
+        )
+
+    iterations = int(sys.argv[1])
+    if iterations < 1:
+        sys.exit("ERROR: iterations must be >= 1")
+
+    n = int(sys.argv[2])
+    if n < 1:
+        sys.exit("ERROR: array dimension must be >= 1")
+
+    if len(sys.argv) > 3:
+        pattern = sys.argv[3]
+    else:
+        pattern = "star"
+
+    if len(sys.argv) > 4:
+        r = int(sys.argv[4])
+        if r < 1:
+            sys.exit("ERROR: Stencil radius should be positive")
+        if (2 * r + 1) > n:
+            sys.exit("ERROR: Stencil radius exceeds grid size")
+    else:
+        r = 2
+
+    print("Number of iterations = ", iterations)
+    print("Grid size            = ", n)
+    print("Radius of stencil    = ", r)
+    if pattern == "star":
+        print("Type of stencil      = ", "star")
+    else:
+        print("Type of stencil      = ", "stencil")
+    print("Data type            = double precision")
+    print("Compact representation of stencil loop body")
+
+    # there is certainly a more Pythonic way to initialize W,
+    # but it will have no impact on performance.
+    W = numpy.zeros(((2 * r + 1), (2 * r + 1)), dtype=numpy.float64)
+    if pattern == "star":
+        stencil_size = 4 * r + 1
+        for i in range(1, r + 1):
+            W[r, r + i] = +1.0 / (2 * i * r)
+            W[r + i, r] = +1.0 / (2 * i * r)
+            W[r, r - i] = -1.0 / (2 * i * r)
+            W[r - i, r] = -1.0 / (2 * i * r)
+
+    else:
+        stencil_size = (2 * r + 1) ** 2.0
+        for j in range(1, r + 1):
+            for i in range(-j + 1, j):
+                W[r + i, r + j] = +1.0 / (4 * j * (2 * j - 1) * r)
+                W[r + i, r - j] = -1.0 / (4 * j * (2 * j - 1) * r)
+                W[r + j, r + i] = +1.0 / (4 * j * (2 * j - 1) * r)
+                W[r - j, r + i] = -1.0 / (4 * j * (2 * j - 1) * r)
+
+            W[r + j, r + j] = +1.0 / (4 * j * r)
+            W[r - j, r - j] = -1.0 / (4 * j * r)
+
+    # A = numpy.fromfunction(lambda i,j: i+j, (n,n), dtype=float)
+    A = numpy.empty((n, n), dtype=numpy.float64)
+    for i in range(n):
+        for j in range(n):
+            A[i, j] = float(i + j)
+    print(A.dtype)
+    B = numpy.zeros((n, n), dtype=numpy.float64)
+
+    for k in range(iterations + 1):
+        # start timer after a warmup iteration
+        if k < 1:
+            t0 = timer()
+
+        if pattern == "star":
+            if r == 2:
+                B[2 : n - 2, 2 : n - 2] = (
+                    B[2 : n - 2, 2 : n - 2]
+                    + W[2, 2] * A[2 : n - 2, 2 : n - 2]
+                    + W[2, 0] * A[2 : n - 2, 0 : n - 4]
+                    + W[2, 1] * A[2 : n - 2, 1 : n - 3]
+                    + W[2, 3] * A[2 : n - 2, 3 : n - 1]
+                    + W[2, 4] * A[2 : n - 2, 4 : n - 0]
+                    + W[0, 2] * A[0 : n - 4, 2 : n - 2]
+                    + W[1, 2] * A[1 : n - 3, 2 : n - 2]
+                    + W[3, 2] * A[3 : n - 1, 2 : n - 2]
+                    + W[4, 2] * A[4 : n - 0, 2 : n - 2]
+                )
+            else:
+                b = n - r
+                B[r:b, r:b] = B[r:b, r:b] + W[r, r] * A[r:b, r:b]
+                for s in range(1, r + 1):
+                    B[r:b, r:b] = (
+                        B[r:b, r:b]
+                        + W[r, r - s] * A[r:b, r - s : b - s]
+                        + W[r, r + s] * A[r:b, r + s : b + s]
+                        + W[r - s, r] * A[r - s : b - s, r:b]
+                        + W[r + s, r] * A[r + s : b + s, r:b]
+                    )
+        else:  # stencil
+            if r > 0:
+                b = n - r
+                for s in range(-r, r + 1):
+                    for t in range(-r, r + 1):
+                        B[r:b, r:b] = (
+                            B[r:b, r:b]
+                            + W[r + t, r + s] * A[r + t : b + t, r + s : b + s]
+                        )
+
+        A = A + 1.0
+
+    t1 = timer()
+    stencil_time = t1 - t0
+
+    # ******************************************************************************
+    # * Analyze and output results.
+    # ******************************************************************************
+
+    print(W, B)
+    # norm = numpy.linalg.norm(numpy.reshape(B,n*n),ord=1)
+    # active_points = (n-2*r)**2
+    # norm /= active_points
+
+    # epsilon=1.e-8
+
+    # # verify correctness
+    # reference_norm = 2*(iterations+1)
+    # if abs(norm-reference_norm) < epsilon:
+    #     print('Solution validates')
+    #     flops = (2*stencil_size+1) * active_points
+    #     avgtime = stencil_time/iterations
+    #     print('Rate (MFlops/s): ',1.e-6*flops/avgtime, ' Avg time (s): ',avgtime)
+    # else:
+    #     print('ERROR: L1 norm = ', norm,' Reference L1 norm = ', reference_norm)
+    #     sys.exit()
+
+
+if __name__ == "__main__":
+    numpy.init(False)
+    main()
+    numpy.fini()