IntelPython
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎CMakeLists.txt‎
Lines changed: 15 additions & 12 deletions b/‎CMakeLists.txt‎
Lines changed: 15 additions & 12 deletions
diff --git a/‎ddptensor/__init__.py‎
Lines changed: 2 additions & 1 deletion b/‎ddptensor/__init__.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎setup.py‎
Lines changed: 53 additions & 30 deletions b/‎setup.py‎
Lines changed: 53 additions & 30 deletions
diff --git a/‎src/Creator.cpp‎
Lines changed: 1 addition & 0 deletions b/‎src/Creator.cpp‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/EWBinOp.cpp‎
Lines changed: 57 additions & 74 deletions b/‎src/EWBinOp.cpp‎
Lines changed: 57 additions & 74 deletions
@@ -127,3 +127,6 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+
+# emacs
+*~
@@ -10,6 +10,18 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
 # Common installation directories
 #include(GNUInstallDirs)
 
+# ===============
+# Deps
+# ===============
+
+# Find Python3 and NumPy
+find_package(Python3 COMPONENTS Interpreter Development.Module NumPy REQUIRED)
+
+find_package(Python COMPONENTS Interpreter Development)
+find_package(pybind11 CONFIG)
+find_package(MPI REQUIRED)
+include_directories(SYSTEM ${MPI_INCLUDE_PATH} ${pybind11_INCLUDE_DIRS})
+
 # Use -fPIC even if statically compiled
 set(CMAKE_POSITION_INDEPENDENT_CODE ON)
 
@@ -19,18 +31,9 @@ set(CMAKE_POSITION_INDEPENDENT_CODE ON)
 FILE(GLOB MyCppSources ${PROJECT_SOURCE_DIR}/src/*.cpp ${PROJECT_SOURCE_DIR}/src/include/ddptensor/*.hpp)
 
 # Create the mymath library
-add_library(_ddptensor SHARED ${MyCppSources})
+#add_library(_ddptensor MODULE ${MyCppSources})
+pybind11_add_module(_ddptensor THIN_LTO ${MyCppSources})
 
 target_include_directories(_ddptensor PRIVATE ${PROJECT_SOURCE_DIR}/src/include ${PROJECT_SOURCE_DIR}/third_party/xtl/include ${PROJECT_SOURCE_DIR}/third_party/xsimd/include ${PROJECT_SOURCE_DIR}/third_party/xtensor-blas/include ${PROJECT_SOURCE_DIR}/third_party/xtensor/include ${PROJECT_SOURCE_DIR}/third_party/bitsery/include)
 
-# ===============
-# Deps
-# ===============
-
-# Find Python3 and NumPy
-find_package(Python3 COMPONENTS Interpreter Development.Module NumPy REQUIRED)
-
-find_package(MPI REQUIRED)
-find_package(pybind11 CONFIG)
-include_directories(SYSTEM ${MPI_INCLUDE_PATH} ${pybind11_INCLUDE_DIRS})
-target_link_libraries(_ddptensor ${MPI_C_LIBRARIES})
+target_link_libraries(_ddptensor PRIVATE ${MPI_C_LIBRARIES})
@@ -14,7 +14,8 @@
 # are simply forwarded as-is.
 
 from . import _ddptensor as _cdt
-from .ddptensor import float64, int64, fini, dtensor
+from ._ddptensor import float64, float32, int64, int32, int16, uint64, uint32, uint16, fini
+from .ddptensor import dtensor
 from os import getenv
 from . import array_api as api
 from . import spmd
 
@@ -1,39 +1,62 @@
-import cmake_build_extension
-from setuptools import setup
-from pathlib import Path
-
-ext_modules = [
-        cmake_build_extension.CMakeExtension(
-            name="_ddptensor",
-            # Name of the resulting package name (import mymath_pybind11)
-            install_prefix="ddptensor",
-            # Note: pybind11 is a build-system requirement specified in pyproject.toml,
-            #       therefore pypa/pip or pypa/build will install it in the virtual
-            #       environment created in /tmp during packaging.
-            #       This cmake_depends_on option adds the pybind11 installation path
-            #       to CMAKE_PREFIX_PATH so that the example finds the pybind11 targets
-            #       even if it is not installed in the system.
-            cmake_depends_on=["pybind11"],
-            # Exposes the binary print_answer to the environment.
-            # It requires also adding a new entry point in setup.cfg.
-            # expose_binaries=["bin/print_answer"],
-            # Writes the content to the top-level __init__.py
-            #write_top_level_init=init_py,
-            # Selects the folder where the main CMakeLists.txt is stored
-            # (it could be a subfolder)
-            source_dir=str(Path(__file__).parent.absolute()),
-            cmake_configure_options=[
-            ]
-        ),
-    ]
+import os
+import pathlib
+from setuptools import setup, Extension
+from setuptools.command.build_ext import build_ext as build_ext_orig
+
+
+class CMakeExtension(Extension):
+
+    def __init__(self, name):
+        # don't invoke the original build_ext for this special extension
+        super().__init__(name, sources=[])
+
+
+class build_ext(build_ext_orig):
+
+    def run(self):
+        for ext in self.extensions:
+            self.build_cmake(ext)
+        super().run()
+
+    def build_cmake(self, ext):
+        cwd = pathlib.Path().absolute()
+
+        # these dirs will be created in build_py, so if you don't have
+        # any python sources to bundle, the dirs will be missing
+        build_temp = pathlib.Path(self.build_temp)
+        build_temp.mkdir(parents=True, exist_ok=True)
+        extdir = pathlib.Path(self.get_ext_fullpath(ext.name))
+        extdir.parent.mkdir(parents=True, exist_ok=True)
+
+        # example of cmake args
+        config = 'Debug' if self.debug else 'Release'
+        cmake_args = [
+            '-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + str(extdir.parent.absolute()),
+            '-DCMAKE_BUILD_TYPE=' + config
+        ]
+
+        # example of build args
+        build_args = [
+            '--config', config,
+            '--', '-j8'
+        ]
+
+        os.chdir(str(build_temp))
+        self.spawn(['cmake', str(cwd)] + cmake_args)
+        if not self.dry_run:
+            self.spawn(['cmake', '--build', '.'] + build_args)
+        # Troubleshooting: if fail on line above then delete all possible 
+        # temporary CMake files including "CMakeCache.txt" in top level dir.
+        os.chdir(str(cwd))
+
 
 setup(name="ddptensor",
       version="0.1",
       description="Distributed Tensor and more",
       packages=["ddptensor", "ddptensor.numpy", "ddptensor.torch"],
-      ext_modules=ext_modules,
+      ext_modules=[CMakeExtension('ddptensor/_ddptensor')],
       cmdclass=dict(
           # Enable the CMakeExtension entries defined above
-          build_ext=cmake_build_extension.BuildExtension,
+          build_ext=build_ext  #cmake_build_extension.BuildExtension,
       ),
 )
@@ -8,6 +8,7 @@ namespace x {
     {
     public:
         using ptr_type = DPTensorBaseX::ptr_type;
+        using typed_ptr_type = typename DPTensorX<T>::typed_ptr_type;
 
         static ptr_type op(CreatorId c, shape_type && shp)
         {
 
@@ -3,114 +3,64 @@
 
 namespace x {
 
-    template<typename T>
     class EWBinOp
     {
     public:
         using ptr_type = DPTensorBaseX::ptr_type;
 
 #pragma GCC diagnostic ignored "-Wswitch"
-
-        template<typename A, typename B, typename U = T, std::enable_if_t<std::is_floating_point<U>::value, bool> = true>
-        static ptr_type integral_op(EWBinOpId iop, const DPTensorX<T> & tx, A && a, B && b)
-        {
-            throw std::runtime_error("Illegal or unknown inplace elementwise binary operation");
-        }
-
-        template<typename A, typename B, typename U = T, std::enable_if_t<std::is_integral<U>::value, bool> = true>
-        static ptr_type integral_op(EWBinOpId iop, const DPTensorX<T> & tx, A && a, B && b)
-        {
-            switch(iop) {
-            case __AND__:
-            case BITWISE_AND:
-                return operatorx<T>::mk_tx_(tx, a & b);
-            case __RAND__:
-                return operatorx<T>::mk_tx_(tx, b & a);
-            case __LSHIFT__:
-            case BITWISE_LEFT_SHIFT:
-                return operatorx<T>::mk_tx_(tx, a << b);
-            case __MOD__:
-            case REMAINDER:
-                return operatorx<T>::mk_tx_(tx, a % b);
-            case __OR__:
-            case BITWISE_OR:
-                return operatorx<T>::mk_tx_(tx, a | b);
-            case __ROR__:
-                return operatorx<T>::mk_tx_(tx, b | a);
-            case __RSHIFT__:
-            case BITWISE_RIGHT_SHIFT:
-                return operatorx<T>::mk_tx_(tx, a >> b);
-            case __XOR__:
-            case BITWISE_XOR:
-                return operatorx<T>::mk_tx_(tx, a ^ b);
-            case __RXOR__:
-                return operatorx<T>::mk_tx_(tx, b ^ a);
-            case __RLSHIFT__:
-                return operatorx<T>::mk_tx_(tx, b << a);
-            case __RMOD__:
-                return operatorx<T>::mk_tx_(tx, b % a);
-            case __RRSHIFT__:
-                return operatorx<T>::mk_tx_(tx, b >> a);
-            default:
-                throw std::runtime_error("Unknown elementwise binary operation");
-            }
-        }
-
-        static ptr_type op(EWBinOpId bop, const ptr_type & a_ptr, const ptr_type & b_ptr)
+        template<typename A, typename B>
+        static ptr_type op(EWBinOpId bop, const std::shared_ptr<DPTensorX<A>> & a_ptr, const std::shared_ptr<DPTensorX<B>> & b_ptr)
         {
-            const auto _a = dynamic_cast<DPTensorX<T>*>(a_ptr.get());
-            const auto _b = dynamic_cast<DPTensorX<T>*>(b_ptr.get());
-            if(!_a || !_b)
-                throw std::runtime_error("Invalid array object: could not dynamically cast");
-            const auto & a = xt::strided_view(_a->xarray(), _a->lslice());
-            const auto & b = xt::strided_view(_b->xarray(), _b->lslice());
+            const auto & a = xt::strided_view(a_ptr->xarray(), a_ptr->lslice());
+            const auto & b = xt::strided_view(b_ptr->xarray(), b_ptr->lslice());
 
             switch(bop) {
             case __ADD__:
             case ADD:
-                return operatorx<T>::mk_tx_(*_a, a + b);
+                return operatorx<A>::mk_tx_(a_ptr, a + b);
             case __RADD__:
-                return operatorx<T>::mk_tx_(*_a, b + a);
+                return operatorx<A>::mk_tx_(a_ptr, b + a);
             case ATAN2:
-                return  operatorx<T>::mk_tx_(*_a, xt::atan2(a, b));
+                return  operatorx<A>::mk_tx_(a_ptr, xt::atan2(a, b));
             case __EQ__:
             case EQUAL:
-                return  operatorx<T>::mk_tx_(*_a, xt::equal(a, b));
+                return  operatorx<A>::mk_tx_(a_ptr, xt::equal(a, b));
             case __FLOORDIV__:
             case FLOOR_DIVIDE:
-                return operatorx<T>::mk_tx_(*_a, xt::floor(a / b));
+                return operatorx<A>::mk_tx_(a_ptr, xt::floor(a / b));
             case __GE__:
             case GREATER_EQUAL:
-                return operatorx<T>::mk_tx_(*_a, a >= b);
+                return operatorx<A>::mk_tx_(a_ptr, a >= b);
             case __GT__:
             case GREATER:
-                return operatorx<T>::mk_tx_(*_a, a > b);
+                return operatorx<A>::mk_tx_(a_ptr, a > b);
             case __LE__:
             case LESS_EQUAL:
-                return operatorx<T>::mk_tx_(*_a, a <= b);
+                return operatorx<A>::mk_tx_(a_ptr, a <= b);
             case __LT__:
             case LESS:
-                return operatorx<T>::mk_tx_(*_a, a < b);
+                return operatorx<A>::mk_tx_(a_ptr, a < b);
             case __MUL__:
             case MULTIPLY:
-                return operatorx<T>::mk_tx_(*_a, a * b);
+                return operatorx<A>::mk_tx_(a_ptr, a * b);
             case __RMUL__:
-                return operatorx<T>::mk_tx_(*_a, b * a);
+                return operatorx<A>::mk_tx_(a_ptr, b * a);
             case __NE__:
             case NOT_EQUAL:
-                return operatorx<T>::mk_tx_(*_a, xt::not_equal(a, b));
+                return operatorx<A>::mk_tx_(a_ptr, xt::not_equal(a, b));
             case __SUB__:
             case SUBTRACT:
-                return operatorx<T>::mk_tx_(*_a, a - b);
+                return operatorx<A>::mk_tx_(a_ptr, a - b);
             case __TRUEDIV__:
             case DIVIDE:
-                return operatorx<T>::mk_tx_(*_a, a / b);
+                return operatorx<A>::mk_tx_(a_ptr, a / b);
             case __RFLOORDIV__:
-                return operatorx<T>::mk_tx_(*_a, xt::floor(b / a));
+                return operatorx<A>::mk_tx_(a_ptr, xt::floor(b / a));
             case __RSUB__:
-                return operatorx<T>::mk_tx_(*_a, b - a);
+                return operatorx<A>::mk_tx_(a_ptr, b - a);
             case __RTRUEDIV__:
-                return operatorx<T>::mk_tx_(*_a, b / a);
+                return operatorx<A>::mk_tx_(a_ptr, b / a);
             case __MATMUL__:
             case __POW__:
             case POW:
@@ -122,15 +72,48 @@ namespace x {
                 // FIXME
                 throw std::runtime_error("Binary operation not implemented");
             }
-            return integral_op(bop, *_a, a, b);
+            if constexpr (std::is_integral<A>::value && std::is_integral<B>::value) {
+                switch(bop) {
+                case __AND__:
+                case BITWISE_AND:
+                    return operatorx<A>::mk_tx_(a_ptr, a & b);
+                case __RAND__:
+                    return operatorx<A>::mk_tx_(a_ptr, b & a);
+                case __LSHIFT__:
+                case BITWISE_LEFT_SHIFT:
+                    return operatorx<A>::mk_tx_(a_ptr, a << b);
+                case __MOD__:
+                case REMAINDER:
+                    return operatorx<A>::mk_tx_(a_ptr, a % b);
+                case __OR__:
+                case BITWISE_OR:
+                    return operatorx<A>::mk_tx_(a_ptr, a | b);
+                case __ROR__:
+                    return operatorx<A>::mk_tx_(a_ptr, b | a);
+                case __RSHIFT__:
+                case BITWISE_RIGHT_SHIFT:
+                    return operatorx<A>::mk_tx_(a_ptr, a >> b);
+                case __XOR__:
+                case BITWISE_XOR:
+                    return operatorx<A>::mk_tx_(a_ptr, a ^ b);
+                case __RXOR__:
+                    return operatorx<A>::mk_tx_(a_ptr, b ^ a);
+                case __RLSHIFT__:
+                    return operatorx<A>::mk_tx_(a_ptr, b << a);
+                case __RMOD__:
+                    return operatorx<A>::mk_tx_(a_ptr, b % a);
+                case __RRSHIFT__:
+                    return operatorx<A>::mk_tx_(a_ptr, b >> a);
+                }
+            }
+            throw std::runtime_error("Unknown/invalid elementwise binary operation");
         }
-
 #pragma GCC diagnostic pop
 
     };
 } // namespace x
 
 tensor_i::ptr_type EWBinOp::op(EWBinOpId op, x::DPTensorBaseX::ptr_type a, x::DPTensorBaseX::ptr_type b)
 {
-    return TypeDispatch<x::EWBinOp>(a->dtype(), op, a, b);
+    return TypeDispatch2<x::EWBinOp>(a, b, op);
 }
-Original file line number
+Diff line change
 # Pyre type checker
 .pyre/
++
 +# emacs
 +*~
Original file line number	Diff line number	Diff line change
`@@ -8,6 +8,7 @@ namespace x {`
`8`	`8`	`{`
`9`	`9`	`public:`
`10`	`10`	`using ptr_type = DPTensorBaseX::ptr_type;`
	`11`	`+ using typed_ptr_type = typename DPTensorX<T>::typed_ptr_type;`
`11`	`12`
`12`	`13`	`static ptr_type op(CreatorId c, shape_type && shp)`
`13`	`14`	`{`