Skip to content
This repository was archived by the owner on Jan 26, 2026. It is now read-only.

Commit e5b97f0

Browse files
committed
parallel loops for non-strided arrays
1 parent 530e4e2 commit e5b97f0

File tree

10 files changed

+95
-31
lines changed

10 files changed

+95
-31
lines changed

CMakeLists.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
1818
find_package(Python3 COMPONENTS Interpreter Development.Module NumPy REQUIRED)
1919
find_package(pybind11 CONFIG)
2020
find_package(MPI REQUIRED)
21+
#find_package(OpenMP)
2122

2223
set(MKL_LIBRARIES -L$ENV{MKLROOT}/lib -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -liomp5 -lpthread -lrt -ldl -lm)
2324
# Use -fPIC even if statically compiled
@@ -30,8 +31,9 @@ FILE(GLOB MyCppSources ${PROJECT_SOURCE_DIR}/src/*.cpp ${PROJECT_SOURCE_DIR}/src
3031

3132
# Create the mymath library
3233
#add_library(_ddptensor MODULE ${MyCppSources})
33-
pybind11_add_module(_ddptensor THIN_LTO ${MyCppSources})
34+
pybind11_add_module(_ddptensor MODULE ${MyCppSources})
3435

35-
target_compile_definitions(_ddptensor PRIVATE USE_MKL=1 XTENSOR_USE_XSIMD=1 XTENSOR_USE_OPENMP=1) # DDPT_2TYPES=1)
36+
target_compile_options(_ddptensor PRIVATE -fopenmp)
37+
target_compile_definitions(_ddptensor PRIVATE USE_MKL=1 XTENSOR_USE_XSIMD=1 XTENSOR_USE_OPENMP=1 DDPT_2TYPES=1)
3638
target_include_directories(_ddptensor PRIVATE ${PROJECT_SOURCE_DIR}/src/include ${PROJECT_SOURCE_DIR}/third_party/xtl/include ${PROJECT_SOURCE_DIR}/third_party/xsimd/include ${PROJECT_SOURCE_DIR}/third_party/xtensor-blas/include ${PROJECT_SOURCE_DIR}/third_party/xtensor/include ${PROJECT_SOURCE_DIR}/third_party/bitsery/include ${MPI_INCLUDE_PATH} $ENV{MKLROOT}/include ${pybind11_INCLUDE_DIRS})
3739
target_link_libraries(_ddptensor PRIVATE ${MPI_C_LIBRARIES} ${MKL_LIBRARIES})

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def build_cmake(self, ext):
2929
extdir.parent.mkdir(parents=True, exist_ok=True)
3030

3131
# example of cmake args
32-
config = 'Debug' # if self.debug else 'Release'
32+
config = 'Debug' if self.debug else 'Release'
3333
cmake_args = [
3434
'-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + str(extdir.parent.absolute()),
3535
'-DCMAKE_BUILD_TYPE=' + config

src/Creator.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,11 @@ namespace x {
1616
shape_type shape(std::move(pvslice.tile_shape()));
1717
switch(c) {
1818
case EMPTY:
19-
return operatorx<T>::mk_tx(std::move(pvslice), std::move(xt::empty<T>(shape)));
19+
return operatorx<T>::mk_tx(std::move(pvslice), std::move(xt::empty<T>(std::move(shape))));
2020
case ONES:
21-
return operatorx<T>::mk_tx(std::move(pvslice), std::move(xt::ones<T>(shape)));
21+
return operatorx<T>::mk_tx(std::move(pvslice), std::move(xt::ones<T>(std::move(shape))));
2222
case ZEROS:
23-
return operatorx<T>::mk_tx(std::move(pvslice), std::move(xt::zeros<T>(shape)));
23+
return operatorx<T>::mk_tx(std::move(pvslice), std::move(xt::zeros<T>(std::move(shape))));
2424
default:
2525
throw std::runtime_error("Unknown creator");
2626
};

src/EWBinOp.cpp

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,23 @@ namespace x {
88
public:
99
using ptr_type = DPTensorBaseX::ptr_type;
1010

11-
#pragma GCC diagnostic ignored "-Wswitch"
1211
template<typename A, typename B>
1312
static ptr_type op(EWBinOpId bop, const std::shared_ptr<DPTensorX<A>> & a_ptr, const std::shared_ptr<DPTensorX<B>> & b_ptr)
1413
{
15-
const auto & a = xt::strided_view(a_ptr->xarray(), a_ptr->lslice());
16-
const auto & b = xt::strided_view(b_ptr->xarray(), b_ptr->lslice());
17-
14+
const auto & ax = a_ptr->xarray();
15+
const auto & bx = b_ptr->xarray();
16+
if(a_ptr->is_sliced() || b_ptr->is_sliced()) {
17+
const auto & av = xt::strided_view(ax, a_ptr->lslice());
18+
const auto & bv = xt::strided_view(bx, b_ptr->lslice());
19+
return do_op(bop, av, bv, a_ptr);
20+
}
21+
return do_op(bop, ax, bx, a_ptr);
22+
}
23+
24+
#pragma GCC diagnostic ignored "-Wswitch"
25+
template<typename T1, typename T2, typename A>
26+
static ptr_type do_op(EWBinOpId bop, const T1 & a, const T2 & b, const std::shared_ptr<DPTensorX<A>> & a_ptr)
27+
{
1828
switch(bop) {
1929
case __ADD__:
2030
case ADD:
@@ -72,7 +82,7 @@ namespace x {
7282
// FIXME
7383
throw std::runtime_error("Binary operation not implemented");
7484
}
75-
if constexpr (std::is_integral<A>::value && std::is_integral<B>::value) {
85+
if constexpr (std::is_integral<A>::value && std::is_integral<typename T2::value_type>::value) {
7686
switch(bop) {
7787
case __AND__:
7888
case BITWISE_AND:

src/EWUnyOp.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,21 @@ namespace x {
88
public:
99
using ptr_type = DPTensorBaseX::ptr_type;
1010

11-
#pragma GCC diagnostic ignored "-Wswitch"
1211
template<typename T>
1312
static ptr_type op(EWUnyOpId uop, const std::shared_ptr<DPTensorX<T>> & a_ptr)
1413
{
15-
const auto & a = xt::strided_view(a_ptr->xarray(), a_ptr->lslice());
16-
14+
const auto & ax = a_ptr->xarray();
15+
if(a_ptr->is_sliced()) {
16+
const auto & av = xt::strided_view(ax, a_ptr->lslice());
17+
return do_op(uop, av, a_ptr);
18+
}
19+
return do_op(uop, ax, a_ptr);
20+
}
21+
22+
#pragma GCC diagnostic ignored "-Wswitch"
23+
template<typename T1, typename T>
24+
static ptr_type do_op(EWUnyOpId uop, const T1 & a, const std::shared_ptr<DPTensorX<T>> & a_ptr)
25+
{
1726
switch(uop) {
1827
case __ABS__:
1928
case ABS:

src/IEWBinOp.cpp

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,24 @@ namespace x {
88
public:
99
using ptr_type = DPTensorBaseX::ptr_type;
1010

11-
#pragma GCC diagnostic ignored "-Wswitch"
1211
template<typename A, typename B>
1312
static void op(IEWBinOpId iop, std::shared_ptr<DPTensorX<A>> a_ptr, const std::shared_ptr<DPTensorX<B>> & b_ptr)
1413
{
15-
auto a = xt::strided_view(a_ptr->xarray(), a_ptr->lslice());
16-
const auto b = xt::strided_view(b_ptr->xarray(), b_ptr->lslice());
17-
14+
auto & ax = a_ptr->xarray();
15+
const auto & bx = b_ptr->xarray();
16+
if(a_ptr->is_sliced() || b_ptr->is_sliced()) {
17+
auto av = xt::strided_view(ax, a_ptr->lslice());
18+
const auto & bv = xt::strided_view(bx, b_ptr->lslice());
19+
do_op(iop, av, bv);
20+
} else {
21+
do_op(iop, ax, bx);
22+
}
23+
}
24+
25+
#pragma GCC diagnostic ignored "-Wswitch"
26+
template<typename T1, typename T2>
27+
static void do_op(IEWBinOpId iop, T1 & a, const T2 & b)
28+
{
1829
switch(iop) {
1930
case __IADD__:
2031
a += b;
@@ -34,7 +45,7 @@ namespace x {
3445
case __IPOW__:
3546
throw std::runtime_error("Binary inplace operation not implemented");
3647
}
37-
if constexpr (std::is_integral<A>::value && std::is_integral<B>::value) {
48+
if constexpr (std::is_integral<typename T1::value_type>::value && std::is_integral<typename T2::value_type>::value) {
3849
switch(iop) {
3950
case __IMOD__:
4051
a %= b;

src/Random.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,12 @@ namespace x {
1111
struct Rand
1212
{
1313
template<typename L, typename U>
14-
static ptr_type op(const shape_type & shape, const L & lower, const U & upper)
14+
static ptr_type op(const shape_type & shp, const L & lower, const U & upper)
1515
{
1616
if constexpr (std::is_floating_point<T>::value) {
17-
PVSlice pvslice(shape);
18-
return operatorx<T>::mk_tx(std::move(pvslice), std::move(xt::random::rand(shape, to_native<T>(lower), to_native<T>(upper))));
17+
PVSlice pvslice(shp);
18+
shape_type shape(std::move(pvslice.tile_shape()));
19+
return operatorx<T>::mk_tx(std::move(pvslice), std::move(xt::random::rand(std::move(shape), to_native<T>(lower), to_native<T>(upper))));
1920
}
2021
}
2122
};

src/ReduceOp.cpp

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@ namespace x {
88
public:
99
using ptr_type = DPTensorBaseX::ptr_type;
1010

11-
#pragma GCC diagnostic ignored "-Wswitch"
12-
1311
template<typename X>
1412
static ptr_type dist_reduce(ReduceOpId rop, const PVSlice & slice, const dim_vec_type & dims, X && x)
1513
{
@@ -21,14 +19,24 @@ namespace x {
2119
theTransceiver->reduce_all(a.data(), DTYPE<typename X::value_type>::value, len, rop);
2220
owner = REPLICATED;
2321
}
24-
return operatorx<typename X::value_type>::mk_tx(new_shape, a, owner);
22+
return operatorx<typename X::value_type>::mk_tx(std::move(new_shape), a, owner);
2523
}
2624

2725
template<typename T>
2826
static ptr_type op(ReduceOpId rop, const dim_vec_type & dims, const std::shared_ptr<DPTensorX<T>> & a_ptr)
2927
{
30-
const auto & a = xt::strided_view(a_ptr->xarray(), a_ptr->lslice());
28+
const auto & ax = a_ptr->xarray();
29+
if(a_ptr->is_sliced()) {
30+
const auto & av = xt::strided_view(ax, a_ptr->lslice());
31+
return do_op(rop, dims, av, a_ptr);
32+
}
33+
return do_op(rop, dims, ax, a_ptr);
34+
}
3135

36+
#pragma GCC diagnostic ignored "-Wswitch"
37+
template<typename T1, typename T>
38+
static ptr_type do_op(ReduceOpId rop, const dim_vec_type & dims, const T1 & a, const std::shared_ptr<DPTensorX<T>> & a_ptr)
39+
{
3240
switch(rop) {
3341
case MEAN:
3442
return dist_reduce(rop, a_ptr->slice(), dims, xt::mean(a, dims));

src/include/ddptensor/x.hpp

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ namespace x
4848
xt::xstrided_slice_vector _lslice;
4949
std::shared_ptr<xt::xarray<T>> _xarray;
5050
mutable T _replica = 0;
51+
bool _issliced = false;
5152

5253
public:
5354
using typed_ptr_type = std::shared_ptr<DPTensorX<T>>;
@@ -56,7 +57,22 @@ namespace x
5657
DPTensorX(PVSlice && slc, I && ax, rank_type owner=NOOWNER)
5758
: _owner(owner),
5859
_slice(std::move(slc)),
59-
_lslice(to_xt(_slice.local_slice_of_rank())),
60+
_xarray(std::make_shared<xt::xarray<T>>(std::forward<I>(ax)))
61+
{
62+
}
63+
64+
template<typename I>
65+
DPTensorX(const shape_type & slc, I && ax, rank_type owner=NOOWNER)
66+
: _owner(owner),
67+
_slice(slc),
68+
_xarray(std::make_shared<xt::xarray<T>>(std::forward<I>(ax)))
69+
{
70+
}
71+
72+
template<typename I>
73+
DPTensorX(shape_type && slc, I && ax, rank_type owner=NOOWNER)
74+
: _owner(owner),
75+
_slice(std::move(slc)),
6076
_xarray(std::make_shared<xt::xarray<T>>(std::forward<I>(ax)))
6177
{
6278
}
@@ -66,7 +82,8 @@ namespace x
6682
: _owner(owner),
6783
_slice(org._slice, slc),
6884
_lslice(to_xt(_slice.local_slice_of_rank())),
69-
_xarray(org._xarray)
85+
_xarray(org._xarray),
86+
_issliced(true)
7087
{
7188
if(owner == NOOWNER && slice().size() <= 1) {
7289
set_owner(org.slice().owner(slc));
@@ -80,21 +97,27 @@ namespace x
8097
: _owner(theTransceiver->rank()),
8198
_slice(std::forward<PVSlice>(slc)),
8299
_lslice(to_xt(_slice.slice())),
83-
_xarray()
100+
_xarray(),
101+
_issliced(true)
84102
{
85103
_xarray = org;
86104
}
87105

88106
DPTensorX(const T & v)
89107
: _owner(theTransceiver->rank()),
90108
_slice(shape_type{1}),
91-
_lslice({xt::newaxis()}), //to_xt(_slice.slice())),
109+
// _lslice({xt::newaxis()}), //to_xt(_slice.slice())),
92110
_xarray(std::make_shared<xt::xarray<T>>(1)),
93111
_replica(v)
94112
{
95113
*_xarray = v;
96114
}
97115

116+
bool is_sliced() const
117+
{
118+
return _issliced;
119+
}
120+
98121
virtual std::string __repr__() const
99122
{
100123
auto v = xt::strided_view(xarray(), lslice());

test/test_red.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import ddptensor as dt
2-
a = dt.ones((8,8))
2+
a = dt.ones((8,8), dtype=dt.float64)
33
s = dt.sum(a, axis=None)
44
b = dt.ones((8,8))
55
c = a + b

0 commit comments

Comments
 (0)