Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions .github/workflows/c-cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,24 @@ jobs:
run: make BACKEND=${{ matrix.backend }} FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }}
- name: make test
run: make BACKEND=${{ matrix.backend }} FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} -f cudacpp.mk test
CPU_ARM:
runs-on: ubuntu-24.04-arm
strategy:
matrix:
folder: [ epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum , epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg ]
precision: [ d , f , m ]
backend: [ cppnone, cppsse4 ]
fail-fast: false
steps:
- uses: actions/checkout@v2
- name: github PR info
run: date; echo github.event.pull_request.head.sha='${{ github.event.pull_request.head.sha }}'
- name: make info
run: make BACKEND=${{ matrix.backend }} FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} -f cudacpp.mk info
- name: make
run: make BACKEND=${{ matrix.backend }} FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }}
- name: make test
run: make BACKEND=${{ matrix.backend }} FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} -f cudacpp.mk test
CPU_MAC:
runs-on: macos-latest
env:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ UNAME_S := $(shell uname -s)
# Detect architecture (x86_64, ppc64le...)
UNAME_P := $(shell uname -p)
###$(info UNAME_P='$(UNAME_P)')
UNAME_M := $(shell uname -m)

#-------------------------------------------------------------------------------

Expand All @@ -60,7 +61,7 @@ endif
ifeq ($(BACKEND),cppauto)
ifeq ($(UNAME_P),ppc64le)
override BACKEND = cppsse4
else ifeq ($(UNAME_P),arm)
else ifneq (,$(filter $(UNAME_M),arm64 aarch64))
override BACKEND = cppsse4
else ifeq ($(wildcard /proc/cpuinfo),)
override BACKEND = cppnone
Expand Down Expand Up @@ -415,6 +416,7 @@ CXXFLAGS += $(OMPFLAGS)

# Set the build flags appropriate to each BACKEND choice (example: "make BACKEND=cppnone")
# [NB MGONGPU_PVW512 is needed because "-mprefer-vector-width=256" is not exposed in a macro]
# [Use 'g++ <buildflags> -E -dM - < /dev/null' to check which #define's are enabled]
# [See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96476]
ifeq ($(UNAME_P),ppc64le)
ifeq ($(BACKEND),cppsse4)
Expand All @@ -426,16 +428,30 @@ ifeq ($(UNAME_P),ppc64le)
else ifeq ($(BACKEND),cpp512z)
$(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on PowerPC for the moment)
endif
else ifeq ($(UNAME_P),arm)
ifeq ($(BACKEND),cppsse4)
override AVXFLAGS = -D__SSE4_2__ # ARM NEON with 128 width (Q/quadword registers)
else ifeq ($(UNAME_M),arm64) # ARM on Apple silicon
ifeq ($(BACKEND),cppnone) # this internally undefines __ARM_NEON
override AVXFLAGS = -DMGONGPU_NOARMNEON
else ifeq ($(BACKEND),cppsse4) # __ARM_NEON is always defined on Apple silicon
override AVXFLAGS =
else ifeq ($(BACKEND),cppavx2)
$(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment)
else ifeq ($(BACKEND),cpp512y)
$(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment)
else ifeq ($(BACKEND),cpp512z)
$(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment)
endif
else ifeq ($(UNAME_M),aarch64) # ARM on Linux
ifeq ($(BACKEND),cppnone) # +nosimd ensures __ARM_NEON is absent
override AVXFLAGS = -march=armv8-a+nosimd
else ifeq ($(BACKEND),cppsse4) # +simd ensures __ARM_NEON is present (128 width Q/quadword registers)
override AVXFLAGS = -march=armv8-a+simd
else ifeq ($(BACKEND),cppavx2)
$(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on aarch64 for the moment)
else ifeq ($(BACKEND),cpp512y)
$(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on aarch64 for the moment)
else ifeq ($(BACKEND),cpp512z)
$(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on aarch64 for the moment)
endif
else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531
ifeq ($(BACKEND),cppnone)
override AVXFLAGS = -mno-sse3 # no SIMD
Expand Down Expand Up @@ -713,7 +729,7 @@ bld512z:
ifeq ($(UNAME_P),ppc64le)
###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4
bldavxs: bldnone bldsse4
else ifeq ($(UNAME_P),arm)
else ifneq (,$(filter $(UNAME_M),arm64 aarch64))
###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4
bldavxs: bldnone bldsse4
else
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ UNAME_S := $(shell uname -s)
# Detect architecture (x86_64, ppc64le...)
UNAME_P := $(shell uname -p)
###$(info UNAME_P='$(UNAME_P)')
UNAME_M := $(shell uname -m)

#-------------------------------------------------------------------------------

Expand All @@ -60,7 +61,7 @@ endif
ifeq ($(BACKEND),cppauto)
ifeq ($(UNAME_P),ppc64le)
override BACKEND = cppsse4
else ifeq ($(UNAME_P),arm)
else ifneq (,$(filter $(UNAME_M),arm64 aarch64))
override BACKEND = cppsse4
else ifeq ($(wildcard /proc/cpuinfo),)
override BACKEND = cppnone
Expand Down Expand Up @@ -259,6 +260,7 @@ CXXFLAGS += $(OMPFLAGS)

# Set the build flags appropriate to each BACKEND choice (example: "make BACKEND=cppnone")
# [NB MGONGPU_PVW512 is needed because "-mprefer-vector-width=256" is not exposed in a macro]
# [Use 'g++ <buildflags> -E -dM - < /dev/null' to check which #define's are enabled]
# [See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96476]
ifeq ($(UNAME_P),ppc64le)
ifeq ($(BACKEND),cppsse4)
Expand All @@ -270,16 +272,30 @@ ifeq ($(UNAME_P),ppc64le)
else ifeq ($(BACKEND),cpp512z)
$(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on PowerPC for the moment)
endif
else ifeq ($(UNAME_P),arm)
ifeq ($(BACKEND),cppsse4)
override AVXFLAGS = -D__SSE4_2__ # ARM NEON with 128 width (Q/quadword registers)
else ifeq ($(UNAME_M),arm64) # ARM on Apple silicon
ifeq ($(BACKEND),cppnone) # this internally undefines __ARM_NEON
override AVXFLAGS = -DMGONGPU_NOARMNEON
else ifeq ($(BACKEND),cppsse4) # __ARM_NEON is always defined on Apple silicon
override AVXFLAGS =
else ifeq ($(BACKEND),cppavx2)
$(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment)
else ifeq ($(BACKEND),cpp512y)
$(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment)
else ifeq ($(BACKEND),cpp512z)
$(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment)
endif
else ifeq ($(UNAME_M),aarch64) # ARM on Linux
ifeq ($(BACKEND),cppnone) # +nosimd ensures __ARM_NEON is absent
override AVXFLAGS = -march=armv8-a+nosimd
else ifeq ($(BACKEND),cppsse4) # +simd ensures __ARM_NEON is present (128 width Q/quadword registers)
override AVXFLAGS = -march=armv8-a+simd
else ifeq ($(BACKEND),cppavx2)
$(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on aarch64 for the moment)
else ifeq ($(BACKEND),cpp512y)
$(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on aarch64 for the moment)
else ifeq ($(BACKEND),cpp512z)
$(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on aarch64 for the moment)
endif
else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531
ifeq ($(BACKEND),cppnone)
override AVXFLAGS = -mno-sse3 # no SIMD
Expand Down Expand Up @@ -729,7 +745,7 @@ bld512z:
ifeq ($(UNAME_P),ppc64le)
###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4
bldavxs: bldnone bldsse4
else ifeq ($(UNAME_P),arm)
else ifneq (,$(filter $(UNAME_M),arm64 aarch64))
###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4
bldavxs: bldnone bldsse4
else
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -250,25 +250,22 @@ namespace mg5amcCpu
bool known = true;
bool ok = __builtin_cpu_supports( "vsx" );
const std::string tag = "powerpc vsx (128bit as in SSE4.2)";
#elif defined __ARM_NEON__ // consider using __BUILTIN_CPU_SUPPORTS__
bool known = false; // __builtin_cpu_supports is not supported
// See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html
// See https://stackoverflow.com/q/62783908
// See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu
bool ok = true; // this is just an assumption!
const std::string tag = "arm neon (128bit as in SSE4.2)";
#elif defined( __x86_64__ ) || defined( __i386__ )
bool known = true;
bool ok = __builtin_cpu_supports( "sse4.2" );
const std::string tag = "nehalem (SSE4.2)";
#else // AV FIXME! Added by OM for Mac, should identify the correct __xxx__ flag that should be targeted
// DM now we have an explicit NEON target for ARM
bool known = false; // __builtin_cpu_supports is not supported
bool ok = true; // this is just an assumption!
const std::string tag = "simd arch not defined";
#endif
#elif defined __ARM_NEON // consider using __BUILTIN_CPU_SUPPORTS__
bool known = false; // __builtin_cpu_supports is not supported
// See https://gcc.gnu.org/onlinedocs/gcc/Basic-PowerPC-Built-in-Functions-Available-on-all-Configurations.html
// See https://stackoverflow.com/q/62783908
// See https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu
bool ok = true; // this is just an assumption!
const std::string tag = "arm neon (128bit as in SSE4.2)";
#endif
#else
bool known = true;
bool ok = true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -912,13 +912,13 @@ main( int argc, char** argv )
#elif defined __SSE4_2__
#ifdef __PPC__
wrkflwtxt += "/ppcv";
#elif defined __ARM_NEON__
wrkflwtxt += "/neon";
#else
wrkflwtxt += "/sse4";
#endif
#elif defined __ARM_NEON
wrkflwtxt += "/neon";
#else
wrkflwtxt += "/????"; // no path to this statement
wrkflwtxt += "/????"; // no path to this statement
#endif
// -- Has cxtype_v::operator[] bracket with non-const reference?
#if defined MGONGPU_CPPSIMD
Expand Down Expand Up @@ -1028,11 +1028,12 @@ main( int argc, char** argv )
<< "Internal loops fptype_sv = VECTOR[" << neppV
#ifdef __PPC__
<< "] ('sse4': PPC VSX, 128bit)" << cxtref << std::endl
#elif defined __ARM_NEON__
<< "] ('sse4': ARM NEON, 128bit)" << cxtref << std::endl
#else
<< "] ('sse4': SSE4.2, 128bit)" << cxtref << std::endl
#endif
#elif defined __ARM_NEON
<< "Internal loops fptype_sv = VECTOR[" << neppV
<< "] ('sse4': ARM NEON, 128bit)" << cxtref << std::endl
#else
#error Internal error: unknown SIMD build configuration
#endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ UNAME_S := $(shell uname -s)
# Detect architecture (x86_64, ppc64le...)
UNAME_P := $(shell uname -p)
###$(info UNAME_P='$(UNAME_P)')
UNAME_M := $(shell uname -m)

#-------------------------------------------------------------------------------

Expand All @@ -60,7 +61,7 @@ endif
ifeq ($(BACKEND),cppauto)
ifeq ($(UNAME_P),ppc64le)
override BACKEND = cppsse4
else ifeq ($(UNAME_P),arm)
else ifneq (,$(filter $(UNAME_M),arm64 aarch64))
override BACKEND = cppsse4
else ifeq ($(wildcard /proc/cpuinfo),)
override BACKEND = cppnone
Expand Down Expand Up @@ -516,6 +517,7 @@ CXXFLAGS += $(OMPFLAGS)
# Set the build flags appropriate to each BACKEND choice (example: "make BACKEND=cppnone")
# [NB MGONGPU_PVW512 is needed because "-mprefer-vector-width=256" is not exposed in a macro]
# [See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96476]
# [Use 'g++ <buildflags> -E -dM - < /dev/null' to check which #define's are enabled]
ifeq ($(UNAME_P),ppc64le)
ifeq ($(BACKEND),cppsse4)
override AVXFLAGS = -D__SSE4_2__ # Power9 VSX with 128 width (VSR registers)
Expand All @@ -526,16 +528,30 @@ ifeq ($(UNAME_P),ppc64le)
else ifeq ($(BACKEND),cpp512z)
$(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on PowerPC for the moment)
endif
else ifeq ($(UNAME_P),arm)
ifeq ($(BACKEND),cppsse4)
override AVXFLAGS = -D__SSE4_2__ # ARM NEON with 128 width (Q/quadword registers)
else ifeq ($(UNAME_M),arm64) # ARM on Apple silicon
ifeq ($(BACKEND),cppnone) # this internally undefines __ARM_NEON
override AVXFLAGS = -DMGONGPU_NOARMNEON
else ifeq ($(BACKEND),cppsse4) # __ARM_NEON is always defined on Apple silicon
override AVXFLAGS =
else ifeq ($(BACKEND),cppavx2)
$(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment)
else ifeq ($(BACKEND),cpp512y)
$(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment)
else ifeq ($(BACKEND),cpp512z)
$(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on ARM for the moment)
endif
else ifeq ($(UNAME_M),aarch64) # ARM on Linux
ifeq ($(BACKEND),cppnone) # +nosimd ensures __ARM_NEON is absent
override AVXFLAGS = -march=armv8-a+nosimd
else ifeq ($(BACKEND),cppsse4) # +simd ensures __ARM_NEON is present (128 width Q/quadword registers)
override AVXFLAGS = -march=armv8-a+simd
else ifeq ($(BACKEND),cppavx2)
$(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on aarch64 for the moment)
else ifeq ($(BACKEND),cpp512y)
$(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on aarch64 for the moment)
else ifeq ($(BACKEND),cpp512z)
$(error Invalid SIMD BACKEND='$(BACKEND)': only 'cppnone' and 'cppsse4' are supported on aarch64 for the moment)
endif
else ifneq ($(shell $(CXX) --version | grep ^nvc++),) # support nvc++ #531
ifeq ($(BACKEND),cppnone)
override AVXFLAGS = -mno-sse3 # no SIMD
Expand Down Expand Up @@ -1092,7 +1108,7 @@ bld512z:
ifeq ($(UNAME_P),ppc64le)
###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4
bldavxs: bldnone bldsse4
else ifeq ($(UNAME_P),arm)
else ifneq (,$(filter $(UNAME_M),arm64 aarch64))
###bldavxs: $(INCDIR)/fbridge.inc bldnone bldsse4
bldavxs: bldnone bldsse4
else
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ endif
# Basic uname helpers (if not already set)
UNAME_S ?= $(shell uname -s)
UNAME_P ?= $(shell uname -p)
UNAME_M ?= $(shell uname -m)

# Enable the C preprocessor https://gcc.gnu.org/onlinedocs/gfortran/Preprocessing-Options.html
FFLAGS+= -cpp
Expand Down Expand Up @@ -114,7 +115,7 @@ $(LIBS): .libs
touch $@

$(CUDACPP_BUILDDIR)/.cudacpplibs:
$(MAKE) -f $(CUDACPP_MAKEFILE)
$(MAKE) VERBOSE=1 -f $(CUDACPP_MAKEFILE)
touch $@

# Remove per-library recipes from makefile to avoid duplicate sub-makes
Expand Down Expand Up @@ -225,7 +226,7 @@ madevent_%_link:
# Cudacpp bldall targets
ifeq ($(UNAME_P),ppc64le)
bldavxs: bldnone bldsse4
else ifeq ($(UNAME_P),arm)
else ifneq (,$(filter $(UNAME_M),arm64 aarch64))
bldavxs: bldnone bldsse4
else
bldavxs: bldnone bldsse4 bldavx2 bld512y bld512z
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,14 @@ THISDIR = $(dir $(abspath $(lastword $(MAKEFILE_LIST))))

# Host detection
UNAME_S := $(shell uname -s)
UNAME_P := $(shell uname -p)
UNAME_M := $(shell uname -m)

# Only add AVX2/FMA on non-mac hosts
# Only add AVX2/FMA on non-mac and non-ARM hosts
ifeq ($(UNAME_S),Darwin)
GTEST_CMAKE_FLAGS :=
else ifeq ($(UNAME_M),aarch64)
GTEST_CMAKE_FLAGS :=
else
GTEST_CMAKE_FLAGS := -DCMAKE_CXX_FLAGS="-mavx2 -mfma"
endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,11 @@ namespace mgOnGpu
using mgOnGpu::fptype;
using mgOnGpu::fptype2;

// Undefine ARM_NEON (hack for cppnone on Apple silicon ARM)
#ifdef MGONGPU_NOARMNEON
#undef __ARM_NEON
#endif

// C++ SIMD vectorization width (this will be used to set neppV)
#ifdef MGONGPUCPP_GPUIMPL // CUDA and HIP implementations have no SIMD
#undef MGONGPU_CPPSIMD
Expand All @@ -235,7 +240,13 @@ using mgOnGpu::fptype2;
#else
#define MGONGPU_CPPSIMD 8
#endif
#elif defined __SSE4_2__ // C++ "sse4" SSE4.2 (128-bit ie 16-byte): 2 (DOUBLE) or 4 (FLOAT) [Power9 and ARM default]
#elif defined __SSE4_2__ // C++ "sse4" SSE4.2 (128-bit ie 16-byte): 2 (DOUBLE) or 4 (FLOAT) [Power9 default]
#ifdef MGONGPU_FPTYPE_DOUBLE
#define MGONGPU_CPPSIMD 2
#else
#define MGONGPU_CPPSIMD 4
#endif
#elif defined __ARM_NEON // C++ "sse4" ARM NEON (128-bit ie 16-byte): 2 (DOUBLE) or 4 (FLOAT) [ARM default]
#ifdef MGONGPU_FPTYPE_DOUBLE
#define MGONGPU_CPPSIMD 2
#else
Expand Down
Loading