Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions common_param.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@
typedef struct {
int dtb_entries;
int switch_ratio;
int divide_rate;
int divide_limit;
int preferred_size;
int offsetA, offsetB, align;
#if BUILD_HFLOAT16 == 1
int shgemm_p, shgemm_q, shgemm_r;
Expand Down
16 changes: 12 additions & 4 deletions driver/level3/gemm.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,21 @@
#define GEMM_Q 128
#endif

#ifdef GEMM_DIVIDE_RATE
#ifdef DYNAMIC_ARCH
#define DIVIDE_LIMIT gotoblas->divide_limit
#define DIVIDE_RATE gotoblas->divide_rate
#else
#define DIVIDE_LIMIT GEMM_DIVIDE_LIMIT
#define DIVIDE_RATE GEMM_DIVIDE_RATE
#endif

#ifdef GEMM_DIVIDE_LIMIT
#define DIVIDE_LIMIT GEMM_DIVIDE_LIMIT
#endif
//#ifdef GEMM_DIVIDE_RATE
//#define DIVIDE_RATE GEMM_DIVIDE_RATE
//#endif

//#ifdef GEMM_DIVIDE_LIMIT
//#define DIVIDE_LIMIT GEMM_DIVIDE_LIMIT
//#endif

#ifdef THREADED_LEVEL3
#include "level3_thread.c"
Expand Down
5 changes: 3 additions & 2 deletions driver/level3/level3_gemm3m_thread.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
#define CACHE_LINE_SIZE 8
#endif

#define DIVIDE_RATE_MAX 2
#ifndef DIVIDE_RATE
#define DIVIDE_RATE 2
#endif
Expand Down Expand Up @@ -93,7 +94,7 @@ typedef struct {
#else
volatile
#endif
BLASLONG working[MAX_CPU_NUMBER][CACHE_LINE_SIZE * DIVIDE_RATE];
BLASLONG working[MAX_CPU_NUMBER][CACHE_LINE_SIZE * DIVIDE_RATE_MAX];
} job_t;


Expand Down Expand Up @@ -294,7 +295,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
FLOAT *a, *b, *c;
job_t *job = (job_t *)args -> common;
BLASLONG xxx, bufferside;
FLOAT *buffer[DIVIDE_RATE];
FLOAT *buffer[DIVIDE_RATE_MAX];

BLASLONG ls, min_l, jjs, min_jj;
BLASLONG is, min_i, div_n;
Expand Down
6 changes: 4 additions & 2 deletions driver/level3/level3_syrk_threaded.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@
#define CACHE_LINE_SIZE 8
#endif

#define DIVIDE_RATE_MAX 2

#ifndef DIVIDE_RATE
#define DIVIDE_RATE 2
#endif
Expand Down Expand Up @@ -69,7 +71,7 @@ _Atomic
#else
volatile
#endif
BLASLONG working[MAX_CPU_NUMBER][CACHE_LINE_SIZE * DIVIDE_RATE];
BLASLONG working[MAX_CPU_NUMBER][CACHE_LINE_SIZE * DIVIDE_RATE_MAX];
} job_t;


Expand Down Expand Up @@ -133,7 +135,7 @@ _Atomic

static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLOAT *sb, BLASLONG mypos){

FLOAT *buffer[DIVIDE_RATE];
FLOAT *buffer[DIVIDE_RATE_MAX];

BLASLONG k, lda, ldc;
BLASLONG m_from, m_to, n_from, n_to;
Expand Down
17 changes: 11 additions & 6 deletions driver/level3/level3_thread.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,17 @@
#define CACHE_LINE_SIZE 8
#endif

#define DIVIDE_RATE_MAX 2

#ifndef DIVIDE_RATE
#define DIVIDE_RATE 2
#endif

#ifndef GEMM_PREFERED_SIZE
#define GEMM_PREFERED_SIZE 1
#ifdef DYNAMIC_ARCH
#define GEMM_PREFERRED_SIZE gotoblas->preferred_size
#endif
#ifndef GEMM_PREFERRED_SIZE
#define GEMM_PREFERRED_SIZE 1
#endif

//The array of job_t may overflow the stack.
Expand Down Expand Up @@ -93,7 +98,7 @@

typedef struct {
volatile
BLASLONG working[MAX_CPU_NUMBER][CACHE_LINE_SIZE * DIVIDE_RATE];
BLASLONG working[MAX_CPU_NUMBER][CACHE_LINE_SIZE * DIVIDE_RATE_MAX];
} job_t;


Expand Down Expand Up @@ -234,7 +239,7 @@ typedef struct {

static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, IFLOAT *sa, IFLOAT *sb, BLASLONG mypos){

IFLOAT *buffer[DIVIDE_RATE];
IFLOAT *buffer[DIVIDE_RATE_MAX];

BLASLONG k, lda, ldb, ldc;
BLASLONG m_from, m_to, n_from, n_to;
Expand Down Expand Up @@ -707,7 +712,7 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
while (m > 0){
width = blas_quickdivide(m + nthreads_m - num_parts - 1, nthreads_m - num_parts);

width = round_up(m, width, GEMM_PREFERED_SIZE);
width = round_up(m, width, GEMM_PREFERRED_SIZE);

m -= width;

Expand Down Expand Up @@ -758,7 +763,7 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
if (width < switch_ratio) {
width = switch_ratio;
}
width = round_up(width_n, width, GEMM_PREFERED_SIZE);
width = round_up(width_n, width, GEMM_PREFERRED_SIZE);

width_n -= width;
if (width_n < 0) {
Expand Down
6 changes: 6 additions & 0 deletions kernel/setparam-ref.c
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,12 @@ gotoblas_t TABLE_NAME = {

SWITCH_RATIO,

GEMM_DIVIDE_RATE,

GEMM_DIVIDE_LIMIT,

GEMM_PREFERRED_SIZE,

GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN,

#ifdef BUILD_HFLOAT16
Expand Down
46 changes: 28 additions & 18 deletions param.h
Original file line number Diff line number Diff line change
Expand Up @@ -630,10 +630,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#if defined(XDOUBLE) || defined(DOUBLE)
#define SWITCH_RATIO 4
#define GEMM_PREFERED_SIZE 4
#define GEMM_PREFERRED_SIZE 4
#else
#define SWITCH_RATIO 8
#define GEMM_PREFERED_SIZE 8
#define GEMM_PREFERRED_SIZE 8
#endif

#ifdef ARCH_X86
Expand Down Expand Up @@ -1539,10 +1539,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#if defined(XDOUBLE) || defined(DOUBLE)
#define SWITCH_RATIO 4
#define GEMM_PREFERED_SIZE 4
#define GEMM_PREFERRED_SIZE 4
#else
#define SWITCH_RATIO 8
#define GEMM_PREFERED_SIZE 8
#define GEMM_PREFERRED_SIZE 8
#endif

#ifdef ARCH_X86
Expand Down Expand Up @@ -1665,10 +1665,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#if defined(XDOUBLE) || defined(DOUBLE)
#define SWITCH_RATIO 8
#define GEMM_PREFERED_SIZE 8
#define GEMM_PREFERRED_SIZE 8
#else
#define SWITCH_RATIO 16
#define GEMM_PREFERED_SIZE 16
#define GEMM_PREFERRED_SIZE 16
#endif
#define USE_SGEMM_KERNEL_DIRECT 1

Expand Down Expand Up @@ -1786,10 +1786,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#if defined(XDOUBLE) || defined(DOUBLE)
#define SWITCH_RATIO 8
#define GEMM_PREFERED_SIZE 8
#define GEMM_PREFERRED_SIZE 8
#else
#define SWITCH_RATIO 16
#define GEMM_PREFERED_SIZE 16
#define GEMM_PREFERRED_SIZE 16
#endif
#define USE_SGEMM_KERNEL_DIRECT 1

Expand Down Expand Up @@ -1919,10 +1919,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#if defined(XDOUBLE) || defined(DOUBLE)
#define SWITCH_RATIO 8
#define GEMM_PREFERED_SIZE 8
#define GEMM_PREFERRED_SIZE 8
#else
#define SWITCH_RATIO 16
#define GEMM_PREFERED_SIZE 16
#define GEMM_PREFERRED_SIZE 16
#endif
#define USE_SGEMM_KERNEL_DIRECT 1

Expand Down Expand Up @@ -2577,7 +2577,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define GEMM_DEFAULT_ALIGN 0x0ffffUL

#define SWITCH_RATIO 16
#define GEMM_PREFERED_SIZE 16
#define GEMM_PREFERRED_SIZE 16

#define SGEMM_DEFAULT_UNROLL_M 16
#define SGEMM_DEFAULT_UNROLL_N 8
Expand Down Expand Up @@ -2616,7 +2616,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define GEMM_DEFAULT_ALIGN 0x0ffffUL

#define SWITCH_RATIO 16
#define GEMM_PREFERED_SIZE 16
#define GEMM_PREFERRED_SIZE 16

#define SGEMM_DEFAULT_UNROLL_M 16
#define SGEMM_DEFAULT_UNROLL_N 8
Expand Down Expand Up @@ -3611,10 +3611,10 @@ is a big desktop or server with abundant cache rather than a phone or embedded d

#if defined(XDOUBLE) || defined(DOUBLE)
#define SWITCH_RATIO 8
#define GEMM_PREFERED_SIZE 4
#define GEMM_PREFERRED_SIZE 4
#else
#define SWITCH_RATIO 16
#define GEMM_PREFERED_SIZE 8
#define GEMM_PREFERRED_SIZE 8
#endif

#undef BGEMM_ALIGN_K
Expand Down Expand Up @@ -3662,8 +3662,6 @@ is a big desktop or server with abundant cache rather than a phone or embedded d

#elif defined(NEOVERSEN2) || defined(NEOVERSEV2)

#define GEMM_DIVIDE_LIMIT 3

#if defined(XDOUBLE) || defined(DOUBLE)
#define SWITCH_RATIO 8
#else
Expand Down Expand Up @@ -3751,9 +3749,9 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
#define GEMM_DIVIDE_RATE 1

#if defined(XDOUBLE) || defined(DOUBLE)
#define GEMM_PREFERED_SIZE 8
#define GEMM_PREFERRED_SIZE 8
#else
#define GEMM_PREFERED_SIZE 16
#define GEMM_PREFERRED_SIZE 16
#endif

/* When all BLAS3 routines are implemeted with SVE, SGEMM_DEFAULT_UNROLL_M should be "sve_vl".
Expand Down Expand Up @@ -4260,6 +4258,18 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout
#define SWITCH_RATIO 2
#endif

#ifndef GEMM_DIVIDE_RATE
#define GEMM_DIVIDE_RATE 2
#endif

#ifndef GEMM_DIVIDE_LIMIT
#define GEMM_DIVIDE_LIMIT 0
#endif

#ifndef GEMM_PREFERRED_SIZE
#define GEMM_PREFERRED_SIZE 1
#endif

#ifndef QGEMM_DEFAULT_UNROLL_M
#define QGEMM_DEFAULT_UNROLL_M 2
#endif
Expand Down
Loading