Skip to content

Commit 9075dce

Browse files
committed
feat: moved implementation to template
1 parent bd68475 commit 9075dce

3 files changed

Lines changed: 14 additions & 50 deletions

File tree

Source/kronecker/GB_kron.c

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -167,9 +167,7 @@ GrB_Info GB_kron // C<M> = accum (C, kron(A,B))
167167
GB_RETURN_IF_QUICK_MASK (C, C_replace, M, Mask_comp, Mask_struct) ;
168168

169169
// check if it's possible to apply mask immediately in kron
170-
// TODO: make MT of same CSR/CSC format as C
171170
// TODO: MT should have its own 32/64 bitness controls
172-
// TODO: clear MT header
173171

174172
bool Mask_is_applicable = M != NULL && !Mask_comp ;
175173
if (Mask_is_applicable) {
@@ -189,8 +187,6 @@ GrB_Info GB_kron // C<M> = accum (C, kron(A,B))
189187
return masked_kroner_info ;
190188
}
191189

192-
GxB_Matrix_fprint(MT, "MT", GxB_COMPLETE, stdout) ;
193-
194190
if (MT->is_csc != C->is_csc) {
195191
GrB_Info MTtranspose = GB_transpose_in_place (MT, true, Werk) ;
196192
if (MTtranspose != GrB_SUCCESS)

Source/kronecker/GB_kroner.c

Lines changed: 14 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212
// hypersparse, full if both A and B are full, or sparse otherwise. C is never
1313
// constructed as bitmap.
1414

15-
#include <stdio.h>
16-
1715
#define GB_FREE_WORKSPACE \
1816
{ \
1917
GB_Matrix_free (&Awork) ; \
@@ -38,7 +36,7 @@
3836
#include "jitifyer/GB_stringify.h"
3937

4038
//------------------------------------------------------------------------------
41-
// GB_lookup_xoffset: find the offset of (row,col) in a matrix
39+
// GB_lookup_xoffset: find the offset of (row,col) in a Ax if present
4240
//------------------------------------------------------------------------------
4341

4442
static bool GB_lookup_xoffset
@@ -97,7 +95,7 @@ static bool GB_lookup_xoffset
9795
}
9896

9997
//------------------------------------------------------------------------------
100-
// GB_kroner_generic: generic fallback kernel for both masked and default paths
98+
// GB_kroner_generic: generic kernel for masked and default paths
10199
//------------------------------------------------------------------------------
102100

103101
static GrB_Info GB_kroner_generic
@@ -121,7 +119,6 @@ static GrB_Info GB_kroner_generic
121119
GrB_Type ctype = op->ztype ;
122120
const size_t csize = ctype->size ;
123121

124-
// declare everything the template needs
125122
GB_Ap_DECLARE (Ap, const) ; GB_Ap_PTR (Ap, A) ;
126123
GB_Ah_DECLARE (Ah, const) ; GB_Ah_PTR (Ah, A) ;
127124
const int64_t avlen = A->vlen ;
@@ -199,7 +196,6 @@ static GrB_Info GB_kroner_generic
199196
#define GB_GENERIC
200197
#include "ewise/include/GB_ewise_shared_definitions.h"
201198
#include "kronecker/template/GB_kroner_template.c"
202-
203199

204200
return GrB_SUCCESS ;
205201
}
@@ -261,7 +257,7 @@ GrB_Info GB_kroner
261257
GrB_Matrix B = B_in ;
262258

263259
//--------------------------------------------------------------------------
264-
// masked case: apply mask to avoid computing entries outside the mask
260+
// apply mask immediately if possible
265261
//--------------------------------------------------------------------------
266262

267263
if (Mask != NULL && !Mask_comp)
@@ -305,7 +301,6 @@ GrB_Info GB_kroner
305301
int nthreads_max = GB_Context_nthreads_max ( ) ;
306302
double chunk = GB_Context_chunk ( ) ;
307303
int nthreads = GB_nthreads (work, chunk, nthreads_max) ;
308-
printf ("threads initially: %d\n", nthreads) ;
309304

310305
int64_t vlen = Mask->vlen ;
311306

@@ -466,23 +461,22 @@ GrB_Info GB_kroner
466461
C->p = Mask->p_is_32 ? (void *) Cp32 : (void *) Cp64 ;
467462
C->i = Mask->i_is_32 ? (void *) Ci32 : (void *) Ci64 ;
468463
C->x = Cx ;
469-
C->p_size = (Mask->p_is_32 ? sizeof (uint32_t)
470-
: sizeof (uint64_t))
471-
* (Mask->vdim + 1) ;
472-
C->i_size = (Mask->i_is_32 ? sizeof (uint32_t)
473-
: sizeof (uint64_t)) * centries ;
474-
C->x_size = C->iso ? op->ztype->size
475-
: op->ztype->size * centries ;
476-
C->magic = GB_MAGIC ;
477-
C->nvals = centries ;
464+
C->p_size = (Mask->p_is_32 ? sizeof (uint32_t)
465+
: sizeof (uint64_t))
466+
* (Mask->vdim + 1) ;
467+
C->i_size = (Mask->i_is_32 ? sizeof (uint32_t)
468+
: sizeof (uint64_t)) * centries ;
469+
C->x_size = C->iso ? op->ztype->size
470+
: op->ztype->size * centries ;
471+
C->magic = GB_MAGIC ;
472+
C->nvals = centries ;
478473
C->nvec_nonempty = (int64_t) nvecs ;
479474

480475
//----------------------------------------------------------------------
481-
// evaluate: JIT then generic fallback
476+
// evaluate
482477
//----------------------------------------------------------------------
483478

484479
info = GB_kroner_jit (C, op, false, A, A_transpose, B, B_transpose, Mask, Mask_struct, Mask_comp, nthreads) ;
485-
printf ("info: %d\n", info) ;
486480

487481
if (info != GrB_SUCCESS)
488482
{
@@ -649,11 +643,10 @@ GrB_Info GB_kroner
649643
}
650644

651645
//--------------------------------------------------------------------------
652-
// evaluate: JIT then generic fallback
646+
// evaluate: JIT or generic
653647
//--------------------------------------------------------------------------
654648

655649
info = GB_kroner_jit (C, op, flipij, A, A_transpose, B, B_transpose, Mask, Mask_struct, Mask_comp, nthreads) ;
656-
printf("default jit info: %d\n", info) ;
657650

658651
if (info != GrB_SUCCESS)
659652
{

Source/kronecker/template/GB_kroner_template.c

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -49,18 +49,6 @@
4949

5050
if (!GB_NO_MASK && !GB_MASK_COMP)
5151
{
52-
#include <stdio.h>
53-
//printf("got mask here\n");
54-
//printf("A_transpose, B_transpose: %d, %d\n", A_transpose, B_transpose) ;
55-
//GxB_Matrix_fprint (Mask, "Mask inside template", GxB_COMPLETE, stdout) ;
56-
57-
int64_t a = 0 ;
58-
int64_t b = Mask->plen - 1 ;
59-
60-
bool search = GB_binary_search (1, Mask->p, Mask->p_is_32, &a, &b) ;
61-
62-
//printf("search: %d\n", search) ;
63-
6452
#define GB_GETVECTOR(A, row, col) GrB_Index vector_ = (A)->is_csc ? (col) : (row)
6553

6654
#define GB_GETCOORD(A, row, col) GrB_Index coord_ = (A)->is_csc ? (row) : (col)
@@ -178,27 +166,21 @@
178166
GB_DECLAREB (b_elem) ;
179167
int64_t vlen = Mask->vlen ;
180168

181-
//printf("inside parallel fine\n") ;
182169
#pragma omp for schedule(static)
183170
for (GrB_Index k = 0 ; k < Mask->nvec ; k++)
184171
{
185-
//printf("inside fors, k: %d\n", k) ;
186172
GrB_Index j = GBh_M (Mh, k) ;
187173

188174
int64_t pA_start = GBp_M (Mp, k, vlen) ;
189175
int64_t pA_end = GBp_M (Mp, k+1, vlen) ;
190176
GrB_Index pos = Mask->p_is_32 ? ((int32_t *)C->p)[j] : ((int64_t *)C->p)[j] ;
191-
//printf("pA_start = %d, pA_end = %d\n", pA_start, pA_end) ;
192177
for (GrB_Index p = pA_start ; p < pA_end ; p++)
193178
{
194-
//printf("inside eval loop, p: %d\n", p) ;
195179
if (!GBb_M (Mask->b, p)) continue ;
196180

197181
int64_t i = GBi_M (Mi, p, vlen) ;
198182
GrB_Index Mrow = Mask->is_csc ? i : j ; GrB_Index Mcol = Mask->is_csc ? j : i ;
199183

200-
//printf("Mask row: %d, Mask col: %d\n", Mrow, Mcol) ;
201-
202184
// extract elements from A and B,
203185
// initialize offset in MTi and MTx,
204186
// get result of op, place it in MTx
@@ -211,26 +193,20 @@
211193
GrB_Index brow = B_transpose ? (Mcol % bncols) : (Mrow % bnrows);
212194
GrB_Index bcol = B_transpose ? (Mrow % bnrows) : (Mcol % bncols);
213195

214-
//printf("arow: %d, acol: %d\n", arow, acol) ;
215196
GB_LOOKUP_XOFFSET (offset, A, arow, acol) ;
216197
if (offset == -1)
217198
{
218199
continue;
219200
}
220-
//printf("A search result ok\n") ;
221201
// iso of A or of C (?)
222202
GB_GETA (a_elem, Ax, offset, GB_A_ISO) ;
223-
//printf("a_elem: %d\n", *((int32_t *)a_elem)) ;
224203

225-
//printf("brow: %d, bcol: %d\n", brow, bcol) ;
226204
GB_LOOKUP_XOFFSET (offset, B, brow, bcol) ;
227205
if (offset == -1)
228206
{
229207
continue;
230208
}
231-
//printf("B search result ok\n") ;
232209
GB_GETB (b_elem, Bx, offset, GB_B_ISO) ;
233-
//printf("b_elem: %d\n", *((int32_t *)b_elem)) ;
234210

235211
GrB_Index ix, jx, iy, jy ;
236212
ix = A_transpose ? acol : arow ;
@@ -248,7 +224,6 @@
248224
}
249225

250226
GB_KRONECKER_OP (Cx, pos, a_elem, ix, jx, b_elem, iy, jy) ;
251-
//printf("C result: %d\n", ((int32_t *)Cx)[pos]) ;
252227

253228
pos++ ;
254229
}

0 commit comments

Comments
 (0)