Skip to content

Commit 131c923

Browse files
committed
feat: C has its own bitness control for array for cases when Mask uses extra 64 bit integers
1 parent 9aed2ec commit 131c923

3 files changed

Lines changed: 95 additions & 59 deletions

File tree

Source/kronecker/GB_kron.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,6 @@ GrB_Info GB_kron // C<M> = accum (C, kron(A,B))
167167
GB_RETURN_IF_QUICK_MASK (C, C_replace, M, Mask_comp, Mask_struct) ;
168168

169169
// check if it's possible to apply mask immediately in kron
170-
// TODO: MT should have its own 32/64 bitness controls
171170

172171
bool Mask_is_applicable = M != NULL && !Mask_comp ;
173172
if (Mask_is_applicable) {

Source/kronecker/GB_kroner.c

Lines changed: 92 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -274,8 +274,13 @@ GrB_Info GB_kroner
274274
// allocate Cp
275275
//----------------------------------------------------------------------
276276

277+
int64_t mnzmax = GB_nnz_max (Mask) ;
278+
bool Cp_is_32, Cj_is_32, Ci_is_32;
279+
GB_determine_pji_is_32 (&Cp_is_32, &Cj_is_32, &Ci_is_32,
280+
GxB_SPARSE, mnzmax, (int64_t) Mask->vlen, (int64_t) Mask->vdim, Werk) ;
281+
277282
uint32_t *Cp32 = NULL ; uint64_t *Cp64 = NULL ;
278-
if (Mask->p_is_32)
283+
if (Cp_is_32)
279284
Cp32 = GB_calloc_memory (Mask->vdim + 1, sizeof (uint32_t),
280285
&allocated) ;
281286
else
@@ -349,8 +354,14 @@ GrB_Info GB_kroner
349354
if (!GB_lookup_xoffset (&offset, B, brow, bcol))
350355
continue ;
351356

352-
if (Mask->p_is_32) { (Cp32 [j])++ ; }
353-
else { (Cp64 [j])++ ; }
357+
if (Cp_is_32)
358+
{
359+
(Cp32 [j])++ ;
360+
}
361+
else
362+
{
363+
(Cp64 [j])++ ;
364+
}
354365
nonempty = true ;
355366
}
356367
}
@@ -362,30 +373,34 @@ GrB_Info GB_kroner
362373
// prefix sum to get centries
363374
//----------------------------------------------------------------------
364375

365-
if (Mask->p_is_32)
366-
GB_cumsum (Cp32, Mask->p_is_32, Mask->vdim, NULL, nthreads, Werk) ;
376+
if (Cp_is_32)
377+
GB_cumsum (Cp32, Cp_is_32, Mask->vdim, NULL, nthreads, Werk) ;
367378
else
368-
GB_cumsum (Cp64, Mask->p_is_32, Mask->vdim, NULL, nthreads, Werk) ;
379+
GB_cumsum (Cp64, Cp_is_32, Mask->vdim, NULL, nthreads, Werk) ;
369380

370-
centries = Mask->p_is_32 ? (int64_t) Cp32 [Mask->vdim]
371-
: (int64_t) Cp64 [Mask->vdim] ;
381+
centries = Cp_is_32 ? (int64_t) Cp32 [Mask->vdim]
382+
: (int64_t) Cp64 [Mask->vdim] ;
372383

373384
//----------------------------------------------------------------------
374385
// allocate Ci
375386
//----------------------------------------------------------------------
376387

377388
uint32_t *Ci32 = NULL ; uint64_t *Ci64 = NULL ;
378-
if (Mask->i_is_32)
389+
if (Ci_is_32)
379390
Ci32 = GB_malloc_memory (centries, sizeof (uint32_t), &allocated) ;
380391
else
381392
Ci64 = GB_malloc_memory (centries, sizeof (uint64_t), &allocated) ;
382393

383394
if (centries > 0 && Ci32 == NULL && Ci64 == NULL)
384395
{
385-
if (Mask->p_is_32) GB_free_memory (&Cp32,
386-
(Mask->vdim + 1) * sizeof (uint32_t)) ;
387-
else GB_free_memory (&Cp64,
388-
(Mask->vdim + 1) * sizeof (uint64_t)) ;
396+
if (Cp_is_32)
397+
{
398+
GB_free_memory (&Cp32, (Mask->vdim + 1) * sizeof (uint32_t)) ;
399+
}
400+
else
401+
{
402+
GB_free_memory (&Cp64, (Mask->vdim + 1) * sizeof (uint64_t)) ;
403+
}
389404
GB_FREE_WORKSPACE ;
390405
return GrB_OUT_OF_MEMORY ;
391406
}
@@ -400,14 +415,22 @@ GrB_Info GB_kroner
400415
Cx = GB_malloc_memory (1, op->ztype->size, &allocated) ;
401416
if (Cx == NULL)
402417
{
403-
if (Mask->i_is_32) GB_free_memory (&Ci32,
404-
centries * sizeof (uint32_t)) ;
405-
else GB_free_memory (&Ci64,
406-
centries * sizeof (uint64_t)) ;
407-
if (Mask->p_is_32) GB_free_memory (&Cp32,
408-
(Mask->vdim + 1) * sizeof (uint32_t)) ;
409-
else GB_free_memory (&Cp64,
410-
(Mask->vdim + 1) * sizeof (uint64_t)) ;
418+
if (Ci_is_32)
419+
{
420+
GB_free_memory (&Ci32, centries * sizeof (uint32_t)) ;
421+
}
422+
else
423+
{
424+
GB_free_memory (&Ci64, centries * sizeof (uint64_t)) ;
425+
}
426+
if (Cp_is_32)
427+
{
428+
GB_free_memory (&Cp32, (Mask->vdim + 1) * sizeof (uint32_t)) ;
429+
}
430+
else
431+
{
432+
GB_free_memory (&Cp64, (Mask->vdim + 1) * sizeof (uint64_t)) ;
433+
}
411434
GB_FREE_WORKSPACE ;
412435
return GrB_OUT_OF_MEMORY ;
413436
}
@@ -418,14 +441,22 @@ GrB_Info GB_kroner
418441
Cx = GB_malloc_memory (centries, op->ztype->size, &allocated) ;
419442
if (centries > 0 && Cx == NULL)
420443
{
421-
if (Mask->i_is_32) GB_free_memory (&Ci32,
422-
centries * sizeof (uint32_t)) ;
423-
else GB_free_memory (&Ci64,
424-
centries * sizeof (uint64_t)) ;
425-
if (Mask->p_is_32) GB_free_memory (&Cp32,
426-
(Mask->vdim + 1) * sizeof (uint32_t)) ;
427-
else GB_free_memory (&Cp64,
428-
(Mask->vdim + 1) * sizeof (uint64_t)) ;
444+
if (Ci_is_32)
445+
{
446+
GB_free_memory (&Ci32, centries * sizeof (uint32_t)) ;
447+
}
448+
else
449+
{
450+
GB_free_memory (&Ci64, centries * sizeof (uint64_t)) ;
451+
}
452+
if (Cp_is_32)
453+
{
454+
GB_free_memory (&Cp32, (Mask->vdim + 1) * sizeof (uint32_t)) ;
455+
}
456+
else
457+
{
458+
GB_free_memory (&Cp64, (Mask->vdim + 1) * sizeof (uint64_t)) ;
459+
}
429460
GB_FREE_WORKSPACE ;
430461
return GrB_OUT_OF_MEMORY ;
431462
}
@@ -438,36 +469,49 @@ GrB_Info GB_kroner
438469
GrB_Info Calloc = GB_new_bix (&C, op->ztype, vlen, Mask->vdim,
439470
GB_ph_null, Mask->is_csc, GxB_SPARSE, false, Mask->hyper_switch,
440471
Mask->vdim, centries, false, C_iso,
441-
Mask->p_is_32, Mask->j_is_32, Mask->i_is_32) ;
472+
Cp_is_32, Cj_is_32, Ci_is_32) ;
442473
if (Calloc != GrB_SUCCESS)
443474
{
444-
if (C_iso) GB_free_memory (&Cx, op->ztype->size) ;
445-
else GB_free_memory (&Cx, centries * op->ztype->size) ;
446-
if (Mask->i_is_32) GB_free_memory (&Ci32,
447-
centries * sizeof (uint32_t)) ;
448-
else GB_free_memory (&Ci64,
449-
centries * sizeof (uint64_t)) ;
450-
if (Mask->p_is_32) GB_free_memory (&Cp32,
451-
(Mask->vdim + 1) * sizeof (uint32_t)) ;
452-
else GB_free_memory (&Cp64,
453-
(Mask->vdim + 1) * sizeof (uint64_t)) ;
475+
if (C_iso)
476+
{
477+
GB_free_memory (&Cx, op->ztype->size) ;
478+
}
479+
else
480+
{
481+
GB_free_memory (&Cx, centries * op->ztype->size) ;
482+
}
483+
if (Ci_is_32)
484+
{
485+
GB_free_memory (&Ci32, centries * sizeof (uint32_t)) ;
486+
}
487+
else
488+
{
489+
GB_free_memory (&Ci64, centries * sizeof (uint64_t)) ;
490+
}
491+
if (Cp_is_32)
492+
{
493+
GB_free_memory (&Cp32, (Mask->vdim + 1) * sizeof (uint32_t)) ;
494+
}
495+
else
496+
{
497+
GB_free_memory (&Cp64, (Mask->vdim + 1) * sizeof (uint64_t)) ;
498+
}
454499
GB_FREE_WORKSPACE ;
455500
return Calloc ;
456501
}
457502

458503
GB_free_memory (&C->i, C->i_size) ;
459504
GB_free_memory (&C->x, C->x_size) ;
460505

461-
C->p = Mask->p_is_32 ? (void *) Cp32 : (void *) Cp64 ;
462-
C->i = Mask->i_is_32 ? (void *) Ci32 : (void *) Ci64 ;
506+
C->p = Cp_is_32 ? (void *) Cp32 : (void *) Cp64 ;
507+
C->i = Ci_is_32 ? (void *) Ci32 : (void *) Ci64 ;
463508
C->x = Cx ;
464-
C->p_size = (Mask->p_is_32 ? sizeof (uint32_t)
465-
: sizeof (uint64_t))
466-
* (Mask->vdim + 1) ;
467-
C->i_size = (Mask->i_is_32 ? sizeof (uint32_t)
468-
: sizeof (uint64_t)) * centries ;
509+
C->p_size = (Cp_is_32 ? sizeof (uint32_t)
510+
: sizeof (uint64_t)) * (Mask->vdim + 1) ;
511+
C->i_size = (Ci_is_32 ? sizeof (uint32_t)
512+
: sizeof (uint64_t)) * centries ;
469513
C->x_size = C->iso ? op->ztype->size
470-
: op->ztype->size * centries ;
514+
: op->ztype->size * centries ;
471515
C->magic = GB_MAGIC ;
472516
C->nvals = centries ;
473517
C->nvec_nonempty = (int64_t) nvecs ;

Source/kronecker/template/GB_kroner_template.c

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -186,8 +186,8 @@
186186

187187
int64_t pA_start = GBp_M (Mp, k, vlen) ;
188188
int64_t pA_end = GBp_M (Mp, k+1, vlen) ;
189-
GrB_Index pos = Mask->p_is_32 ? ((int32_t *)C->p)[j] : ((int64_t *)C->p)[j] ;
190-
for (GrB_Index p = pA_start ; p < pA_end ; p++)
189+
GrB_Index pos = GB_IGET(Cp, j);
190+
for (GrB_Index p = pA_start; p < pA_end; p++)
191191
{
192192
if (!GBb_M (Mask->b, p)) continue ;
193193

@@ -227,14 +227,7 @@
227227
iy = B_transpose ? bcol : brow ;
228228
jy = B_transpose ? brow : bcol ;
229229

230-
if (Mask->i_is_32)
231-
{
232-
((int32_t *)C->i)[pos] = i ;
233-
}
234-
else
235-
{
236-
((int64_t *)C->i)[pos] = i ;
237-
}
230+
GB_ISET (Ci, pos, i) ;
238231

239232
GB_KRONECKER_OP (Cx, pos, a_elem, ix, jx, b_elem, iy, jy) ;
240233

0 commit comments

Comments
 (0)