Skip to content

Commit 02e42dc

Browse files
committed
Replace lambdas with named functors in container_concat
Lambdas create unique types per call site, causing duplicate template instantiations. Named functors are shared across call sites. Results: - container_concat: 186 → 93 instantiations (50% reduction) - Wall-clock: 518ms → 309ms (40% reduction)
1 parent 0a1e1cc commit 02e42dc

1 file changed

Lines changed: 22 additions & 4 deletions

File tree

include/ck/utility/container_helper.hpp

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,26 @@ container_reverse_inclusive_scan(const Tuple<Xs...>& x, Reduce f, TData init)
324324
return y;
325325
}
326326

327+
// Named functors for container_concat to reduce template instantiations
328+
// (lambdas create unique types per call site, functors are shared)
329+
struct make_tuple_functor
330+
{
331+
template <typename... Ts>
332+
__host__ __device__ constexpr auto operator()(Ts&&... xs) const
333+
{
334+
return make_tuple(ck::forward<Ts>(xs)...);
335+
}
336+
};
337+
338+
struct make_array_functor
339+
{
340+
template <typename T, typename... Ts>
341+
__host__ __device__ constexpr auto operator()(T&& x, Ts&&... xs) const
342+
{
343+
return make_array(ck::forward<T>(x), ck::forward<Ts>(xs)...);
344+
}
345+
};
346+
327347
template <typename X, typename... Ys>
328348
__host__ __device__ constexpr auto container_concat(const X& x, const Ys&... ys)
329349
{
@@ -333,15 +353,13 @@ __host__ __device__ constexpr auto container_concat(const X& x, const Ys&... ys)
333353
template <typename T, index_t NX, index_t NY>
334354
__host__ __device__ constexpr auto container_concat(const Array<T, NX>& ax, const Array<T, NY>& ay)
335355
{
336-
return unpack2(
337-
[&](auto&&... zs) { return make_array(ck::forward<decltype(zs)>(zs)...); }, ax, ay);
356+
return unpack2(make_array_functor{}, ax, ay);
338357
}
339358

340359
template <typename... X, typename... Y>
341360
__host__ __device__ constexpr auto container_concat(const Tuple<X...>& tx, const Tuple<Y...>& ty)
342361
{
343-
return unpack2(
344-
[&](auto&&... zs) { return make_tuple(ck::forward<decltype(zs)>(zs)...); }, tx, ty);
362+
return unpack2(make_tuple_functor{}, tx, ty);
345363
}
346364

347365
template <typename Container>

0 commit comments

Comments
 (0)