Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 91 additions & 0 deletions src/TiledArray/dist_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,10 @@
#include "TiledArray/util/random.h"

#include <madness/world/parallel_archive.h>
#include <array>
#include <cstdlib>
#include <tuple>
#include <vector>

namespace TiledArray {

Expand Down Expand Up @@ -2015,6 +2017,95 @@ std::size_t size_of(const DistArray<Tile, Policy>& da) {
return result;
}

/// \return the number of bytes the locally-owned tiles of \p storage occupy
/// in memory space `S`.
///
/// This is the *tile-data* footprint of a `DistArray`'s storage object only.
/// It deliberately does **not** include the `DistArray`-level metadata --
/// `TiledRange`, `Shape`, and `Pmap` -- because those live in the owning
/// `ArrayImpl`/`TensorImpl`, not in the `DistributedStorage`. For
/// `SparsePolicy` the `Shape` (a per-tile Frobenius-norm table) can be
/// sizeable, so this undercounts the full per-array footprint that
/// `size_of(const DistArray&)` reports. Counts only tiles whose futures are
/// set; pending and remote-cached tiles are skipped.
/// \tparam S the memory space to report
template <MemorySpace S, typename T>
std::size_t size_of(const detail::DistributedStorage<T>& storage) {
std::size_t result = 0;
storage.for_each_local_tile(
[&result](const auto& tile) { result += size_of<S>(tile); });
return result;
}

/// \return the per-rank tile-data bytes (in memory space `S`) of the
/// `DistributedStorage` of *all* live `DistArray<Tile,Policy>` of the
/// requested type currently registered in \p world, discovered by walking
/// the World's `WorldObject` registry.
///
/// Each array's tile storage is a single `detail::DistributedStorage`
/// `WorldObject`, so an array referenced by N shallow-copy handles is counted
/// exactly once — unlike summing `size_of` over a set of handles, which
/// double-counts shared storage. This makes the result suitable as ground
/// truth for validating handle-based tile-data accounting.
///
/// Discovery is type-safe: each registered pointer is recovered as the common
/// polymorphic base `madness::WorldObjectBase` and `dynamic_cast` to the
/// `DistributedStorage` matching `DistArrayT`'s tile type; non-matching
/// objects (other tile types, MADNESS containers) are skipped. Assumes the
/// registered `WorldObject`s place `WorldObjectBase` at offset 0 (true for
/// the single-inheritance `class X : public WorldObject<X>` idiom TA uses).
///
/// \warning This reports the `DistributedStorage` (tile-data) footprint only.
/// It excludes the `DistArray`-level `TiledRange`, `Shape`, and `Pmap`; the
/// `Shape` can be large under `SparsePolicy`. It is therefore **not**
/// comparable term-for-term with a sum of `size_of(const DistArray&)` over
/// handles (which includes the shape). Use it for tile-data accounting, not
/// total-DistArray-footprint accounting.
/// \note Counts only locally-owned tiles whose futures are set. Excludes
/// remote-tile caches. Call at a quiescent point (after a fence).
/// \tparam DistArrayT the `DistArray` specialization to look for
/// \tparam S the memory space to report (default `Host`)
template <typename DistArrayT, MemorySpace S = MemorySpace::Host>
std::size_t size_of_live_distarray_storage(World& world) {
using tile_type = typename DistArrayT::value_type;
using storage_type = detail::DistributedStorage<tile_type>;
std::size_t result = 0;
for (const auto& id : world.get_object_ids()) {
auto base_opt = world.template ptr_from_id<madness::WorldObjectBase>(id);
if (!base_opt || !*base_opt) continue;
if (auto* storage = dynamic_cast<storage_type*>(*base_opt)) {
result += size_of<S>(*storage);
}
}
return result;
}

/// \return a matrix of per-rank live-storage tile-data byte totals indexed
/// `[world_index][type_index]`: for each `World` in \p worlds (rows) and each
/// `DistArray` type in the pack `DistArrayTs` (columns), the value of
/// `size_of_live_distarray_storage<DistArrayT, S>(world)`. Lets a caller
/// inventory which array types hold how much tile data in which world at a
/// checkpoint, deduplicated across shallow-copy handles.
///
/// \warning Tile-data only; see `size_of_live_distarray_storage` for the
/// excluded-metadata caveat (no `TiledRange`/`Shape`/`Pmap`).
/// \note `S` is the leading template argument (it has a default but precedes
/// the type pack), so callers must spell it out:
/// `size_of_live_distarrays_storage<MemorySpace::Host, ArrayA,
/// ArrayB>(worlds)`.
/// \pre every pointer in \p worlds is non-null
template <MemorySpace S = MemorySpace::Host, typename... DistArrayTs>
std::vector<std::array<std::size_t, sizeof...(DistArrayTs)>>
size_of_live_distarrays_storage(const std::vector<World*>& worlds) {
Comment thread
evaleev marked this conversation as resolved.
std::vector<std::array<std::size_t, sizeof...(DistArrayTs)>> result;
result.reserve(worlds.size());
for (World* w : worlds) {
Comment thread
evaleev marked this conversation as resolved.
TA_ASSERT(w != nullptr);
result.push_back({size_of_live_distarray_storage<DistArrayTs, S>(*w)...});
}
return result;
}

#ifndef TILEDARRAY_HEADER_ONLY

extern template class DistArray<Tensor<double>, DensePolicy>;
Expand Down
20 changes: 20 additions & 0 deletions src/TiledArray/distributed_storage.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#ifndef TILEDARRAY_DISTRIBUTED_STORAGE_H__INCLUDED
#define TILEDARRAY_DISTRIBUTED_STORAGE_H__INCLUDED

#include <TiledArray/platform.h>
#include <TiledArray/pmap/pmap.h>

namespace TiledArray {
Expand Down Expand Up @@ -360,6 +361,25 @@ class DistributedStorage : public madness::WorldObject<DistributedStorage<T>> {
/// \throw nothing
size_type size() const { return data_.size(); }

/// Apply \p op to each locally-owned tile whose future is already set.

/// Pending (unset) and remote-cached elements are skipped. No
/// communication; intended to be called at a quiescent point (e.g. after a
/// fence). This is the per-rank local tile set, the same one
/// `size_of(DistArray)` iterates. Any summation it enables (e.g. of
/// `size_of<S>(tile)`) is left to the caller, which sees the tile-type
/// overloads -- those need not be visible where this low-level header is
/// parsed.
/// \tparam Op a callable invocable as `op(const value_type&)`
/// \param op the callable to apply to each set local tile
template <typename Op>
void for_each_local_tile(Op&& op) const {
for (auto it = data_.begin(); it != data_.end(); ++it) {
const future& f = it->second;
if (f.probe()) op(f.get());
}
}

/// Max size accessor

/// The maximum size is the total number of elements that can be held by
Expand Down
65 changes: 65 additions & 0 deletions tests/dist_array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1020,6 +1020,71 @@ BOOST_AUTO_TEST_CASE(size_of) {
BOOST_REQUIRE(sz0 == sz0_expected);
}

BOOST_AUTO_TEST_CASE(live_storage_size_in_world) {
using T = Tensor<double>;
using ToT = Tensor<T>;
using Policy = SparsePolicy;
using ArrayT = DistArray<T, Policy>;
using ArrayToT = DistArray<ToT, Policy>;

auto& world = get_default_world();
world.gop.fence();

// arrays from earlier test cases may still be registered (destruction is
// deferred to the next fence), so measure a baseline and compare deltas
auto const base_T = TiledArray::size_of_live_distarray_storage<ArrayT>(world);
auto const base_ToT =
TiledArray::size_of_live_distarray_storage<ArrayToT>(world);

TiledRange const trange({{0, 2, 5, 7}, {0, 5, 7, 10, 12}});

// two distinct regular arrays
auto a1 = make_array<ArrayT>(world, trange, [](T& tile, Range const& rng) {
tile = T(rng, 1.0);
return tile.norm();
});
auto a2 = make_array<ArrayT>(world, trange, [](T& tile, Range const& rng) {
tile = T(rng, 2.0);
return tile.norm();
});
// shallow copy: shares a1's storage WorldObject, must NOT be double-counted
ArrayT a1_copy = a1;
BOOST_REQUIRE(a1_copy.trange() == a1.trange()); // keep a1_copy alive & used

world.gop.fence();

// per-array local tile-data bytes = size_of(array) - size_of(shape); the
// storage walk reports tile data only, so subtract the shape from the
// handle-based full-array size_of
auto tiles_only = [](ArrayT const& a) {
return TiledArray::size_of<MemorySpace::Host>(a) -
TiledArray::size_of<MemorySpace::Host>(a.shape());
};

// the storage walk counts each distinct DistributedStorage once: a1 + a2,
// NOT a1 + a2 + a1_copy
auto const expected_T = tiles_only(a1) + tiles_only(a2);
auto const got_T =
TiledArray::size_of_live_distarray_storage<ArrayT>(world) - base_T;
BOOST_CHECK_EQUAL(got_T, expected_T);

// the ToT-typed walk must not pick up the regular (T) arrays
auto const got_ToT_delta =
TiledArray::size_of_live_distarray_storage<ArrayToT>(world) - base_ToT;
BOOST_CHECK_EQUAL(got_ToT_delta, 0u);

// variadic matrix: one world (one row), two types (two columns)
auto const mat = TiledArray::size_of_live_distarrays_storage<
MemorySpace::Host, ArrayT, ArrayToT>(std::vector<World*>{&world});
BOOST_REQUIRE_EQUAL(mat.size(), 1u);
BOOST_CHECK_EQUAL(mat[0][0],
TiledArray::size_of_live_distarray_storage<ArrayT>(world));
BOOST_CHECK_EQUAL(
mat[0][1], TiledArray::size_of_live_distarray_storage<ArrayToT>(world));

world.gop.fence();
}

BOOST_FIXTURE_TEST_CASE(fill_zero_sparse, ArrayFixture) {
// construct a sparse array with some non-zero tiles and fill it
SpArrayN as(world, tr, TiledArray::SparseShape<float>(shape_tensor, tr));
Expand Down
Loading