Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/rebuild/rebuild_internal.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/**
* (C) Copyright 2017-2024 Intel Corporation.
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
* (C) Copyright 2025-2026 Hewlett Packard Enterprise Development LP
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -247,6 +247,7 @@ struct rebuild_pool_tls {
d_list_t rebuild_pool_list;
uint64_t rebuild_pool_obj_count;
uint64_t rebuild_pool_reclaim_obj_count;
uint64_t rebuild_pool_reclaim_skipped;
unsigned int rebuild_pool_ver;
uint32_t rebuild_pool_gen;
uint64_t rebuild_pool_leader_term;
Expand Down
35 changes: 32 additions & 3 deletions src/rebuild/scan.c
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,10 @@ find_rebuild_shards(struct pl_map *map, uint32_t gl_layout_ver, struct daos_obj_
return rc;
}

#define RECLAIM_SKIPPED_MAX 50 /* skipped too many objects may cause ENOSPACE */
#define RECLAIM_LOG_INTERVAL 1800 /* 30 minutes */
#define RECLAIM_BUSY_THRESHOLD (300 * 1000) /* 300 seconds */

static int
obj_reclaim(struct pl_map *map, uint32_t layout_ver, uint32_t new_layout_ver,
struct daos_obj_md *md, struct rebuild_tgt_pool_tracker *rpt,
Expand All @@ -499,6 +503,8 @@ obj_reclaim(struct pl_map *map, uint32_t layout_ver, uint32_t new_layout_ver,
struct rebuild_pool_tls *tls;
daos_epoch_range_t discard_epr;
bool still_needed;
unsigned int busy_tried = 0;
uint64_t log_since = 0;
int rc;

/*
Expand Down Expand Up @@ -550,17 +556,40 @@ obj_reclaim(struct pl_map *map, uint32_t layout_ver, uint32_t new_layout_ver,
* to delete
*/
do {
uint64_t now;

/* Inform the iterator and delete the object */
*acts |= VOS_ITER_CB_DELETE;
rc = vos_discard(param->ip_hdl, &oid, &discard_epr, NULL, NULL);
if (rc != -DER_BUSY && rc != -DER_INPROGRESS)
break;

D_DEBUG(DB_REBUILD, "retry by "DF_RC"/"DF_UOID"\n",
DP_RC(rc), DP_UOID(oid));
busy_tried++;
if (busy_tried >= RECLAIM_BUSY_THRESHOLD) { /* too many retries, time to skip */
busy_tried = 0;
if (tls->rebuild_pool_reclaim_skipped < RECLAIM_SKIPPED_MAX) {
tls->rebuild_pool_reclaim_skipped++;
D_ERROR(DF_RB " stop retrying reclaim after 5 minutes (rc=%d), "
"skip busy object=" DF_UOID ", total skipped=" DF_U64
"\n",
DP_RB_RPT(rpt), rc, DP_UOID(oid),
tls->rebuild_pool_reclaim_skipped);
rc = 0;
break;
}
}
D_DEBUG(DB_REBUILD, "retry by " DF_RC "/" DF_UOID "\n", DP_RC(rc), DP_UOID(oid));
/* Busy - inform iterator and yield */
*acts |= VOS_ITER_CB_YIELD;
dss_sleep(0);
dss_sleep(1); /* 1 ms */

now = daos_gettime_coarse();
if (now - log_since >= RECLAIM_LOG_INTERVAL &&
tls->rebuild_pool_reclaim_skipped > 0) {
D_INFO(DF_RB " reclaim skipped total " DF_U64 " busy objects\n",
DP_RB_RPT(rpt), tls->rebuild_pool_reclaim_skipped);
log_since = now;
}
} while (1);

if (rc != 0)
Expand Down
1 change: 1 addition & 0 deletions src/rebuild/srv.c
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ rebuild_pool_tls_create(uuid_t pool_uuid, uuid_t poh_uuid, uuid_t coh_uuid,
rebuild_pool_tls->rebuild_pool_scanning = 1;
rebuild_pool_tls->rebuild_pool_scan_done = 0;
rebuild_pool_tls->rebuild_pool_obj_count = 0;
rebuild_pool_tls->rebuild_pool_reclaim_skipped = 0;
rebuild_pool_tls->rebuild_pool_reclaim_obj_count = 0;
rebuild_pool_tls->rebuild_tree_hdl = DAOS_HDL_INVAL;
/* Only 1 thread will access the list, no need lock */
Expand Down
Loading