Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ $ ./pgremapper cancel-backfill --pgs-including bucket:data10

### drain

Remap PGs off of the given source OSD spec(s), up to the given maximum number of scheduled backfills. No attempt is made to balance the fullness of the target OSDs; rather, the least busy target OSDs and PGs will be selected.
Remap PGs off of the given source OSD spec(s), up to the given maximum number of scheduled backfills. For each PG, among CRUSH-valid targets that still fit reservation limits, the tool prefers OSDs with fewer PGs in the current `up` set (from `pg dump`), then OSDs with lower backfill reservation load on the target—remote reservations count more than local—and finally breaks remaining ties at random. That spreads drain load toward emptier targets but does not perform whole-cluster balancing.
If a source OSD is included among target OSDs, it will be removed from the targets.

```
Expand Down Expand Up @@ -255,7 +255,7 @@ $ ./pgremapper remap <pg ID> <source osd ID> <target osd ID>

### undo-upmaps

Given a list of OSDs, remove (or modify) upmap items such that the OSDs become the source (or target if `--target` is specified) of backfill operations (i.e. they are currently the "To" ("From") of the upmap items) up to the backfill limits specified. Backfill is spread across target and primary OSDs in a best-effort manner.
Given a list of OSDs, remove (or modify) upmap items such that the OSDs become the source (or target if `--target` is specified) of backfill operations (i.e. they are currently the "To" ("From") of the upmap items) up to the backfill limits specified. Among eligible undos that still fit those limits, each scheduled remap picks among its candidates using the same preference order as [`drain`](#drain): targets with fewer PGs in the current `up` set (from `pg dump`), then lower backfill reservation load on the target—remote reservations count more than local—and random tie-breaks. See [`drain`](#drain) for the exact behavior and caveats (this spreads load toward emptier targets but does not perform whole-cluster balancing).

This is useful for cases where the upmap rebalancer won't do this for us, e.g., performing a swap-bucket where we want the source OSDs to totally drain (vs. balance with the rest of the cluster). It also achieves a much higher level of concurrency than the balancer generally will.

Expand Down
11 changes: 11 additions & 0 deletions backfillstate.go
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,17 @@ func (bs *backfillState) getMaxBackfillReservations(osd int) int {
return bs.maxBackfillReservations
}

// pgCountsByOsd returns how many PGs list each OSD in their up set (live bs.pgbs).
func (bs *backfillState) pgCountsByOsd() map[int]int {
counts := make(map[int]int)
for _, pgb := range bs.pgbs {
for _, osd := range pgb.Up {
counts[osd]++
}
}
return counts
}

func computeBackfillSrcsTgts(pgb *pgBriefItem) ([]int, []int) {
srcs := []int{}
tgts := []int{}
Expand Down
21 changes: 21 additions & 0 deletions backfillstate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,24 @@ func TestBackfillState(t *testing.T) {
require.Equal(t, 1, bs.osd(77).remoteReservations)
require.Equal(t, 1, bs.osd(77).backfillsFrom)
}

func TestPgCountsByOsd(t *testing.T) {
setupTest(t)
defer teardownTest(t)
pgDumpOut := `
[
{ "pgid": "1.01", "up": [ 77, 1, 2 ], "acting": [ 77, 1, 2 ] },
{ "pgid": "1.02", "up": [ 77, 3, 4 ], "acting": [ 77, 3, 5 ] }
]
`
runOsdDump = func() (string, error) { return "{}", nil }
runPgDumpPgsBrief = func() (string, error) { return pgDumpOut, nil }

bs := mustGetCurrentBackfillState()
c := bs.pgCountsByOsd()
require.Equal(t, 2, c[77])
require.Equal(t, 1, c[1])
require.Equal(t, 1, c[2])
require.Equal(t, 1, c[3])
require.Equal(t, 1, c[4])
}
42 changes: 29 additions & 13 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,16 @@ import (
"strconv"
"strings"
"sync"
"time"

"github.com/pkg/errors"
"github.com/spf13/cobra"
)

// remapRand is only used in remapLeastBusyPg for tie-breaking. init seeds it from the clock;
// tests reset it in setupTest so expectations are stable (do not use this elsewhere).
var remapRand = rand.New(rand.NewSource(1))

var (
concurrency int
yes bool
Expand Down Expand Up @@ -147,8 +152,8 @@ has been made so far.
Long: `Drain PGs from one or more source OSDs to the target OSDs.

Remap PGs off of the given source OSD, up to the given maximum number of
scheduled backfills. No attempt is made to balance the fullness of the target
OSDs; rather, the least busy target OSDs and PGs will be selected.
scheduled backfills. Among valid targets, those with fewer PGs in the up set are
preferred, then lower backfill reservation load on the target (ties broken at random).
`,
Args: func(cmd *cobra.Command, args []string) error {
if len(args) == 0 {
Expand Down Expand Up @@ -690,6 +695,7 @@ func mustParseMaxBackfillReservations(cmd *cobra.Command) {
}

func init() {
remapRand = rand.New(rand.NewSource(time.Now().UnixNano()))
rootCmd.PersistentFlags().IntVar(&concurrency, "concurrency", 5, "number of commands to issue in parallel")
rootCmd.PersistentFlags().BoolVar(&yes, "yes", false, "skip confirmations and dry-run output")
rootCmd.PersistentFlags().BoolVar(&verbose, "verbose", false, "display Ceph commands being run")
Expand Down Expand Up @@ -1016,34 +1022,44 @@ func calcPgMappingsToUndoUpmaps(osds []int, osdsAreTargets bool) {
}

func remapLeastBusyPg(candidateMappings []pgMapping) (string, bool) {
// Pick a remap target in three steps. First, among candidates that still
// have room for backfill (hasRoomForRemap), prefer the target OSD with
// the fewest PGs in the current up set, using live pg brief state. Second,
// break ties using reservation load on that OSD: remote reservations (this
// OSD as a backfill target) weigh more than local reservations (this OSD as
// primary), via remote*10 + local. Third, if still tied, choose uniformly at
// random among those mappings (remapRand).
pgCounts := M.bs.pgCountsByOsd()
var (
found bool
bestScore = int(math.MaxInt32)
bestMapping pgMapping
found bool
bestPgCount = int(math.MaxInt32)
bestResScore = int(math.MaxInt32)
ties []pgMapping
)
// Look for a candidate OSD to remap to that has the lowest reservation
// score. We consider the remote reservation count (the count of
// backfills in which this OSD is the target) to be more important than
// the local reservation count (the count of backfills for which this
// OSD is primary), and thus apply a weight to it.
for _, m := range candidateMappings {
if !M.bs.hasRoomForRemap(m.PgID, m.Mapping.From, m.Mapping.To) {
M.changeState = updateChangeState(NoReservationAvailable)
continue
}

pgC := pgCounts[m.Mapping.To]
obs := M.bs.osd(m.Mapping.To)
score := obs.remoteReservations*10 + obs.localReservations
if score < bestScore {
if !found || pgC < bestPgCount || (pgC == bestPgCount && score < bestResScore) {
found = true
bestScore = score
bestMapping = m
bestPgCount = pgC
bestResScore = score
ties = []pgMapping{m}
} else if pgC == bestPgCount && score == bestResScore {
ties = append(ties, m)
}
}
if !found {
return "", false
}

// Uniform choice among candidates tied on PG count and reservation score.
bestMapping := ties[remapRand.Intn(len(ties))]
M.mustRemap(bestMapping.PgID, bestMapping.Mapping.From, bestMapping.Mapping.To)

return bestMapping.PgID, true
Expand Down
14 changes: 9 additions & 5 deletions main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package main

import (
"fmt"
"math/rand"
"testing"

"github.com/spf13/cobra"
Expand Down Expand Up @@ -500,8 +501,8 @@ func TestCalcPgMappingsToUndoUpmaps(t *testing.T) {
targetOsds := []int{0}
expected := []expectedMapping{
{ID: "1.33", Mappings: nil},
{ID: "1.34", Mappings: nil},
{ID: "1.8a", Mappings: nil},
{ID: "1.46", Mappings: nil},
{ID: "1.8b", Mappings: []mapping{{From: 1, To: 7}}},
}

M = mustGetCurrentMappingState()
Expand Down Expand Up @@ -776,7 +777,7 @@ func TestCalcPgMappingsToDrainOsd(t *testing.T) {
expected: []expectedMapping{
{ID: "1.32", Mappings: []mapping{{From: 0, To: 2, dirty: true}}},
{ID: "1.33", Mappings: []mapping{{From: 0, To: 2, dirty: true}}},
{ID: "1.34", Mappings: []mapping{{From: 0, To: 3, dirty: true}}},
{ID: "1.35", Mappings: []mapping{{From: 0, To: 3, dirty: true}}},
},
},
{
Expand All @@ -786,7 +787,7 @@ func TestCalcPgMappingsToDrainOsd(t *testing.T) {
expected: []expectedMapping{
{ID: "1.32", Mappings: []mapping{{From: 0, To: 2, dirty: true}}},
{ID: "1.33", Mappings: []mapping{{From: 0, To: 2, dirty: true}}},
{ID: "1.35", Mappings: []mapping{{From: 0, To: 5, dirty: true}}},
{ID: "1.35", Mappings: []mapping{{From: 0, To: 3, dirty: true}}},
},
},
// Movements allowed across racks - weird case enabled by PGs
Expand All @@ -797,7 +798,7 @@ func TestCalcPgMappingsToDrainOsd(t *testing.T) {
targetOsds: []int{1, 2, 3, 5, 8, 12, 16},
expected: []expectedMapping{
{ID: "1.32", Mappings: []mapping{{From: 0, To: 2, dirty: true}}},
{ID: "1.33", Mappings: []mapping{{From: 0, To: 8, dirty: true}}},
{ID: "1.33", Mappings: []mapping{{From: 0, To: 12, dirty: true}}},
{ID: "1.34", Mappings: []mapping{{From: 0, To: 12, dirty: true}}},
},
},
Expand Down Expand Up @@ -954,6 +955,9 @@ func setupTest(t *testing.T) {

// We only need the upmap items from this; default to empty.
runOsdDump = func() (string, error) { return "{}", nil }

// Matches remapRand doc: fixed stream for remapLeastBusyPg tie-breaks.
remapRand = rand.New(rand.NewSource(42))
}

func teardownTest(t *testing.T) {
Expand Down
Loading