Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 28 additions & 15 deletions pkg/daemon/managed.go
Original file line number Diff line number Diff line change
Expand Up @@ -344,26 +344,39 @@ func (me *ManagedEngine) fill(members []uint32) int {
}

// fetchMembers calls list_nodes on the registry for this network.
// Retries with exponential backoff on transient failures.
func (me *ManagedEngine) fetchMembers() ([]uint32, error) {
resp, err := me.daemon.regConn.ListNodes(me.netID, me.daemon.config.AdminToken)
if err != nil {
return nil, err
}

nodesRaw, ok := resp["nodes"].([]interface{})
if !ok {
return nil, fmt.Errorf("unexpected list_nodes response")
}
const maxAttempts = 5
var lastErr error
backoff := 1 * time.Second

for attempt := 0; attempt < maxAttempts; attempt++ {
resp, err := me.daemon.regConn.ListNodes(me.netID, me.daemon.config.AdminToken)
if err == nil {
nodesRaw, ok := resp["nodes"].([]interface{})
if !ok {
return nil, fmt.Errorf("unexpected list_nodes response")
}

var members []uint32
for _, n := range nodesRaw {
if m, ok := n.(map[string]interface{}); ok {
if id, ok := m["node_id"].(float64); ok {
members = append(members, uint32(id))
var members []uint32
for _, n := range nodesRaw {
if m, ok := n.(map[string]interface{}); ok {
if id, ok := m["node_id"].(float64); ok {
members = append(members, uint32(id))
}
}
}
return members, nil
}

lastErr = err
if attempt < maxAttempts-1 {
time.Sleep(backoff)
backoff *= 2
}
}
return members, nil

return nil, fmt.Errorf("fetchMembers: failed after %d attempts: %w", maxAttempts, lastErr)
}

// persist saves the managed state to disk.
Expand Down
Loading