facebook · williamnixon20 · Mar 6, 2026 · Mar 6, 2026
diff --git a/cachelib/allocator/CMakeLists.txt b/cachelib/allocator/CMakeLists.txt
@@ -59,6 +59,7 @@ add_library (cachelib_allocator
     LruTailAgeStrategy.cpp
     MarginalHitsOptimizeStrategy.cpp
     MarginalHitsStrategy.cpp
+    MarginalHitsStrategyNew.cpp
     memory/AllocationClass.cpp
     memory/MemoryAllocator.cpp
     memory/MemoryPool.cpp

diff --git a/cachelib/allocator/CacheAllocatorConfig.h b/cachelib/allocator/CacheAllocatorConfig.h
@@ -1237,7 +1237,7 @@ bool CacheAllocatorConfig<T>::validateStrategy(
 
   auto type = strategy->getType();
   return type != RebalanceStrategy::NumTypes &&
-         (type != RebalanceStrategy::MarginalHits || trackTailHits);
+         ((type != RebalanceStrategy::MarginalHits && type != RebalanceStrategy::MarginalHitsNew) || trackTailHits);
 }
 
 template <typename T>

diff --git a/cachelib/allocator/MarginalHitsStrategyNew.cpp b/cachelib/allocator/MarginalHitsStrategyNew.cpp
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "cachelib/allocator/MarginalHitsStrategyNew.h"
+
+#include <folly/logging/xlog.h>
+
+#include <algorithm>
+#include <functional>
+
+namespace facebook::cachelib {
+
+MarginalHitsStrategyNew::MarginalHitsStrategyNew(Config config)
+    : RebalanceStrategy(MarginalHits), config_(std::move(config)) {}
+
+RebalanceContext MarginalHitsStrategyNew::pickVictimAndReceiverImpl(
+    const CacheBase& cache, PoolId pid, const PoolStats& poolStats) {
+    return pickVictimAndReceiverCandidates(cache, pid, poolStats, false);
+}
+
+RebalanceContext MarginalHitsStrategyNew::pickVictimAndReceiverCandidates(
+    const CacheBase& cache, PoolId pid, const PoolStats& poolStats, bool force) {
+  const auto config = getConfigCopy();
+  if (!cache.getPool(pid).allSlabsAllocated()) {
+    XLOGF(DBG,
+          "Pool Id: {} does not have all its slabs allocated"
+          " and does not need rebalancing.",
+          static_cast<int>(pid));
+    return kNoOpContext;
+  }
+
+
+  auto scores = computeClassMarginalHits(pid, poolStats, config.movingAverageParam);
+  auto classesSet = poolStats.getClassIds();
+  std::vector<ClassId> classes(classesSet.begin(), classesSet.end());
+  std::unordered_map<ClassId, bool> validVictim;
+  std::unordered_map<ClassId, bool> validReceiver;
+  for (auto it : classes) {
+    auto acStats = poolStats.mpStats.acStats;
+    // a class can be a victim only if it has more than config.minSlabs slabs
+    validVictim[it] = acStats.at(it).totalSlabs() > config.minSlabs;
+    // a class can be a receiver only if its free memory (free allocs, free
+    // slabs, etc) is small
+    validReceiver[it] = acStats.at(it).getTotalFreeMemory() <
+                        config.maxFreeMemSlabs * Slab::kSize;
+  }
+  if (classStates_[pid].entities.empty()) {
+    // initialization
+    classStates_[pid].entities = classes;
+    for (auto cid : classes) {
+      classStates_[pid].smoothedRanks[cid] = 0;
+    }
+  }
+  // we don't rely on this decay anymore
+  classStates_[pid].updateRankings(scores, 0.0);
+  RebalanceContext ctx = pickVictimAndReceiverFromRankings(pid, validVictim, validReceiver);
+
+  auto numRequestObserved = computeNumRequests(pid, poolStats);
+  if(!force && numRequestObserved < config.minRequestsObserved) {
+    XLOGF(DBG, "haven't observed enough requests: {}/{}, wait until next round", numRequestObserved, config.minRequestsObserved);
+    ctx = kNoOpContext;
+  }
+  if(!force && ctx.isEffective()) {
+    //extra filterings
+    auto receiverScore = scores.at(ctx.receiverClassId);
+    auto victimScore = scores.at(ctx.victimClassId);
+    auto improvement = receiverScore - victimScore;
+    auto improvementRatio = improvement / (victimScore == 0 ? 1 : victimScore);
+    ctx.diffValue = improvement;
+    if ((config.minDiff > 0 && improvement < config.minDiff) || 
+        (config.minDiffRatio > 0 && improvementRatio < config.minDiffRatio)){
+        XLOGF(DBG, "Not enough to trigger rebalancing, receiver id: {}, victim id: {}, receiver score: {}, victim score: {}, improvement: {}, improvement ratio: {}, thresh1: {}, thresh2: {}",
+              ctx.receiverClassId, ctx.victimClassId, receiverScore, victimScore, improvement, improvementRatio, config.minDiff, config.minDiffRatio);
+        ctx = kNoOpContext;
+    } else {
+        XLOGF(DBG, "rebalancing, receiver id: {}, victim id: {}, receiver score: {}, victim score: {}, improvement: {}, improvement ratio: {}",
+              ctx.receiverClassId, ctx.victimClassId, receiverScore, victimScore, improvement, improvementRatio);
+
+    }
+  } 
+
+  if(!ctx.isEffective()){
+    ctx = kNoOpContext;
+  }
+  auto& poolState = getPoolState(pid);
+  auto deltaRequestsSinceLastDecay = computeRequestsSinceLastDecay(pid, poolStats);
+  if((ctx.isEffective() || !config.onlyUpdateHitIfRebalance) || deltaRequestsSinceLastDecay >= config.minDecayInterval) {  
+    for (const auto i : poolStats.getClassIds()) {
+        poolState[i].updateTailHits(poolStats, config.movingAverageParam);
+    }
+  }
+
+  if(numRequestObserved >= config.minRequestsObserved) {
+    for (const auto i : poolStats.getClassIds()) {
+      poolState[i].updateRequests(poolStats);
+    }
+  }
+
+  // self-tuning threshold for the next round.
+  if(ctx.isEffective()){
+    // max window size: 2 * n_classes
+
+    size_t classWithHits = 0;
+    for (const auto& cid : classes) {
+        if (poolState.at(cid).deltaHits(poolStats) > 0) {
+            ++classWithHits;
+        }
+    }
+    recordRebalanceEvent(pid, ctx, classWithHits * 2);
+    auto effectiveMoveRate = queryEffectiveMoveRate(pid);
+    auto windowSize = getRebalanceEventQueueSize(pid);
+    XLOGF(DBG, 
+          "Rebalancing: effective move rate = {}, window size = {}, diff = {}, threshold = {}, ({}->{})",
+          effectiveMoveRate,
+          windowSize, ctx.diffValue, config.minDiff, static_cast<int>(ctx.victimClassId), static_cast<int>(ctx.receiverClassId));
+
+    if(effectiveMoveRate <= config.emrLow && windowSize >= config.thresholdIncMinWindowSize) {
+        if(config.thresholdAI) {
+          auto currentMin = getMinDiffValueFromRebalanceEvents(pid);
+          if(updateMinDff(currentMin + config.thresholdAIADStep)) {
+            clearPoolRebalanceEvent(pid);
+          }
+        } else if (config.thresholdMI){
+          if(updateMinDff(config.minDiff * config.thresholdMIMDFactor)) {
+            clearPoolRebalanceEvent(pid);
+          }
+        }
+
+    } else if (effectiveMoveRate >= config.emrHigh && windowSize >= classWithHits) {
+        if(config.thresholdAD) {
+          if(updateMinDff(std::max(2.0, config.minDiff - config.thresholdAIADStep))) {
+            clearPoolRebalanceEvent(pid);
+          }
+        } else if (config.thresholdMD){
+          if(updateMinDff(std::max(2.0, config.minDiff / config.thresholdMIMDFactor))) {
+            clearPoolRebalanceEvent(pid);
+          }
+        }
+    }
+  }
+
+  return ctx;
+}
+
+ClassId MarginalHitsStrategyNew::pickVictimImpl(const CacheBase& cache,
+                                             PoolId pid,
+                                             const PoolStats& stats) {
+  return pickVictimAndReceiverCandidates(cache, pid, stats, true).victimClassId;
+}
+
+std::unordered_map<ClassId, double>
+MarginalHitsStrategyNew::computeClassMarginalHits(PoolId pid,
+                                               const PoolStats& poolStats,
+                                               double decayFactor) {
+  const auto& poolState = getPoolState(pid);
+  std::unordered_map<ClassId, double> scores;
+  for (auto info : poolState) {
+    if (info.id != Slab::kInvalidClassId) {
+      // this score is the latest delta.
+      scores[info.id] = info.getDecayedMarginalHits(poolStats, decayFactor);
+    }
+  }
+  return scores;
+}
+
+size_t MarginalHitsStrategyNew::computeNumRequests(
+    PoolId pid, const PoolStats& poolStats) const {
+  const auto& poolState = getPoolState(pid);
+  size_t totalRequests = 0;
+  auto classesSet = poolStats.getClassIds();
+  for (const auto& cid : classesSet) {
+    totalRequests += poolState.at(cid).deltaRequests(poolStats);
+  }
+  return totalRequests;
+}
+
+size_t MarginalHitsStrategyNew::computeRequestsSinceLastDecay(
+    PoolId pid, const PoolStats& poolStats) const {
+  const auto& poolState = getPoolState(pid);
+  size_t totalRequests = 0;
+  auto classesSet = poolStats.getClassIds();
+  for (const auto& cid : classesSet) {
+    totalRequests += poolState.at(cid).deltaRequestsSinceLastDecay(poolStats);
+  }
+  return totalRequests;
+}
+
+RebalanceContext MarginalHitsStrategyNew::pickVictimAndReceiverFromRankings(
+    PoolId pid,
+    const std::unordered_map<ClassId, bool>& validVictim,
+    const std::unordered_map<ClassId, bool>& validReceiver) {
+  auto victimAndReceiver = classStates_[pid].pickVictimAndReceiverFromRankings(
+      validVictim, validReceiver, Slab::kInvalidClassId);
+  RebalanceContext ctx{victimAndReceiver.first, victimAndReceiver.second};
+  if (ctx.victimClassId == Slab::kInvalidClassId ||
+      ctx.receiverClassId == Slab::kInvalidClassId ||
+      ctx.victimClassId == ctx.receiverClassId) {
+    return kNoOpContext;
+  }
+
+  XLOGF(DBG,
+        "Rebalancing: receiver = {}, smoothed rank = {}, victim = {}, smoothed "
+        "rank = {}",
+        static_cast<int>(ctx.receiverClassId),
+        classStates_[pid].smoothedRanks[ctx.receiverClassId],
+        static_cast<int>(ctx.victimClassId),
+        classStates_[pid].smoothedRanks[ctx.victimClassId]);
+  return ctx;
+}
+} // namespace facebook::cachelib
diff --git a/cachelib/allocator/MarginalHitsStrategyNew.h b/cachelib/allocator/MarginalHitsStrategyNew.h
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#pragma once
+
+#include "cachelib/allocator/MarginalHitsState.h"
+#include "cachelib/allocator/RebalanceStrategy.h"
+
+namespace facebook {
+namespace cachelib {
+
+// This strategy computes number of hits in the tail slab of LRU to estimate
+// the potential (given one more slab, how many more hits can this LRU serve).
+// And use a smoothed ranking of those potentials to decide victim and receiver.
+class MarginalHitsStrategyNew : public RebalanceStrategy {
+ public:
+  // Config class for marginal hits strategy
+  struct Config : public BaseConfig {
+    // parameter for moving average, to smooth the ranking
+    double movingAverageParam{0.3};
+
+    // minimum number of slabs to retain in every allocation class.
+    unsigned int minSlabs{1};
+
+    // maximum free memory (equivalent to this many slabs) in every allocation
+    // class
+    unsigned int maxFreeMemSlabs{1};
+
+    bool onlyUpdateHitIfRebalance{true};
+
+    // enforcing thresholds between the victim and receiver class
+    double minDiff{2.0};
+    double minDiffRatio{0.00};
+
+    ////// these parameters are for controlling the threshold auto-tuning
+    unsigned int thresholdIncMinWindowSize{5};
+    bool thresholdAI{true};
+    bool thresholdMI{false};
+    bool thresholdAD{false};
+    bool thresholdMD{true};
+
+    double emrLow{0.5};
+    double emrHigh{0.95};
+    double thresholdAIADStep{2.0};
+    double thresholdMIMDFactor{2.0};
+    ///////////////////////////
+
+    uint64_t minRequestsObserved{50000};
+    uint64_t minDecayInterval{50000};
+
+    Config() noexcept {}
+    explicit Config(double param) noexcept : Config(param, 1, 1) {}
+    Config(double param, unsigned int minSlab, unsigned int maxFree) noexcept
+        : movingAverageParam(param),
+          minSlabs(minSlab),
+          maxFreeMemSlabs(maxFree) {}
+  };
+
+  // Update the config. This will not affect the current rebalancing, but
+  // will take effect in the next round
+  void updateConfig(const BaseConfig& baseConfig) override final {
+    std::lock_guard<std::mutex> l(configLock_);
+    config_ = static_cast<const Config&>(baseConfig);
+  }
+
+  bool updateMinDff(double newValue) {
+    if(config_.minDiff == newValue){
+      return false;
+    }
+    std::lock_guard<std::mutex> l(configLock_);
+    XLOGF(DBG, "marginal-hits, threshold auto-tuning, updating from {} to {}", config_.minDiff, newValue);
+    config_.minDiff = newValue;
+    return true;
+  }
+
+  explicit MarginalHitsStrategyNew(Config config = {});
+
+ protected:
+  // This returns a copy of the current config.
+  // This ensures that we're always looking at the same config even though
+  // someone else may have updated the config during rebalancing
+  Config getConfigCopy() const {
+    std::lock_guard<std::mutex> l(configLock_);
+    return config_;
+  }
+
+  // pick victim and receiver classes from a pool
+  RebalanceContext pickVictimAndReceiverImpl(
+      const CacheBase& cache,
+      PoolId pid,
+      const PoolStats& poolStats) override final;
+
+  // pick victim class from a pool to shrink
+  ClassId pickVictimImpl(const CacheBase& cache,
+                         PoolId pid,
+                         const PoolStats& poolStats) override final;
+
+  size_t computeNumRequests(PoolId pid, const PoolStats& poolStats) const;
+
+  size_t computeRequestsSinceLastDecay(PoolId pid, const PoolStats& poolStats) const;
+
+ private:
+  // compute delta of tail hits for every class in this pool
+  std::unordered_map<ClassId, double> computeClassMarginalHits(
+      PoolId pid, const PoolStats& poolStats, double decayFactor);
+
+  // pick victim and receiver according to smoothed rankings
+  RebalanceContext pickVictimAndReceiverFromRankings(
+      PoolId pid,
+      const std::unordered_map<ClassId, bool>& validVictim,
+      const std::unordered_map<ClassId, bool>& validReceiver);
+
+  RebalanceContext pickVictimAndReceiverCandidates(
+      const CacheBase& cache,
+      PoolId pid,
+      const PoolStats& poolStats,
+      bool force);
+
+  // marginal hits states for classes in each pools
+  std::unordered_map<PoolId, MarginalHitsState<ClassId>> classStates_;
+
+  // Config for this strategy, this can be updated anytime.
+  // Do not access this directly, always use `getConfig()` to
+  // obtain a copy first
+  Config config_;
+  mutable std::mutex configLock_;
+};
+} // namespace cachelib
+} // namespace facebook