Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 41 additions & 4 deletions sqlx-core/src/pool/inner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ pub(crate) struct PoolInner<DB: Database> {
pub(super) options: PoolOptions<DB>,
pub(crate) acquire_time_level: Option<Level>,
pub(crate) acquire_slow_level: Option<Level>,
/// Counter for phantom-permit retries, used by tests to verify backoff behavior.
#[cfg(test)]
pub(super) phantom_retries: AtomicU32,
}

impl<DB: Database> PoolInner<DB> {
Expand Down Expand Up @@ -62,6 +65,8 @@ impl<DB: Database> PoolInner<DB> {
acquire_time_level: private_level_filter_to_trace_level(options.acquire_time_level),
acquire_slow_level: private_level_filter_to_trace_level(options.acquire_slow_level),
options,
#[cfg(test)]
phantom_retries: AtomicU32::new(0),
};

let pool = Arc::new(pool);
Expand Down Expand Up @@ -254,6 +259,8 @@ impl<DB: Database> PoolInner<DB> {
let acquired = crate::rt::timeout(
self.options.acquire_timeout,
async {
let mut backoff_count: u32 = 0;

loop {
// Handles the close-event internally
let permit = self.acquire_permit().await?;
Expand All @@ -279,11 +286,23 @@ impl<DB: Database> PoolInner<DB> {
// This can happen for a child pool that's at its connection limit,
// or if the pool was closed between `acquire_permit()` and
// `try_increment_size()`.
//
// It can also happen due to "phantom permits" — the semaphore
// has permits available but no idle connections exist and the
// pool is at max_connections. This occurs when a connection is
// discarded (expired/errored) while checked out: the
// DecrementSizeGuard releases a permit but the connection was
// never returned to the idle queue.
//
// Use exponential backoff to avoid spinning at 100% CPU on
// repeated phantom permit acquisitions. The backoff also gives
// time for checked-out connections to be returned to the pool.
tracing::debug!("woke but was unable to acquire idle connection or open new one; retrying");
// If so, we're likely in the current-thread runtime if it's Tokio,
// and so we should yield to let any spawned return_to_pool() tasks
// execute.
crate::rt::yield_now().await;
#[cfg(test)]
self.phantom_retries.fetch_add(1, Ordering::Relaxed);
let backoff = Duration::from_millis(1 << cmp::min(backoff_count, 8));
crate::rt::sleep(backoff).await;
backoff_count += 1;
continue;
}
};
Expand Down Expand Up @@ -621,3 +640,21 @@ impl<DB: Database> Drop for DecrementSizeGuard<DB> {
}
}
}

#[cfg(test)]
impl<DB: Database> PoolInner<DB> {
/// Simulate the "phantom permit" condition that can occur on ARM/aarch64
/// due to weak memory ordering between `idle_conns.push()` and `semaphore.release()`
/// in the `release()` path.
///
/// Sets `size` to `max_connections` and injects extra semaphore permits that
/// don't correspond to any idle connection, reproducing the state where
/// `acquire()` would spin in a tight loop with the old `yield_now()` code.
pub(super) fn inject_phantom_permits(&self, count: usize) {
// Set pool size to max so try_increment_size() will fail.
self.size
.store(self.options.max_connections, Ordering::Release);
// Add permits that don't correspond to idle connections.
self.semaphore.release(count);
}
}
97 changes: 97 additions & 0 deletions sqlx-core/src/pool/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -666,3 +666,100 @@ fn assert_pool_traits() {
assert_clone::<Pool<DB>>();
}
}

#[cfg(test)]
#[cfg(all(feature = "any", feature = "_rt-tokio"))]
mod phantom_permit_tests {
use super::*;
use std::sync::atomic::Ordering as AtomicOrdering;
use std::time::{Duration, Instant};

/// Reproduces the phantom permit spin loop that occurs on ARM/aarch64.
///
/// On ARM, weak memory ordering can cause `semaphore.release()` in the
/// `release()` path to become visible to other cores before
/// `idle_conns.push()`, creating "phantom permits" — semaphore permits
/// with no corresponding idle connection.
///
/// When the pool is at `max_connections`, this leads to a tight loop:
/// 1. acquire permit (succeeds — phantom)
/// 2. pop_idle() → None
/// 3. try_increment_size() → fails (at max)
/// 4. drop permit (re-released to semaphore)
/// 5. goto 1
///
/// With the old `yield_now()` code, this loop would execute millions of
/// times per second, consuming 100% CPU. With exponential backoff
/// (1ms, 2ms, 4ms, ..., 256ms cap), only ~15 retries occur in 2 seconds.
///
/// This test synthetically injects phantom permits and asserts on the
/// retry count to verify that backoff is working.
///
/// Run with: cargo test -p sqlx-core --features _rt-tokio,any phantom_permit
#[test]
fn test_phantom_permit_backoff_limits_retries() {
use crate::any::{Any, AnyConnectOptions};

let rt = tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
.unwrap();

rt.block_on(async {
let conn_options: AnyConnectOptions = "sqlite::memory:".parse().unwrap();

let acquire_timeout = Duration::from_secs(2);
let pool = PoolOptions::<Any>::new()
.max_connections(4)
.acquire_timeout(acquire_timeout)
.connect_lazy_with(conn_options);

// Inject 4 phantom permits: semaphore has permits, idle queue is
// empty, and size == max_connections. This is the exact state
// observed on aarch64 under heavy contention.
pool.0.inject_phantom_permits(4);

// Verify the phantom state is set up correctly.
assert_eq!(pool.0.size(), pool.0.options.max_connections);
assert_eq!(pool.0.num_idle(), 0);
assert!(pool.0.semaphore.permits() >= 4);

let start = Instant::now();
let result = pool.0.acquire().await;
let elapsed = start.elapsed();

// Must return PoolTimedOut, not spin forever.
assert!(
matches!(result, Err(Error::PoolTimedOut)),
"expected PoolTimedOut, got: {:?}",
result.err()
);

// The acquire should have taken roughly `acquire_timeout` wall time.
assert!(
elapsed >= acquire_timeout - Duration::from_millis(100),
"acquire returned too quickly ({elapsed:?}), expected ~{acquire_timeout:?}"
);

// KEY ASSERTION: With exponential backoff (1+2+4+8+16+32+64+128+256+256+...ms),
// roughly 12-16 retries fit in 2 seconds. With the old yield_now() code,
// this would be millions of retries (yield_now returns in ~nanoseconds).
//
// We use 100 as a generous upper bound that still catches a spin loop.
let retries = pool.0.phantom_retries.load(AtomicOrdering::Relaxed);
assert!(
retries < 100,
"too many phantom permit retries ({retries}): \
acquire loop is spinning instead of backing off. \
With yield_now() this would be millions; with backoff, ~15."
);

// Sanity check: we did actually retry (not just timeout immediately).
assert!(
retries >= 5,
"too few retries ({retries}): backoff may be too aggressive \
or the test setup is wrong"
);
});
}
}
Loading