Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 33 additions & 6 deletions mssql_python/pybind/connection/connection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,16 @@ void Connection::connect(const py::dict& attrs_before) {
#else
connStrPtr = const_cast<SQLWCHAR*>(_connStr.c_str());
#endif
SQLRETURN ret = SQLDriverConnect_ptr(_dbcHandle->get(), nullptr, connStrPtr, SQL_NTS, nullptr,
0, nullptr, SQL_DRIVER_NOPROMPT);
SQLRETURN ret;
{
// Release the GIL during the blocking ODBC connect call.
// SQLDriverConnect involves DNS resolution, TCP handshake, TLS negotiation,
// and SQL Server authentication — all pure I/O that doesn't need the GIL.
// This allows other Python threads to run concurrently.
py::gil_scoped_release release;
ret = SQLDriverConnect_ptr(_dbcHandle->get(), nullptr, connStrPtr, SQL_NTS, nullptr,
0, nullptr, SQL_DRIVER_NOPROMPT);
}
checkError(ret);
updateLastUsed();
}
Expand All @@ -95,6 +103,11 @@ void Connection::disconnect() {
if (_dbcHandle) {
LOG("Disconnecting from database");

// Check if we hold the GIL so we can conditionally release it.
// The GIL is held when called from pybind11-bound methods but may NOT
// be held in destructor paths (C++ shared_ptr ref-count drop, shutdown).
bool hasGil = PyGILState_Check() != 0;

// CRITICAL FIX: Mark all child statement handles as implicitly freed
// When we free the DBC handle below, the ODBC driver will automatically free
// all child STMT handles. We need to tell the SqlHandle objects about this
Expand Down Expand Up @@ -135,8 +148,24 @@ void Connection::disconnect() {
_allocationsSinceCompaction = 0;
} // Release lock before potentially slow SQLDisconnect call

SQLRETURN ret = SQLDisconnect_ptr(_dbcHandle->get());
checkError(ret);
SQLRETURN ret;
if (hasGil) {
// Release the GIL during the blocking ODBC disconnect call.
// This allows other Python threads to run while the network
// round-trip completes.
py::gil_scoped_release release;
ret = SQLDisconnect_ptr(_dbcHandle->get());
} else {
// Destructor / shutdown path — GIL is not held, call directly.
ret = SQLDisconnect_ptr(_dbcHandle->get());
}
// In destructor/shutdown paths, suppress errors to avoid
// std::terminate() if this throws during stack unwinding.
if (hasGil) {
checkError(ret);
} else if (!SQL_SUCCEEDED(ret)) {
LOG("SQLDisconnect failed in destructor/shutdown path; ignoring error");
}
// triggers SQLFreeHandle via destructor, if last owner
_dbcHandle.reset();
} else {
Expand Down Expand Up @@ -375,7 +404,6 @@ bool Connection::reset() {
(SQLPOINTER)SQL_RESET_CONNECTION_YES, SQL_IS_INTEGER);
if (!SQL_SUCCEEDED(ret)) {
LOG("Failed to reset connection (ret=%d). Marking as dead.", ret);
disconnect();
return false;
}

Expand All @@ -387,7 +415,6 @@ bool Connection::reset() {
(SQLPOINTER)SQL_TXN_READ_COMMITTED, SQL_IS_INTEGER);
if (!SQL_SUCCEEDED(ret)) {
LOG("Failed to reset transaction isolation level (ret=%d). Marking as dead.", ret);
disconnect();
return false;
}

Expand Down
82 changes: 65 additions & 17 deletions mssql_python/pybind/connection/connection_pool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ std::shared_ptr<Connection> ConnectionPool::acquire(const std::wstring& connStr,
const py::dict& attrs_before) {
std::vector<std::shared_ptr<Connection>> to_disconnect;
std::shared_ptr<Connection> valid_conn = nullptr;
bool needs_connect = false;
{
std::lock_guard<std::mutex> lock(_mutex);
auto now = std::chrono::steady_clock::now();
Expand Down Expand Up @@ -57,16 +58,33 @@ std::shared_ptr<Connection> ConnectionPool::acquire(const std::wstring& connStr,
}
}

// Create new connection if none reusable
// Reserve a slot for a new connection if none reusable.
// The actual connect() call happens outside the mutex to avoid
// holding the mutex during the blocking ODBC call (which releases
// the GIL and could otherwise cause a mutex/GIL deadlock).
if (!valid_conn && _current_size < _max_size) {
valid_conn = std::make_shared<Connection>(connStr, true);
valid_conn->connect(attrs_before);
++_current_size;
needs_connect = true;
} else if (!valid_conn) {
throw std::runtime_error("ConnectionPool::acquire: pool size limit reached");
}
}

// Phase 2.5: Connect the new connection outside the mutex.
if (needs_connect) {
try {
valid_conn->connect(attrs_before);
} catch (...) {
// Connect failed — release the reserved slot
{
std::lock_guard<std::mutex> lock(_mutex);
if (_current_size > 0) --_current_size;
}
throw;
}
}

// Phase 3: Disconnect expired/bad connections outside lock
for (auto& conn : to_disconnect) {
try {
Expand All @@ -79,12 +97,25 @@ std::shared_ptr<Connection> ConnectionPool::acquire(const std::wstring& connStr,
}

void ConnectionPool::release(std::shared_ptr<Connection> conn) {
std::lock_guard<std::mutex> lock(_mutex);
if (_pool.size() < _max_size) {
conn->updateLastUsed();
_pool.push_back(conn);
} else {
conn->disconnect();
bool should_disconnect = false;
{
std::lock_guard<std::mutex> lock(_mutex);
if (_pool.size() < _max_size) {
conn->updateLastUsed();
_pool.push_back(conn);
} else {
should_disconnect = true;
}
}
// Disconnect outside the mutex to avoid holding it during the
// blocking ODBC call (which releases the GIL).
if (should_disconnect) {
try {
conn->disconnect();
} catch (const std::exception& ex) {
LOG("ConnectionPool::release: disconnect failed: %s", ex.what());
}
std::lock_guard<std::mutex> lock(_mutex);
if (_current_size > 0)
--_current_size;
}
Expand Down Expand Up @@ -116,21 +147,35 @@ ConnectionPoolManager& ConnectionPoolManager::getInstance() {

std::shared_ptr<Connection> ConnectionPoolManager::acquireConnection(const std::wstring& connStr,
const py::dict& attrs_before) {
std::lock_guard<std::mutex> lock(_manager_mutex);

auto& pool = _pools[connStr];
if (!pool) {
LOG("Creating new connection pool");
pool = std::make_shared<ConnectionPool>(_default_max_size, _default_idle_secs);
std::shared_ptr<ConnectionPool> pool;
{
std::lock_guard<std::mutex> lock(_manager_mutex);
auto& pool_ref = _pools[connStr];
if (!pool_ref) {
LOG("Creating new connection pool");
pool_ref = std::make_shared<ConnectionPool>(_default_max_size, _default_idle_secs);
}
pool = pool_ref;
}
// Call acquire() outside _manager_mutex. acquire() may release the GIL
// during the ODBC connect call; holding _manager_mutex across that would
// create a mutex/GIL lock-ordering deadlock.
return pool->acquire(connStr, attrs_before);
}

void ConnectionPoolManager::returnConnection(const std::wstring& conn_str,
const std::shared_ptr<Connection> conn) {
std::lock_guard<std::mutex> lock(_manager_mutex);
if (_pools.find(conn_str) != _pools.end()) {
_pools[conn_str]->release((conn));
std::shared_ptr<ConnectionPool> pool;
{
std::lock_guard<std::mutex> lock(_manager_mutex);
auto it = _pools.find(conn_str);
if (it != _pools.end()) {
pool = it->second;
}
}
// Call release() outside _manager_mutex to avoid deadlock.
if (pool) {
pool->release(conn);
}
}

Expand All @@ -142,6 +187,9 @@ void ConnectionPoolManager::configure(int max_size, int idle_timeout_secs) {

void ConnectionPoolManager::closePools() {
std::lock_guard<std::mutex> lock(_manager_mutex);
// Keep _manager_mutex held for the full close operation so that
// acquireConnection()/returnConnection() cannot create or use pools
// while closePools() is in progress.
for (auto& [conn_str, pool] : _pools) {
if (pool) {
pool->close();
Expand Down
36 changes: 36 additions & 0 deletions tests/test_009_pooling.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,42 @@ def try_overflow():
c.close()


def test_pool_release_overflow_disconnects_outside_mutex(conn_str):
"""Test that releasing a connection when pool is full disconnects it correctly.

When a connection is returned to a pool that is already at max_size,
the connection must be disconnected. This exercises the overflow path in
ConnectionPool::release() (connection_pool.cpp) where should_disconnect
is set and disconnect happens outside the mutex.

With the current pool semantics, max_size limits total concurrent
connections, so we acquire two connections with max_size=2, then shrink
the pool to max_size=1 before returning them. The second close hits
the overflow path.
"""
pooling(max_size=2, idle_timeout=30)

conn1 = connect(conn_str)
conn2 = connect(conn_str)

# Shrink idle capacity so first close fills the pool and second overflows
pooling(max_size=1, idle_timeout=30)

# Close conn1 — returned to the pool (pool now has 1 idle entry)
conn1.close()

# Close conn2 — pool is full (1 idle already), so this connection
# must be disconnected rather than pooled (overflow path).
conn2.close()

# Verify the pool is still functional
conn3 = connect(conn_str)
cursor = conn3.cursor()
cursor.execute("SELECT 1")
assert cursor.fetchone()[0] == 1
conn3.close()


@pytest.mark.skip("Flaky test - idle timeout behavior needs investigation")
def test_pool_idle_timeout_removes_connections(conn_str):
"""Test that idle_timeout removes connections from the pool after the timeout."""
Expand Down
Loading
Loading