Skip to content

Commit b03ac72

Browse files
committed
Put the thread state in PyCriticalSection structs
GH-141406 improved performance by only fetching thread state once and storing it in a variable on the stack. This instead puts the thread state in the PyCriticalState struct (also a temp variable on the stack), bringing the public and private implementations closer together.
1 parent 99e2c5e commit b03ac72

File tree

3 files changed

+41
-62
lines changed

3 files changed

+41
-62
lines changed

Include/cpython/critical_section.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,8 @@ struct PyCriticalSection {
111111

112112
// Mutex used to protect critical section
113113
PyMutex *_cs_mutex;
114+
115+
PyThreadState *_cs_tstate;
114116
};
115117

116118
// A critical section protected by two mutexes. Use

Include/internal/pycore_critical_section.h

Lines changed: 29 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -86,10 +86,10 @@ _PyCriticalSection_Resume(PyThreadState *tstate);
8686

8787
// (private) slow path for locking the mutex
8888
PyAPI_FUNC(void)
89-
_PyCriticalSection_BeginSlow(PyThreadState *tstate, PyCriticalSection *c, PyMutex *m);
89+
_PyCriticalSection_BeginSlow(PyCriticalSection *c, PyMutex *m);
9090

9191
PyAPI_FUNC(void)
92-
_PyCriticalSection2_BeginSlow(PyThreadState *tstate, PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2,
92+
_PyCriticalSection2_BeginSlow(PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2,
9393
int is_m1_locked);
9494

9595
PyAPI_FUNC(void)
@@ -104,30 +104,33 @@ _PyCriticalSection_IsActive(uintptr_t tag)
104104
}
105105

106106
static inline void
107-
_PyCriticalSection_BeginMutex(PyThreadState *tstate, PyCriticalSection *c, PyMutex *m)
107+
_PyCriticalSection_BeginMutex(PyCriticalSection *c, PyMutex *m)
108108
{
109+
PyThreadState *tstate = PyThreadState_GET();
110+
c->_cs_tstate = tstate;
109111
if (PyMutex_LockFast(m)) {
110112
c->_cs_mutex = m;
111113
c->_cs_prev = tstate->critical_section;
112114
tstate->critical_section = (uintptr_t)c;
113115
}
114116
else {
115-
_PyCriticalSection_BeginSlow(tstate, c, m);
117+
_PyCriticalSection_BeginSlow(c, m);
116118
}
117119
}
118120

119121
static inline void
120-
_PyCriticalSection_Begin(PyThreadState *tstate, PyCriticalSection *c, PyObject *op)
122+
_PyCriticalSection_Begin(PyCriticalSection *c, PyObject *op)
121123
{
122-
_PyCriticalSection_BeginMutex(tstate, c, &op->ob_mutex);
124+
_PyCriticalSection_BeginMutex(c, &op->ob_mutex);
123125
}
124126

125127
// Removes the top-most critical section from the thread's stack of critical
126128
// sections. If the new top-most critical section is inactive, then it is
127129
// resumed.
128130
static inline void
129-
_PyCriticalSection_Pop(PyThreadState *tstate, PyCriticalSection *c)
131+
_PyCriticalSection_Pop(PyCriticalSection *c)
130132
{
133+
PyThreadState *tstate = c->_cs_tstate;
131134
uintptr_t prev = c->_cs_prev;
132135
tstate->critical_section = prev;
133136

@@ -137,7 +140,7 @@ _PyCriticalSection_Pop(PyThreadState *tstate, PyCriticalSection *c)
137140
}
138141

139142
static inline void
140-
_PyCriticalSection_End(PyThreadState *tstate, PyCriticalSection *c)
143+
_PyCriticalSection_End(PyCriticalSection *c)
141144
{
142145
// If the mutex is NULL, we used the fast path in
143146
// _PyCriticalSection_BeginSlow for locks already held in the top-most
@@ -146,17 +149,17 @@ _PyCriticalSection_End(PyThreadState *tstate, PyCriticalSection *c)
146149
return;
147150
}
148151
PyMutex_Unlock(c->_cs_mutex);
149-
_PyCriticalSection_Pop(tstate, c);
152+
_PyCriticalSection_Pop(c);
150153
}
151154

152155
static inline void
153-
_PyCriticalSection2_BeginMutex(PyThreadState *tstate, PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2)
156+
_PyCriticalSection2_BeginMutex(PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2)
154157
{
155158
if (m1 == m2) {
156159
// If the two mutex arguments are the same, treat this as a critical
157160
// section with a single mutex.
158161
c->_cs_mutex2 = NULL;
159-
_PyCriticalSection_BeginMutex(tstate, &c->_cs_base, m1);
162+
_PyCriticalSection_BeginMutex(&c->_cs_base, m1);
160163
return;
161164
}
162165

@@ -169,6 +172,9 @@ _PyCriticalSection2_BeginMutex(PyThreadState *tstate, PyCriticalSection2 *c, PyM
169172
m2 = tmp;
170173
}
171174

175+
PyThreadState *tstate = PyThreadState_GET();
176+
c->_cs_base._cs_tstate = tstate;
177+
172178
if (PyMutex_LockFast(m1)) {
173179
if (PyMutex_LockFast(m2)) {
174180
c->_cs_base._cs_mutex = m1;
@@ -179,22 +185,22 @@ _PyCriticalSection2_BeginMutex(PyThreadState *tstate, PyCriticalSection2 *c, PyM
179185
tstate->critical_section = p;
180186
}
181187
else {
182-
_PyCriticalSection2_BeginSlow(tstate, c, m1, m2, 1);
188+
_PyCriticalSection2_BeginSlow(c, m1, m2, 1);
183189
}
184190
}
185191
else {
186-
_PyCriticalSection2_BeginSlow(tstate, c, m1, m2, 0);
192+
_PyCriticalSection2_BeginSlow(c, m1, m2, 0);
187193
}
188194
}
189195

190196
static inline void
191-
_PyCriticalSection2_Begin(PyThreadState *tstate, PyCriticalSection2 *c, PyObject *a, PyObject *b)
197+
_PyCriticalSection2_Begin(PyCriticalSection2 *c, PyObject *a, PyObject *b)
192198
{
193-
_PyCriticalSection2_BeginMutex(tstate, c, &a->ob_mutex, &b->ob_mutex);
199+
_PyCriticalSection2_BeginMutex(c, &a->ob_mutex, &b->ob_mutex);
194200
}
195201

196202
static inline void
197-
_PyCriticalSection2_End(PyThreadState *tstate, PyCriticalSection2 *c)
203+
_PyCriticalSection2_End(PyCriticalSection2 *c)
198204
{
199205
// if mutex1 is NULL, we used the fast path in
200206
// _PyCriticalSection_BeginSlow for mutexes that are already held,
@@ -208,7 +214,7 @@ _PyCriticalSection2_End(PyThreadState *tstate, PyCriticalSection2 *c)
208214
PyMutex_Unlock(c->_cs_mutex2);
209215
}
210216
PyMutex_Unlock(c->_cs_base._cs_mutex);
211-
_PyCriticalSection_Pop(tstate, &c->_cs_base);
217+
_PyCriticalSection_Pop(&c->_cs_base);
212218
}
213219

214220
static inline void
@@ -252,43 +258,12 @@ _PyCriticalSection_AssertHeldObj(PyObject *op)
252258
#endif
253259
}
254260

255-
#undef Py_BEGIN_CRITICAL_SECTION
256-
# define Py_BEGIN_CRITICAL_SECTION(op) \
257-
{ \
258-
PyCriticalSection _py_cs; \
259-
PyThreadState *_cs_tstate = _PyThreadState_GET(); \
260-
_PyCriticalSection_Begin(_cs_tstate, &_py_cs, _PyObject_CAST(op))
261-
262-
#undef Py_BEGIN_CRITICAL_SECTION_MUTEX
263-
# define Py_BEGIN_CRITICAL_SECTION_MUTEX(mutex) \
264-
{ \
265-
PyCriticalSection _py_cs; \
266-
PyThreadState *_cs_tstate = _PyThreadState_GET(); \
267-
_PyCriticalSection_BeginMutex(_cs_tstate, &_py_cs, mutex)
268-
269-
#undef Py_END_CRITICAL_SECTION
270-
# define Py_END_CRITICAL_SECTION() \
271-
_PyCriticalSection_End(_cs_tstate, &_py_cs); \
272-
}
273-
274-
#undef Py_BEGIN_CRITICAL_SECTION2
275-
# define Py_BEGIN_CRITICAL_SECTION2(a, b) \
276-
{ \
277-
PyCriticalSection2 _py_cs2; \
278-
PyThreadState *_cs_tstate = _PyThreadState_GET(); \
279-
_PyCriticalSection2_Begin(_cs_tstate, &_py_cs2, _PyObject_CAST(a), _PyObject_CAST(b))
280-
281-
#undef Py_BEGIN_CRITICAL_SECTION2_MUTEX
282-
# define Py_BEGIN_CRITICAL_SECTION2_MUTEX(m1, m2) \
283-
{ \
284-
PyCriticalSection2 _py_cs2; \
285-
PyThreadState *_cs_tstate = _PyThreadState_GET(); \
286-
_PyCriticalSection2_BeginMutex(_cs_tstate, &_py_cs2, m1, m2)
287-
288-
#undef Py_END_CRITICAL_SECTION2
289-
# define Py_END_CRITICAL_SECTION2() \
290-
_PyCriticalSection2_End(_cs_tstate, &_py_cs2); \
291-
}
261+
#define PyCriticalSection_Begin _PyCriticalSection_Begin
262+
#define PyCriticalSection_BeginMutex _PyCriticalSection_BeginMutex
263+
#define PyCriticalSection_End _PyCriticalSection_End
264+
#define PyCriticalSection2_Begin _PyCriticalSection2_Begin
265+
#define PyCriticalSection2_BeginMutex _PyCriticalSection2_BeginMutex
266+
#define PyCriticalSection2_End _PyCriticalSection2_End
292267

293268
#endif /* Py_GIL_DISABLED */
294269

Python/critical_section.c

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,15 @@ untag_critical_section(uintptr_t tag)
1818
#endif
1919

2020
void
21-
_PyCriticalSection_BeginSlow(PyThreadState *tstate, PyCriticalSection *c, PyMutex *m)
21+
_PyCriticalSection_BeginSlow(PyCriticalSection *c, PyMutex *m)
2222
{
2323
#ifdef Py_GIL_DISABLED
2424
// As an optimisation for locking the same object recursively, skip
2525
// locking if the mutex is currently locked by the top-most critical
2626
// section.
2727
// If the top-most critical section is a two-mutex critical section,
2828
// then locking is skipped if either mutex is m.
29+
PyThreadState *tstate = c->_cs_tstate;
2930
if (tstate->critical_section) {
3031
PyCriticalSection *prev = untag_critical_section(tstate->critical_section);
3132
if (prev->_cs_mutex == m) {
@@ -62,10 +63,11 @@ _PyCriticalSection_BeginSlow(PyThreadState *tstate, PyCriticalSection *c, PyMute
6263
}
6364

6465
void
65-
_PyCriticalSection2_BeginSlow(PyThreadState *tstate, PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2,
66+
_PyCriticalSection2_BeginSlow(PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2,
6667
int is_m1_locked)
6768
{
6869
#ifdef Py_GIL_DISABLED
70+
PyThreadState *tstate = c->_cs_base._cs_tstate;
6971
if (tstate->interp->stoptheworld.world_stopped) {
7072
c->_cs_base._cs_mutex = NULL;
7173
c->_cs_mutex2 = NULL;
@@ -153,7 +155,7 @@ void
153155
PyCriticalSection_Begin(PyCriticalSection *c, PyObject *op)
154156
{
155157
#ifdef Py_GIL_DISABLED
156-
_PyCriticalSection_Begin(_PyThreadState_GET(), c, op);
158+
_PyCriticalSection_Begin(c, op);
157159
#endif
158160
}
159161

@@ -162,7 +164,7 @@ void
162164
PyCriticalSection_BeginMutex(PyCriticalSection *c, PyMutex *m)
163165
{
164166
#ifdef Py_GIL_DISABLED
165-
_PyCriticalSection_BeginMutex(_PyThreadState_GET(), c, m);
167+
_PyCriticalSection_BeginMutex(c, m);
166168
#endif
167169
}
168170

@@ -171,7 +173,7 @@ void
171173
PyCriticalSection_End(PyCriticalSection *c)
172174
{
173175
#ifdef Py_GIL_DISABLED
174-
_PyCriticalSection_End(_PyThreadState_GET(), c);
176+
_PyCriticalSection_End(c);
175177
#endif
176178
}
177179

@@ -180,7 +182,7 @@ void
180182
PyCriticalSection2_Begin(PyCriticalSection2 *c, PyObject *a, PyObject *b)
181183
{
182184
#ifdef Py_GIL_DISABLED
183-
_PyCriticalSection2_Begin(_PyThreadState_GET(), c, a, b);
185+
_PyCriticalSection2_Begin(c, a, b);
184186
#endif
185187
}
186188

@@ -189,7 +191,7 @@ void
189191
PyCriticalSection2_BeginMutex(PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2)
190192
{
191193
#ifdef Py_GIL_DISABLED
192-
_PyCriticalSection2_BeginMutex(_PyThreadState_GET(), c, m1, m2);
194+
_PyCriticalSection2_BeginMutex(c, m1, m2);
193195
#endif
194196
}
195197

@@ -198,6 +200,6 @@ void
198200
PyCriticalSection2_End(PyCriticalSection2 *c)
199201
{
200202
#ifdef Py_GIL_DISABLED
201-
_PyCriticalSection2_End(_PyThreadState_GET(), c);
203+
_PyCriticalSection2_End(c);
202204
#endif
203205
}

0 commit comments

Comments
 (0)