@@ -1379,7 +1379,10 @@ make_executor_from_uops(_PyThreadStateImpl *tstate, _PyUOpInstruction *buffer, i
13791379 // linking of executor. Otherwise, the GC tries to untrack a
13801380 // still untracked object during dealloc.
13811381 _PyObject_GC_TRACK (executor );
1382- _Py_ExecutorInit (executor , dependencies );
1382+ if (_Py_ExecutorInit (executor , dependencies ) < 0 ) {
1383+ Py_DECREF (executor );
1384+ return NULL ;
1385+ }
13831386#ifdef Py_DEBUG
13841387 char * python_lltrace = Py_GETENV ("PYTHON_LLTRACE" );
13851388 int lltrace = 0 ;
@@ -1646,59 +1649,63 @@ bloom_filter_may_contain(_PyBloomFilter *bloom, _PyBloomFilter *hashes)
16461649 return true;
16471650}
16481651
1649- static void
1650- link_executor (_PyExecutorObject * executor )
1652+ static int
1653+ link_executor (_PyExecutorObject * executor , const _PyBloomFilter * bloom )
16511654{
16521655 PyInterpreterState * interp = _PyInterpreterState_GET ();
1653- _PyExecutorLinkListNode * links = & executor -> vm_data .links ;
1654- _PyExecutorObject * head = interp -> executor_list_head ;
1655- if (head == NULL ) {
1656- interp -> executor_list_head = executor ;
1657- links -> previous = NULL ;
1658- links -> next = NULL ;
1659- }
1660- else {
1661- assert (head -> vm_data .links .previous == NULL );
1662- links -> previous = NULL ;
1663- links -> next = head ;
1664- head -> vm_data .links .previous = executor ;
1665- interp -> executor_list_head = executor ;
1666- }
1667- /* executor_list_head must be first in list */
1668- assert (interp -> executor_list_head -> vm_data .links .previous == NULL );
1656+ if (interp -> executor_count == interp -> executor_capacity ) {
1657+ size_t new_cap = interp -> executor_capacity ? interp -> executor_capacity * 2 : 64 ;
1658+ _PyBloomFilter * new_blooms = PyMem_Realloc (
1659+ interp -> executor_blooms , new_cap * sizeof (_PyBloomFilter ));
1660+ if (new_blooms == NULL ) {
1661+ return -1 ;
1662+ }
1663+ _PyExecutorObject * * new_ptrs = PyMem_Realloc (
1664+ interp -> executor_ptrs , new_cap * sizeof (_PyExecutorObject * ));
1665+ if (new_ptrs == NULL ) {
1666+ /* Revert blooms realloc — the old pointer may have been freed by
1667+ * a successful realloc, but new_blooms is the valid pointer. */
1668+ interp -> executor_blooms = new_blooms ;
1669+ return -1 ;
1670+ }
1671+ interp -> executor_blooms = new_blooms ;
1672+ interp -> executor_ptrs = new_ptrs ;
1673+ interp -> executor_capacity = new_cap ;
1674+ }
1675+ size_t idx = interp -> executor_count ++ ;
1676+ interp -> executor_blooms [idx ] = * bloom ;
1677+ interp -> executor_ptrs [idx ] = executor ;
1678+ executor -> vm_data .bloom_array_idx = (int32_t )idx ;
1679+ return 0 ;
16691680}
16701681
16711682static void
16721683unlink_executor (_PyExecutorObject * executor )
16731684{
1674- _PyExecutorLinkListNode * links = & executor -> vm_data .links ;
1675- _PyExecutorObject * next = links -> next ;
1676- _PyExecutorObject * prev = links -> previous ;
1677- if (next != NULL ) {
1678- next -> vm_data .links .previous = prev ;
1679- }
1680- if (prev != NULL ) {
1681- prev -> vm_data .links .next = next ;
1682- }
1683- else {
1684- // prev == NULL implies that executor is the list head
1685- PyInterpreterState * interp = PyInterpreterState_Get ();
1686- assert (interp -> executor_list_head == executor );
1687- interp -> executor_list_head = next ;
1685+ PyInterpreterState * interp = PyInterpreterState_Get ();
1686+ int32_t idx = executor -> vm_data .bloom_array_idx ;
1687+ assert (idx >= 0 && (size_t )idx < interp -> executor_count );
1688+ size_t last = -- interp -> executor_count ;
1689+ if ((size_t )idx != last ) {
1690+ /* Swap-remove: move the last element into the vacated slot */
1691+ interp -> executor_blooms [idx ] = interp -> executor_blooms [last ];
1692+ interp -> executor_ptrs [idx ] = interp -> executor_ptrs [last ];
1693+ interp -> executor_ptrs [idx ]-> vm_data .bloom_array_idx = idx ;
16881694 }
1695+ executor -> vm_data .bloom_array_idx = -1 ;
16891696}
16901697
16911698/* This must be called by optimizers before using the executor */
1692- void
1699+ int
16931700_Py_ExecutorInit (_PyExecutorObject * executor , const _PyBloomFilter * dependency_set )
16941701{
16951702 executor -> vm_data .valid = true;
16961703 executor -> vm_data .pending_deletion = 0 ;
16971704 executor -> vm_data .code = NULL ;
1698- for ( int i = 0 ; i < _Py_BLOOM_FILTER_WORDS ; i ++ ) {
1699- executor -> vm_data . bloom . bits [ i ] = dependency_set -> bits [ i ] ;
1705+ if ( link_executor ( executor , dependency_set ) < 0 ) {
1706+ return -1 ;
17001707 }
1701- link_executor ( executor ) ;
1708+ return 0 ;
17021709}
17031710
17041711static _PyExecutorObject *
@@ -1809,35 +1816,36 @@ void
18091816_Py_Executor_DependsOn (_PyExecutorObject * executor , void * obj )
18101817{
18111818 assert (executor -> vm_data .valid );
1812- _Py_BloomFilter_Add (& executor -> vm_data .bloom , obj );
1819+ PyInterpreterState * interp = _PyInterpreterState_GET ();
1820+ int32_t idx = executor -> vm_data .bloom_array_idx ;
1821+ assert (idx >= 0 && (size_t )idx < interp -> executor_count );
1822+ _Py_BloomFilter_Add (& interp -> executor_blooms [idx ], obj );
18131823}
18141824
18151825/* Invalidate all executors that depend on `obj`
1816- * May cause other executors to be invalidated as well
1826+ * May cause other executors to be invalidated as well.
1827+ * Uses contiguous bloom filter array for cache-friendly scanning.
18171828 */
18181829void
18191830_Py_Executors_InvalidateDependency (PyInterpreterState * interp , void * obj , int is_invalidation )
18201831{
18211832 _PyBloomFilter obj_filter ;
18221833 _Py_BloomFilter_Init (& obj_filter );
18231834 _Py_BloomFilter_Add (& obj_filter , obj );
1824- /* Walk the list of executors */
1825- /* TO DO -- Use a tree to avoid traversing as many objects */
1835+ /* Scan contiguous bloom filter array */
18261836 PyObject * invalidate = PyList_New (0 );
18271837 if (invalidate == NULL ) {
18281838 goto error ;
18291839 }
18301840 /* Clearing an executor can clear others, so we need to make a list of
18311841 * executors to invalidate first */
1832- for (_PyExecutorObject * exec = interp -> executor_list_head ; exec != NULL ;) {
1833- assert (exec -> vm_data .valid );
1834- _PyExecutorObject * next = exec -> vm_data .links .next ;
1835- if (bloom_filter_may_contain (& exec -> vm_data .bloom , & obj_filter ) &&
1836- PyList_Append (invalidate , (PyObject * )exec ))
1842+ for (size_t i = 0 ; i < interp -> executor_count ; i ++ ) {
1843+ assert (interp -> executor_ptrs [i ]-> vm_data .valid );
1844+ if (bloom_filter_may_contain (& interp -> executor_blooms [i ], & obj_filter ) &&
1845+ PyList_Append (invalidate , (PyObject * )interp -> executor_ptrs [i ]))
18371846 {
18381847 goto error ;
18391848 }
1840- exec = next ;
18411849 }
18421850 for (Py_ssize_t i = 0 ; i < PyList_GET_SIZE (invalidate ); i ++ ) {
18431851 PyObject * exec = PyList_GET_ITEM (invalidate , i );
@@ -1859,8 +1867,9 @@ _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is
18591867void
18601868_Py_Executors_InvalidateAll (PyInterpreterState * interp , int is_invalidation )
18611869{
1862- while (interp -> executor_list_head ) {
1863- _PyExecutorObject * executor = interp -> executor_list_head ;
1870+ while (interp -> executor_count > 0 ) {
1871+ /* Invalidate from the end to avoid repeated swap-remove shifts */
1872+ _PyExecutorObject * executor = interp -> executor_ptrs [interp -> executor_count - 1 ];
18641873 assert (executor -> vm_data .valid );
18651874 if (executor -> vm_data .code ) {
18661875 // Clear the entire code object so its co_executors array be freed:
@@ -1878,27 +1887,24 @@ _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation)
18781887void
18791888_Py_Executors_InvalidateCold (PyInterpreterState * interp )
18801889{
1881- /* Walk the list of executors */
1882- /* TO DO -- Use a tree to avoid traversing as many objects */
1890+ /* Scan contiguous executor array */
18831891 PyObject * invalidate = PyList_New (0 );
18841892 if (invalidate == NULL ) {
18851893 goto error ;
18861894 }
18871895
18881896 /* Clearing an executor can deallocate others, so we need to make a list of
18891897 * executors to invalidate first */
1890- for (_PyExecutorObject * exec = interp -> executor_list_head ; exec != NULL ;) {
1898+ for (size_t i = 0 ; i < interp -> executor_count ; i ++ ) {
1899+ _PyExecutorObject * exec = interp -> executor_ptrs [i ];
18911900 assert (exec -> vm_data .valid );
1892- _PyExecutorObject * next = exec -> vm_data .links .next ;
18931901
18941902 if (exec -> vm_data .cold && PyList_Append (invalidate , (PyObject * )exec ) < 0 ) {
18951903 goto error ;
18961904 }
18971905 else {
18981906 exec -> vm_data .cold = true;
18991907 }
1900-
1901- exec = next ;
19021908 }
19031909 for (Py_ssize_t i = 0 ; i < PyList_GET_SIZE (invalidate ); i ++ ) {
19041910 PyObject * exec = PyList_GET_ITEM (invalidate , i );
@@ -2142,9 +2148,8 @@ _PyDumpExecutors(FILE *out)
21422148 fprintf (out , " rankdir = \"LR\"\n\n" );
21432149 fprintf (out , " node [colorscheme=greys9]\n" );
21442150 PyInterpreterState * interp = PyInterpreterState_Get ();
2145- for (_PyExecutorObject * exec = interp -> executor_list_head ; exec != NULL ;) {
2146- executor_to_gv (exec , out );
2147- exec = exec -> vm_data .links .next ;
2151+ for (size_t i = 0 ; i < interp -> executor_count ; i ++ ) {
2152+ executor_to_gv (interp -> executor_ptrs [i ], out );
21482153 }
21492154 fprintf (out , "}\n\n" );
21502155 return 0 ;
0 commit comments