Skip to content

Commit 5ec4779

Browse files
gh-135573: Make pickled lists, sets and dicts a tiny bit smaller
Ensure that APPENDS and SETITEMS are never used for a batch of size 1. Ensure that ADDITEMS and SETITEMS are never used for a batch of size 0. This harmonizes the C implementation with the Python implementation which already guarantees this and makes a pickle a tiny bit smaller with a tiny chance (about 0.1%). Saves 1 byte for list and dict with size 1001, 2001, ... Saves 2 bytes for set and dict with size 1000, 2000, ...
1 parent c5cfcdf commit 5ec4779

File tree

1 file changed

+39
-40
lines changed

1 file changed

+39
-40
lines changed

Modules/_pickle.c

Lines changed: 39 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -3034,11 +3034,6 @@ batch_list(PickleState *state, PicklerObject *self, PyObject *iter, PyObject *or
30343034

30353035
assert(iter != NULL);
30363036

3037-
/* XXX: I think this function could be made faster by avoiding the
3038-
iterator interface and fetching objects directly from list using
3039-
PyList_GET_ITEM.
3040-
*/
3041-
30423037
if (self->proto == 0) {
30433038
/* APPENDS isn't available; do one at a time. */
30443039
for (;; total++) {
@@ -3160,24 +3155,24 @@ batch_list_exact(PickleState *state, PicklerObject *self, PyObject *obj)
31603155
assert(obj != NULL);
31613156
assert(self->proto > 0);
31623157
assert(PyList_CheckExact(obj));
3163-
3164-
if (PyList_GET_SIZE(obj) == 1) {
3165-
item = PyList_GET_ITEM(obj, 0);
3166-
Py_INCREF(item);
3167-
int err = save(state, self, item, 0);
3168-
Py_DECREF(item);
3169-
if (err < 0) {
3170-
_PyErr_FormatNote("when serializing %T item 0", obj);
3171-
return -1;
3172-
}
3173-
if (_Pickler_Write(self, &append_op, 1) < 0)
3174-
return -1;
3175-
return 0;
3176-
}
3158+
assert(PyList_GET_SIZE(obj));
31773159

31783160
/* Write in batches of BATCHSIZE. */
31793161
total = 0;
31803162
do {
3163+
if (PyList_GET_SIZE(obj) - total == 1) {
3164+
item = PyList_GET_ITEM(obj, total);
3165+
Py_INCREF(item);
3166+
int err = save(state, self, item, 0);
3167+
Py_DECREF(item);
3168+
if (err < 0) {
3169+
_PyErr_FormatNote("when serializing %T item %zd", obj, total);
3170+
return -1;
3171+
}
3172+
if (_Pickler_Write(self, &append_op, 1) < 0)
3173+
return -1;
3174+
return 0;
3175+
}
31813176
this_batch = 0;
31823177
if (_Pickler_Write(self, &mark_op, 1) < 0)
31833178
return -1;
@@ -3438,28 +3433,29 @@ batch_dict_exact(PickleState *state, PicklerObject *self, PyObject *obj)
34383433
assert(self->proto > 0);
34393434

34403435
dict_size = PyDict_GET_SIZE(obj);
3441-
3442-
/* Special-case len(d) == 1 to save space. */
3443-
if (dict_size == 1) {
3444-
PyDict_Next(obj, &ppos, &key, &value);
3445-
Py_INCREF(key);
3446-
Py_INCREF(value);
3447-
if (save(state, self, key, 0) < 0) {
3448-
goto error;
3449-
}
3450-
if (save(state, self, value, 0) < 0) {
3451-
_PyErr_FormatNote("when serializing %T item %R", obj, key);
3452-
goto error;
3453-
}
3454-
Py_CLEAR(key);
3455-
Py_CLEAR(value);
3456-
if (_Pickler_Write(self, &setitem_op, 1) < 0)
3457-
return -1;
3458-
return 0;
3459-
}
3436+
assert(dict_size);
34603437

34613438
/* Write in batches of BATCHSIZE. */
3439+
Py_ssize_t total = 0;
34623440
do {
3441+
if (dict_size - total == 1) {
3442+
PyDict_Next(obj, &ppos, &key, &value);
3443+
Py_INCREF(key);
3444+
Py_INCREF(value);
3445+
if (save(state, self, key, 0) < 0) {
3446+
goto error;
3447+
}
3448+
if (save(state, self, value, 0) < 0) {
3449+
_PyErr_FormatNote("when serializing %T item %R", obj, key);
3450+
goto error;
3451+
}
3452+
Py_CLEAR(key);
3453+
Py_CLEAR(value);
3454+
if (_Pickler_Write(self, &setitem_op, 1) < 0)
3455+
return -1;
3456+
return 0;
3457+
}
3458+
34633459
i = 0;
34643460
if (_Pickler_Write(self, &mark_op, 1) < 0)
34653461
return -1;
@@ -3475,6 +3471,7 @@ batch_dict_exact(PickleState *state, PicklerObject *self, PyObject *obj)
34753471
}
34763472
Py_CLEAR(key);
34773473
Py_CLEAR(value);
3474+
total++;
34783475
if (++i == BATCHSIZE)
34793476
break;
34803477
}
@@ -3487,7 +3484,7 @@ batch_dict_exact(PickleState *state, PicklerObject *self, PyObject *obj)
34873484
return -1;
34883485
}
34893486

3490-
} while (i == BATCHSIZE);
3487+
} while (total < dict_size);
34913488
return 0;
34923489
error:
34933490
Py_XDECREF(key);
@@ -3605,6 +3602,7 @@ save_set(PickleState *state, PicklerObject *self, PyObject *obj)
36053602
return 0; /* nothing to do */
36063603

36073604
/* Write in batches of BATCHSIZE. */
3605+
Py_ssize_t total = 0;
36083606
do {
36093607
i = 0;
36103608
if (_Pickler_Write(self, &mark_op, 1) < 0)
@@ -3619,6 +3617,7 @@ save_set(PickleState *state, PicklerObject *self, PyObject *obj)
36193617
_PyErr_FormatNote("when serializing %T element", obj);
36203618
break;
36213619
}
3620+
total++;
36223621
if (++i == BATCHSIZE)
36233622
break;
36243623
}
@@ -3634,7 +3633,7 @@ save_set(PickleState *state, PicklerObject *self, PyObject *obj)
36343633
"set changed size during iteration");
36353634
return -1;
36363635
}
3637-
} while (i == BATCHSIZE);
3636+
} while (total < set_size);
36383637

36393638
return 0;
36403639
}

0 commit comments

Comments
 (0)