Skip to content

Commit 166e538

Browse files
committed
Enhanced memory handling within the delta-stream parsing method. Removed the base delta chunk vector, which was a reminder of old (python) times which are long gone
1 parent a93363c commit 166e538

File tree

2 files changed

+60
-32
lines changed

2 files changed

+60
-32
lines changed

_fun.c

Lines changed: 59 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -198,46 +198,62 @@ typedef struct {
198198
Py_ssize_t reserved_size; // Reserve in DeltaChunks
199199
} DeltaChunkVector;
200200

201-
/*
202-
Grow the delta chunk list by the given amount of bytes.
203-
This may trigger a realloc, but will do nothing if the reserved size is already
204-
large enough.
205-
Return 1 on success, 0 on failure
206-
*/
201+
202+
203+
// Reserve enough memory to hold the given amount of delta chunks
204+
// Return 1 on success
207205
inline
208-
int DCV_grow(DeltaChunkVector* vec, uint num_dc)
206+
int DCV_reserve_memory(DeltaChunkVector* vec, uint num_dc)
209207
{
210-
const uint grow_by_chunks = (vec->size + num_dc) - vec->reserved_size;
211-
if (grow_by_chunks <= 0){
208+
if (num_dc <= vec->reserved_size){
212209
return 1;
213210
}
214211

212+
#ifdef DEBUG
213+
bool was_null = vec->mem == NULL;
214+
#endif
215+
215216
if (vec->mem == NULL){
216-
vec->mem = PyMem_Malloc(grow_by_chunks * sizeof(DeltaChunk));
217+
vec->mem = PyMem_Malloc(num_dc * sizeof(DeltaChunk));
217218
} else {
218-
vec->mem = PyMem_Realloc(vec->mem, (vec->reserved_size + grow_by_chunks) * sizeof(DeltaChunk));
219+
vec->mem = PyMem_Realloc(vec->mem, num_dc * sizeof(DeltaChunk));
219220
}
220221

221222
if (vec->mem == NULL){
222223
Py_FatalError("Could not allocate memory for append operation");
223224
}
224225

225-
vec->reserved_size = vec->reserved_size + grow_by_chunks;
226+
vec->reserved_size = num_dc;
226227

227228
#ifdef DEBUG
228-
fprintf(stderr, "Allocated %i bytes at %p, to hold up to %i chunks\n", (int)((vec->reserved_size + grow_by_chunks) * sizeof(DeltaChunk)), vec->mem, (int)(vec->reserved_size + grow_by_chunks));
229+
const char* format = "Allocated %i bytes at %p, to hold up to %i chunks\n";
230+
if (!was_null)
231+
format = "Re-allocated %i bytes at %p, to hold up to %i chunks\n";
232+
fprintf(stderr, format, (int)(vec->reserved_size * sizeof(DeltaChunk)), vec->mem, (int)vec->reserved_size);
229233
#endif
230234

231235
return vec->mem != NULL;
232236
}
233237

238+
/*
239+
Grow the delta chunk list by the given amount of bytes.
240+
This may trigger a realloc, but will do nothing if the reserved size is already
241+
large enough.
242+
Return 1 on success, 0 on failure
243+
*/
244+
inline
245+
int DCV_grow_by(DeltaChunkVector* vec, uint num_dc)
246+
{
247+
return DCV_reserve_memory(vec, vec->reserved_size + num_dc);
248+
}
249+
234250
int DCV_init(DeltaChunkVector* vec, ull initial_size)
235251
{
236252
vec->mem = NULL;
237253
vec->size = 0;
238254
vec->reserved_size = 0;
239255

240-
return DCV_grow(vec, initial_size);
256+
return DCV_grow_by(vec, initial_size);
241257
}
242258

243259
inline
@@ -309,14 +325,32 @@ void DCV_forget_members(DeltaChunkVector* vec)
309325
vec->size = 0;
310326
}
311327

328+
// Reset the vector so that its size will be zero, and its members will
329+
// have been deallocated properly.
330+
// It will keep its memory though, and hence can be filled again
331+
inline
332+
void DCV_reset(DeltaChunkVector* vec)
333+
{
334+
if (vec->size == 0)
335+
return;
336+
337+
DeltaChunk* dc = vec->mem;
338+
DeltaChunk* dcend = DCV_end(vec);
339+
for(;dc < dcend; dc++){
340+
DC_destroy(dc);
341+
}
342+
343+
vec->size = 0;
344+
}
345+
312346
// Append num-chunks to the end of the list, possibly reallocating existing ones
313347
// Return a pointer to the first of the added items. They are already null initialized
314348
// If num-chunks == 0, it returns the end pointer of the allocated memory
315349
static inline
316350
DeltaChunk* DCV_append_multiple(DeltaChunkVector* vec, uint num_chunks)
317351
{
318352
if (vec->size + num_chunks > vec->reserved_size){
319-
DCV_grow(vec, (vec->size + num_chunks) - vec->reserved_size);
353+
DCV_grow_by(vec, (vec->size + num_chunks) - vec->reserved_size);
320354
}
321355
Py_FatalError("Could not allocate memory for append operation");
322356
Py_ssize_t old_size = vec->size;
@@ -337,7 +371,7 @@ static inline
337371
DeltaChunk* DCV_append(DeltaChunkVector* vec)
338372
{
339373
if (vec->size + 1 > vec->reserved_size){
340-
DCV_grow(vec, 1);
374+
DCV_grow_by(vec, 1);
341375
}
342376

343377
DeltaChunk* next = vec->mem + vec->size;
@@ -546,12 +580,10 @@ static PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
546580
stream_iter = dstreams;
547581
}
548582

549-
DeltaChunkVector bdcv;
550-
DeltaChunkVector tdcv;
551583
DeltaChunkVector dcv;
584+
DeltaChunkVector tdcv;
552585
DeltaChunkVector tmpl;
553-
DCV_init(&bdcv, 0);
554-
DCV_init(&dcv, 0);
586+
DCV_init(&dcv, 100); // should be enough to keep the average text file
555587
DCV_init(&tdcv, 0);
556588
DCV_init(&tmpl, 200);
557589

@@ -578,7 +610,7 @@ static PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
578610

579611
// estimate number of ops - assume one third adds, half two byte (size+offset) copies
580612
const uint approx_num_cmds = (dlen / 3) + (((dlen / 3) * 2) / (2+2+1));
581-
DCV_grow(&dcv, approx_num_cmds);
613+
DCV_reserve_memory(&dcv, approx_num_cmds);
582614

583615
// parse command stream
584616
ull tbw = 0; // Amount of target bytes written
@@ -632,16 +664,15 @@ static PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
632664
if (!is_first_run){
633665
DCV_connect_with_base(&tdcv, &dcv, &tmpl);
634666
}
635-
// swap the vector
636-
// Skip the first vector, as it is also used as top chunk vector
637-
if (bdcv.mem != tdcv.mem){
638-
DCV_destroy(&bdcv);
639-
}
640-
bdcv = dcv;
667+
641668
if (is_first_run){
642669
tdcv = dcv;
670+
// wipe out dcv without destroying the members, get its own memory
671+
DCV_init(&dcv, tdcv.size);
672+
} else {
673+
// destroy members, but keep memory
674+
DCV_reset(&dcv);
643675
}
644-
DCV_init(&dcv, 0);
645676

646677
loop_end:
647678
// perform cleanup
@@ -662,7 +693,6 @@ static PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
662693
}
663694

664695
DCV_destroy(&tmpl);
665-
DCV_destroy(&bdcv);
666696
if (dsi > 1){
667697
// otherwise dcv equals tcl
668698
DCV_destroy(&dcv);

fun.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -545,7 +545,6 @@ def connect_deltas(dstreams):
545545
:param dstreams: iterable of delta stream objects, the delta to be applied last
546546
comes first, then all its ancestors in order
547547
:return: DeltaChunkList, containing all operations to apply"""
548-
bdcl = None # data chunk list for initial base
549548
tdcl = None # topmost dcl
550549

551550
dcl = tdcl = TopdownDeltaChunkList()
@@ -611,13 +610,12 @@ def connect_deltas(dstreams):
611610
dcl.compress()
612611

613612
# merge the lists !
614-
if bdcl is not None:
613+
if dsi > 0:
615614
if not tdcl.connect_with_next_base(dcl):
616615
break
617616
# END handle merge
618617

619618
# prepare next base
620-
bdcl = dcl
621619
dcl = DeltaChunkList()
622620
# END for each delta stream
623621

0 commit comments

Comments
 (0)