Skip to content

Commit a63ee1d

Browse files
committed
Currently there is a weird memory bug, valgrind says it is writing one byte too much. Perhaps its because of the use of PyMem
1 parent 60b4f37 commit a63ee1d

File tree

1 file changed

+82
-33
lines changed

1 file changed

+82
-33
lines changed

_fun.c

Lines changed: 82 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,12 @@ typedef uchar bool;
9494
// Constants
9595
const ull gDVC_grow_by = 50;
9696

97+
#ifdef DEBUG
98+
#define DBG_check(vec) DCV_dbg_check_integrity(vec)
99+
#else
100+
#define DBG_check(vec)
101+
#endif
102+
97103
// DELTA CHUNK
98104
////////////////
99105
// Internal Delta Chunk Objects
@@ -154,34 +160,36 @@ void DC_set_data(DeltaChunk* dc, const uchar* data, Py_ssize_t dlen, bool shared
154160
}
155161

156162
inline
157-
ull DC_rbound(DeltaChunk* dc)
163+
ull DC_rbound(const DeltaChunk* dc)
158164
{
159165
return dc->to + dc->ts;
160166
}
161167

162168
// Copy all data from src to dest, the data pointer will be copied too
163169
inline
164-
void DC_copy_to(DeltaChunk* src, DeltaChunk* dest)
170+
void DC_copy_to(const DeltaChunk* src, DeltaChunk* dest)
165171
{
166172
dest->to = src->to;
167173
dest->ts = src->ts;
168174
dest->so = src->so;
169175
dest->data_shared = 0;
176+
dest->data = NULL;
170177

171178
DC_set_data(dest, src->data, src->ts, 0);
172179
}
173180

174181
// Copy all data with the given offset and size. The source offset, as well
175182
// as the data will be truncated accordingly
176183
inline
177-
void DC_offset_copy_to(DeltaChunk* src, DeltaChunk* dest, ull ofs, ull size)
184+
void DC_offset_copy_to(const DeltaChunk* src, DeltaChunk* dest, ull ofs, ull size)
178185
{
179186
assert(size <= src->ts);
180187
assert(src->to + ofs + size <= DC_rbound(src));
181188

182189
dest->to = src->to + ofs;
183190
dest->ts = size;
184191
dest->so = src->so + ofs;
192+
dest->data = NULL;
185193

186194
if (src->data){
187195
DC_set_data(dest, src->data + ofs, size, 0);
@@ -260,51 +268,51 @@ int DCV_init(DeltaChunkVector* vec, ull initial_size)
260268
}
261269

262270
inline
263-
ull DCV_len(DeltaChunkVector* vec)
271+
ull DCV_len(const DeltaChunkVector* vec)
264272
{
265273
return vec->size;
266274
}
267275

268276
inline
269-
ull DCV_lbound(DeltaChunkVector* vec)
277+
ull DCV_lbound(const DeltaChunkVector* vec)
270278
{
271279
assert(vec->size && vec->mem);
272280
return vec->mem->to;
273281
}
274282

275283
// Return item at index
276284
inline
277-
DeltaChunk* DCV_get(DeltaChunkVector* vec, Py_ssize_t i)
285+
DeltaChunk* DCV_get(const DeltaChunkVector* vec, Py_ssize_t i)
278286
{
279287
assert(i < vec->size && vec->mem);
280288
return &vec->mem[i];
281289
}
282290

283291
// Return last item
284292
inline
285-
DeltaChunk* DCV_last(DeltaChunkVector* vec)
293+
DeltaChunk* DCV_last(const DeltaChunkVector* vec)
286294
{
287295
return DCV_get(vec, vec->size-1);
288296
}
289297

290298
inline
291-
ull DCV_rbound(DeltaChunkVector* vec)
299+
ull DCV_rbound(const DeltaChunkVector* vec)
292300
{
293301
return DC_rbound(DCV_last(vec));
294302
}
295303

296304
inline
297-
int DCV_empty(DeltaChunkVector* vec)
305+
int DCV_empty(const DeltaChunkVector* vec)
298306
{
299307
return vec->size == 0;
300308
}
301309

302310
// Return end pointer of the vector
303311
inline
304-
DeltaChunk* DCV_end(DeltaChunkVector* vec)
312+
const DeltaChunk* DCV_end(const DeltaChunkVector* vec)
305313
{
306314
assert(!DCV_empty(vec));
307-
return &vec->mem[vec->size];
315+
return vec->mem + vec->size;
308316
}
309317

310318
void DCV_destroy(DeltaChunkVector* vec)
@@ -345,7 +353,7 @@ void DCV_reset(DeltaChunkVector* vec)
345353
return;
346354

347355
DeltaChunk* dc = vec->mem;
348-
DeltaChunk* dcend = DCV_end(vec);
356+
const DeltaChunk* dcend = DCV_end(vec);
349357
for(;dc < dcend; dc++){
350358
DC_destroy(dc);
351359
}
@@ -366,11 +374,9 @@ DeltaChunk* DCV_append_multiple(DeltaChunkVector* vec, uint num_chunks)
366374
Py_ssize_t old_size = vec->size;
367375
vec->size += num_chunks;
368376

369-
#ifdef DEBUG
370377
for(;old_size < vec->size; ++old_size){
371378
DC_init(DCV_get(vec, old_size), 0, 0, 0);
372379
}
373-
#endif
374380

375381
return &vec->mem[old_size];
376382
}
@@ -391,7 +397,7 @@ DeltaChunk* DCV_append(DeltaChunkVector* vec)
391397

392398
// Return delta chunk being closest to the given absolute offset
393399
inline
394-
DeltaChunk* DCV_closest_chunk(DeltaChunkVector* vec, ull ofs)
400+
DeltaChunk* DCV_closest_chunk(const DeltaChunkVector* vec, ull ofs)
395401
{
396402
assert(vec->mem);
397403

@@ -416,16 +422,43 @@ DeltaChunk* DCV_closest_chunk(DeltaChunkVector* vec, ull ofs)
416422
return DCV_last(vec);
417423
}
418424

425+
// Assert the given vector has correct datachunks
426+
void DCV_dbg_check_integrity(const DeltaChunkVector* vec)
427+
{
428+
assert(!DCV_empty(vec));
429+
const DeltaChunk* i = vec->mem;
430+
const DeltaChunk* end = DCV_end(vec);
431+
432+
ull aparent_size = DCV_rbound(vec) - DCV_lbound(vec);
433+
ull acc_size = 0;
434+
for(; i < end; i++){
435+
acc_size += i->ts;
436+
}
437+
assert(acc_size == aparent_size);
438+
439+
if (vec->size < 2){
440+
return;
441+
}
442+
443+
const DeltaChunk* endm1 = DCV_end(vec) - 1;
444+
for(i = vec->mem; i < endm1; i++){
445+
const DeltaChunk* n = i+1;
446+
assert(DC_rbound(i) == n->to);
447+
}
448+
449+
}
450+
419451
// Write a slice as defined by its absolute offset in bytes and its size into the given
420452
// destination. The individual chunks written will be a deep copy of the source
421453
// data chunks
422454
// TODO: this could trigger copying many smallish add-chunk pieces - maybe some sort
423455
// of append-only memory pool would improve performance
424456
inline
425-
void DCV_copy_slice_to(DeltaChunkVector* src, DeltaChunkVector* dest, ull ofs, ull size)
457+
void DCV_copy_slice_to(const DeltaChunkVector* src, DeltaChunkVector* dest, ull ofs, ull size)
426458
{
459+
//fprintf(stderr, "Copy Slice To: src->size = %i, ofs = %i, size=%i\n", (int)src->size, (int)ofs, (int)size);
427460
assert(DCV_lbound(src) <= ofs);
428-
assert(DCV_rbound(src) <= ofs + size);
461+
assert((ofs + size) <= DCV_rbound(src));
429462

430463
DeltaChunk* cdc = DCV_closest_chunk(src, ofs);
431464

@@ -442,7 +475,7 @@ void DCV_copy_slice_to(DeltaChunkVector* src, DeltaChunkVector* dest, ull ofs, u
442475
}
443476
}
444477

445-
DeltaChunk* vecend = DCV_end(src);
478+
const DeltaChunk* vecend = DCV_end(src);
446479
for( ;(cdc < vecend) && size; ++cdc)
447480
{
448481
if (cdc->ts < size) {
@@ -464,18 +497,22 @@ void DCV_copy_slice_to(DeltaChunkVector* src, DeltaChunkVector* dest, ull ofs, u
464497
// 'at' will be replaced by the items to insert ( special purpose )
465498
// 'at' will be properly destroyed, but all items will just be copied bytewise
466499
// using memcpy. Hence from must just forget about them !
500+
// IMPORTANT: to must have an appropriate size already
467501
inline
468-
void DCV_replace_one_by_many(DeltaChunkVector* from, DeltaChunkVector* to, DeltaChunk* at)
502+
void DCV_replace_one_by_many(const DeltaChunkVector* from, DeltaChunkVector* to, DeltaChunk* at)
469503
{
504+
fprintf(stderr, "Replace one by many: from->size = %i, to->size = %i, to->reserved = %i\n", (int)from->size, (int)to->size, (int)to->reserved_size);
470505
assert(from->size > 1);
506+
assert(to->size + from->size - 1 <= to->reserved_size);
471507

472-
DCV_reserve_memory(to, to->size + from->size - 1); // -1 because we replace at
508+
// -1 because we replace 'at'
473509
DC_destroy(at);
474-
to->size -= 1 + from->size;
510+
to->size += from->size - 1;
475511

476512
// If we are somewhere in the middle, we have to make some space
477513
if (DCV_last(to) != at) {
478-
memmove((void*)at+from->size, (void*)(at+1), (size_t)(DCV_end(to) - (at+1)));
514+
fprintf(stderr, "moving to %p from %p, num bytes = %i\n", at+from->size, at+1, (int)((DCV_end(to) - (at+1)) * sizeof(DeltaChunk)));
515+
memmove((void*)(at+from->size), (void*)(at+1), (size_t)(DCV_end(to) - (at+1)) * sizeof(DeltaChunk));
479516
}
480517

481518
// Finally copy all the items in
@@ -485,22 +522,27 @@ void DCV_replace_one_by_many(DeltaChunkVector* from, DeltaChunkVector* to, Delta
485522
// Take slices of bdcv into the corresponding area of the tdcv, which is the topmost
486523
// delta to apply. tmpl is used as temporary space and must be initialzed and destroyed by the
487524
// caller
488-
void DCV_connect_with_base(DeltaChunkVector* tdcv, DeltaChunkVector* bdcv, DeltaChunkVector* tmpl)
525+
void DCV_connect_with_base(DeltaChunkVector* tdcv, const DeltaChunkVector* bdcv, DeltaChunkVector* tmpl)
489526
{
490-
DeltaChunk* dc = tdcv->mem;
491-
DeltaChunk* end = tdcv->mem + tdcv->size;
492-
assert(dc);
527+
Py_ssize_t dci = 0;
528+
Py_ssize_t iend = tdcv->size;
529+
DeltaChunk* dc;
493530

494-
for (;dc < end; dc++)
531+
DBG_check(tdcv);
532+
DBG_check(bdcv);
533+
534+
for (;dci < iend; dci++)
495535
{
496536
// Data chunks don't need processing
537+
dc = DCV_get(tdcv, dci);
497538
if (dc->data){
498539
continue;
499540
}
500541

501542
// Copy Chunk Handling
502543
DCV_copy_slice_to(bdcv, tmpl, dc->so, dc->ts);
503-
// assert(tmpl->size);
544+
DBG_check(tmpl);
545+
assert(tmpl->size);
504546

505547
// move target bounds
506548
DeltaChunk* tdc = tmpl->mem;
@@ -516,8 +558,15 @@ void DCV_connect_with_base(DeltaChunkVector* tdcv, DeltaChunkVector* bdcv, Delta
516558
DC_destroy(dc);
517559
*dc = *DCV_get(tmpl, 0);
518560
} else {
561+
DCV_reserve_memory(tdcv, tdcv->size + tmpl->size - 1 + gDVC_grow_by);
562+
dc = DCV_get(tdcv, dci);
519563
DCV_replace_one_by_many(tmpl, tdcv, dc);
564+
// Compensate for us being replaced
565+
dci += tmpl->size-1;
566+
iend += tmpl->size-1;
520567
}
568+
569+
DBG_check(tdcv);
521570

522571
// make sure the members will not be deallocated by the list
523572
DCV_forget_members(tmpl);
@@ -679,8 +728,8 @@ static PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
679728
DCV_init(&tdcv, 0);
680729
DCV_init(&tmpl, 200);
681730

682-
unsigned int dsi;
683-
PyObject* ds;
731+
unsigned int dsi = 0;
732+
PyObject* ds = 0;
684733
int error = 0;
685734
for (ds = PyIter_Next(stream_iter), dsi = 0; ds != NULL; ++dsi, ds = PyIter_Next(stream_iter))
686735
{
@@ -706,7 +755,7 @@ static PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
706755

707756
// parse command stream
708757
ull tbw = 0; // Amount of target bytes written
709-
bool shared_data = dsi != 0;
758+
bool is_shared_data = dsi != 0;
710759
bool is_first_run = dsi == 0;
711760

712761
assert(data < dend);
@@ -742,10 +791,10 @@ static PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
742791
// What's faster
743792
DeltaChunk* dc = DCV_append(&dcv);
744793
DC_init(dc, tbw, cmd, 0);
745-
DC_set_data(dc, data, cmd, shared_data);
794+
DC_set_data(dc, data, cmd, is_shared_data);
746795
tbw += cmd;
747796
data += cmd;
748-
} else {
797+
} else {
749798
error = 1;
750799
PyErr_SetString(PyExc_RuntimeError, "Encountered an unsupported delta cmd: 0");
751800
goto loop_end;

0 commit comments

Comments
 (0)