Skip to content

Commit fa03f74

Browse files
committed
Reverse Delta Application was a nice experiment, as it has one major flaw: Currently it integrates chunks from its base into the topmost delta chunk list, which causes plenty of mem-move operations. Plenty means, many many many, and its getting worse the more deltas you have of course. The algorithm was supposed to reduce the amount of memory activity, but failed at this point, making it worse than before. Probably it would just be fastest to implement the previous python algorithm, which swaps two buffers, in c
1 parent 9c5672e commit fa03f74

File tree

2 files changed

+8
-4
lines changed

2 files changed

+8
-4
lines changed

_delta_apply.c

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -398,7 +398,6 @@ int DCV_dbg_check_integrity(const DeltaChunkVector* vec)
398398
inline
399399
void DCV_copy_slice_to(const DeltaChunkVector* src, DeltaChunkVector* dest, ull ofs, ull size)
400400
{
401-
//fprintf(stderr, "Copy Slice To: src->size = %i, ofs = %i, size=%i\n", (int)src->size, (int)ofs, (int)size);
402401
assert(DCV_lbound(src) <= ofs);
403402
assert((ofs + size) <= DCV_rbound(src));
404403

@@ -443,7 +442,6 @@ void DCV_copy_slice_to(const DeltaChunkVector* src, DeltaChunkVector* dest, ull
443442
inline
444443
void DCV_replace_one_by_many(const DeltaChunkVector* from, DeltaChunkVector* to, DeltaChunk* at)
445444
{
446-
//fprintf(stderr, "Replace one by many: from->size = %i, to->size = %i, to->reserved = %i\n", (int)from->size, (int)to->size, (int)to->reserved_size);
447445
assert(from->size > 1);
448446
assert(to->size + from->size - 1 <= to->reserved_size);
449447

@@ -452,7 +450,6 @@ void DCV_replace_one_by_many(const DeltaChunkVector* from, DeltaChunkVector* to,
452450

453451
// If we are somewhere in the middle, we have to make some space
454452
if (DCV_last(to) != at) {
455-
//fprintf(stderr, "moving to %i from %i, num chunks = %i\n", (int)((at+from->size)-to->mem), (int)((at+1)-to->mem), (int)(DCV_end(to) - (at+1)));
456453
memmove((void*)(at+from->size), (void*)(at+1), (size_t)((DCV_end(to) - (at+1)) * sizeof(DeltaChunk)));
457454
}
458455

@@ -725,7 +722,8 @@ static PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
725722
const ull target_size = msb_size(&data, dend);
726723

727724
// estimate number of ops - assume one third adds, half two byte (size+offset) copies
728-
const uint approx_num_cmds = (dlen / 3) + (((dlen / 3) * 2) / (2+2+1));
725+
// Assume good compression for the adds
726+
const uint approx_num_cmds = ((dlen / 3) / 10) + (((dlen / 3) * 2) / (2+2+1));
729727
DCV_reserve_memory(&dcv, approx_num_cmds);
730728

731729
// parse command stream
@@ -825,6 +823,11 @@ static PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
825823
DCV_connect_with_base(&tdcv, &dcv, &tmpl);
826824
}
827825

826+
#ifdef DEBUG
827+
fprintf(stderr, "tdcv->size = %i, tdcv->reserved_size = %i\n", (int)tdcv.size, (int)tdcv.reserved_size);
828+
fprintf(stderr, "dcv->size = %i, dcv->reserved_size = %i\n", (int)dcv.size, (int)dcv.reserved_size);
829+
#endif
830+
828831
if (is_first_run){
829832
tdcv = dcv;
830833
// wipe out dcv without destroying the members, get its own memory

stream.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,7 @@ def _set_cache_(self, attr):
337337
# Aggregate all deltas into one delta in reverse order. Hence we take
338338
# the last delta, and reverse-merge its ancestor delta, until we receive
339339
# the final delta data stream.
340+
# print "Handling %i delta streams, sizes: %s" % (len(self._dstreams), [ds.size for ds in self._dstreams])
340341
dcl = connect_deltas(self._dstreams)
341342

342343
# call len directly, as the (optional) c version doesn't implement the sequence

0 commit comments

Comments
 (0)