Skip to content

Commit f6bd67c

Browse files
committed
Reverse delta aggregration appears to be working
1 parent 2e19424 commit f6bd67c

File tree

2 files changed

+107
-50
lines changed

2 files changed

+107
-50
lines changed

fun.py

Lines changed: 106 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,46 @@ def delta_list_apply(dcl, bbuf, write, lbound_offset=0, size=0):
235235
# END for each dc
236236
# END handle application values
237237

238+
def delta_list_slice(dcl, absofs, size):
239+
""":return: Subsection of this list at the given absolute offset, with the given
240+
size in bytes.
241+
:return: DeltaChunkList (copy) which represents the given chunk"""
242+
if len(dcl) == 0:
243+
return DeltaChunkList()
244+
245+
absofs = max(absofs, dcl.lbound())
246+
size = min(dcl.rbound() - dcl.lbound(), size)
247+
cdi = _closest_index(dcl, absofs) # delta start index
248+
cd = dcl[cdi]
249+
slen = len(dcl)
250+
ndcl = dcl.__class__()
251+
252+
if cd.to != absofs:
253+
tcd = delta_duplicate(cd)
254+
_move_delta_lbound(tcd, absofs - cd.to)
255+
_set_delta_rbound(tcd, min(tcd.ts, size))
256+
ndcl.append(tcd)
257+
size -= tcd.ts
258+
cdi += 1
259+
# END lbound overlap handling
260+
261+
while cdi < slen and size:
262+
# are we larger than the current block
263+
cd = dcl[cdi]
264+
if cd.ts <= size:
265+
ndcl.append(delta_duplicate(cd))
266+
size -= cd.ts
267+
else:
268+
tcd = delta_duplicate(cd)
269+
_set_delta_rbound(tcd, size)
270+
ndcl.append(tcd)
271+
size -= tcd.ts
272+
break
273+
# END hadle size
274+
cdi += 1
275+
# END for each chunk
276+
277+
return ndcl
238278

239279
class DeltaChunkList(list):
240280
"""List with special functionality to deal with DeltaChunks.
@@ -270,7 +310,7 @@ def connect_with(self, bdcl):
270310
:param bdcl: DeltaChunkList to serve as base"""
271311
for dc in self:
272312
if not dc.has_data():
273-
dc.set_copy_chunklist(bdcl[dc.so:dc.ts])
313+
dc.set_copy_chunklist(delta_list_slice(bdcl, dc.so, dc.ts))
274314
# END handle overlap
275315
# END for each dc
276316

@@ -355,49 +395,7 @@ def check_integrity(self, target_size=-1):
355395
assert lft.to + lft.ts == rgt.to
356396
# END for each pair
357397

358-
def __getslice__(self, absofs, size):
359-
""":return: Subsection of this list at the given absolute offset, with the given
360-
size in bytes.
361-
:return: DeltaChunkList (copy) which represents the given chunk"""
362-
if len(self) == 0:
363-
return DeltaChunkList()
364-
365-
absofs = max(absofs, self.lbound())
366-
size = min(self.rbound() - self.lbound(), size)
367-
cdi = _closest_index(self, absofs) # delta start index
368-
cd = self[cdi]
369-
slen = len(self)
370-
ndcl = self.__class__()
371-
372-
if cd.to != absofs:
373-
tcd = delta_duplicate(cd)
374-
_move_delta_lbound(tcd, absofs - cd.to)
375-
_set_delta_rbound(tcd, min(tcd.ts, size))
376-
ndcl.append(tcd)
377-
size -= tcd.ts
378-
cdi += 1
379-
# END lbound overlap handling
380-
381-
while cdi < slen and size:
382-
# are we larger than the current block
383-
cd = self[cdi]
384-
if cd.ts <= size:
385-
ndcl.append(delta_duplicate(cd))
386-
size -= cd.ts
387-
else:
388-
tcd = delta_duplicate(cd)
389-
_set_delta_rbound(tcd, size)
390-
ndcl.append(tcd)
391-
size -= tcd.ts
392-
break
393-
# END hadle size
394-
cdi += 1
395-
# END for each chunk
396-
397-
# ndcl.check_integrity()
398-
return ndcl
399398

400-
401399
class TopdownDeltaChunkList(DeltaChunkList):
402400
"""Represents a list which is generated by feeding its ancestor streams one by
403401
one"""
@@ -416,15 +414,76 @@ def connect_with_next_base(self, bdcl):
416414
consequtively and in order, towards the earliest ancestor delta
417415
:return: True if processing was done. Use it to abort processing of
418416
remaining streams"""
417+
assert self is not bdcl
419418
if self.frozen == 1:
420419
# Can that ever be hit ?
421420
return False
422421
# END early abort
423-
# mark us so that the is_reversed method returns True, without us thinking
424-
# we are frozen
425-
self.frozen = -1
426422

427-
raise NotImplementedError("todo")
423+
nfc = 0 # number of frozen chunks
424+
dci = 0 # delta chunk index
425+
slen = len(self) # len of self
426+
sold = slen
427+
while dci < slen:
428+
dc = self[dci]
429+
dci += 1
430+
431+
if dc.flags:
432+
nfc += 1
433+
continue
434+
# END skip frozen chunks
435+
436+
# all data chunks must be frozen, we are topmost already
437+
# (Also if its a copy operation onto the lowest base, but we cannot
438+
# determine that without the number of deltas to come)
439+
if dc.has_data():
440+
dc.flags = True
441+
nfc += 1
442+
continue
443+
# END skip add chunks
444+
445+
# copy chunks
446+
# integrate the portion of the base list into ourselves. Lists
447+
# dont support efficient insertion ( just one at a time ), but for now
448+
# we live with it. Internally, its all just a 32/64bit pointer, and
449+
# the portions of moved memory should be smallish. Maybe we just rebuild
450+
# ourselves in order to reduce the amount of insertions ...
451+
ccl = delta_list_slice(bdcl, dc.so, dc.ts)
452+
453+
# move the target bounds into place to match with our chunk
454+
ofs = dc.to - dc.so
455+
for cdc in ccl:
456+
cdc.to += ofs
457+
# END update target bounds
458+
459+
460+
assert dc.to == ccl.lbound() and dc.rbound() == cdc.rbound()
461+
462+
if len(ccl) == 1:
463+
self[dci-1] = ccl[0]
464+
else:
465+
466+
# maybe try to compute the expenses here, and pick the right algorithm
467+
# It would normally be faster than copying everything physically though
468+
# TODO: Use a deque here, and decide by the index whether to extend
469+
# or extend left !
470+
post_dci = self[dci:]
471+
del(self[dci-1:]) # include deletion of dc
472+
self.extend(ccl)
473+
self.extend(post_dci)
474+
475+
slen = len(self)
476+
dci += len(ccl)-1 # deleted dc, added rest
477+
478+
# END handle chunk replacement
479+
480+
# END for each chunk
481+
482+
if nfc == slen:
483+
self.frozen = True
484+
return False
485+
# END handle completeness
486+
428487
return True
429488

430489

@@ -648,8 +707,6 @@ def connect_deltas(dstreams, reverse):
648707
dcl.connect_with(bdcl)
649708
# END handle merge
650709

651-
# dcl.check_integrity()
652-
653710
# prepare next base
654711
bdcl = dcl
655712
dcl = DeltaChunkList()

test/test_pack.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ def test_pack(self):
130130
self._assert_pack_file(pack, version, size)
131131
# END for each pack to test
132132

133-
def _test_pack_entity(self):
133+
def test_pack_entity(self):
134134
for packinfo, indexinfo in ( (self.packfile_v2_1, self.packindexfile_v1),
135135
(self.packfile_v2_2, self.packindexfile_v2),
136136
(self.packfile_v2_3_ascii, self.packindexfile_v2_3_ascii)):

0 commit comments

Comments
 (0)