Skip to content

Commit fbf8f32

Browse files
committed
Filled in first implementation of all missing methods. Its untested, and currently the lists are truncated physically, which would work, but it would be easier to just remember the changed bounds, and apply it later with these bounds in mind
1 parent 4cf3eac commit fbf8f32

File tree

1 file changed

+168
-22
lines changed

1 file changed

+168
-22
lines changed

fun.py

Lines changed: 168 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
import mmap
1313
from itertools import islice, izip
1414

15+
from copy import copy
16+
1517
# INVARIANTS
1618
OFS_DELTA = 6
1719
REF_DELTA = 7
@@ -51,33 +53,48 @@
5153
def _set_delta_rbound(d, size):
5254
"""Truncate the given delta to the given size
5355
:param size: size relative to our target offset, may not be 0, must be smaller or equal
54-
to our size"""
56+
to our size
57+
:return: d"""
5558
if size == 0:
5659
raise ValueError("size to truncate to must not be 0")
5760
if d.ts == size:
5861
return
5962
if size > d.ts:
60-
raise ValueError("Cannot truncate delta 'larger'")
63+
raise ValueError("Cannot extend rbound")
6164

6265
d.ts = size
6366

6467
# NOTE: data is truncated automatically when applying the delta
6568
# MUST NOT DO THIS HERE, see _split_delta
69+
70+
if d.has_copy_chunklist():
71+
d.data.set_rbound(size)
72+
# END truncate chunklist
73+
74+
return d
6675

6776
def _move_delta_lbound(d, bytes):
6877
"""Move the delta by the given amount of bytes, reducing its size so that its
6978
right bound stays static
70-
:param bytes: amount of bytes to move, must be smaller than delta size"""
79+
:param bytes: amount of bytes to move, must be smaller than delta size
80+
:return: d"""
81+
if bytes == 0:
82+
return
7183
if bytes >= d.ts:
7284
raise ValueError("Cannot move offset that much")
7385

7486
d.to += bytes
7587
d.so += bytes
7688
d.ts -= bytes
77-
if d.data:
78-
d.data = d.data[bytes:]
89+
if d.data is not None:
90+
if isinstance(d.data, DeltaChunkList):
91+
d.data.move_lbound(bytes)
92+
else:
93+
d.data = d.data[bytes:]
94+
# END handle data type
7995
# END handle data
8096

97+
return d
8198

8299
class DeltaChunk(object):
83100
"""Represents a piece of a delta, it can either add new data, or copy existing
@@ -106,16 +123,22 @@ def rbound(self):
106123

107124
def has_data(self):
108125
""":return: True if the instance has data to add to the target stream"""
109-
return self.data is None or not isinstance(self.data, DeltaChunkList)
126+
return self.data is not None and not isinstance(self.data, DeltaChunkList)
110127

111-
def apply(self, source, write):
128+
def has_copy_chunklist(self):
129+
""":return: True if we copy our data from a chunklist"""
130+
return return self.data is not None and isinstance(self.data, DeltaChunkList)
131+
132+
def apply(self, bbuf, write):
112133
"""Apply own data to the target buffer
113-
:param source: buffer providing source bytes for copy operations
134+
:param bbuf: buffer providing source bytes for copy operations
114135
:param write: write method to call with data to write"""
115-
if self.has_data():
136+
if self.data is None:
116137
# COPY DATA FROM SOURCE
117-
assert len(source) - self.so - self.ts > 0
118-
write(buffer(source, self.so, self.ts))
138+
assert len(bbuf) - self.so - self.ts > 0
139+
write(buffer(bbuf, self.so, self.ts))
140+
elif isinstance(self.data, DeltaChunkList):
141+
self.data.apply(bbuf, write)
119142
else:
120143
# APPEND DATA
121144
# whats faster: if + 4 function calls or just a write with a slice ?
@@ -153,6 +176,8 @@ def _split_delta(dcl, d, di, relofs, insert_offset=0):
153176
:note: belongs to DeltaChunkList"""
154177
if relofs > d.ts:
155178
raise ValueError("Cannot split behinds a chunks rbound")
179+
if relofs < 1:
180+
raise ValueError("Cannot split delta with %i" % relofs)
156181

157182
osize = d.ts - relofs
158183
_set_delta_rbound(d, relofs)
@@ -295,31 +320,77 @@ def init(self, size):
295320

296321
return self
297322

298-
def set_rbound(self, size):
299-
"""Chops the list at the given size, splitting and removing DeltaNodes
323+
def set_rbound(self, relofs):
324+
"""Chops the list at the given relative offset, splitting and removing DeltaNodes
300325
as required
326+
:param relofs: offset relative to the start of the chain
301327
:return: self"""
302-
di = _closest_index(self, size)
328+
if len(self) == 0:
329+
raise AssertionError("Cannot change bound of empty list")
330+
if relofs == 0:
331+
raise ValueError("Size to truncate to must not be 0")
332+
absofs = self.lbound() + relofs
333+
if absofs > self.rbound():
334+
raise ValueError("Cannot extend chunk list")
335+
di = _closest_index(self, absofs)
303336
d = self[di]
304-
rsize = size - d.to
337+
rsize = absofs - d.to
305338
if rsize:
306339
_set_delta_rbound(d, rsize)
307340
# END truncate last node if possible
308341
del(self[di+(rsize!=0):])
309342

310343
## DEBUG ##
311-
self.check_integrity(size)
344+
self.check_integrity(absofs)
312345

313346
return self
314347

348+
def move_lbound(self, bytes):
349+
"""Offset the left bound of the list by the given amount of bytes.
350+
This effectively truncates the list
351+
:return: self"""
352+
if len(self) == 0:
353+
raise AssertionError("Cannot change bound of empty list")
354+
if bytes == 0:
355+
return
356+
abslbound = self.lbound() + bytes
357+
if abslbound >= self.rbound():
358+
raise ValueError("Cannot move lbound that much")
359+
360+
dsi = _closest_index(self, abslbound)
361+
d = self[dsi]
362+
_move_delta_lbound(d, abslbound - d.to)
363+
364+
if dsi:
365+
del(self[:dsi])
366+
# END remove all skipped nodes
367+
368+
return self
369+
370+
def rbound(self):
371+
""":return: rightmost extend in bytes, absolute"""
372+
if len(self) == 0:
373+
return 0
374+
return self[-1].rbound()
375+
376+
def lbound(self):
377+
""":return: leftmost byte at which this chunklist starts"""
378+
if len(self) == 0:
379+
return 0
380+
return self[0].to
381+
315382
def connect_with(self, bdlc):
316383
"""Connect this instance's delta chunks virtually with the given base.
317384
This means that all copy deltas will simply apply to the given region
318385
of the given base. Afterwards, the base is optimized so that add-deltas
319386
will be truncated to the region actually used, or removed completely where
320387
adequate. This way, memory usage is reduced.
321388
:param bdlc: DeltaChunkList to serve as base"""
322-
raise NotImplementedError("todo")
389+
for dc in self:
390+
if not dc.has_data():
391+
dc.data = bdcl[dc.to, dc.ts]
392+
# END handle overlap
393+
# END for each dc
323394

324395
def apply(self, bbuf, write):
325396
"""Apply the chain's changes and write the final result using the passed
@@ -328,7 +399,10 @@ def apply(self, bbuf, write):
328399
list. It will only be used if the chunk in question does not have a base
329400
chain.
330401
:param write: function taking a string of bytes to write to the output"""
331-
raise NotImplementedError("todo")
402+
dapply = DeltaChunk.apply
403+
for dc in self:
404+
dapply(dc, bbuf, write)
405+
# END for each dc
332406

333407
def check_integrity(self, target_size=-1):
334408
"""Verify the list has non-overlapping chunks only, and the total size matches
@@ -345,6 +419,7 @@ def check_integrity(self, target_size=-1):
345419

346420
# check data
347421
for dc in self:
422+
assert dc.ts > 0
348423
if dc.data:
349424
assert len(dc.data) >= dc.ts
350425
# END for each dc
@@ -359,6 +434,77 @@ def check_integrity(self, target_size=-1):
359434
assert lft.to + lft.ts == rgt.to
360435
# END for each pair
361436

437+
def __getslice__(self, absofs, size):
438+
""":return: Subsection of this list at the given absolute offset, with the given
439+
size in bytes.
440+
:return: DeltaChunkList (copy) which represents the given chunk"""
441+
cdi = _closest_index(self, absofs) # delta start index
442+
slen = len(self)
443+
ndcl = self.__class__()
444+
rbound = absofs + size
445+
446+
while cdi < slen:
447+
# are we larger than the current block
448+
cd = self[cdi]
449+
if absofs < cd.to:
450+
if rbound >= cd.rbound():
451+
# xxx|xxx|x
452+
# cd is fully contained in the range
453+
ndcl.append(copy(cd))
454+
elif rbound > cd.to:
455+
# partially contained
456+
# xxx|x--|
457+
cd = copy(cd)
458+
_set_delta_rbound(cd, cd.rbound() - rbound)
459+
ndcl.append(cd)
460+
break
461+
else:
462+
# xx.|---|
463+
# WE DON'T OVERLAP IT
464+
break
465+
# END rbound overlap handling
466+
# END lbound overlap handling
467+
else:
468+
if absofs >= cd.rbound():
469+
# happens if slice is out of bound
470+
#|---|xx
471+
break
472+
# END
473+
474+
if rbound >= cd.rbound():
475+
if absofs == cd.to:
476+
#|xxx|x
477+
# fully contained
478+
ndcl.append(copy(cd))
479+
else:
480+
# shift
481+
#|-xx|
482+
cd = copy(cd)
483+
_move_delta_lbound(cd, absofs - cd.to)
484+
ndcl.append(cd)
485+
# END handle offset special case
486+
elif absofs == cd.to:
487+
#|x--|
488+
# we truncate it to our size
489+
cd = copy(cd)
490+
_set_delta_rbound(cd, size)
491+
ndcl.append(cd)
492+
break
493+
else:
494+
#|-x-|
495+
# adjust both ends
496+
cd = copy(cd)
497+
_move_delta_lbound(cd, absofs - cd.to)
498+
_set_delta_rbound(cd, size)
499+
ndcl.append(cd)
500+
break
501+
# END handle rbound overlap
502+
# END handle overlap
503+
# END for each chunk
504+
return ndcl
505+
506+
507+
362508
#} END structures
363509

364510
#{ Routines
@@ -559,16 +705,16 @@ def merge_deltas(dstreams):
559705
# END while processing delta data
560706

561707
# merge the lists !
562-
if base is not None:
563-
dcl.connect_with(base)
708+
if bdcl is not None:
709+
dcl.connect_with(bdcl)
564710
# END handle merge
565711

566712
# prepare next base
567-
base = dcl
713+
bdcl = dcl
568714
dcl = DeltaChunkList()
569715
# END for each delta stream
570716

571-
# print dcl
717+
return base
572718

573719

574720
def apply_delta_data(src_buf, src_buf_size, delta_buf, delta_buf_size, write):

0 commit comments

Comments
 (0)