1212import mmap
1313from itertools import islice , izip
1414
15+ from copy import copy
16+
1517# INVARIANTS
1618OFS_DELTA = 6
1719REF_DELTA = 7
5153def _set_delta_rbound (d , size ):
5254 """Truncate the given delta to the given size
5355 :param size: size relative to our target offset, may not be 0, must be smaller or equal
54- to our size"""
56+ to our size
57+ :return: d"""
5558 if size == 0 :
5659 raise ValueError ("size to truncate to must not be 0" )
5760 if d .ts == size :
5861 return
5962 if size > d .ts :
60- raise ValueError ("Cannot truncate delta 'larger' " )
63+ raise ValueError ("Cannot extend rbound " )
6164
6265 d .ts = size
6366
6467 # NOTE: data is truncated automatically when applying the delta
6568 # MUST NOT DO THIS HERE, see _split_delta
69+
70+ if d .has_copy_chunklist ():
71+ d .data .set_rbound (size )
72+ # END truncate chunklist
73+
74+ return d
6675
6776def _move_delta_lbound (d , bytes ):
6877 """Move the delta by the given amount of bytes, reducing its size so that its
6978 right bound stays static
70- :param bytes: amount of bytes to move, must be smaller than delta size"""
79+ :param bytes: amount of bytes to move, must be smaller than delta size
80+ :return: d"""
81+ if bytes == 0 :
82+ return
7183 if bytes >= d .ts :
7284 raise ValueError ("Cannot move offset that much" )
7385
7486 d .to += bytes
7587 d .so += bytes
7688 d .ts -= bytes
77- if d .data :
78- d .data = d .data [bytes :]
89+ if d .data is not None :
90+ if isinstance (d .data , DeltaChunkList ):
91+ d .data .move_lbound (bytes )
92+ else :
93+ d .data = d .data [bytes :]
94+ # END handle data type
7995 # END handle data
8096
97+ return d
8198
8299class DeltaChunk (object ):
83100 """Represents a piece of a delta, it can either add new data, or copy existing
@@ -106,16 +123,22 @@ def rbound(self):
106123
107124 def has_data (self ):
108125 """:return: True if the instance has data to add to the target stream"""
109- return self .data is None or not isinstance (self .data , DeltaChunkList )
126+ return self .data is not None and not isinstance (self .data , DeltaChunkList )
110127
111- def apply (self , source , write ):
128+ def has_copy_chunklist (self ):
129+ """:return: True if we copy our data from a chunklist"""
130+ return return self .data is not None and isinstance (self .data , DeltaChunkList )
131+
132+ def apply (self , bbuf , write ):
112133 """Apply own data to the target buffer
113- :param source : buffer providing source bytes for copy operations
134+ :param bbuf : buffer providing source bytes for copy operations
114135 :param write: write method to call with data to write"""
115- if self .has_data () :
136+ if self .data is None :
116137 # COPY DATA FROM SOURCE
117- assert len (source ) - self .so - self .ts > 0
118- write (buffer (source , self .so , self .ts ))
138+ assert len (bbuf ) - self .so - self .ts > 0
139+ write (buffer (bbuf , self .so , self .ts ))
140+ elif isinstance (self .data , DeltaChunkList ):
141+ self .data .apply (bbuf , write )
119142 else :
120143 # APPEND DATA
121144 # whats faster: if + 4 function calls or just a write with a slice ?
@@ -153,6 +176,8 @@ def _split_delta(dcl, d, di, relofs, insert_offset=0):
153176 :note: belongs to DeltaChunkList"""
154177 if relofs > d .ts :
155178 raise ValueError ("Cannot split behinds a chunks rbound" )
179+ if relofs < 1 :
180+ raise ValueError ("Cannot split delta with %i" % relofs )
156181
157182 osize = d .ts - relofs
158183 _set_delta_rbound (d , relofs )
@@ -295,31 +320,77 @@ def init(self, size):
295320
296321 return self
297322
298- def set_rbound (self , size ):
299- """Chops the list at the given size , splitting and removing DeltaNodes
323+ def set_rbound (self , relofs ):
324+ """Chops the list at the given relative offset , splitting and removing DeltaNodes
300325 as required
326+ :param relofs: offset relative to the start of the chain
301327 :return: self"""
302- di = _closest_index (self , size )
328+ if len (self ) == 0 :
329+ raise AssertionError ("Cannot change bound of empty list" )
330+ if relofs == 0 :
331+ raise ValueError ("Size to truncate to must not be 0" )
332+ absofs = self .lbound () + relofs
333+ if absofs > self .rbound ():
334+ raise ValueError ("Cannot extend chunk list" )
335+ di = _closest_index (self , absofs )
303336 d = self [di ]
304- rsize = size - d .to
337+ rsize = absofs - d .to
305338 if rsize :
306339 _set_delta_rbound (d , rsize )
307340 # END truncate last node if possible
308341 del (self [di + (rsize != 0 ):])
309342
310343 ## DEBUG ##
311- self .check_integrity (size )
344+ self .check_integrity (absofs )
312345
313346 return self
314347
348+ def move_lbound (self , bytes ):
349+ """Offset the left bound of the list by the given amount of bytes.
350+ This effectively truncates the list
351+ :return: self"""
352+ if len (self ) == 0 :
353+ raise AssertionError ("Cannot change bound of empty list" )
354+ if bytes == 0 :
355+ return
356+ abslbound = self .lbound () + bytes
357+ if abslbound >= self .rbound ():
358+ raise ValueError ("Cannot move lbound that much" )
359+
360+ dsi = _closest_index (self , abslbound )
361+ d = self [dsi ]
362+ _move_delta_lbound (d , abslbound - d .to )
363+
364+ if dsi :
365+ del (self [:dsi ])
366+ # END remove all skipped nodes
367+
368+ return self
369+
370+ def rbound (self ):
371+ """:return: rightmost extend in bytes, absolute"""
372+ if len (self ) == 0 :
373+ return 0
374+ return self [- 1 ].rbound ()
375+
376+ def lbound (self ):
377+ """:return: leftmost byte at which this chunklist starts"""
378+ if len (self ) == 0 :
379+ return 0
380+ return self [0 ].to
381+
315382 def connect_with (self , bdlc ):
316383 """Connect this instance's delta chunks virtually with the given base.
317384 This means that all copy deltas will simply apply to the given region
318385 of the given base. Afterwards, the base is optimized so that add-deltas
319386 will be truncated to the region actually used, or removed completely where
320387 adequate. This way, memory usage is reduced.
321388 :param bdlc: DeltaChunkList to serve as base"""
322- raise NotImplementedError ("todo" )
389+ for dc in self :
390+ if not dc .has_data ():
391+ dc .data = bdcl [dc .to , dc .ts ]
392+ # END handle overlap
393+ # END for each dc
323394
324395 def apply (self , bbuf , write ):
325396 """Apply the chain's changes and write the final result using the passed
@@ -328,7 +399,10 @@ def apply(self, bbuf, write):
328399 list. It will only be used if the chunk in question does not have a base
329400 chain.
330401 :param write: function taking a string of bytes to write to the output"""
331- raise NotImplementedError ("todo" )
402+ dapply = DeltaChunk .apply
403+ for dc in self :
404+ dapply (dc , bbuf , write )
405+ # END for each dc
332406
333407 def check_integrity (self , target_size = - 1 ):
334408 """Verify the list has non-overlapping chunks only, and the total size matches
@@ -345,6 +419,7 @@ def check_integrity(self, target_size=-1):
345419
346420 # check data
347421 for dc in self :
422+ assert dc .ts > 0
348423 if dc .data :
349424 assert len (dc .data ) >= dc .ts
350425 # END for each dc
@@ -359,6 +434,77 @@ def check_integrity(self, target_size=-1):
359434 assert lft .to + lft .ts == rgt .to
360435 # END for each pair
361436
437+ def __getslice__ (self , absofs , size ):
438+ """:return: Subsection of this list at the given absolute offset, with the given
439+ size in bytes.
440+ :return: DeltaChunkList (copy) which represents the given chunk"""
441+ cdi = _closest_index (self , absofs ) # delta start index
442+ slen = len (self )
443+ ndcl = self .__class__ ()
444+ rbound = absofs + size
445+
446+ while cdi < slen :
447+ # are we larger than the current block
448+ cd = self [cdi ]
449+ if absofs < cd .to :
450+ if rbound >= cd .rbound ():
451+ # xxx|xxx|x
452+ # cd is fully contained in the range
453+ ndcl .append (copy (cd ))
454+ elif rbound > cd .to :
455+ # partially contained
456+ # xxx|x--|
457+ cd = copy (cd )
458+ _set_delta_rbound (cd , cd .rbound () - rbound )
459+ ndcl .append (cd )
460+ break
461+ else :
462+ # xx.|---|
463+ # WE DON'T OVERLAP IT
464+ break
465+ # END rbound overlap handling
466+ # END lbound overlap handling
467+ else :
468+ if absofs >= cd .rbound ():
469+ # happens if slice is out of bound
470+ #|---|xx
471+ break
472+ # END
473+
474+ if rbound >= cd .rbound ():
475+ if absofs == cd .to :
476+ #|xxx|x
477+ # fully contained
478+ ndcl .append (copy (cd ))
479+ else :
480+ # shift
481+ #|-xx|
482+ cd = copy (cd )
483+ _move_delta_lbound (cd , absofs - cd .to )
484+ ndcl .append (cd )
485+ # END handle offset special case
486+ elif absofs == cd .to :
487+ #|x--|
488+ # we truncate it to our size
489+ cd = copy (cd )
490+ _set_delta_rbound (cd , size )
491+ ndcl .append (cd )
492+ break
493+ else :
494+ #|-x-|
495+ # adjust both ends
496+ cd = copy (cd )
497+ _move_delta_lbound (cd , absofs - cd .to )
498+ _set_delta_rbound (cd , size )
499+ ndcl .append (cd )
500+ break
501+ # END handle rbound overlap
502+ # END handle overlap
503+ # END for each chunk
504+ return ndcl
505+
506+
507+
362508#} END structures
363509
364510#{ Routines
@@ -559,16 +705,16 @@ def merge_deltas(dstreams):
559705 # END while processing delta data
560706
561707 # merge the lists !
562- if base is not None :
563- dcl .connect_with (base )
708+ if bdcl is not None :
709+ dcl .connect_with (bdcl )
564710 # END handle merge
565711
566712 # prepare next base
567- base = dcl
713+ bdcl = dcl
568714 dcl = DeltaChunkList ()
569715 # END for each delta stream
570716
571- # print dcl
717+ return base
572718
573719
574720def apply_delta_data (src_buf , src_buf_size , delta_buf , delta_buf_size , write ):
0 commit comments