1313from itertools import islice , izip
1414
1515from copy import copy
16+ from cStringIO import StringIO
1617
1718# INVARIANTS
1819OFS_DELTA = 6
@@ -57,10 +58,6 @@ def _set_delta_rbound(d, size):
5758 :return: d"""
5859 if d .ts == size :
5960 return
60- if size == 0 :
61- raise ValueError ("size to truncate to must not be 0" )
62- if size > d .ts :
63- raise ValueError ("Cannot extend rbound" )
6461
6562 d .ts = size
6663
@@ -76,8 +73,6 @@ def _move_delta_lbound(d, bytes):
7673 :return: d"""
7774 if bytes == 0 :
7875 return
79- if bytes >= d .ts :
80- raise ValueError ("Cannot move offset that much" )
8176
8277 d .to += bytes
8378 d .so += bytes
@@ -139,7 +134,6 @@ def has_copy_chunklist(self):
139134 def set_copy_chunklist (self , dcl ):
140135 """Set the deltachunk list to be used as basis for copying.
141136 :note: only works if this chunk is a copy delta chunk"""
142- assert self .data is None , "Cannot assign chain to add delta chunk"
143137 self .data = dcl
144138 self .sob = self .so
145139 self .so = 0 # allows lbound moves to be virtual
@@ -150,13 +144,13 @@ def apply(self, bbuf, write):
150144 :param write: write method to call with data to write"""
151145 if self .data is None :
152146 # COPY DATA FROM SOURCE
153- assert len (bbuf ) - self .so - self .ts > - 1
154147 write (buffer (bbuf , self .so , self .ts ))
155148 elif isinstance (self .data , DeltaChunkList ):
156149 self .data .apply (bbuf , write , self .so , self .ts )
157150 else :
158151 # APPEND DATA
159152 # whats faster: if + 4 function calls or just a write with a slice ?
153+ # Considering data can be larger than 127 bytes now, it should be worth it
160154 if self .ts < len (self .data ):
161155 write (self .data [:self .ts ])
162156 else :
@@ -209,11 +203,54 @@ def connect_with(self, bdcl):
209203 :param bdcl: DeltaChunkList to serve as base"""
210204 for dc in self :
211205 if not dc .has_data ():
212- # dc.set_copy_chunklist(bdcl[dc.copy_offset():dc.ts])
213206 dc .set_copy_chunklist (bdcl [dc .so :dc .ts ])
214207 # END handle overlap
215208 # END for each dc
216209
210+ def compress (self ):
211+ """Alter the list to reduce the amount of nodes. Currently we concatenate
212+ add-chunks
213+ :return: self"""
214+ slen = len (self )
215+ if slen < 2 :
216+ return self
217+ i = 0
218+ slen_orig = slen
219+
220+ first_data_index = None
221+ while i < slen :
222+ dc = self [i ]
223+ i += 1
224+ if not dc .has_data ():
225+ if first_data_index is not None and i - 2 - first_data_index > 1 :
226+ #if first_data_index is not None:
227+ nd = StringIO () # new data
228+ so = self [first_data_index ].to # start offset in target buffer
229+ for x in xrange (first_data_index , i - 1 ):
230+ xdc = self [x ]
231+ nd .write (xdc .data [:xdc .ts ])
232+ # END collect data
233+
234+ del (self [first_data_index :i - 1 ])
235+ buf = nd .getvalue ()
236+ self .insert (first_data_index , DeltaChunk (so , len (buf ), 0 , buf ))
237+
238+ slen = len (self )
239+ i = first_data_index + 1
240+
241+ # END concatenate data
242+ first_data_index = None
243+ continue
244+ # END skip non-data chunks
245+
246+ if first_data_index is None :
247+ first_data_index = i - 1
248+ # END iterate list
249+
250+ #if slen_orig != len(self):
251+ # print "INFO: Reduced delta list len to %f %% of former size" % ((float(len(self)) / slen_orig) * 100)
252+ return self
253+
217254 def apply (self , bbuf , write , lbound_offset = 0 , size = 0 ):
218255 """Apply the chain's changes and write the final result using the passed
219256 write function.
@@ -232,12 +269,6 @@ def apply(self, bbuf, write, lbound_offset=0, size=0):
232269 if size == 0 :
233270 size = self .rbound () - absofs
234271 # END initialize size
235- if absofs + size > self .rbound ():
236- raise ValueError ("Cannot apply more bytes than there are in this chain" )
237- # END sanity check
238-
239- if size > self .rbound () - absofs :
240- raise ValueError ("Trying to apply more than there is available" )
241272
242273 dapply = DeltaChunk .apply
243274 if lbound_offset or absofs + size != self .rbound ():
@@ -347,7 +378,7 @@ def __getslice__(self, absofs, size):
347378 # END for each chunk
348379 assert size == 0 , "size was %i" % size
349380
350- ndcl .check_integrity ()
381+ # ndcl.check_integrity()
351382 return ndcl
352383
353384
@@ -540,7 +571,8 @@ def connect_deltas(dstreams):
540571 dcl .append (DeltaChunk (tbw , cp_size , cp_off , None ))
541572 tbw += cp_size
542573 elif c :
543- # TODO: Concatenate multiple deltachunks
574+ # NOTE: in C, the data chunks should probably be concatenated here.
575+ # In python, we do it as a post-process
544576 dcl .append (DeltaChunk (tbw , c , 0 , db [i :i + c ]))
545577 i += c
546578 tbw += c
@@ -549,12 +581,14 @@ def connect_deltas(dstreams):
549581 # END handle command byte
550582 # END while processing delta data
551583
584+ dcl .compress ()
585+
552586 # merge the lists !
553587 if bdcl is not None :
554588 dcl .connect_with (bdcl )
555589 # END handle merge
556590
557- dcl .check_integrity ()
591+ # dcl.check_integrity()
558592
559593 # prepare next base
560594 bdcl = dcl
0 commit comments