Disabled delta-aggregation as it is reduces the throughput to 540KiB/s compared to 9.4MiB compared to the previous brute-force algorithm. Compression helps, but it would probably be more efficient if done right away, not as post-process.

Byron · Byron · commit 5d1868594860 · 2010-10-10T23:53:11.000+02:00
It might help to implement the reversed version of this algorithm, as initially intended, but currently the overhead is the actual application
diff --git a/fun.py b/fun.py
@@ -76,7 +76,6 @@ def _move_delta_lbound(d, bytes):
 		
 	d.to += bytes
 	d.so += bytes
-	d.sob += bytes
 	d.ts -= bytes
 	if d.has_data():
 		d.data = d.data[bytes:]
@@ -93,14 +92,12 @@ class DeltaChunk(object):
 					'so',		# start offset in the source buffer in bytes or None
 					'data',		# chunk of bytes to be added to the target buffer,
 								# DeltaChunkList to use as base, or None
-					'sob'		# DEBUG: Backup
 				)
 	
 	def __init__(self, to, ts, so, data):
 		self.to = to
 		self.ts = ts
 		self.so = so
-		self.sob = so
 		self.data = data
 
 	def __repr__(self):
@@ -135,7 +132,6 @@ def set_copy_chunklist(self, dcl):
 		"""Set the deltachunk list to be used as basis for copying.
 		:note: only works if this chunk is a copy delta chunk"""
 		self.data = dcl
-		self.sob = self.so
 		self.so = 0				# allows lbound moves to be virtual
 		
 	def apply(self, bbuf, write):
diff --git a/stream.py b/stream.py
@@ -311,6 +311,10 @@ class DeltaApplyReader(LazyMixin):
 					"_br"					# number of bytes read 
 				)
 	
+	#{ Configuration
+	k_max_memory_move = 250*1000*1000
+	#} END configuration
+	
 	def __init__(self, stream_list):
 		"""Initialize this instance with a list of streams, the first stream being 
 		the delta to apply on top of all following deltas, the last stream being the
@@ -325,8 +329,9 @@ def _set_cache_(self, attr):
 		# the direct algorithm is fastest and most direct if there is only one 
 		# delta. Also, the extra overhead might not be worth it for items smaller
 		# than X - definitely the case in python
-		#print "num streams", len(self._dstreams)
-		#if len(self._dstreams) == 1 or (len(self._dstreams) * self._dstreams.size) > 25*1000*1000:
+		# hence we apply a worst-case scenario here
+		# TODO: read the final size from the deltastream - have to partly unpack
+		# if len(self._dstreams) * self._size < self.k_max_memory_move:
 		if len(self._dstreams) == 1:
 			return self._set_cache_brute_(attr)
 		
@@ -353,7 +358,7 @@ def _set_cache_(self, attr):
 		
 		self._mm_target.seek(0)
 		
-	def _set_cache_brute_(self, attr):
+	def _set_cache_(self, attr):
 		"""If we are here, we apply the actual deltas"""
 		
 		buffer_info_list = list()