Skip to content

Commit 4d41a87

Browse files
committed
Initial version of the merge algorithm - the current implementation will lead to quite some fragementation, but that can be improved on once it works
1 parent afefae6 commit 4d41a87

File tree

2 files changed

+103
-32
lines changed

2 files changed

+103
-32
lines changed

fun.py

Lines changed: 101 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747

4848
#{ Structures
4949

50-
def _trunc_delta(d, size):
50+
def _set_delta_rbound(d, size):
5151
"""Truncate the given delta to the given size
5252
:param size: size relative to our target offset, may not be 0, must be smaller or equal
5353
to our size"""
@@ -63,14 +63,15 @@ def _trunc_delta(d, size):
6363
# NOTE: data is truncated automatically when applying the delta
6464
# MUST NOT DO THIS HERE, see _split_delta
6565

66-
def _move_delta_offset(d, bytes):
66+
def _move_delta_lbound(d, bytes):
6767
"""Move the delta by the given amount of bytes, reducing its size so that its
6868
right bound stays static
6969
:param bytes: amount of bytes to move, must be smaller than delta size"""
7070
if bytes >= d.ts:
7171
raise ValueError("Cannot move offset that much")
7272

7373
d.to += bytes
74+
d.so += bytes
7475
d.ts -= bytes
7576
if d.data:
7677
d.data = d.data[bytes:]
@@ -95,7 +96,7 @@ def __init__(self, to, ts, so, data):
9596

9697
#{ Interface
9798

98-
def abssize(self):
99+
def rbound(self):
99100
return self.to + self.ts
100101

101102
def apply(self, source, write):
@@ -129,39 +130,35 @@ def _closest_index(dcl, absofs):
129130
# END for each delta absofs
130131
raise AssertionError("Should never be here")
131132

132-
def _split_delta(dcl, absofs, di=None):
133-
"""Split the delta at di into two deltas, adjusting their sizes, absofss and data
134-
accordingly and adding them to the dcl.
135-
:param absofs: absolute absofs at which to split the delta
136-
:param di: a pre-determined delta-index, or None if it should be retrieved
137-
:note: it will not split if it
138-
:return: the closest index which has been split ( usually di if given)
133+
def _split_delta(dcl, d, di, relofs, insert_offset=0):
134+
"""Split the delta at di into two deltas, adjusting their sizes, offsets and data
135+
accordingly and adding the new part to the dcl
136+
:param relofs: relative offset at which to split the delta
137+
:param d: delta chunk to split
138+
:param di: index of d in dcl
139+
:param insert_offset: offset for the new split id
140+
:return: newly created DeltaChunk
139141
:note: belongs to DeltaChunkList"""
140-
if di is None:
141-
di = _closest_index(dcl, absofs)
142-
143-
d = dcl[di]
144-
if d.to == absofs or d.abssize() == absofs:
145-
return di
142+
if relofs > d.ts:
143+
raise ValueError("Cannot split behinds a chunks rbound")
146144

147-
_trunc_delta(d, absofs - d.to)
145+
osize = d.ts - relofs
146+
_set_delta_rbound(d, relofs)
148147

149148
# insert new one
150-
ds = d.abssize()
151-
relsize = absofs - ds
149+
drb = d.rbound()
152150

153-
self.insert(di+1, DeltaChunk( ds,
154-
relsize,
155-
(d.so and ds) or None,
156-
(d.data and d.data[relsize:]) or None))
157-
# END adjust next one
158-
return di
151+
nd = DeltaChunk( drb,
152+
osize,
153+
(d.so and d.so + osize) or None,
154+
(d.data and d.data[osize:]) or None )
159155

160-
def _merge_delta(dcl, d):
161-
"""Merge the given DeltaChunk instance into the dcl"""
162-
index = _closest_index(dcl, d.to)
163-
od = dcl[index]
156+
self.insert(di+1+insert_offset, nd)
157+
return nd
164158

159+
def _handle_merge(ld, rd):
160+
"""Optimize the layout of the lhs delta and the rhs delta
161+
TODO: Once the default implementation is working"""
165162
if d.data is None:
166163
if od.data:
167164
# OVERWRITE DATA
@@ -173,13 +170,87 @@ def _merge_delta(dcl, d):
173170
else:
174171
if od.data:
175172
# MERGE DATA WITH DATA
173+
# overwrite the data at the respective spot
176174
pass
177175
else:
178176
# INSERT DATA INTO COPY AREA
179177
pass
180178
# END combine or insert data
181179
# END handle chunk mode
182180

181+
def _merge_delta(dcl, d):
182+
"""Merge the given DeltaChunk instance into the dcl
183+
:param d: the DeltaChunk to merge"""
184+
cdi = _closest_index(dcl, d.to) # current delta index
185+
cd = dcl[cdi] # current delta
186+
187+
# either we go at his spot, or after
188+
# cdi either moves one up, or stays
189+
dcl.insert(di + (d.to > cd.to), d)
190+
cdi += d.to == cd.to
191+
192+
while True:
193+
# are we larger than the current block
194+
if d.to < cd.to:
195+
if d.rbound() >= cd.rbound():
196+
# xxx|xxx|x
197+
# remove the current item completely
198+
dcl.pop(cdi)
199+
cdi -= 1
200+
elif d.rbound() > cd.to:
201+
# MOVE ITS LBOUND
202+
# xxx|x--|
203+
_move_delta_lbound(cd, d.rbound() - cd.to)
204+
break
205+
else:
206+
# WE DON'T OVERLAP IT
207+
# this can possibly happen
208+
assert False, "Wow, this can really happen"
209+
break
210+
# END rbound overlap handling
211+
# END lbound overlap handling
212+
else:
213+
if d.to >= cd.rbound():
214+
#|---|...xx
215+
break
216+
# END
217+
218+
if d.rbound() >= cd.rbound():
219+
if d.to == cd.to:
220+
#|xxx|x
221+
# REMOVE CD
222+
dcl.pop(cdi)
223+
cdi -= 1
224+
else:
225+
# TRUNCATE CD
226+
#|-xx|
227+
_set_delta_rbound(cd, d.to - cd.to)
228+
# END handle offset special case
229+
elif d.to == cd.to:
230+
#|x--|
231+
# we shift it by our size
232+
_move_delta_lbound(cd, d.ts)
233+
else:
234+
#|-x-|
235+
# SPLIT CD AND LBOUND MOVE ITS SECOND PART
236+
# insert offset is required to insert it after us
237+
nd = _split_delta(dcl, cd, cdi, 1)
238+
_move_delta_lbound(nd, d.ts)
239+
break
240+
# END handle rbound overlap
241+
# END handle overlap
242+
243+
cdi += 1
244+
if cdi < len(dcl):
245+
cd = dcl[cdi]
246+
else:
247+
break
248+
# END check for end of list
249+
# while our chunk is not completely done
250+
251+
252+
253+
183254

184255
class DeltaChunkList(list):
185256
"""List with special functionality to deal with DeltaChunks"""
@@ -208,7 +279,7 @@ def terminate_at(self, size):
208279
d = self[di]
209280
rsize = size - d.to
210281
if rsize:
211-
_trunc_delta(d, rsize)
282+
_set_delta_rbound(d, rsize)
212283
# END truncate last node if possible
213284
del(self[di+(rsize!=0):])
214285

stream.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -326,15 +326,15 @@ def _set_cache_(self, attr):
326326
# the last delta, and reverse-merge its ancestor delta, until we receive
327327
# the final delta data stream.
328328
dcl = DeltaChunkList()
329-
merge_deltas(dcl, self._dstreams)
329+
merge_deltas(dcl, reversed(self._dstreams))
330330

331331
if len(dcl) == 0:
332332
self._size = 0
333333
self._mm_target = allocate_memory(0)
334334
return
335335
# END handle empty list
336336

337-
self._size = dcl[-1].abssize()
337+
self._size = dcl[-1].rbound()
338338
self._mm_target = allocate_memory(self._size)
339339

340340
bbuf = allocate_memory(self._bstream.size)

0 commit comments

Comments
 (0)