Skip to content

Commit afefae6

Browse files
committed
Implemented everything around the actual merge-algorithm, which appears to be the heart of the whole thing
1 parent 274c5c8 commit afefae6

File tree

2 files changed

+214
-36
lines changed

2 files changed

+214
-36
lines changed

fun.py

Lines changed: 205 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,42 @@
4141

4242
__all__ = ('is_loose_object', 'loose_object_header_info', 'msb_size', 'pack_object_header_info',
4343
'write_object', 'loose_object_header', 'stream_copy', 'apply_delta_data',
44-
'is_equal_canonical_sha', 'apply_delta_chunks', 'reverse_merge_deltas',
45-
'merge_deltas')
44+
'is_equal_canonical_sha', 'reverse_merge_deltas',
45+
'merge_deltas', 'DeltaChunkList')
4646

4747

4848
#{ Structures
4949

50+
def _trunc_delta(d, size):
51+
"""Truncate the given delta to the given size
52+
:param size: size relative to our target offset, may not be 0, must be smaller or equal
53+
to our size"""
54+
if size == 0:
55+
raise ValueError("size to truncate to must not be 0")
56+
if d.ts == size:
57+
return
58+
if size > d.ts:
59+
raise ValueError("Cannot truncate delta 'larger'")
60+
61+
d.ts = size
62+
63+
# NOTE: data is truncated automatically when applying the delta
64+
# MUST NOT DO THIS HERE, see _split_delta
65+
66+
def _move_delta_offset(d, bytes):
67+
"""Move the delta by the given amount of bytes, reducing its size so that its
68+
right bound stays static
69+
:param bytes: amount of bytes to move, must be smaller than delta size"""
70+
if bytes >= d.ts:
71+
raise ValueError("Cannot move offset that much")
72+
73+
d.to += bytes
74+
d.ts -= bytes
75+
if d.data:
76+
d.data = d.data[bytes:]
77+
# END handle data
78+
79+
5080
class DeltaChunk(object):
5181
"""Represents a piece of a delta, it can either add new data, or copy existing
5282
one from a source buffer"""
@@ -68,20 +98,120 @@ def __init__(self, to, ts, so, data):
6898
def abssize(self):
6999
return self.to + self.ts
70100

71-
def apply(self, source, target):
101+
def apply(self, source, write):
72102
"""Apply own data to the target buffer
73103
:param source: buffer providing source bytes for copy operations
74-
:param target: target buffer large enough to contain all the changes to be applied"""
75-
if self.data is not None:
76-
# APPEND DATA
77-
pass
78-
else:
104+
:param write: write method to call with data to write"""
105+
if self.data is None:
79106
# COPY DATA FROM SOURCE
80-
pass
107+
write(buffer(source, self.so, self.ts))
108+
else:
109+
# APPEND DATA
110+
# whats faster: if + 4 function calls or just a write with a slice ?
111+
if self.ts < len(self.data):
112+
write(self.data[:self.ts])
113+
else:
114+
write(self.data)
115+
# END handle truncation
81116
# END handle chunk mode
82117

83118
#} END interface
84119

120+
def _closest_index(dcl, absofs):
121+
""":return: index at which the given absofs should be inserted. The index points
122+
to the DeltaChunk with a target buffer absofs that equals or is greater than
123+
absofs
124+
:note: global method for performance only, it belongs to DeltaChunkList"""
125+
# TODO: binary search !!
126+
for i,d in enumerate(dcl):
127+
if absofs >= d.to:
128+
return i
129+
# END for each delta absofs
130+
raise AssertionError("Should never be here")
131+
132+
def _split_delta(dcl, absofs, di=None):
133+
"""Split the delta at di into two deltas, adjusting their sizes, absofss and data
134+
accordingly and adding them to the dcl.
135+
:param absofs: absolute absofs at which to split the delta
136+
:param di: a pre-determined delta-index, or None if it should be retrieved
137+
:note: it will not split if it
138+
:return: the closest index which has been split ( usually di if given)
139+
:note: belongs to DeltaChunkList"""
140+
if di is None:
141+
di = _closest_index(dcl, absofs)
142+
143+
d = dcl[di]
144+
if d.to == absofs or d.abssize() == absofs:
145+
return di
146+
147+
_trunc_delta(d, absofs - d.to)
148+
149+
# insert new one
150+
ds = d.abssize()
151+
relsize = absofs - ds
152+
153+
self.insert(di+1, DeltaChunk( ds,
154+
relsize,
155+
(d.so and ds) or None,
156+
(d.data and d.data[relsize:]) or None))
157+
# END adjust next one
158+
return di
159+
160+
def _merge_delta(dcl, d):
161+
"""Merge the given DeltaChunk instance into the dcl"""
162+
index = _closest_index(dcl, d.to)
163+
od = dcl[index]
164+
165+
if d.data is None:
166+
if od.data:
167+
# OVERWRITE DATA
168+
pass
169+
else:
170+
# MERGE SOURCE AREA
171+
pass
172+
# END overwrite data
173+
else:
174+
if od.data:
175+
# MERGE DATA WITH DATA
176+
pass
177+
else:
178+
# INSERT DATA INTO COPY AREA
179+
pass
180+
# END combine or insert data
181+
# END handle chunk mode
182+
183+
184+
class DeltaChunkList(list):
185+
"""List with special functionality to deal with DeltaChunks"""
186+
187+
def init(self, size):
188+
"""Intialize this instance with chunks defining to fill up size from a base
189+
buffer of equal size"""
190+
if len(self) != 0:
191+
return
192+
# pretend we have one huge delta chunk, which just copies everything
193+
# from source to destination
194+
maxint32 = 2**32
195+
for x in range(0, size, maxint32):
196+
self.append(DeltaChunk(x, maxint32, x, None))
197+
# END create copy chunks
198+
offset = x*maxint32
199+
remainder = size-offset
200+
if remainder:
201+
self.append(DeltaChunk(offset, remainder, offset, None))
202+
# END handle all done in loop
203+
204+
def terminate_at(self, size):
205+
"""Chops the list at the given size, splitting and removing DeltaNodes
206+
as required"""
207+
di = _closest_index(self, size)
208+
d = self[di]
209+
rsize = size - d.to
210+
if rsize:
211+
_trunc_delta(d, rsize)
212+
# END truncate last node if possible
213+
del(self[di+(rsize!=0):])
214+
85215
#} END structures
86216

87217
#{ Routines
@@ -204,12 +334,10 @@ def stream_copy(read, write, size, chunk_size):
204334
# END duplicate data
205335
return dbw
206336

207-
208337
def reverse_merge_deltas(dcl, dstreams):
209338
"""Read the condensed delta chunk information from dstream and merge its information
210339
into a list of existing delta chunks
211-
:param dcl: list of DeltaChunk objects, may be empty initially, and will be changed
212-
during the merge process
340+
:param dcl: see merge_deltas
213341
:param dstreams: iterable of delta stream objects. They must be ordered latest first,
214342
hence the delta to be applied last comes first, then its ancestors
215343
:return: None"""
@@ -218,42 +346,88 @@ def reverse_merge_deltas(dcl, dstreams):
218346
def merge_deltas(dcl, dstreams):
219347
"""Read the condensed delta chunk information from dstream and merge its information
220348
into a list of existing delta chunks
221-
:param dcl: list of DeltaChunk objects, may be empty initially, and will be changed
349+
:param dcl: DeltaChunkList, may be empty initially, and will be changed
222350
during the merge process
223351
:param dstreams: iterable of delta stream objects. They must be ordered latest last,
224352
hence the delta to be applied last comes last, its oldest ancestor first
225353
:return: None"""
226354
for ds in dstreams:
227-
buf = ds.read()
228-
i, src_size = msb_size(buf)
229-
i, target_size = msb_size(buf, i)
355+
db = ds.read()
356+
delta_buf_size = ds.size
357+
358+
# read header
359+
i, src_size = msb_size(db)
360+
i, target_size = msb_size(db, i)
230361

231-
# parse the commands
362+
if len(dcl) == 0:
363+
dcl.init(target_size)
364+
# END handle empty list
365+
366+
# interpret opcodes
367+
tbw = 0 # amount of target bytes written
368+
while i < delta_buf_size:
369+
c = ord(db[i])
370+
i += 1
371+
if c & 0x80:
372+
cp_off, cp_size = 0, 0
373+
if (c & 0x01):
374+
cp_off = ord(db[i])
375+
i += 1
376+
if (c & 0x02):
377+
cp_off |= (ord(db[i]) << 8)
378+
i += 1
379+
if (c & 0x04):
380+
cp_off |= (ord(db[i]) << 16)
381+
i += 1
382+
if (c & 0x08):
383+
cp_off |= (ord(db[i]) << 24)
384+
i += 1
385+
if (c & 0x10):
386+
cp_size = ord(db[i])
387+
i += 1
388+
if (c & 0x20):
389+
cp_size |= (ord(db[i]) << 8)
390+
i += 1
391+
if (c & 0x40):
392+
cp_size |= (ord(db[i]) << 16)
393+
i += 1
394+
395+
if not cp_size:
396+
cp_size = 0x10000
397+
398+
rbound = cp_off + cp_size
399+
if (rbound < cp_size or
400+
rbound > src_size):
401+
break
402+
403+
_merge_delta(dcl, DeltaChunk(tbw, cp_size, cp_off, None))
404+
tbw += cp_size
405+
elif c:
406+
# TODO: Concatenate multiple deltachunks
407+
_merge_delta(dcl, DeltaChunk(tbw, c, None, db[i:i+c]))
408+
i += c
409+
tbw += c
410+
else:
411+
raise ValueError("unexpected delta opcode 0")
412+
# END handle command byte
413+
# END while processing delta data
414+
415+
dcl.terminate_at(target_size)
232416

233417
# END for each delta stream
234418

235-
def apply_delta_chunks(src_buf, src_buf_size, dcl, target):
236-
"""
237-
Apply data from a delta chunk list and a source buffer to the target stream
238-
239-
:param src_buf: random access data from which the delta was created
240-
:param src_buf_size: size of the source buffer in bytes
241-
:param delta_buf_size: size fo the delta buffer in bytes
242-
:param target: ostream with a write method"""
243-
244419

245-
def apply_delta_data(src_buf, src_buf_size, delta_buf, delta_buf_size, target_file):
420+
def apply_delta_data(src_buf, src_buf_size, delta_buf, delta_buf_size, write):
246421
"""
247422
Apply data from a delta buffer using a source buffer to the target file
248423
249424
:param src_buf: random access data from which the delta was created
250425
:param src_buf_size: size of the source buffer in bytes
251426
:param delta_buf_size: size fo the delta buffer in bytes
252427
:param delta_buf: random access delta data
253-
:param target_file: file like object to write the result to
428+
:param write: write method taking a chunk of bytes
254429
:note: transcribed to python from the similar routine in patch-delta.c"""
255430
i = 0
256-
twrite = target_file.write
257431
db = delta_buf
258432
while i < delta_buf_size:
259433
c = ord(db[i])
@@ -289,9 +463,9 @@ def apply_delta_data(src_buf, src_buf_size, delta_buf, delta_buf_size, target_fi
289463
if (rbound < cp_size or
290464
rbound > src_buf_size):
291465
break
292-
twrite(buffer(src_buf, cp_off, cp_size))
466+
write(buffer(src_buf, cp_off, cp_size))
293467
elif c:
294-
twrite(db[i:i+c])
468+
write(db[i:i+c])
295469
i += c
296470
else:
297471
raise ValueError("unexpected delta opcode 0")

stream.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
msb_size,
99
stream_copy,
1010
apply_delta_data,
11-
apply_delta_chunks,
1211
merge_deltas,
12+
DeltaChunkList,
1313
delta_types
1414
)
1515

@@ -325,8 +325,8 @@ def _set_cache_(self, attr):
325325
# Aggregate all deltas into one delta in reverse order. Hence we take
326326
# the last delta, and reverse-merge its ancestor delta, until we receive
327327
# the final delta data stream.
328-
dcl = list()
329-
reverse_merge_deltas(dcl, self._dstreams)
328+
dcl = DeltaChunkList()
329+
merge_deltas(dcl, self._dstreams)
330330

331331
if len(dcl) == 0:
332332
self._size = 0
@@ -338,9 +338,13 @@ def _set_cache_(self, attr):
338338
self._mm_target = allocate_memory(self._size)
339339

340340
bbuf = allocate_memory(self._bstream.size)
341-
stream_copy(self._bstream.read, bbuf.write, base_size, 256 * mmap.PAGESIZE)
341+
stream_copy(self._bstream.read, bbuf.write, self._bstream.size, 256 * mmap.PAGESIZE)
342342

343-
apply_delta_chunks(bbuf, self._bstream.size, dcl, self._mm_target)
343+
# APPLY CHUNKS
344+
write = self._mm_target.write
345+
for dc in dcl:
346+
dc.apply(bbuf, write)
347+
# END for each deltachunk to apply
344348

345349
def _set_cache_old(self, attr):
346350
"""If we are here, we apply the actual deltas"""

0 commit comments

Comments
 (0)