Skip to content

Commit 48fdcf4

Browse files
committed
Added new pack to test database to get some ascii deltas, which may possibly help visual debugging ( but probably not )
added plenty of debug code, to realize that the copy operations are not yet correct
1 parent 4d41a87 commit 48fdcf4

File tree

5 files changed

+93
-49
lines changed

5 files changed

+93
-49
lines changed

fun.py

Lines changed: 74 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
decompressobj = zlib.decompressobj
1111

1212
import mmap
13+
from itertools import islice, izip
1314

1415
# INVARIANTS
1516
OFS_DELTA = 6
@@ -93,7 +94,10 @@ def __init__(self, to, ts, so, data):
9394
self.ts = ts
9495
self.so = so
9596
self.data = data
96-
97+
98+
def __repr__(self):
99+
return "DeltaChunk(%i, %i, %s, %s)" % (self.to, self.ts, self.so, self.data or "")
100+
97101
#{ Interface
98102

99103
def rbound(self):
@@ -105,6 +109,7 @@ def apply(self, source, write):
105109
:param write: write method to call with data to write"""
106110
if self.data is None:
107111
# COPY DATA FROM SOURCE
112+
assert len(source) - self.so - self.ts > 0
108113
write(buffer(source, self.so, self.ts))
109114
else:
110115
# APPEND DATA
@@ -121,14 +126,16 @@ def apply(self, source, write):
121126
def _closest_index(dcl, absofs):
122127
""":return: index at which the given absofs should be inserted. The index points
123128
to the DeltaChunk with a target buffer absofs that equals or is greater than
124-
absofs
129+
absofs.
125130
:note: global method for performance only, it belongs to DeltaChunkList"""
126131
# TODO: binary search !!
127132
for i,d in enumerate(dcl):
128-
if absofs >= d.to:
133+
if absofs < d.to:
134+
return i-1
135+
elif absofs == d.to:
129136
return i
130137
# END for each delta absofs
131-
raise AssertionError("Should never be here")
138+
return len(dcl)-1
132139

133140
def _split_delta(dcl, d, di, relofs, insert_offset=0):
134141
"""Split the delta at di into two deltas, adjusting their sizes, offsets and data
@@ -150,7 +157,7 @@ def _split_delta(dcl, d, di, relofs, insert_offset=0):
150157

151158
nd = DeltaChunk( drb,
152159
osize,
153-
(d.so and d.so + osize) or None,
160+
d.so + osize,
154161
(d.data and d.data[osize:]) or None )
155162

156163
self.insert(di+1+insert_offset, nd)
@@ -178,64 +185,70 @@ def _handle_merge(ld, rd):
178185
# END combine or insert data
179186
# END handle chunk mode
180187

181-
def _merge_delta(dcl, d):
188+
def _merge_delta(dcl, dc):
182189
"""Merge the given DeltaChunk instance into the dcl
183190
:param d: the DeltaChunk to merge"""
184-
cdi = _closest_index(dcl, d.to) # current delta index
191+
if len(dcl) == 0:
192+
dcl.append(dc)
193+
return
194+
# END early return on empty list
195+
196+
cdi = _closest_index(dcl, dc.to) # current delta index
185197
cd = dcl[cdi] # current delta
186198

187199
# either we go at his spot, or after
188200
# cdi either moves one up, or stays
189-
dcl.insert(di + (d.to > cd.to), d)
190-
cdi += d.to == cd.to
201+
#print "insert at %i" % (cdi + (dc.to > cd.to))
202+
#print cd, dc
203+
dcl.insert(cdi + (dc.to > cd.to), dc)
204+
cdi += dc.to == cd.to
191205

192206
while True:
193207
# are we larger than the current block
194-
if d.to < cd.to:
195-
if d.rbound() >= cd.rbound():
208+
if dc.to < cd.to:
209+
if dc.rbound() >= cd.rbound():
196210
# xxx|xxx|x
197211
# remove the current item completely
198212
dcl.pop(cdi)
199213
cdi -= 1
200-
elif d.rbound() > cd.to:
214+
elif dc.rbound() > cd.to:
201215
# MOVE ITS LBOUND
202216
# xxx|x--|
203-
_move_delta_lbound(cd, d.rbound() - cd.to)
217+
_move_delta_lbound(cd, dc.rbound() - cd.to)
204218
break
205219
else:
206220
# WE DON'T OVERLAP IT
207-
# this can possibly happen
208-
assert False, "Wow, this can really happen"
221+
# this can actually happen, once multiple streams are merged
209222
break
210223
# END rbound overlap handling
211224
# END lbound overlap handling
212225
else:
213-
if d.to >= cd.rbound():
226+
if dc.to >= cd.rbound():
214227
#|---|...xx
215228
break
216229
# END
217230

218-
if d.rbound() >= cd.rbound():
219-
if d.to == cd.to:
231+
if dc.rbound() >= cd.rbound():
232+
if dc.to == cd.to:
220233
#|xxx|x
221234
# REMOVE CD
222235
dcl.pop(cdi)
223236
cdi -= 1
224237
else:
225238
# TRUNCATE CD
226239
#|-xx|
227-
_set_delta_rbound(cd, d.to - cd.to)
240+
_set_delta_rbound(cd, dc.to - cd.to)
228241
# END handle offset special case
229-
elif d.to == cd.to:
242+
elif dc.to == cd.to:
230243
#|x--|
231244
# we shift it by our size
232-
_move_delta_lbound(cd, d.ts)
245+
_move_delta_lbound(cd, dc.ts)
233246
else:
234247
#|-x-|
235248
# SPLIT CD AND LBOUND MOVE ITS SECOND PART
236249
# insert offset is required to insert it after us
237250
nd = _split_delta(dcl, cd, cdi, 1)
238-
_move_delta_lbound(nd, d.ts)
251+
_move_delta_lbound(nd, dc.ts)
239252
break
240253
# END handle rbound overlap
241254
# END handle overlap
@@ -248,30 +261,14 @@ def _merge_delta(dcl, d):
248261
# END check for end of list
249262
# while our chunk is not completely done
250263

251-
264+
## DEBUG ##
265+
dcl.check_integrity()
252266

253267

254268

255269
class DeltaChunkList(list):
256270
"""List with special functionality to deal with DeltaChunks"""
257271

258-
def init(self, size):
259-
"""Intialize this instance with chunks defining to fill up size from a base
260-
buffer of equal size"""
261-
if len(self) != 0:
262-
return
263-
# pretend we have one huge delta chunk, which just copies everything
264-
# from source to destination
265-
maxint32 = 2**32
266-
for x in range(0, size, maxint32):
267-
self.append(DeltaChunk(x, maxint32, x, None))
268-
# END create copy chunks
269-
offset = x*maxint32
270-
remainder = size-offset
271-
if remainder:
272-
self.append(DeltaChunk(offset, remainder, offset, None))
273-
# END handle all done in loop
274-
275272
def terminate_at(self, size):
276273
"""Chops the list at the given size, splitting and removing DeltaNodes
277274
as required"""
@@ -283,6 +280,38 @@ def terminate_at(self, size):
283280
# END truncate last node if possible
284281
del(self[di+(rsize!=0):])
285282

283+
## DEBUG ##
284+
self.check_integrity(size)
285+
286+
def check_integrity(self, target_size=-1):
287+
"""Verify the list has non-overlapping chunks only, and the total size matches
288+
target_size
289+
:param target_size: if not -1, the total size of the chain must be target_size
290+
:raise AssertionError: if the size doen't match"""
291+
if target_size > -1:
292+
assert self[-1].rbound() == target_size
293+
assert reduce(lambda x,y: x+y, (d.ts for d in self), 0) == target_size
294+
# END target size verification
295+
296+
if len(self) < 2:
297+
return
298+
299+
# check data
300+
for dc in self:
301+
if dc.data:
302+
assert len(dc.data) >= dc.ts
303+
# END for each dc
304+
305+
left = islice(self, 0, len(self)-1)
306+
right = iter(self)
307+
right.next()
308+
# this is very pythonic - we might have just use index based access here,
309+
# but this could actually be faster
310+
for lft,rgt in izip(left, right):
311+
assert lft.rbound() == rgt.to
312+
assert lft.to + lft.ts == rgt.to
313+
# END for each pair
314+
286315
#} END structures
287316

288317
#{ Routines
@@ -422,18 +451,15 @@ def merge_deltas(dcl, dstreams):
422451
:param dstreams: iterable of delta stream objects. They must be ordered latest last,
423452
hence the delta to be applied last comes last, its oldest ancestor first
424453
:return: None"""
425-
for ds in dstreams:
454+
for dsi, ds in enumerate(dstreams):
455+
# print "Stream", dsi
426456
db = ds.read()
427457
delta_buf_size = ds.size
428458

429459
# read header
430460
i, src_size = msb_size(db)
431461
i, target_size = msb_size(db, i)
432462

433-
if len(dcl) == 0:
434-
dcl.init(target_size)
435-
# END handle empty list
436-
437463
# interpret opcodes
438464
tbw = 0 # amount of target bytes written
439465
while i < delta_buf_size:
@@ -475,7 +501,7 @@ def merge_deltas(dcl, dstreams):
475501
tbw += cp_size
476502
elif c:
477503
# TODO: Concatenate multiple deltachunks
478-
_merge_delta(dcl, DeltaChunk(tbw, c, None, db[i:i+c]))
504+
_merge_delta(dcl, DeltaChunk(tbw, c, 0, db[i:i+c]))
479505
i += c
480506
tbw += c
481507
else:
@@ -487,6 +513,8 @@ def merge_deltas(dcl, dstreams):
487513

488514
# END for each delta stream
489515

516+
# print dcl
517+
490518

491519
def apply_delta_data(src_buf, src_buf_size, delta_buf, delta_buf_size, write):
492520
"""

stream.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,19 @@ def _set_cache_(self, attr):
346346
dc.apply(bbuf, write)
347347
# END for each deltachunk to apply
348348

349+
self._mm_target.seek(0)
350+
351+
## DEBUG ##
352+
mt = self._mm_target
353+
for ds in self._dstreams:
354+
ds.stream.seek(0)
355+
self._bstream.stream.seek(0)
356+
self._set_cache_old(attr)
357+
358+
import chardet
359+
if chardet.detect(mt[:])['encoding'] == 'ascii':
360+
assert self._mm_target[:] == mt[:]
361+
349362
def _set_cache_old(self, attr):
350363
"""If we are here, we apply the actual deltas"""
351364

@@ -399,7 +412,7 @@ def _set_cache_old(self, attr):
399412
stream_copy(dstream.read, ddata.write, dstream.size, 256*mmap.PAGESIZE)
400413

401414
#######################################################################
402-
apply_delta_data(bbuf, src_size, ddata, len(ddata), tbuf)
415+
apply_delta_data(bbuf, src_size, ddata, len(ddata), tbuf.write)
403416
#######################################################################
404417

405418
# finally, swap out source and target buffers. The target is now the
Binary file not shown.
Binary file not shown.

test/test_pack.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,10 @@ class TestPack(TestBase):
3434

3535
packindexfile_v1 = (fixture_path('packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.idx'), 1, 67)
3636
packindexfile_v2 = (fixture_path('packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.idx'), 2, 30)
37+
packindexfile_v2_3_ascii = (fixture_path('packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.idx'), 2, 42)
3738
packfile_v2_1 = (fixture_path('packs/pack-c0438c19fb16422b6bbcce24387b3264416d485b.pack'), 2, packindexfile_v1[2])
3839
packfile_v2_2 = (fixture_path('packs/pack-11fdfa9e156ab73caae3b6da867192221f2089c2.pack'), 2, packindexfile_v2[2])
40+
packfile_v2_3_ascii = (fixture_path('packs/pack-a2bf8e71d8c18879e499335762dd95119d93d9f1.pack'), 2, packindexfile_v2_3_ascii[2])
3941

4042

4143
def _assert_index_file(self, index, version, size):
@@ -123,14 +125,15 @@ def test_pack_index(self):
123125

124126
def test_pack(self):
125127
# there is this special version 3, but apparently its like 2 ...
126-
for packfile, version, size in (self.packfile_v2_1, self.packfile_v2_2):
128+
for packfile, version, size in (self.packfile_v2_3_ascii, self.packfile_v2_1, self.packfile_v2_2):
127129
pack = PackFile(packfile)
128130
self._assert_pack_file(pack, version, size)
129131
# END for each pack to test
130132

131133
def test_pack_entity(self):
132134
for packinfo, indexinfo in ( (self.packfile_v2_1, self.packindexfile_v1),
133-
(self.packfile_v2_2, self.packindexfile_v2)):
135+
(self.packfile_v2_2, self.packindexfile_v2),
136+
(self.packfile_v2_3_ascii, self.packindexfile_v2_3_ascii)):
134137
packfile, version, size = packinfo
135138
indexfile, version, size = indexinfo
136139
entity = PackEntity(packfile)

0 commit comments

Comments
 (0)