@@ -94,6 +94,12 @@ typedef uchar bool;
9494// Constants
9595const ull gDVC_grow_by = 50 ;
9696
97+ #ifdef DEBUG
98+ #define DBG_check (vec ) DCV_dbg_check_integrity(vec)
99+ #else
100+ #define DBG_check (vec )
101+ #endif
102+
97103// DELTA CHUNK
98104////////////////
99105// Internal Delta Chunk Objects
@@ -154,34 +160,36 @@ void DC_set_data(DeltaChunk* dc, const uchar* data, Py_ssize_t dlen, bool shared
154160}
155161
156162inline
157- ull DC_rbound (DeltaChunk * dc )
163+ ull DC_rbound (const DeltaChunk * dc )
158164{
159165 return dc -> to + dc -> ts ;
160166}
161167
162168// Copy all data from src to dest, the data pointer will be copied too
163169inline
164- void DC_copy_to (DeltaChunk * src , DeltaChunk * dest )
170+ void DC_copy_to (const DeltaChunk * src , DeltaChunk * dest )
165171{
166172 dest -> to = src -> to ;
167173 dest -> ts = src -> ts ;
168174 dest -> so = src -> so ;
169175 dest -> data_shared = 0 ;
176+ dest -> data = NULL ;
170177
171178 DC_set_data (dest , src -> data , src -> ts , 0 );
172179}
173180
174181// Copy all data with the given offset and size. The source offset, as well
175182// as the data will be truncated accordingly
176183inline
177- void DC_offset_copy_to (DeltaChunk * src , DeltaChunk * dest , ull ofs , ull size )
184+ void DC_offset_copy_to (const DeltaChunk * src , DeltaChunk * dest , ull ofs , ull size )
178185{
179186 assert (size <= src -> ts );
180187 assert (src -> to + ofs + size <= DC_rbound (src ));
181188
182189 dest -> to = src -> to + ofs ;
183190 dest -> ts = size ;
184191 dest -> so = src -> so + ofs ;
192+ dest -> data = NULL ;
185193
186194 if (src -> data ){
187195 DC_set_data (dest , src -> data + ofs , size , 0 );
@@ -260,51 +268,51 @@ int DCV_init(DeltaChunkVector* vec, ull initial_size)
260268}
261269
262270inline
263- ull DCV_len (DeltaChunkVector * vec )
271+ ull DCV_len (const DeltaChunkVector * vec )
264272{
265273 return vec -> size ;
266274}
267275
268276inline
269- ull DCV_lbound (DeltaChunkVector * vec )
277+ ull DCV_lbound (const DeltaChunkVector * vec )
270278{
271279 assert (vec -> size && vec -> mem );
272280 return vec -> mem -> to ;
273281}
274282
275283// Return item at index
276284inline
277- DeltaChunk * DCV_get (DeltaChunkVector * vec , Py_ssize_t i )
285+ DeltaChunk * DCV_get (const DeltaChunkVector * vec , Py_ssize_t i )
278286{
279287 assert (i < vec -> size && vec -> mem );
280288 return & vec -> mem [i ];
281289}
282290
283291// Return last item
284292inline
285- DeltaChunk * DCV_last (DeltaChunkVector * vec )
293+ DeltaChunk * DCV_last (const DeltaChunkVector * vec )
286294{
287295 return DCV_get (vec , vec -> size - 1 );
288296}
289297
290298inline
291- ull DCV_rbound (DeltaChunkVector * vec )
299+ ull DCV_rbound (const DeltaChunkVector * vec )
292300{
293301 return DC_rbound (DCV_last (vec ));
294302}
295303
296304inline
297- int DCV_empty (DeltaChunkVector * vec )
305+ int DCV_empty (const DeltaChunkVector * vec )
298306{
299307 return vec -> size == 0 ;
300308}
301309
302310// Return end pointer of the vector
303311inline
304- DeltaChunk * DCV_end (DeltaChunkVector * vec )
312+ const DeltaChunk * DCV_end (const DeltaChunkVector * vec )
305313{
306314 assert (!DCV_empty (vec ));
307- return & vec -> mem [ vec -> size ] ;
315+ return vec -> mem + vec -> size ;
308316}
309317
310318void DCV_destroy (DeltaChunkVector * vec )
@@ -345,7 +353,7 @@ void DCV_reset(DeltaChunkVector* vec)
345353 return ;
346354
347355 DeltaChunk * dc = vec -> mem ;
348- DeltaChunk * dcend = DCV_end (vec );
356+ const DeltaChunk * dcend = DCV_end (vec );
349357 for (;dc < dcend ; dc ++ ){
350358 DC_destroy (dc );
351359 }
@@ -366,11 +374,9 @@ DeltaChunk* DCV_append_multiple(DeltaChunkVector* vec, uint num_chunks)
366374 Py_ssize_t old_size = vec -> size ;
367375 vec -> size += num_chunks ;
368376
369- #ifdef DEBUG
370377 for (;old_size < vec -> size ; ++ old_size ){
371378 DC_init (DCV_get (vec , old_size ), 0 , 0 , 0 );
372379 }
373- #endif
374380
375381 return & vec -> mem [old_size ];
376382}
@@ -391,7 +397,7 @@ DeltaChunk* DCV_append(DeltaChunkVector* vec)
391397
392398// Return delta chunk being closest to the given absolute offset
393399inline
394- DeltaChunk * DCV_closest_chunk (DeltaChunkVector * vec , ull ofs )
400+ DeltaChunk * DCV_closest_chunk (const DeltaChunkVector * vec , ull ofs )
395401{
396402 assert (vec -> mem );
397403
@@ -416,16 +422,43 @@ DeltaChunk* DCV_closest_chunk(DeltaChunkVector* vec, ull ofs)
416422 return DCV_last (vec );
417423}
418424
425+ // Assert the given vector has correct datachunks
426+ void DCV_dbg_check_integrity (const DeltaChunkVector * vec )
427+ {
428+ assert (!DCV_empty (vec ));
429+ const DeltaChunk * i = vec -> mem ;
430+ const DeltaChunk * end = DCV_end (vec );
431+
432+ ull aparent_size = DCV_rbound (vec ) - DCV_lbound (vec );
433+ ull acc_size = 0 ;
434+ for (; i < end ; i ++ ){
435+ acc_size += i -> ts ;
436+ }
437+ assert (acc_size == aparent_size );
438+
439+ if (vec -> size < 2 ){
440+ return ;
441+ }
442+
443+ const DeltaChunk * endm1 = DCV_end (vec ) - 1 ;
444+ for (i = vec -> mem ; i < endm1 ; i ++ ){
445+ const DeltaChunk * n = i + 1 ;
446+ assert (DC_rbound (i ) == n -> to );
447+ }
448+
449+ }
450+
419451// Write a slice as defined by its absolute offset in bytes and its size into the given
420452// destination. The individual chunks written will be a deep copy of the source
421453// data chunks
422454// TODO: this could trigger copying many smallish add-chunk pieces - maybe some sort
423455// of append-only memory pool would improve performance
424456inline
425- void DCV_copy_slice_to (DeltaChunkVector * src , DeltaChunkVector * dest , ull ofs , ull size )
457+ void DCV_copy_slice_to (const DeltaChunkVector * src , DeltaChunkVector * dest , ull ofs , ull size )
426458{
459+ //fprintf(stderr, "Copy Slice To: src->size = %i, ofs = %i, size=%i\n", (int)src->size, (int)ofs, (int)size);
427460 assert (DCV_lbound (src ) <= ofs );
428- assert (DCV_rbound ( src ) <= ofs + size );
461+ assert (( ofs + size ) <= DCV_rbound ( src ) );
429462
430463 DeltaChunk * cdc = DCV_closest_chunk (src , ofs );
431464
@@ -442,7 +475,7 @@ void DCV_copy_slice_to(DeltaChunkVector* src, DeltaChunkVector* dest, ull ofs, u
442475 }
443476 }
444477
445- DeltaChunk * vecend = DCV_end (src );
478+ const DeltaChunk * vecend = DCV_end (src );
446479 for ( ;(cdc < vecend ) && size ; ++ cdc )
447480 {
448481 if (cdc -> ts < size ) {
@@ -464,18 +497,22 @@ void DCV_copy_slice_to(DeltaChunkVector* src, DeltaChunkVector* dest, ull ofs, u
464497// 'at' will be replaced by the items to insert ( special purpose )
465498// 'at' will be properly destroyed, but all items will just be copied bytewise
466499// using memcpy. Hence from must just forget about them !
500+ // IMPORTANT: to must have an appropriate size already
467501inline
468- void DCV_replace_one_by_many (DeltaChunkVector * from , DeltaChunkVector * to , DeltaChunk * at )
502+ void DCV_replace_one_by_many (const DeltaChunkVector * from , DeltaChunkVector * to , DeltaChunk * at )
469503{
504+ fprintf (stderr , "Replace one by many: from->size = %i, to->size = %i, to->reserved = %i\n" , (int )from -> size , (int )to -> size , (int )to -> reserved_size );
470505 assert (from -> size > 1 );
506+ assert (to -> size + from -> size - 1 <= to -> reserved_size );
471507
472- DCV_reserve_memory ( to , to -> size + from -> size - 1 ); // -1 because we replace at
508+ // -1 because we replace 'at'
473509 DC_destroy (at );
474- to -> size -= 1 + from -> size ;
510+ to -> size += from -> size - 1 ;
475511
476512 // If we are somewhere in the middle, we have to make some space
477513 if (DCV_last (to ) != at ) {
478- memmove ((void * )at + from -> size , (void * )(at + 1 ), (size_t )(DCV_end (to ) - (at + 1 )));
514+ fprintf (stderr , "moving to %p from %p, num bytes = %i\n" , at + from -> size , at + 1 , (int )((DCV_end (to ) - (at + 1 )) * sizeof (DeltaChunk )));
515+ memmove ((void * )(at + from -> size ), (void * )(at + 1 ), (size_t )(DCV_end (to ) - (at + 1 )) * sizeof (DeltaChunk ));
479516 }
480517
481518 // Finally copy all the items in
@@ -485,22 +522,27 @@ void DCV_replace_one_by_many(DeltaChunkVector* from, DeltaChunkVector* to, Delta
485522// Take slices of bdcv into the corresponding area of the tdcv, which is the topmost
486523// delta to apply. tmpl is used as temporary space and must be initialzed and destroyed by the
487524// caller
488- void DCV_connect_with_base (DeltaChunkVector * tdcv , DeltaChunkVector * bdcv , DeltaChunkVector * tmpl )
525+ void DCV_connect_with_base (DeltaChunkVector * tdcv , const DeltaChunkVector * bdcv , DeltaChunkVector * tmpl )
489526{
490- DeltaChunk * dc = tdcv -> mem ;
491- DeltaChunk * end = tdcv -> mem + tdcv -> size ;
492- assert ( dc );
527+ Py_ssize_t dci = 0 ;
528+ Py_ssize_t iend = tdcv -> size ;
529+ DeltaChunk * dc ;
493530
494- for (;dc < end ; dc ++ )
531+ DBG_check (tdcv );
532+ DBG_check (bdcv );
533+
534+ for (;dci < iend ; dci ++ )
495535 {
496536 // Data chunks don't need processing
537+ dc = DCV_get (tdcv , dci );
497538 if (dc -> data ){
498539 continue ;
499540 }
500541
501542 // Copy Chunk Handling
502543 DCV_copy_slice_to (bdcv , tmpl , dc -> so , dc -> ts );
503- // assert(tmpl->size);
544+ DBG_check (tmpl );
545+ assert (tmpl -> size );
504546
505547 // move target bounds
506548 DeltaChunk * tdc = tmpl -> mem ;
@@ -516,8 +558,15 @@ void DCV_connect_with_base(DeltaChunkVector* tdcv, DeltaChunkVector* bdcv, Delta
516558 DC_destroy (dc );
517559 * dc = * DCV_get (tmpl , 0 );
518560 } else {
561+ DCV_reserve_memory (tdcv , tdcv -> size + tmpl -> size - 1 + gDVC_grow_by );
562+ dc = DCV_get (tdcv , dci );
519563 DCV_replace_one_by_many (tmpl , tdcv , dc );
564+ // Compensate for us being replaced
565+ dci += tmpl -> size - 1 ;
566+ iend += tmpl -> size - 1 ;
520567 }
568+
569+ DBG_check (tdcv );
521570
522571 // make sure the members will not be deallocated by the list
523572 DCV_forget_members (tmpl );
@@ -679,8 +728,8 @@ static PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
679728 DCV_init (& tdcv , 0 );
680729 DCV_init (& tmpl , 200 );
681730
682- unsigned int dsi ;
683- PyObject * ds ;
731+ unsigned int dsi = 0 ;
732+ PyObject * ds = 0 ;
684733 int error = 0 ;
685734 for (ds = PyIter_Next (stream_iter ), dsi = 0 ; ds != NULL ; ++ dsi , ds = PyIter_Next (stream_iter ))
686735 {
@@ -706,7 +755,7 @@ static PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
706755
707756 // parse command stream
708757 ull tbw = 0 ; // Amount of target bytes written
709- bool shared_data = dsi != 0 ;
758+ bool is_shared_data = dsi != 0 ;
710759 bool is_first_run = dsi == 0 ;
711760
712761 assert (data < dend );
@@ -742,10 +791,10 @@ static PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
742791 // What's faster
743792 DeltaChunk * dc = DCV_append (& dcv );
744793 DC_init (dc , tbw , cmd , 0 );
745- DC_set_data (dc , data , cmd , shared_data );
794+ DC_set_data (dc , data , cmd , is_shared_data );
746795 tbw += cmd ;
747796 data += cmd ;
748- } else {
797+ } else {
749798 error = 1 ;
750799 PyErr_SetString (PyExc_RuntimeError , "Encountered an unsupported delta cmd: 0" );
751800 goto loop_end ;
0 commit comments