@@ -15,6 +15,7 @@ typedef uchar bool;
1515const ull gDIV_grow_by = 100 ;
1616
1717
18+
1819// DELTA STREAM ACCESS
1920///////////////////////
2021inline
@@ -63,10 +64,14 @@ void TSI_destroy(ToplevelStreamInfo* info)
6364
6465 if (info -> parent_object ){
6566 Py_DECREF (info -> parent_object );
66- info -> parent_object = 0 ;
67+ info -> parent_object = NULL ;
6768 } else if (info -> tds ){
6869 PyMem_Free ((void * )info -> tds );
6970 }
71+ info -> tds = NULL ;
72+ info -> cstart = NULL ;
73+ info -> tdslen = 0 ;
74+ info -> num_chunks = 0 ;
7075}
7176
7277inline
@@ -122,26 +127,21 @@ bool TSI_copy_stream_from_object(ToplevelStreamInfo* info)
122127 return 1 ;
123128}
124129
125- // make sure we have the given amount of memory available. This will change
126- // our official length in bytes right away, its up to the caller
127- // to do something useful with the freed space
128- // Return true on success
129- bool TSI_resize (ToplevelStreamInfo * info , uint num_bytes )
130+ // Transfer ownership of the given stream into our instance. The amount of chunks
131+ // remains the same, and needs to be set by the caller
132+ void TSI_replace_stream (ToplevelStreamInfo * info , const uchar * stream , uint streamlen )
130133{
131- assert (info -> tds );
132- if (num_bytes <= info -> tdslen ){
133- return 1 ;
134- }
134+ assert (info -> parent_object == 0 );
135+ fprintf (stderr , "TSI_replace_stream\n" );
135136
136- #ifdef DEBUG
137- fprintf (stderr , "TSI_resize: to %i bytes\n" , num_bytes );
138- #endif
139137 uint ofs = (uint )(info -> cstart - info -> tds );
140- info -> tds = PyMem_Realloc ((void * )info -> tds , num_bytes );
141- info -> tdslen = num_bytes ;
138+ if (info -> tds ){
139+ PyMem_Free ((void * )info -> tds );
140+ }
141+ info -> tds = stream ;
142142 info -> cstart = info -> tds + ofs ;
143+ info -> tdslen = streamlen ;
143144
144- return info -> tds != NULL ;
145145}
146146
147147// DELTA CHUNK
@@ -156,6 +156,9 @@ typedef struct {
156156 const uchar * data ;
157157} DeltaChunk ;
158158
159+ // forward declarations
160+ const uchar * next_delta_info (const uchar * , DeltaChunk * );
161+
159162inline
160163void DC_init (DeltaChunk * dc , ull to , ull ts , ull so , const uchar * data )
161164{
@@ -208,6 +211,8 @@ inline
208211void DC_encode_to (const DeltaChunk * dc , uchar * * pout , uint ofs , uint size )
209212{
210213 uchar * out = * pout ;
214+ DC_print (dc , "DC_encode_to" );
215+ fprintf (stderr , "DC_encode_to: ofs = %i, size = %i\n" , ofs , size );
211216 if (dc -> data ){
212217 * out ++ = (uchar )size ;
213218 memcpy (out , dc -> data + ofs , size );
@@ -233,6 +238,18 @@ void DC_encode_to(const DeltaChunk* dc, uchar** pout, uint ofs, uint size)
233238
234239 * op = i ;
235240 }
241+
242+ #ifdef DEBUG
243+ DeltaChunk mdc ;
244+ DC_init (& mdc , 0 , 0 , 0 , NULL );
245+ next_delta_info (* pout , & mdc );
246+ assert (mdc .ts == size );
247+ if (mdc .data )
248+ assert (mdc .data );
249+ else
250+ assert (mdc .so == dc -> so + ofs );
251+ #endif
252+
236253 * pout = out ;
237254}
238255
@@ -497,8 +514,6 @@ DeltaInfo* DIV_closest_chunk(const DeltaInfoVector* vec, ull ofs)
497514 return DIV_last (vec );
498515}
499516
500- // forward declaration
501- const uchar * next_delta_info (const uchar * , DeltaChunk * );
502517
503518// Return the amount of chunks a slice at the given spot would have, as well as
504519// its size in bytes it would have if the possibly partial chunks would be encoded
@@ -558,10 +573,11 @@ uint DIV_count_slice_bytes(const DeltaInfoVector* src, uint ofs, uint size)
558573// data chunk stream
559574// Return: number of chunks in the slice
560575inline
561- uint DIV_copy_slice_to (const DeltaInfoVector * src , uchar * dest , ull tofs , uint size )
576+ uint DIV_copy_slice_to (const DeltaInfoVector * src , uchar * * dest , ull tofs , uint size )
562577{
563578 assert (DIV_lbound (src ) <= tofs );
564579 assert ((tofs + size ) <= DIV_info_rbound (src , DIV_last (src )));
580+ fprintf (stderr , "copy_slice: ofs = %i, size = %i\n" , (int )tofs , size );
565581
566582 DeltaChunk dc ;
567583 DC_init (& dc , 0 , 0 , 0 , NULL );
@@ -573,14 +589,12 @@ uint DIV_copy_slice_to(const DeltaInfoVector* src, uchar* dest, ull tofs, uint s
573589 if (cdi -> to != tofs ) {
574590 const uint relofs = tofs - cdi -> to ;
575591 next_delta_info (src -> dstream + cdi -> dso , & dc );
576- const uint cdisize = dc .ts ;
577- const uint max_size = cdisize - relofs < size ? cdisize - relofs : size ;
592+ const uint max_size = dc .ts - relofs < size ? dc .ts - relofs : size ;
578593
579594 size -= max_size ;
580595
581596 // adjust dc proportions
582-
583- DC_encode_to (& dc , & dest , relofs , max_size );
597+ DC_encode_to (& dc , dest , relofs , max_size );
584598
585599 num_chunks += 1 ;
586600 cdi += 1 ;
@@ -599,10 +613,10 @@ uint DIV_copy_slice_to(const DeltaInfoVector* src, uchar* dest, ull tofs, uint s
599613 // Full copy would be possible, but the final length of the dstream
600614 // needs to be used as well to know how many bytes to copy
601615 // TODO: make a DIV_ function for this
602- DC_encode_to (& dc , & dest , 0 , dc .ts );
616+ DC_encode_to (& dc , dest , 0 , dc .ts );
603617 size -= dc .ts ;
604618 } else {
605- DC_encode_to (& dc , & dest , 0 , size );
619+ DC_encode_to (& dc , dest , 0 , size );
606620 size = 0 ;
607621 break ;
608622 }
@@ -619,98 +633,90 @@ bool DIV_connect_with_base(ToplevelStreamInfo* tsi, DeltaInfoVector* div)
619633{
620634 assert (tsi -> num_chunks );
621635
622- typedef struct {
623- uint bofs ; // byte-offset of delta stream
624- uint dofs ; // delta stream offset relative to tsi->cstart
625- } OffsetInfo ;
626-
627-
628- OffsetInfo * const offset_array = PyMem_Malloc (tsi -> num_chunks * sizeof (OffsetInfo ));
629- if (!offset_array ){
630- return 0 ;
631- }
632-
633- OffsetInfo * pofs = offset_array ;
634- uint num_addbytes = 0 ;
635- int bytes = 0 ;
636- uint dofs = 0 ;
637636
637+ uint num_bytes = 0 ;
638638 const uchar * data = TSI_first (tsi );
639- const uchar * prev_data = data ;
640- const uchar const * dend = TSI_end (tsi );
639+ const uchar * dend = TSI_end (tsi );
641640
642641 DeltaChunk dc ;
643642 DC_init (& dc , 0 , 0 , 0 , NULL );
644643
645644
646- // OFFSET RUN
647- for (;data < dend ; pofs ++ , prev_data = data )
645+ // COMPUTE SIZE OF TARGET STREAM
646+ /////////////////////////////////
647+ for (;data < dend ;)
648648 {
649- pofs -> bofs = num_addbytes ;
650649 data = next_delta_info (data , & dc );
651- assert (data );
652- pofs -> dofs = dofs ;
653- dofs += (uint )(data - prev_data );
650+ DC_print (& dc , "count" );
654651
655652 // Data chunks don't need processing
656653 if (dc .data ){
654+ num_bytes += 1 + dc .ts ;
657655 continue ;
658656 }
659657
660- // offset the next chunk by the amount of chunks in the slice
661- // - N, because we replace our own chunk's bytes
662- bytes = DIV_count_slice_bytes (div , dc .so , dc .ts ) - (data - prev_data );
663- // if we shrink in size, compensate this by moving the start virtually
664- //
665- if (bytes < 0 ){
666- fprintf (stderr , "hit negative bytes: %i\n" , bytes );
667- tsi -> cstart += abs (bytes );
668- }
669- num_addbytes += abs (bytes );
658+ num_bytes += DIV_count_slice_bytes (div , dc .so , dc .ts );
670659 }
671-
672660 assert (DC_rbound (& dc ) == tsi -> target_size );
673661
674662
675- // reserve enough memory to hold all the new chunks
676- TSI_resize (tsi , tsi -> tdslen + num_addbytes );
677- const OffsetInfo const * pofs_start = offset_array - 1 ;
678- const OffsetInfo * cpofs ;
679- uchar * ds ; // pointer into the delta stream
680- const uchar * nds ; // next pointer, used for size retrieving the size
681- uint num_addchunks = 0 ; // total amount of chunks added
663+ // GET NEW DELTA BUFFER
664+ ////////////////////////
665+ uchar * const dstream = PyMem_Malloc (num_bytes );
666+ if (!dstream ){
667+ return 0 ;
668+ }
669+
670+
671+ data = TSI_first (tsi );
672+ const uchar * ndata = data ;
673+ dend = TSI_end (tsi );
674+
675+ uint num_chunks = 0 ;
676+ uchar * ds = dstream ;
682677 DC_init (& dc , 0 , 0 , 0 , NULL );
683678
684- // Insert slices, from the end to the beginning, which allows memcpy
685- // to be used, with a little help of the offset array
686- for (cpofs = pofs - 1 ; cpofs > pofs_start ; cpofs -- )
679+ // pick slices from the delta and put them into the new stream
680+ for (; data < dend ; data = ndata )
687681 {
688- ds = (uchar * )(tsi -> cstart + cpofs -> dofs );
689- nds = next_delta_info (ds , & dc );
682+ ndata = next_delta_info (data , & dc );
683+
684+ DC_print (& dc , "slice" );
690685
691686 // Data chunks don't need processing
692687 if (dc .data ){
693- // NOTE: could peek the preceeding chunks to figure out whether they are
694- // all just moved by ofs. In that case, they can move as a whole!
695- // tests showed that this is very rare though, even in huge deltas, so its
696- // not worth the extra effort
697- if (cpofs -> bofs ){
698- memcpy ((void * )(ds + cpofs -> bofs ), (void * )ds , nds - ds );
699- // memmove((void*)(ds + cpofs->bofs), (void*)ds, nds - ds);
700- }
688+ // just copy it over
689+ memcpy ((void * )ds , (void * )data , ndata - data );
690+ ds += ndata - data ;
691+ num_chunks += 1 ;
701692 continue ;
702693 }
703694
704- // Copy Chunks - target offset is determined by their location and size
705- // hence it doesn't need specific adjustment
706- num_addchunks += DIV_copy_slice_to (div , ds + cpofs -> bofs , dc .so , dc .ts );
707- // -1 chunks because we overwrite our own chunk ( by not copying it )
708- num_addchunks -= 1 ;
695+ // Copy Chunks
696+ num_chunks += DIV_copy_slice_to (div , & ds , dc .so , dc .ts );
709697 }
698+ assert (ds - dstream == num_bytes );
699+ assert (num_chunks >= tsi -> num_chunks );
700+ assert (DC_rbound (& dc ) == tsi -> target_size );
701+
702+ // finally, replace the streams
703+ TSI_replace_stream (tsi , dstream , num_bytes );
704+ tsi -> cstart = dstream ; // we have NO header !
705+ assert (tsi -> tds == dstream );
706+ tsi -> num_chunks = num_chunks ;
710707
711- tsi -> num_chunks += num_addchunks ;
708+ #ifdef DEBUG
709+ data = TSI_first (tsi );
710+ dend = TSI_end (tsi );
711+
712+ DC_init (& dc , 0 , 0 , 0 , NULL );
713+
714+ while (data < dend ){
715+ data = next_delta_info (data , & dc );
716+ DC_print (& dc , "debug" );
717+ }
718+ #endif
712719
713- PyMem_Free (offset_array );
714720 return 1 ;
715721
716722}
@@ -754,6 +760,7 @@ PyObject* DCL_py_rbound(DeltaChunkList* self)
754760static
755761PyObject * DCL_apply (DeltaChunkList * self , PyObject * args )
756762{
763+ fprintf (stderr , "DCL_apply\n" );
757764
758765 PyObject * pybuf = 0 ;
759766 PyObject * writeproc = 0 ;
@@ -890,6 +897,7 @@ const uchar* next_delta_info(const uchar* data, DeltaChunk* dc)
890897 data += cmd ;
891898 } else {
892899 PyErr_SetString (PyExc_RuntimeError , "Encountered an unsupported delta cmd: 0" );
900+ assert (0 );
893901 return NULL ;
894902 }
895903
@@ -1048,7 +1056,7 @@ static PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
10481056
10491057 #ifdef DEBUG
10501058 fprintf (stderr , "------------ Stream %i --------\n " , (int )dsi );
1051- fprintf (stderr , "Before Connect: tdsinfo-> num_chunks = %i, tdsinfo-> bytelen = %i\n" , (int )tdsinfo .num_chunks , (int )tdsinfo .tdslen );
1059+ fprintf (stderr , "Before Connect: tdsinfo: num_chunks = %i, bytelen = %i, target_size = %i \n" , (int )tdsinfo .num_chunks , (int )tdsinfo .tdslen , ( int ) tdsinfo . target_size );
10521060 fprintf (stderr , "div->num_chunks = %i, div->reserved_size = %i, div->bytelen=%i\n" , (int )div .size , (int )div .reserved_size , (int )dlen );
10531061 #endif
10541062
0 commit comments