@@ -159,6 +159,16 @@ void DC_set_data(DeltaChunk* dc, const uchar* data, Py_ssize_t dlen, bool shared
159159
160160}
161161
162+ // Make the given data our own. It is assumed to have the size stored in our instance
163+ // and will be managed by us.
164+ inline
165+ void DC_set_data_with_ownership (DeltaChunk * dc , const uchar * data )
166+ {
167+ assert (data );
168+ DC_deallocate_data (dc );
169+ dc -> data = data ;
170+ }
171+
162172inline
163173ull DC_rbound (const DeltaChunk * dc )
164174{
@@ -214,7 +224,6 @@ void DC_offset_copy_to(const DeltaChunk* src, DeltaChunk* dest, ull ofs, ull siz
214224 if (src -> data ){
215225 DC_set_data (dest , src -> data + ofs , size , 0 );
216226 } else {
217- dest -> data = NULL ;
218227 dest -> data_shared = 0 ;
219228 }
220229}
@@ -825,6 +834,8 @@ static PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
825834 const unsigned long rbound = cp_off + cp_size ;
826835 if (rbound < cp_size ||
827836 rbound > base_size ){
837+ // this really shouldn't happen
838+ error = 1 ;
828839 assert (0 );
829840 break ;
830841 }
@@ -834,16 +845,53 @@ static PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
834845
835846 } else if (cmd ) {
836847 // TODO: Compress nodes by parsing them in advance
837- // NOTE: Compression only necessary for all other deltas, not
838- // for the first one, as we will share the data. It really depends
839- // What's faster
840848 // Compression reduces fragmentation though, which is why we do it
841849 // in all cases.
842- DeltaChunk * dc = DCV_append (& dcv );
843- DC_init (dc , tbw , cmd , 0 );
844- DC_set_data (dc , data , cmd , is_shared_data );
845- tbw += cmd ;
850+ const uchar * add_start = data - 1 ;
851+ const uchar * add_end = dend ;
852+ ull num_bytes = cmd ;
846853 data += cmd ;
854+ ull num_chunks = 1 ;
855+ while (data < dend ){
856+ fprintf (stderr , "looping\n" );
857+ const char c = * data ;
858+ if (c & 0x80 ){
859+ add_end = data ;
860+ break ;
861+ } else {
862+ num_chunks += 1 ;
863+ data += c + 1 ; // advance by 1 to skip add cmd
864+ num_bytes += c ;
865+ }
866+ }
867+
868+ fprintf (stderr , "add bytes = %i\n" , (int )num_bytes );
869+ #ifdef DEBUG
870+ assert (add_end - add_start > 0 );
871+ if (num_chunks > 1 ){
872+ fprintf (stderr , "Compression worked, got %i bytes of %i chunks\n" , (int )num_bytes , (int )num_chunks );
873+ }
874+ #endif
875+
876+ DeltaChunk * dc = DCV_append (& dcv );
877+ DC_init (dc , tbw , num_bytes , 0 );
878+
879+ // gather the data, or (possibly) share single blocks
880+ if (num_chunks > 1 ){
881+ uchar * dcdata = PyMem_Malloc (num_bytes );
882+ while (add_start < add_end ){
883+ const char bytes = * add_start ++ ;
884+ fprintf (stderr , "Copying %i bytes\n" , bytes );
885+ memcpy ((void * )dcdata , (void * )add_start , bytes );
886+ dcdata += bytes ;
887+ add_start += bytes ;
888+ }
889+ DC_set_data_with_ownership (dc , dcdata );
890+ } else {
891+ DC_set_data (dc , data - cmd , cmd , is_shared_data );
892+ }
893+
894+ tbw += num_bytes ;
847895 } else {
848896 error = 1 ;
849897 PyErr_SetString (PyExc_RuntimeError , "Encountered an unsupported delta cmd: 0" );
0 commit comments