@@ -89,6 +89,7 @@ static PyObject *PackIndexFile_sha_to_index(PyObject *self, PyObject *args)
8989typedef unsigned long long ull ;
9090typedef unsigned int uint ;
9191typedef unsigned char uchar ;
92+ typedef uchar bool ;
9293
9394// DELTA CHUNK
9495////////////////
@@ -97,45 +98,96 @@ typedef struct {
9798 ull to ;
9899 ull ts ;
99100 ull so ;
100- uchar * data ;
101+ const uchar * data ;
102+ bool data_shared ;
101103} DeltaChunk ;
102104
105+ inline
103106void DC_init (DeltaChunk * dc , ull to , ull ts , ull so )
104107{
105108 dc -> to = to ;
106109 dc -> ts = ts ;
107110 dc -> so = so ;
108111 dc -> data = NULL ;
112+ dc -> data_shared = 0 ;
109113}
110114
111- void DC_destroy (DeltaChunk * dc )
115+ inline
116+ void DC_deallocate_data (DeltaChunk * dc )
112117{
113- if (dc -> data ){
118+ if (! dc -> data_shared && dc -> data ){
114119 PyMem_Free ((void * )dc -> data );
115120 }
121+ dc -> data = NULL ;
116122}
117123
118- // Store a copy of data in our instance
119- void DC_set_data (DeltaChunk * dc , const uchar * data , Py_ssize_t dlen )
124+ inline
125+ void DC_destroy (DeltaChunk * dc )
120126{
121- if (dc -> data ){
122- PyMem_Free ((void * )dc -> data );
123- }
127+ DC_deallocate_data (dc );
128+ }
129+
130+ // Store a copy of data in our instance. If shared is 1, the data will be shared,
131+ // hence it will only be stored, but the memory will not be touched, or copied.
132+ inline
133+ void DC_set_data (DeltaChunk * dc , const uchar * data , Py_ssize_t dlen , bool shared )
134+ {
135+ DC_deallocate_data (dc );
124136
125137 if (data == 0 ){
126138 dc -> data = NULL ;
139+ dc -> data_shared = 0 ;
127140 return ;
128141 }
129142
130- dc -> data = (uchar * )PyMem_Malloc (dlen );
131- memcpy (dc -> data , data , dlen );
143+ dc -> data_shared = shared ;
144+ if (shared ){
145+ dc -> data = data ;
146+ } else {
147+ dc -> data = (uchar * )PyMem_Malloc (dlen );
148+ memcpy ((void * )dc -> data , (void * )data , dlen );
149+ }
150+
132151}
133152
153+ inline
134154ull DC_rbound (DeltaChunk * dc )
135155{
136156 return dc -> to + dc -> ts ;
137157}
138158
159+ // Copy all data from src to dest, the data pointer will be copied too
160+ inline
161+ void DC_copy_to (DeltaChunk * src , DeltaChunk * dest )
162+ {
163+ dest -> to = src -> to ;
164+ dest -> ts = src -> ts ;
165+ dest -> so = src -> so ;
166+ dest -> data_shared = 0 ;
167+
168+ DC_set_data (dest , src -> data , src -> ts , 0 );
169+ }
170+
171+ // Copy all data with the given offset and size. The source offset, as well
172+ // as the data will be truncated accordingly
173+ inline
174+ void DC_offset_copy_to (DeltaChunk * src , DeltaChunk * dest , ull ofs , ull size )
175+ {
176+ assert (size <= src -> ts );
177+ assert (src -> to + ofs + size <= DC_rbound (src ));
178+
179+ dest -> to = src -> to + ofs ;
180+ dest -> ts = size ;
181+ dest -> so = src -> so + ofs ;
182+
183+ if (src -> data ){
184+ DC_set_data (dest , src -> data + ofs , size , 0 );
185+ } else {
186+ dest -> data = NULL ;
187+ dest -> data_shared = 0 ;
188+ }
189+ }
190+
139191
140192// DELTA CHUNK VECTOR
141193/////////////////////
@@ -152,7 +204,7 @@ This may trigger a realloc, but will do nothing if the reserved size is already
152204large enough.
153205Return 1 on success, 0 on failure
154206*/
155- static
207+ inline
156208int DCV_grow (DeltaChunkVector * vec , uint num_dc )
157209{
158210 const uint grow_by_chunks = (vec -> size + num_dc ) - vec -> reserved_size ;
@@ -188,35 +240,48 @@ int DCV_init(DeltaChunkVector* vec, ull initial_size)
188240 return DCV_grow (vec , initial_size );
189241}
190242
191- static inline
243+ inline
192244ull DCV_len (DeltaChunkVector * vec )
193245{
194246 return vec -> size ;
195247}
196248
249+ inline
250+ ull DCV_lbound (DeltaChunkVector * vec )
251+ {
252+ assert (vec -> size && vec -> mem );
253+ return vec -> mem -> to ;
254+ }
255+
197256// Return item at index
198- static inline
257+ inline
199258DeltaChunk * DCV_get (DeltaChunkVector * vec , Py_ssize_t i )
200259{
201260 assert (i < vec -> size && vec -> mem );
202261 return & vec -> mem [i ];
203262}
204263
205- static inline
264+ inline
265+ ull DCV_rbound (DeltaChunkVector * vec )
266+ {
267+ return DC_rbound (DCV_get (vec , vec -> size - 1 ));
268+ }
269+
270+ inline
206271int DCV_empty (DeltaChunkVector * vec )
207272{
208273 return vec -> size == 0 ;
209274}
210275
211276// Return end pointer of the vector
212- static inline
277+ inline
213278DeltaChunk * DCV_end (DeltaChunkVector * vec )
214279{
215280 assert (!DCV_empty (vec ));
216281 return & vec -> mem [vec -> size ];
217282}
218283
219- void DCV_dealloc (DeltaChunkVector * vec )
284+ void DCV_destroy (DeltaChunkVector * vec )
220285{
221286 if (vec -> mem ){
222287#ifdef DEBUG
@@ -236,6 +301,14 @@ void DCV_dealloc(DeltaChunkVector* vec)
236301 }
237302}
238303
304+ // Reset this vector so that its existing memory can be filled again.
305+ // Memory will be kept, but not cleaned up
306+ inline
307+ void DCV_forget_members (DeltaChunkVector * vec )
308+ {
309+ vec -> size = 0 ;
310+ }
311+
239312// Append num-chunks to the end of the list, possibly reallocating existing ones
240313// Return a pointer to the first of the added items. They are already null initialized
241314// If num-chunks == 0, it returns the end pointer of the allocated memory
@@ -249,15 +322,17 @@ DeltaChunk* DCV_append_multiple(DeltaChunkVector* vec, uint num_chunks)
249322 Py_ssize_t old_size = vec -> size ;
250323 vec -> size += num_chunks ;
251324
325+ #ifdef DEBUG
252326 for (;old_size < vec -> size ; ++ old_size ){
253327 DC_init (DCV_get (vec , old_size ), 0 , 0 , 0 );
254328 }
329+ #endif
255330
256331 return & vec -> mem [old_size ];
257332}
258333
259334// Append one chunk to the end of the list, and return a pointer to it
260- // It will have been initialized.
335+ // It will not have been initialized !
261336static inline
262337DeltaChunk * DCV_append (DeltaChunkVector * vec )
263338{
@@ -270,6 +345,59 @@ DeltaChunk* DCV_append(DeltaChunkVector* vec)
270345 return next ;
271346}
272347
348+ // Write a slice as defined by its absolute offset in bytes and its size into the given
349+ // destination. The individual chunks written will be a deep copy of the source
350+ // data chunks
351+ // TODO: this could trigger copying many smallish add-chunk pieces - maybe some sort
352+ // of append-only memory pool would improve performance
353+ inline
354+ void DCV_copy_slice_to (DeltaChunkVector * src , DeltaChunkVector * dest , ull ofs , ull size )
355+ {
356+
357+ }
358+
359+
360+ // Take slices of bdcv into the corresponding area of the tdcv, which is the topmost
361+ // delta to apply. tmpl is used as temporary space and must be initialzed and destroyed by the
362+ // caller
363+ static
364+ void DCV_connect_with_base (DeltaChunkVector * tdcv , DeltaChunkVector * bdcv , DeltaChunkVector * tmpl )
365+ {
366+ DeltaChunk * dc = tdcv -> mem ;
367+ DeltaChunk * end = tdcv -> mem + tdcv -> size ;
368+ assert (dc );
369+
370+ for (;dc < end ; dc ++ )
371+ {
372+ // Data chunks don't need processing
373+ if (dc -> data ){
374+ continue ;
375+ }
376+
377+ // Copy Chunk Handling
378+ DCV_copy_slice_to (bdcv , tmpl , dc -> so , dc -> ts );
379+ // assert(tmpl->size);
380+
381+ // move target bounds
382+ DeltaChunk * cdc = tmpl -> mem ;
383+ DeltaChunk * cdcend = tmpl -> mem + tmpl -> size ;
384+ const ull ofs = dc -> to - dc -> so ;
385+ for (;cdc < cdcend ; cdc ++ ){
386+ cdc -> to += ofs ;
387+ }
388+
389+ // insert slice into our list, replacing our current chunk
390+ if (tmpl -> size == 1 ){
391+ * dc = * DCV_get (tmpl , 0 );
392+ } else {
393+
394+ }
395+
396+ // make sure the members will not be deallocated by the list
397+ DCV_forget_members (tmpl );
398+ }
399+ }
400+
273401// DELTA CHUNK LIST (PYTHON)
274402/////////////////////////////
275403
@@ -296,7 +424,7 @@ int DCL_init(DeltaChunkList*self, PyObject *args, PyObject *kwds)
296424static
297425void DCL_dealloc (DeltaChunkList * self )
298426{
299- DCV_dealloc (& (self -> vec ));
427+ DCV_destroy (& (self -> vec ));
300428}
301429
302430static
@@ -310,7 +438,7 @@ ull DCL_rbound(DeltaChunkList* self)
310438{
311439 if (DCV_empty (& self -> vec ))
312440 return 0 ;
313- return DC_rbound ( DCV_get ( & self -> vec , self -> vec . size - 1 ) );
441+ return DCV_rbound ( & self -> vec );
314442}
315443
316444static
@@ -421,10 +549,11 @@ static PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
421549 DeltaChunkVector bdcv ;
422550 DeltaChunkVector tdcv ;
423551 DeltaChunkVector dcv ;
552+ DeltaChunkVector tmpl ;
424553 DCV_init (& bdcv , 0 );
425554 DCV_init (& dcv , 0 );
426555 DCV_init (& tdcv , 0 );
427-
556+ DCV_init ( & tmpl , 200 );
428557
429558 unsigned int dsi ;
430559 PyObject * ds ;
@@ -453,6 +582,9 @@ static PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
453582
454583 // parse command stream
455584 ull tbw = 0 ; // Amount of target bytes written
585+ bool shared_data = dsi != 0 ;
586+ bool is_first_run = dsi == 0 ;
587+
456588 assert (data < dend );
457589 while (data < dend )
458590 {
@@ -481,9 +613,12 @@ static PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
481613
482614 } else if (cmd ) {
483615 // TODO: Compress nodes by parsing them in advance
616+ // NOTE: Compression only necessary for all other deltas, not
617+ // for the first one, as we will share the data. It really depends
618+ // What's faster
484619 DeltaChunk * dc = DCV_append (& dcv );
485620 DC_init (dc , tbw , cmd , 0 );
486- DC_set_data (dc , data , cmd );
621+ DC_set_data (dc , data , cmd , shared_data );
487622 tbw += cmd ;
488623 data += cmd ;
489624 } else {
@@ -493,18 +628,20 @@ static PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
493628 }
494629 }// END handle command opcodes
495630 assert (tbw == target_size );
496-
631+
632+ if (!is_first_run ){
633+ DCV_connect_with_base (& tdcv , & dcv , & tmpl );
634+ }
497635 // swap the vector
498636 // Skip the first vector, as it is also used as top chunk vector
499637 if (bdcv .mem != tdcv .mem ){
500- DCV_dealloc (& bdcv );
638+ DCV_destroy (& bdcv );
501639 }
502640 bdcv = dcv ;
503- if (dsi == 0 ){
641+ if (is_first_run ){
504642 tdcv = dcv ;
505643 }
506644 DCV_init (& dcv , 0 );
507-
508645
509646loop_end :
510647 // perform cleanup
@@ -524,18 +661,19 @@ static PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
524661 Py_DECREF (stream_iter );
525662 }
526663
527- DCV_dealloc (& bdcv );
664+ DCV_destroy (& tmpl );
665+ DCV_destroy (& bdcv );
528666 if (dsi > 1 ){
529667 // otherwise dcv equals tcl
530- DCV_dealloc (& dcv );
668+ DCV_destroy (& dcv );
531669 }
532670
533671 // Return the actual python object - its just a container
534672 DeltaChunkList * dcl = DCL_new_instance ();
535673 if (!dcl ){
536674 PyErr_SetString (PyExc_RuntimeError , "Couldn't allocate list" );
537675 // Otherwise tdcv would be deallocated by the chunk list
538- DCV_dealloc (& tdcv );
676+ DCV_destroy (& tdcv );
539677 error = 1 ;
540678 } else {
541679 // Plain copy, don't deallocate
0 commit comments