Skip to content

Commit a93363c

Browse files
committed
prepared the slicing, as well as a few accompanying methods. There is still quite a lot functionality missing
1 parent 489f763 commit a93363c

File tree

1 file changed

+166
-28
lines changed

1 file changed

+166
-28
lines changed

_fun.c

Lines changed: 166 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ static PyObject *PackIndexFile_sha_to_index(PyObject *self, PyObject *args)
8989
typedef unsigned long long ull;
9090
typedef unsigned int uint;
9191
typedef unsigned char uchar;
92+
typedef uchar bool;
9293

9394
// DELTA CHUNK
9495
////////////////
@@ -97,45 +98,96 @@ typedef struct {
9798
ull to;
9899
ull ts;
99100
ull so;
100-
uchar* data;
101+
const uchar* data;
102+
bool data_shared;
101103
} DeltaChunk;
102104

105+
inline
103106
void DC_init(DeltaChunk* dc, ull to, ull ts, ull so)
104107
{
105108
dc->to = to;
106109
dc->ts = ts;
107110
dc->so = so;
108111
dc->data = NULL;
112+
dc->data_shared = 0;
109113
}
110114

111-
void DC_destroy(DeltaChunk* dc)
115+
inline
116+
void DC_deallocate_data(DeltaChunk* dc)
112117
{
113-
if (dc->data){
118+
if (!dc->data_shared && dc->data){
114119
PyMem_Free((void*)dc->data);
115120
}
121+
dc->data = NULL;
116122
}
117123

118-
// Store a copy of data in our instance
119-
void DC_set_data(DeltaChunk* dc, const uchar* data, Py_ssize_t dlen)
124+
inline
125+
void DC_destroy(DeltaChunk* dc)
120126
{
121-
if (dc->data){
122-
PyMem_Free((void*)dc->data);
123-
}
127+
DC_deallocate_data(dc);
128+
}
129+
130+
// Store a copy of data in our instance. If shared is 1, the data will be shared,
131+
// hence it will only be stored, but the memory will not be touched, or copied.
132+
inline
133+
void DC_set_data(DeltaChunk* dc, const uchar* data, Py_ssize_t dlen, bool shared)
134+
{
135+
DC_deallocate_data(dc);
124136

125137
if (data == 0){
126138
dc->data = NULL;
139+
dc->data_shared = 0;
127140
return;
128141
}
129142

130-
dc->data = (uchar*)PyMem_Malloc(dlen);
131-
memcpy(dc->data, data, dlen);
143+
dc->data_shared = shared;
144+
if (shared){
145+
dc->data = data;
146+
} else {
147+
dc->data = (uchar*)PyMem_Malloc(dlen);
148+
memcpy((void*)dc->data, (void*)data, dlen);
149+
}
150+
132151
}
133152

153+
inline
134154
ull DC_rbound(DeltaChunk* dc)
135155
{
136156
return dc->to + dc->ts;
137157
}
138158

159+
// Copy all data from src to dest, the data pointer will be copied too
160+
inline
161+
void DC_copy_to(DeltaChunk* src, DeltaChunk* dest)
162+
{
163+
dest->to = src->to;
164+
dest->ts = src->ts;
165+
dest->so = src->so;
166+
dest->data_shared = 0;
167+
168+
DC_set_data(dest, src->data, src->ts, 0);
169+
}
170+
171+
// Copy all data with the given offset and size. The source offset, as well
172+
// as the data will be truncated accordingly
173+
inline
174+
void DC_offset_copy_to(DeltaChunk* src, DeltaChunk* dest, ull ofs, ull size)
175+
{
176+
assert(size <= src->ts);
177+
assert(src->to + ofs + size <= DC_rbound(src));
178+
179+
dest->to = src->to + ofs;
180+
dest->ts = size;
181+
dest->so = src->so + ofs;
182+
183+
if (src->data){
184+
DC_set_data(dest, src->data + ofs, size, 0);
185+
} else {
186+
dest->data = NULL;
187+
dest->data_shared = 0;
188+
}
189+
}
190+
139191

140192
// DELTA CHUNK VECTOR
141193
/////////////////////
@@ -152,7 +204,7 @@ This may trigger a realloc, but will do nothing if the reserved size is already
152204
large enough.
153205
Return 1 on success, 0 on failure
154206
*/
155-
static
207+
inline
156208
int DCV_grow(DeltaChunkVector* vec, uint num_dc)
157209
{
158210
const uint grow_by_chunks = (vec->size + num_dc) - vec->reserved_size;
@@ -188,35 +240,48 @@ int DCV_init(DeltaChunkVector* vec, ull initial_size)
188240
return DCV_grow(vec, initial_size);
189241
}
190242

191-
static inline
243+
inline
192244
ull DCV_len(DeltaChunkVector* vec)
193245
{
194246
return vec->size;
195247
}
196248

249+
inline
250+
ull DCV_lbound(DeltaChunkVector* vec)
251+
{
252+
assert(vec->size && vec->mem);
253+
return vec->mem->to;
254+
}
255+
197256
// Return item at index
198-
static inline
257+
inline
199258
DeltaChunk* DCV_get(DeltaChunkVector* vec, Py_ssize_t i)
200259
{
201260
assert(i < vec->size && vec->mem);
202261
return &vec->mem[i];
203262
}
204263

205-
static inline
264+
inline
265+
ull DCV_rbound(DeltaChunkVector* vec)
266+
{
267+
return DC_rbound(DCV_get(vec, vec->size-1));
268+
}
269+
270+
inline
206271
int DCV_empty(DeltaChunkVector* vec)
207272
{
208273
return vec->size == 0;
209274
}
210275

211276
// Return end pointer of the vector
212-
static inline
277+
inline
213278
DeltaChunk* DCV_end(DeltaChunkVector* vec)
214279
{
215280
assert(!DCV_empty(vec));
216281
return &vec->mem[vec->size];
217282
}
218283

219-
void DCV_dealloc(DeltaChunkVector* vec)
284+
void DCV_destroy(DeltaChunkVector* vec)
220285
{
221286
if (vec->mem){
222287
#ifdef DEBUG
@@ -236,6 +301,14 @@ void DCV_dealloc(DeltaChunkVector* vec)
236301
}
237302
}
238303

304+
// Reset this vector so that its existing memory can be filled again.
305+
// Memory will be kept, but not cleaned up
306+
inline
307+
void DCV_forget_members(DeltaChunkVector* vec)
308+
{
309+
vec->size = 0;
310+
}
311+
239312
// Append num-chunks to the end of the list, possibly reallocating existing ones
240313
// Return a pointer to the first of the added items. They are already null initialized
241314
// If num-chunks == 0, it returns the end pointer of the allocated memory
@@ -249,15 +322,17 @@ DeltaChunk* DCV_append_multiple(DeltaChunkVector* vec, uint num_chunks)
249322
Py_ssize_t old_size = vec->size;
250323
vec->size += num_chunks;
251324

325+
#ifdef DEBUG
252326
for(;old_size < vec->size; ++old_size){
253327
DC_init(DCV_get(vec, old_size), 0, 0, 0);
254328
}
329+
#endif
255330

256331
return &vec->mem[old_size];
257332
}
258333

259334
// Append one chunk to the end of the list, and return a pointer to it
260-
// It will have been initialized.
335+
// It will not have been initialized !
261336
static inline
262337
DeltaChunk* DCV_append(DeltaChunkVector* vec)
263338
{
@@ -270,6 +345,59 @@ DeltaChunk* DCV_append(DeltaChunkVector* vec)
270345
return next;
271346
}
272347

348+
// Write a slice as defined by its absolute offset in bytes and its size into the given
349+
// destination. The individual chunks written will be a deep copy of the source
350+
// data chunks
351+
// TODO: this could trigger copying many smallish add-chunk pieces - maybe some sort
352+
// of append-only memory pool would improve performance
353+
inline
354+
void DCV_copy_slice_to(DeltaChunkVector* src, DeltaChunkVector* dest, ull ofs, ull size)
355+
{
356+
357+
}
358+
359+
360+
// Take slices of bdcv into the corresponding area of the tdcv, which is the topmost
361+
// delta to apply. tmpl is used as temporary space and must be initialzed and destroyed by the
362+
// caller
363+
static
364+
void DCV_connect_with_base(DeltaChunkVector* tdcv, DeltaChunkVector* bdcv, DeltaChunkVector* tmpl)
365+
{
366+
DeltaChunk* dc = tdcv->mem;
367+
DeltaChunk* end = tdcv->mem + tdcv->size;
368+
assert(dc);
369+
370+
for (;dc < end; dc++)
371+
{
372+
// Data chunks don't need processing
373+
if (dc->data){
374+
continue;
375+
}
376+
377+
// Copy Chunk Handling
378+
DCV_copy_slice_to(bdcv, tmpl, dc->so, dc->ts);
379+
// assert(tmpl->size);
380+
381+
// move target bounds
382+
DeltaChunk* cdc = tmpl->mem;
383+
DeltaChunk* cdcend = tmpl->mem + tmpl->size;
384+
const ull ofs = dc->to - dc->so;
385+
for(;cdc < cdcend; cdc++){
386+
cdc->to += ofs;
387+
}
388+
389+
// insert slice into our list, replacing our current chunk
390+
if (tmpl->size == 1){
391+
*dc = *DCV_get(tmpl, 0);
392+
} else {
393+
394+
}
395+
396+
// make sure the members will not be deallocated by the list
397+
DCV_forget_members(tmpl);
398+
}
399+
}
400+
273401
// DELTA CHUNK LIST (PYTHON)
274402
/////////////////////////////
275403

@@ -296,7 +424,7 @@ int DCL_init(DeltaChunkList*self, PyObject *args, PyObject *kwds)
296424
static
297425
void DCL_dealloc(DeltaChunkList* self)
298426
{
299-
DCV_dealloc(&(self->vec));
427+
DCV_destroy(&(self->vec));
300428
}
301429

302430
static
@@ -310,7 +438,7 @@ ull DCL_rbound(DeltaChunkList* self)
310438
{
311439
if (DCV_empty(&self->vec))
312440
return 0;
313-
return DC_rbound(DCV_get(&self->vec, self->vec.size - 1));
441+
return DCV_rbound(&self->vec);
314442
}
315443

316444
static
@@ -421,10 +549,11 @@ static PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
421549
DeltaChunkVector bdcv;
422550
DeltaChunkVector tdcv;
423551
DeltaChunkVector dcv;
552+
DeltaChunkVector tmpl;
424553
DCV_init(&bdcv, 0);
425554
DCV_init(&dcv, 0);
426555
DCV_init(&tdcv, 0);
427-
556+
DCV_init(&tmpl, 200);
428557

429558
unsigned int dsi;
430559
PyObject* ds;
@@ -453,6 +582,9 @@ static PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
453582

454583
// parse command stream
455584
ull tbw = 0; // Amount of target bytes written
585+
bool shared_data = dsi != 0;
586+
bool is_first_run = dsi == 0;
587+
456588
assert(data < dend);
457589
while (data < dend)
458590
{
@@ -481,9 +613,12 @@ static PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
481613

482614
} else if (cmd) {
483615
// TODO: Compress nodes by parsing them in advance
616+
// NOTE: Compression only necessary for all other deltas, not
617+
// for the first one, as we will share the data. It really depends
618+
// What's faster
484619
DeltaChunk* dc = DCV_append(&dcv);
485620
DC_init(dc, tbw, cmd, 0);
486-
DC_set_data(dc, data, cmd);
621+
DC_set_data(dc, data, cmd, shared_data);
487622
tbw += cmd;
488623
data += cmd;
489624
} else {
@@ -493,18 +628,20 @@ static PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
493628
}
494629
}// END handle command opcodes
495630
assert(tbw == target_size);
496-
631+
632+
if (!is_first_run){
633+
DCV_connect_with_base(&tdcv, &dcv, &tmpl);
634+
}
497635
// swap the vector
498636
// Skip the first vector, as it is also used as top chunk vector
499637
if (bdcv.mem != tdcv.mem){
500-
DCV_dealloc(&bdcv);
638+
DCV_destroy(&bdcv);
501639
}
502640
bdcv = dcv;
503-
if (dsi == 0){
641+
if (is_first_run){
504642
tdcv = dcv;
505643
}
506644
DCV_init(&dcv, 0);
507-
508645

509646
loop_end:
510647
// perform cleanup
@@ -524,18 +661,19 @@ static PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
524661
Py_DECREF(stream_iter);
525662
}
526663

527-
DCV_dealloc(&bdcv);
664+
DCV_destroy(&tmpl);
665+
DCV_destroy(&bdcv);
528666
if (dsi > 1){
529667
// otherwise dcv equals tcl
530-
DCV_dealloc(&dcv);
668+
DCV_destroy(&dcv);
531669
}
532670

533671
// Return the actual python object - its just a container
534672
DeltaChunkList* dcl = DCL_new_instance();
535673
if (!dcl){
536674
PyErr_SetString(PyExc_RuntimeError, "Couldn't allocate list");
537675
// Otherwise tdcv would be deallocated by the chunk list
538-
DCV_dealloc(&tdcv);
676+
DCV_destroy(&tdcv);
539677
error = 1;
540678
} else {
541679
// Plain copy, don't deallocate

0 commit comments

Comments
 (0)