Skip to content

Commit 489f763

Browse files
committed
Now adding chunks to the vectors, next up is to implement the actual chunk merging
1 parent 9ba93c0 commit 489f763

File tree

2 files changed

+71
-34
lines changed

2 files changed

+71
-34
lines changed

_fun.c

Lines changed: 70 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include <assert.h>
44
#include <stdio.h>
55
#include <math.h>
6+
#include <string.h>
67

78
static PyObject *PackIndexFile_sha_to_index(PyObject *self, PyObject *args)
89
{
@@ -87,6 +88,7 @@ static PyObject *PackIndexFile_sha_to_index(PyObject *self, PyObject *args)
8788

8889
typedef unsigned long long ull;
8990
typedef unsigned int uint;
91+
typedef unsigned char uchar;
9092

9193
// DELTA CHUNK
9294
////////////////
@@ -95,21 +97,38 @@ typedef struct {
9597
ull to;
9698
ull ts;
9799
ull so;
98-
PyObject* data;
100+
uchar* data;
99101
} DeltaChunk;
100102

101-
void DC_init(DeltaChunk* dc, ull to, ull ts, ull so, PyObject* data)
103+
void DC_init(DeltaChunk* dc, ull to, ull ts, ull so)
102104
{
103105
dc->to = to;
104106
dc->ts = ts;
105107
dc->so = so;
106-
Py_XINCREF(data);
107-
dc->data = data;
108+
dc->data = NULL;
108109
}
109110

110111
void DC_destroy(DeltaChunk* dc)
111112
{
112-
Py_XDECREF(dc->data);
113+
if (dc->data){
114+
PyMem_Free((void*)dc->data);
115+
}
116+
}
117+
118+
// Store a copy of data in our instance
119+
void DC_set_data(DeltaChunk* dc, const uchar* data, Py_ssize_t dlen)
120+
{
121+
if (dc->data){
122+
PyMem_Free((void*)dc->data);
123+
}
124+
125+
if (data == 0){
126+
dc->data = NULL;
127+
return;
128+
}
129+
130+
dc->data = (uchar*)PyMem_Malloc(dlen);
131+
memcpy(dc->data, data, dlen);
113132
}
114133

115134
ull DC_rbound(DeltaChunk* dc)
@@ -146,7 +165,11 @@ int DCV_grow(DeltaChunkVector* vec, uint num_dc)
146165
} else {
147166
vec->mem = PyMem_Realloc(vec->mem, (vec->reserved_size + grow_by_chunks) * sizeof(DeltaChunk));
148167
}
149-
assert(vec->mem != NULL);
168+
169+
if (vec->mem == NULL){
170+
Py_FatalError("Could not allocate memory for append operation");
171+
}
172+
150173
vec->reserved_size = vec->reserved_size + grow_by_chunks;
151174

152175
#ifdef DEBUG
@@ -220,21 +243,33 @@ static inline
220243
DeltaChunk* DCV_append_multiple(DeltaChunkVector* vec, uint num_chunks)
221244
{
222245
if (vec->size + num_chunks > vec->reserved_size){
223-
if (!DCV_grow(vec, (vec->size + num_chunks) - vec->reserved_size)){
224-
Py_FatalError("Could not allocate memory for append operation");
225-
}
246+
DCV_grow(vec, (vec->size + num_chunks) - vec->reserved_size);
226247
}
227248
Py_FatalError("Could not allocate memory for append operation");
228249
Py_ssize_t old_size = vec->size;
229250
vec->size += num_chunks;
230251

231252
for(;old_size < vec->size; ++old_size){
232-
DC_init(DCV_get(vec, old_size), 0, 0, 0, NULL);
253+
DC_init(DCV_get(vec, old_size), 0, 0, 0);
233254
}
234255

235256
return &vec->mem[old_size];
236257
}
237258

259+
// Append one chunk to the end of the list, and return a pointer to it
260+
// It will have been initialized.
261+
static inline
262+
DeltaChunk* DCV_append(DeltaChunkVector* vec)
263+
{
264+
if (vec->size + 1 > vec->reserved_size){
265+
DCV_grow(vec, 1);
266+
}
267+
268+
DeltaChunk* next = vec->mem + vec->size;
269+
vec->size += 1;
270+
return next;
271+
}
272+
238273
// DELTA CHUNK LIST (PYTHON)
239274
/////////////////////////////
240275

@@ -354,21 +389,18 @@ DeltaChunkList* DCL_new_instance(void)
354389
return dcl;
355390
}
356391

357-
static inline
358-
ull msb_size(const char* data, Py_ssize_t dlen, Py_ssize_t offset, Py_ssize_t* out_bytes_read){
359-
ull size = 0;
360-
Py_ssize_t i = 0;
361-
const char* dend = data + dlen;
362-
for (data = data + offset; data < dend; data+=1, i+=1){
363-
char c = *data;
364-
size |= (c & 0x7f) << i*7;
365-
if (!(c & 0x80)){
366-
break;
367-
}
368-
}// END while in range
369-
370-
*out_bytes_read = i+offset;
371-
assert((*out_bytes_read * 8) - (*out_bytes_read - 1) <= sizeof(ull) * 8);
392+
inline
393+
ull msb_size(const uchar** datap, const uchar* top)
394+
{
395+
const uchar *data = *datap;
396+
ull cmd, size = 0;
397+
uint i = 0;
398+
do {
399+
cmd = *data++;
400+
size |= (cmd & 0x7f) << i;
401+
i += 7;
402+
} while (cmd & 0x80 && data < top);
403+
*datap = data;
372404
return size;
373405
}
374406

@@ -406,25 +438,25 @@ static PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
406438
goto loop_end;
407439
}
408440

409-
const char* data;
441+
const uchar* data;
410442
Py_ssize_t dlen;
411443
PyObject_AsReadBuffer(db, (const void**)&data, &dlen);
444+
const uchar* dend = data + dlen;
412445

413446
// read header
414-
Py_ssize_t ofs = 0;
415-
const ull base_size = msb_size(data, dlen, 0, &ofs);
416-
const ull target_size = msb_size(data, dlen, ofs, &ofs);
447+
const ull base_size = msb_size(&data, dend);
448+
const ull target_size = msb_size(&data, dend);
417449

418450
// estimate number of ops - assume one third adds, half two byte (size+offset) copies
419451
const uint approx_num_cmds = (dlen / 3) + (((dlen / 3) * 2) / (2+2+1));
420452
DCV_grow(&dcv, approx_num_cmds);
421453

422454
// parse command stream
423-
const char* dend = data + dlen;
424455
ull tbw = 0; // Amount of target bytes written
425-
for (data = data + ofs; data < dend; ++data)
456+
assert(data < dend);
457+
while (data < dend)
426458
{
427-
const char cmd = *data;
459+
const char cmd = *data++;
428460

429461
if (cmd & 0x80)
430462
{
@@ -444,12 +476,16 @@ static PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
444476
break;
445477
}
446478

447-
// TODO: Add node
479+
DC_init(DCV_append(&dcv), tbw, cp_size, cp_off);
448480
tbw += cp_size;
449481

450482
} else if (cmd) {
451-
// TODO: Add node
483+
// TODO: Compress nodes by parsing them in advance
484+
DeltaChunk* dc = DCV_append(&dcv);
485+
DC_init(dc, tbw, cmd, 0);
486+
DC_set_data(dc, data, cmd);
452487
tbw += cmd;
488+
data += cmd;
453489
} else {
454490
error = 1;
455491
PyErr_SetString(PyExc_RuntimeError, "Encountered an unsupported delta cmd: 0");

fun.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -703,6 +703,7 @@ def is_equal_canonical_sha(canonical_length, match, sha1):
703703

704704

705705
try:
706+
# raise ImportError; # DEBUG
706707
from _fun import connect_deltas
707708
except ImportError:
708709
pass

0 commit comments

Comments
 (0)