Skip to content

Commit 89408f8

Browse files
committed
Initial frame of the connect_delta method, which seems to do something. Debugging is hellish, you really have to use python exception to get information out of there, printf doesn't do anything for some reason
1 parent c0a1dc6 commit 89408f8

File tree

4 files changed

+310
-3
lines changed

4 files changed

+310
-3
lines changed

Makefile

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
PYTHON = python
2+
SETUP = $(PYTHON) setup.py
3+
TESTRUNNER = $(shell which nosetests)
4+
TESTFLAGS =
5+
6+
all: build
7+
8+
doc::
9+
make -C doc/ html
10+
11+
build::
12+
$(SETUP) build
13+
$(SETUP) build_ext -i
14+
15+
install::
16+
$(SETUP) install
17+
18+
clean::
19+
$(SETUP) clean --all
20+
rm -f *.so
21+
22+
coverage:: build
23+
PYTHONPATH=. $(PYTHON) $(TESTRUNNER) --cover-package=dulwich --with-coverage --cover-erase --cover-inclusive gitdb
24+

_fun.c

Lines changed: 277 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
#include <Python.h>
22
#include <stdint.h>
3+
#include <assert.h>
4+
#include <stdio.h>
35

46
static PyObject *PackIndexFile_sha_to_index(PyObject *self, PyObject *args)
57
{
@@ -82,16 +84,289 @@ static PyObject *PackIndexFile_sha_to_index(PyObject *self, PyObject *args)
8284
}
8385

8486

87+
typedef unsigned long long ull;
88+
89+
// Internal Delta Chunk Objects
90+
typedef struct {
91+
ull to;
92+
ull ts;
93+
ull so;
94+
PyObject* data;
95+
96+
void* next;
97+
} DeltaChunk;
98+
99+
100+
void DC_init(DeltaChunk* dc, ull to, ull ts, ull so, PyObject* data, DeltaChunk* next)
101+
{
102+
dc->to = to;
103+
dc->ts = ts;
104+
dc->so = so;
105+
Py_XINCREF(data);
106+
dc->data = data;
107+
108+
dc->next = next;
109+
}
110+
111+
void DC_destroy(DeltaChunk* dc)
112+
{
113+
Py_XDECREF(dc->data);
114+
}
115+
116+
typedef struct {
117+
PyObject_HEAD
118+
// -----------
119+
DeltaChunk* head;
120+
DeltaChunk* tail;
121+
ull size;
122+
123+
} DeltaChunkList;
124+
125+
ull DC_rbound(DeltaChunk* dc)
126+
{
127+
return dc->to + dc->ts;
128+
}
129+
130+
131+
static
132+
int DCL_init(DeltaChunkList *self, PyObject *args, PyObject *kwds)
133+
{
134+
((DeltaChunkList*)self)->head = NULL;
135+
return 1;
136+
}
137+
138+
static
139+
void DCL_dealloc(DeltaChunkList* self)
140+
{
141+
// TODO: deallocate linked list
142+
if (self->head){
143+
self->head = NULL;
144+
self->tail = NULL;
145+
self->size = 0;
146+
}
147+
}
148+
149+
static
150+
PyObject* DCL_len(PyObject* self)
151+
{
152+
return PyLong_FromUnsignedLongLong(0);
153+
}
154+
155+
static
156+
PyObject* DCL_rbound(DeltaChunkList* self)
157+
{
158+
if (!self->head)
159+
return PyLong_FromUnsignedLongLong(0);
160+
return PyLong_FromUnsignedLongLong(DC_rbound(self->tail));
161+
}
162+
163+
static
164+
PyObject* DCL_apply(PyObject* self, PyObject* args)
165+
{
166+
167+
Py_RETURN_NONE;
168+
}
169+
170+
171+
172+
static PyMethodDef DCL_methods[] = {
173+
{"apply", (PyCFunction)DCL_apply, METH_VARARGS, "Apply the given iterable of delta streams" },
174+
{"__len__", (PyCFunction)DCL_len, METH_NOARGS, NULL},
175+
{"rbound", (PyCFunction)DCL_rbound, METH_NOARGS, NULL},
176+
{NULL} /* Sentinel */
177+
};
178+
179+
static PyTypeObject DeltaChunkListType = {
180+
PyObject_HEAD_INIT(NULL)
181+
0, /*ob_size*/
182+
"DeltaChunkList", /*tp_name*/
183+
sizeof(DeltaChunkList), /*tp_basicsize*/
184+
0, /*tp_itemsize*/
185+
(destructor)DCL_dealloc, /*tp_dealloc*/
186+
0, /*tp_print*/
187+
0, /*tp_getattr*/
188+
0, /*tp_setattr*/
189+
0, /*tp_compare*/
190+
0, /*tp_repr*/
191+
0, /*tp_as_number*/
192+
0, /*tp_as_sequence*/
193+
0, /*tp_as_mapping*/
194+
0, /*tp_hash */
195+
0, /*tp_call*/
196+
0, /*tp_str*/
197+
0, /*tp_getattro*/
198+
0, /*tp_setattro*/
199+
0, /*tp_as_buffer*/
200+
Py_TPFLAGS_DEFAULT, /*tp_flags*/
201+
"Minimal Delta Chunk List",/* tp_doc */
202+
0, /* tp_traverse */
203+
0, /* tp_clear */
204+
0, /* tp_richcompare */
205+
0, /* tp_weaklistoffset */
206+
0, /* tp_iter */
207+
0, /* tp_iternext */
208+
DCL_methods, /* tp_methods */
209+
0, /* tp_members */
210+
0, /* tp_getset */
211+
0, /* tp_base */
212+
0, /* tp_dict */
213+
0, /* tp_descr_get */
214+
0, /* tp_descr_set */
215+
0, /* tp_dictoffset */
216+
(initproc)DCL_init, /* tp_init */
217+
0, /* tp_alloc */
218+
0, /* tp_new */
219+
};
220+
221+
222+
static inline
223+
ull msb_size(const char* data, Py_ssize_t dlen, Py_ssize_t offset, Py_ssize_t* out_bytes_read){
224+
ull size = 0;
225+
Py_ssize_t i = 0;
226+
const char* dend = data + dlen;
227+
for (data = data + offset; data < dend; data+=1, i+=1){
228+
char c = *data;
229+
size |= (c & 0x7f) << i*7;
230+
if (!(c & 0x80)){
231+
break;
232+
}
233+
}// END while in range
234+
235+
*out_bytes_read = i+offset;
236+
return size;
237+
}
238+
239+
static PyObject* connect_deltas(PyObject *self, PyObject *dstreams)
240+
{
241+
// obtain iterator
242+
PyObject* stream_iter = 0;
243+
if (!PyIter_Check(dstreams)){
244+
stream_iter = PyObject_GetIter(dstreams);
245+
if (!stream_iter){
246+
PyErr_SetString(PyExc_RuntimeError, "Couldn't obtain iterator for streams");
247+
return NULL;
248+
}
249+
} else {
250+
stream_iter = dstreams;
251+
}
252+
253+
DeltaChunkList* bdcl = 0;
254+
DeltaChunkList* tdcl = 0;
255+
DeltaChunkList* dcl = 0;
256+
257+
dcl = tdcl = PyObject_New(DeltaChunkList, &DeltaChunkListType);
258+
if (!dcl){
259+
PyErr_SetString(PyExc_RuntimeError, "Couldn't allocate list");
260+
return NULL;
261+
}
262+
263+
unsigned int dsi;
264+
PyObject* ds;
265+
int error = 0;
266+
for (ds = PyIter_Next(stream_iter), dsi = 0; ds != NULL; ++dsi, ds = PyIter_Next(stream_iter))
267+
{
268+
PyObject* db = PyObject_CallMethod(ds, "read", 0);
269+
if (!PyObject_CheckReadBuffer(db)){
270+
error = 1;
271+
PyErr_SetString(PyExc_RuntimeError, "Returned buffer didn't support the buffer protocol");
272+
goto loop_end;
273+
}
274+
275+
const char* data;
276+
Py_ssize_t dlen;
277+
PyObject_AsReadBuffer(db, (const void**)&data, &dlen);
278+
279+
// read header
280+
Py_ssize_t ofs = 0;
281+
const ull base_size = msb_size(data, dlen, 0, &ofs);
282+
const ull target_size = msb_size(data, dlen, ofs, &ofs);
283+
284+
// parse command stream
285+
const char* dend = data + dlen;
286+
ull tbw = 0; // Amount of target bytes written
287+
for (data = data + ofs; data < dend; ++data)
288+
{
289+
const char cmd = *data;
290+
291+
if (cmd & 0x80)
292+
{
293+
unsigned long cp_off = 0, cp_size = 0;
294+
if (cmd & 0x01) cp_off = *data++;
295+
if (cmd & 0x02) cp_off |= (*data++ << 8);
296+
if (cmd & 0x04) cp_off |= (*data++ << 16);
297+
if (cmd & 0x08) cp_off |= ((unsigned) *data++ << 24);
298+
if (cmd & 0x10) cp_size = *data++;
299+
if (cmd & 0x20) cp_size |= (*data++ << 8);
300+
if (cmd & 0x40) cp_size |= (*data++ << 16);
301+
if (cp_size == 0) cp_size = 0x10000;
302+
303+
const unsigned long rbound = cp_off + cp_size;
304+
if (rbound < cp_size ||
305+
rbound > base_size){
306+
goto loop_end;
307+
}
308+
309+
// TODO: Add node
310+
tbw += cp_size;
311+
312+
} else if (cmd) {
313+
// TODO: Add node
314+
tbw += cmd;
315+
} else {
316+
error = 1;
317+
PyErr_SetString(PyExc_RuntimeError, "Encountered an unsupported delta cmd: 0");
318+
goto loop_end;
319+
}
320+
}// END handle command opcodes
321+
322+
assert(tbw == target_size);
323+
324+
loop_end:
325+
// perform cleanup
326+
Py_DECREF(ds);
327+
Py_DECREF(db);
328+
329+
if (error){
330+
break;
331+
}
332+
}// END for each stream object
333+
334+
if (dsi == 0 && ! error){
335+
PyErr_SetString(PyExc_ValueError, "No streams provided");
336+
}
337+
338+
if (stream_iter != dstreams){
339+
Py_DECREF(stream_iter);
340+
}
341+
342+
if (error){
343+
return NULL;
344+
}
345+
346+
return (PyObject*)tdcl;
347+
}
348+
85349
static PyMethodDef py_fun[] = {
86-
{ "PackIndexFile_sha_to_index", (PyCFunction)PackIndexFile_sha_to_index, METH_VARARGS, NULL },
350+
{ "PackIndexFile_sha_to_index", (PyCFunction)PackIndexFile_sha_to_index, METH_VARARGS, "TODO" },
351+
{ "connect_deltas", (PyCFunction)connect_deltas, METH_O, "TODO" },
87352
{ NULL, NULL, 0, NULL }
88353
};
89354

90-
void init_fun(void)
355+
#ifndef PyMODINIT_FUNC /* declarations for DLL import/export */
356+
#define PyMODINIT_FUNC void
357+
#endif
358+
PyMODINIT_FUNC init_fun(void)
91359
{
92360
PyObject *m;
93361

362+
DeltaChunkListType.tp_new = PyType_GenericNew;
363+
if (PyType_Ready(&DeltaChunkListType) < 0)
364+
return;
365+
94366
m = Py_InitModule3("_fun", py_fun, NULL);
95367
if (m == NULL)
96368
return;
369+
370+
Py_INCREF(&DeltaChunkListType);
371+
PyModule_AddObject(m, "Noddy", (PyObject *)&DeltaChunkListType);
97372
}

fun.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -701,3 +701,8 @@ def is_equal_canonical_sha(canonical_length, match, sha1):
701701

702702
#} END routines
703703

704+
705+
try:
706+
from _fun import connect_deltas
707+
except ImportError:
708+
pass

stream.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -338,8 +338,11 @@ def _set_cache_(self, attr):
338338
# the last delta, and reverse-merge its ancestor delta, until we receive
339339
# the final delta data stream.
340340
dcl = connect_deltas(self._dstreams)
341+
assert dcl is not None
341342

342-
if len(dcl) == 0:
343+
# call len directly, as the (optional) c version doesn't implement the sequence
344+
# protocol
345+
if dcl.__len__() == 0:
343346
self._size = 0
344347
self._mm_target = allocate_memory(0)
345348
return

0 commit comments

Comments
 (0)