Skip to content

Commit fee318e

Browse files
Added support for columns of type VECTOR (currently requires access to a
limited availability release of the database).
1 parent 16f92d6 commit fee318e

26 files changed

+477
-28
lines changed

doc/src/release_notes.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ Common Changes
6262
columns which have the check constraint ``IS JSON FORMAT OSON`` enabled.
6363
#) Added boolean property :data:`FetchInfo.is_oson` which is set when a column
6464
has the check constraint "IS JSON FORMAT OSON" enabled.
65+
#) Added support for columns of type vector (currently requires access to a
66+
limited availability release of the database).
6567
#) Errors raised when calling :meth:`Cursor.executemany()` with PL/SQL now
6668
have the :data:`oracledb._Error.offset` attribute populated with the last
6769
iteration that succeeded

src/oracledb/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,10 @@
147147
# flags for tpc_end()
148148
TPC_END_NORMAL as TPC_END_NORMAL,
149149
TPC_END_SUSPEND as TPC_END_SUSPEND,
150+
# vector types
151+
VECTOR_FORMAT_FLOAT32 as VECTOR_FORMAT_FLOAT32,
152+
VECTOR_FORMAT_FLOAT64 as VECTOR_FORMAT_FLOAT64,
153+
VECTOR_FORMAT_INT8 as VECTOR_FORMAT_INT8,
150154
)
151155

152156
from .exceptions import (
@@ -237,6 +241,7 @@
237241
DB_TYPE_UNKNOWN as DB_TYPE_UNKNOWN,
238242
DB_TYPE_UROWID as DB_TYPE_UROWID,
239243
DB_TYPE_VARCHAR as DB_TYPE_VARCHAR,
244+
DB_TYPE_VECTOR as DB_TYPE_VECTOR,
240245
DB_TYPE_XMLTYPE as DB_TYPE_XMLTYPE,
241246
# API types
242247
BINARY as BINARY,

src/oracledb/base_impl.pxd

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333

3434
from libc.stdint cimport int8_t, int16_t, int32_t, int64_t
3535
from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t
36+
from cpython cimport array
3637

3738
ctypedef unsigned char char_type
3839

@@ -44,7 +45,7 @@ cdef enum:
4445

4546
cdef enum:
4647
DB_TYPE_NUM_MIN = 2000
47-
DB_TYPE_NUM_MAX = 2032
48+
DB_TYPE_NUM_MAX = 2033
4849

4950
DB_TYPE_NUM_BFILE = 2020
5051
DB_TYPE_NUM_BINARY_DOUBLE = 2008
@@ -75,6 +76,7 @@ cdef enum:
7576
DB_TYPE_NUM_UNKNOWN = 0
7677
DB_TYPE_NUM_UROWID = 2030
7778
DB_TYPE_NUM_VARCHAR = 2001
79+
DB_TYPE_NUM_VECTOR = 2033
7880
DB_TYPE_NUM_XMLTYPE = 2032
7981

8082
cdef enum:
@@ -91,6 +93,7 @@ cdef enum:
9193
NATIVE_TYPE_NUM_ROWID = 3012
9294
NATIVE_TYPE_NUM_STMT = 3010
9395
NATIVE_TYPE_NUM_TIMESTAMP = 3005
96+
NATIVE_TYPE_NUM_VECTOR = 3017
9497

9598
cdef enum:
9699
CS_FORM_IMPLICIT = 1
@@ -169,8 +172,10 @@ cdef class Buffer:
169172
cdef int _write_raw_bytes_and_length(self, const char_type *ptr,
170173
ssize_t num_bytes) except -1
171174
cdef inline ssize_t bytes_left(self)
172-
cdef object parse_binary_double(self, const uint8_t* ptr)
173-
cdef object parse_binary_float(self, const uint8_t* ptr)
175+
cdef int parse_binary_double(self, const uint8_t* ptr,
176+
double *double_ptr) except -1
177+
cdef int parse_binary_float(self, const uint8_t* ptr,
178+
float *float_ptr) except -1
174179
cdef object parse_date(self, const uint8_t* ptr, ssize_t num_bytes)
175180
cdef object parse_interval_ds(self, const uint8_t* ptr)
176181
cdef object parse_oracle_number(self, const uint8_t* ptr,
@@ -207,8 +212,10 @@ cdef class Buffer:
207212
cdef inline int skip_ub2(self) except -1
208213
cdef inline int skip_ub4(self) except -1
209214
cdef inline int skip_ub8(self) except -1
210-
cdef int write_binary_double(self, double value) except -1
211-
cdef int write_binary_float(self, float value) except -1
215+
cdef int write_binary_double(self, double value,
216+
bint write_length=*) except -1
217+
cdef int write_binary_float(self, float value,
218+
bint write_length=*) except -1
212219
cdef int write_bool(self, bint value) except -1
213220
cdef int write_bytes(self, bytes value) except -1
214221
cdef int write_bytes_with_length(self, bytes value) except -1
@@ -304,6 +311,16 @@ cdef class OsonEncoder(GrowableBuffer):
304311
cdef int encode(self, object value, ssize_t max_fname_size) except -1
305312

306313

314+
cdef class VectorDecoder(Buffer):
315+
316+
cdef object decode(self, bytes data)
317+
318+
319+
cdef class VectorEncoder(GrowableBuffer):
320+
321+
cdef int encode(self, array.array value) except -1
322+
323+
307324
cdef class ConnectParamsNode:
308325
cdef:
309326
public bint source_route
@@ -549,6 +566,9 @@ cdef class FetchInfoImpl:
549566
readonly str domain_schema
550567
readonly str domain_name
551568
readonly dict annotations
569+
readonly uint32_t vector_dimensions
570+
readonly uint8_t vector_format
571+
readonly uint8_t vector_flags
552572

553573

554574
cdef class BaseVarImpl:

src/oracledb/base_impl.pyx

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ from libc.stdint cimport int8_t, int16_t, int32_t, int64_t
3939
from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t
4040
from libc.stdint cimport UINT8_MAX, UINT16_MAX, UINT32_MAX, UINT64_MAX
4141
from libc.string cimport memcpy
42+
from cpython cimport array
43+
44+
import array
4245

4346
import base64
4447
import datetime
@@ -71,10 +74,16 @@ cdef type PY_TYPE_TIMEDELTA = datetime.timedelta
7174
cdef type PY_TYPE_VAR
7275
cdef type PY_TYPE_FETCHINFO
7376

77+
# purity values
7478
cdef uint32_t PURITY_NEW = constants.PURITY_NEW
7579
cdef uint32_t PURITY_SELF = constants.PURITY_SELF
7680
cdef uint32_t PURITY_DEFAULT = constants.PURITY_DEFAULT
7781

82+
# vector types
83+
cdef uint8_t VECTOR_FORMAT_FLOAT32 = constants.VECTOR_FORMAT_FLOAT32
84+
cdef uint8_t VECTOR_FORMAT_FLOAT64 = constants.VECTOR_FORMAT_FLOAT64
85+
cdef uint8_t VECTOR_FORMAT_INT8 = constants.VECTOR_FORMAT_INT8
86+
7887
cdef const char* ENCODING_UTF8 = "UTF-8"
7988
cdef const char* ENCODING_UTF16 = "UTF-16BE"
8089

@@ -89,6 +98,7 @@ include "impl/base/defaults.pyx"
8998
include "impl/base/utils.pyx"
9099
include "impl/base/buffer.pyx"
91100
include "impl/base/oson.pyx"
101+
include "impl/base/vector.pyx"
92102
include "impl/base/connect_params.pyx"
93103
include "impl/base/pool_params.pyx"
94104
include "impl/base/connection.pyx"

src/oracledb/constants.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,14 @@
155155
TPC_END_NORMAL = 0
156156
TPC_END_SUSPEND = 0x00100000
157157

158+
# vector types
159+
VECTOR_FORMAT_FLOAT32 = 2
160+
VECTOR_FORMAT_FLOAT64 = 3
161+
VECTOR_FORMAT_INT8 = 4
162+
163+
# vector metadata flags
164+
VECTOR_META_FLAG_FLEXIBLE_DIM = 0x01
165+
158166
# basic configuration constants
159167
DRIVER_NAME = "python-oracledb"
160168
INSTALLATION_URL = (

src/oracledb/errors.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,8 @@ def _raise_err(
264264
ERR_OSON_FIELD_NAME_LIMITATION = 3020
265265
ERR_OSON_VERSION_NOT_SUPPORTED = 3021
266266
ERR_NAMED_TIMEZONE_NOT_SUPPORTED = 3022
267+
ERR_VECTOR_VERSION_NOT_SUPPORTED = 3023
268+
ERR_VECTOR_FORMAT_NOT_SUPPORTED = 3024
267269

268270
# error numbers that result in DatabaseError
269271
ERR_TNS_ENTRY_NOT_FOUND = 4000
@@ -676,6 +678,12 @@ def _raise_err(
676678
"password verifier type 0x{verifier_type:x} is not supported by "
677679
"python-oracledb in thin mode"
678680
),
681+
ERR_VECTOR_FORMAT_NOT_SUPPORTED: (
682+
"VECTOR type {vector_format} is not supported"
683+
),
684+
ERR_VECTOR_VERSION_NOT_SUPPORTED: (
685+
"VECTOR version {version} is not supported"
686+
),
679687
ERR_WALLET_FILE_MISSING: "wallet file {name} was not found",
680688
ERR_WRONG_ARRAY_DEFINITION: (
681689
"expecting a list of two elements [type, numelems]"

src/oracledb/fetch_info.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from typing import Union
3333

3434
from . import __name__ as MODULE_NAME
35+
from . import constants
3536
from .dbobject import DbObjectType
3637
from .base_impl import (
3738
DbType,
@@ -43,6 +44,7 @@
4344
DB_TYPE_BINARY_DOUBLE,
4445
DB_TYPE_BINARY_INTEGER,
4546
DB_TYPE_NUMBER,
47+
DB_TYPE_VECTOR,
4648
)
4749

4850

@@ -220,3 +222,28 @@ def type_code(self) -> DbType:
220222
Returns the type of the column.
221223
"""
222224
return self._impl.dbtype
225+
226+
@property
227+
def vector_dimensions(self) -> [int, None]:
228+
"""
229+
Returns the number of dimensions required by vector columns. If the
230+
column is not a vector column or allows for any number of dimensions,
231+
the value returned is None.
232+
"""
233+
if self._impl.dbtype is DB_TYPE_VECTOR:
234+
flags = self._impl.vector_flags
235+
if not (flags & constants.VECTOR_META_FLAG_FLEXIBLE_DIM):
236+
return self._impl.vector_dimensions
237+
238+
@property
239+
def vector_format(self) -> [int, None]:
240+
"""
241+
Returns the storage type required by vector columns. If the column is
242+
not a vector column or allows for any type of storage, the value
243+
returned is None.
244+
"""
245+
if (
246+
self._impl.dbtype is DB_TYPE_VECTOR
247+
and self._impl.vector_format != 0
248+
):
249+
return self._impl.vector_format

src/oracledb/impl/base/buffer.pyx

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -276,15 +276,15 @@ cdef class Buffer:
276276
"""
277277
return self._size - self._pos
278278

279-
cdef object parse_binary_double(self, const uint8_t* ptr):
279+
cdef int parse_binary_double(self, const uint8_t* ptr,
280+
double *double_ptr) except -1:
280281
"""
281282
Read a binary double value from the buffer and return the corresponding
282283
Python object representing that value.
283284
"""
284285
cdef:
285286
uint8_t b0, b1, b2, b3, b4, b5, b6, b7
286287
uint64_t high_bits, low_bits, all_bits
287-
double *double_ptr
288288
b0 = ptr[0]
289289
b1 = ptr[1]
290290
b2 = ptr[2]
@@ -307,18 +307,17 @@ cdef class Buffer:
307307
high_bits = b0 << 24 | b1 << 16 | b2 << 8 | b3
308308
low_bits = b4 << 24 | b5 << 16 | b6 << 8 | b7
309309
all_bits = high_bits << 32 | (low_bits & <uint64_t> 0xffffffff)
310-
double_ptr = <double*> &all_bits
311-
return double_ptr[0]
310+
memcpy(double_ptr, &all_bits, 8)
312311

313-
cdef object parse_binary_float(self, const uint8_t* ptr):
312+
cdef int parse_binary_float(self, const uint8_t* ptr,
313+
float *float_ptr) except -1:
314314
"""
315315
Parse a binary float value from the buffer and return the corresponding
316316
Python object representing that value.
317317
"""
318318
cdef:
319319
uint8_t b0, b1, b2, b3
320320
uint64_t all_bits
321-
float *float_ptr
322321
b0 = ptr[0]
323322
b1 = ptr[1]
324323
b2 = ptr[2]
@@ -331,8 +330,7 @@ cdef class Buffer:
331330
b2 = ~b2
332331
b3 = ~b3
333332
all_bits = (b0 << 24) | (b1 << 16) | (b2 << 8) | b3
334-
float_ptr = <float*> &all_bits
335-
return float_ptr[0]
333+
memcpy(float_ptr, &all_bits, 4)
336334

337335
cdef object parse_date(self, const uint8_t* ptr, ssize_t num_bytes):
338336
"""
@@ -512,9 +510,11 @@ cdef class Buffer:
512510
cdef:
513511
const uint8_t *ptr
514512
ssize_t num_bytes
513+
double value
515514
self.read_raw_bytes_and_length(&ptr, &num_bytes)
516515
if ptr != NULL:
517-
return self.parse_binary_double(ptr)
516+
self.parse_binary_double(ptr, &value)
517+
return value
518518

519519
cdef object read_binary_float(self):
520520
"""
@@ -524,9 +524,11 @@ cdef class Buffer:
524524
cdef:
525525
const uint8_t *ptr
526526
ssize_t num_bytes
527+
float value
527528
self.read_raw_bytes_and_length(&ptr, &num_bytes)
528529
if ptr != NULL:
529-
return self.parse_binary_float(ptr)
530+
self.parse_binary_float(ptr, &value)
531+
return value
530532

531533
cdef object read_binary_integer(self):
532534
"""
@@ -835,7 +837,8 @@ cdef class Buffer:
835837
"""
836838
return self._skip_int(8, NULL)
837839

838-
cdef int write_binary_double(self, double value) except -1:
840+
cdef int write_binary_double(self, double value,
841+
bint write_length=True) except -1:
839842
"""
840843
Writes a double value to the buffer in Oracle canonical double floating
841844
point format.
@@ -874,10 +877,12 @@ cdef class Buffer:
874877
buf[5] = b5
875878
buf[6] = b6
876879
buf[7] = b7
877-
self.write_uint8(8)
880+
if write_length:
881+
self.write_uint8(8)
878882
self.write_raw(buf, 8)
879883

880-
cdef int write_binary_float(self, float value) except -1:
884+
cdef int write_binary_float(self, float value,
885+
bint write_length=True) except -1:
881886
"""
882887
Writes a float value to the buffer in Oracle canonical floating point
883888
format.
@@ -904,7 +909,8 @@ cdef class Buffer:
904909
buf[1] = b1
905910
buf[2] = b2
906911
buf[3] = b3
907-
self.write_uint8(4)
912+
if write_length:
913+
self.write_uint8(4)
908914
self.write_raw(buf, 4)
909915

910916
cdef int write_bool(self, bint value) except -1:

src/oracledb/impl/base/connection.pyx

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,12 @@ cdef class BaseConnImpl:
136136
return bool(value)
137137
elif db_type_num == DB_TYPE_NUM_JSON:
138138
return value
139+
elif db_type_num == DB_TYPE_NUM_VECTOR:
140+
if isinstance(value, list):
141+
return array.array('d', value)
142+
elif isinstance(value, array.array) \
143+
and value.typecode in ('f', 'd', 'b'):
144+
return value
139145
else:
140146
if is_ok != NULL:
141147
is_ok[0] = False

src/oracledb/impl/base/constants.pxi

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,20 @@ cdef enum:
7070
TNS_JSON_TYPE_BINARY_FLOAT = 0x7f
7171
TNS_JSON_TYPE_OBJECT = 0x84
7272
TNS_JSON_TYPE_ARRAY = 0xc0
73+
TNS_JSON_TYPE_EXTENDED = 0x7b
74+
TNS_JSON_TYPE_VECTOR = 0x01
75+
76+
# VECTOR constants
77+
cdef enum:
78+
TNS_VECTOR_MAGIC_BYTE = 0xDB
79+
TNS_VECTOR_VERSION = 0
80+
81+
# VECTOR flags
82+
cdef enum:
83+
TNS_VECTOR_FLAG_DIM_UINT8 = 0x0001
84+
TNS_VECTOR_FLAG_DIM_UINT32 = 0x0002
85+
TNS_VECTOR_FLAG_NORM = 0x0008
86+
TNS_VECTOR_FLAG_NORM_RESERVED = 0x0040
7387

7488
# general constants
7589
cdef enum:

0 commit comments

Comments
 (0)