Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 6 additions & 14 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ set(_tvm_ffi_objs_sources
"${CMAKE_CURRENT_SOURCE_DIR}/src/ffi/dtype.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/src/ffi/container.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/src/ffi/init_once.cc"
"${CMAKE_CURRENT_SOURCE_DIR}/src/ffi/custom_allocator.cc"
)

set(_tvm_ffi_extra_objs_sources
Expand Down Expand Up @@ -272,20 +273,11 @@ if (TVM_FFI_BUILD_PYTHON_MODULE)
VERBATIM
)

if (Python_VERSION VERSION_GREATER_EQUAL "3.12" AND NOT PYTHON_IS_FREE_THREADED)
# >= Python3.12, use Use_SABI version
python_add_library(tvm_ffi_cython MODULE "${_core_cpp}" USE_SABI 3.12)
target_link_libraries(tvm_ffi_cython PRIVATE Python::SABIModule)
set_target_properties(tvm_ffi_cython PROPERTIES OUTPUT_NAME "core")
if (NOT WIN32)
target_link_libraries(tvm_ffi_cython PRIVATE Python::Module)
set_target_properties(tvm_ffi_cython PROPERTIES SUFFIX ".abi3.so")
endif ()
else ()
# before Python3.12, use WITH_SOABI version
python_add_library(tvm_ffi_cython MODULE "${_core_cpp}" WITH_SOABI)
set_target_properties(tvm_ffi_cython PROPERTIES OUTPUT_NAME "core")
endif ()
# The PyObject-tying impl in tvm_ffi_python_helpers.h uses full Python
# C API (Py_IncRef, PyObject_GC_Del, atomic header reads), so we build
# against the per-version ABI rather than the limited (abi3) ABI.
python_add_library(tvm_ffi_cython MODULE "${_core_cpp}" WITH_SOABI)
set_target_properties(tvm_ffi_cython PROPERTIES OUTPUT_NAME "core")
target_include_directories(
tvm_ffi_cython PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/python/tvm_ffi/cython
)
Expand Down
60 changes: 60 additions & 0 deletions include/tvm/ffi/c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -580,6 +580,66 @@ TVM_FFI_DLL int TVMFFIObjectDecRef(TVMFFIObjectHandle obj);
TVM_FFI_DLL int TVMFFIObjectCreateOpaque(void* handle, int32_t type_index,
void (*deleter)(void* handle), TVMFFIObjectHandle* out);

//-----------------------------------------------------------------------
// Section: ObjectAllocHeader and CustomAllocator
//-----------------------------------------------------------------------
/*!
* \brief Mandatory header placed immediately before each TVMFFIObject body.
Comment thread
tqchen marked this conversation as resolved.
*
* This header may be used by TVMFFIObject::deleter to reclaim space when a
* custom allocator is present. It can also be set to NULL if
* TVMFFIObject::deleter directly calls system free. This section must be
* available for each Object so a frontend can rely on this field to confirm
* if the object came from a certain allocator.
*/
typedef struct {
/*!
* \brief Free the allocation.
* \param ptr The pointer to the space of the object.
* \note ``ptr`` points to the space of TVMFFIObject and does not include
* the TVMFFIObjectAllocHeader.
*/
void (*delete_space)(void* ptr);
} TVMFFIObjectAllocHeader;

/*!
* \brief Custom allocator entry registered with TVMFFISetCustomAllocator.
*/
typedef struct {
/*!
* \brief Allocate the space for an Object body.
* \param size The size requested for the object body.
* \param alignment The alignment requirement for the object body.
* \param type_index Type index of the object.
* \param context The ``context`` field of the registered allocator.
* \return Pointer to the space of the object, or NULL on failure (with
* the error reported via ``TVMFFIErrorSetRaised``).
* \note The returned pointer must be preceded by a
* ``TVMFFIObjectAllocHeader`` whose ``delete_space`` releases the
* full underlying allocation when invoked.
*/
void* (*allocate)(size_t size, size_t alignment, int32_t type_index, void* context);
/*! \brief Allocator context passed unmodified to ``allocate``. */
void* context;
} TVMFFICustomAllocator;

/*!
* \brief Get the process-wide custom allocator.
* \return The currently registered allocator (never NULL).
* \note ``TVMFFIGetCustomAllocator`` always returns a valid allocator and
* can be overridden by ``TVMFFISetCustomAllocator``.
*/
TVM_FFI_DLL TVMFFICustomAllocator* TVMFFIGetCustomAllocator(void);

/*!
* \brief Register the process-wide custom allocator.
* \param allocator Pointer to a TVMFFICustomAllocator, or NULL to restore
* the builtin default.
* \return 0 on success, nonzero on failure.
* \note ``allocator`` must be alive throughout the lifetime of the process.
*/
TVM_FFI_DLL int TVMFFISetCustomAllocator(TVMFFICustomAllocator* allocator);

/*!
* \brief Convert type key to type index.
* \param type_key The key of the type.
Expand Down
53 changes: 49 additions & 4 deletions include/tvm/ffi/memory.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

#include <cstddef>
#include <cstdlib>
#include <new>
#include <type_traits>
#include <utility>

Expand Down Expand Up @@ -79,6 +80,39 @@ TVM_FFI_INLINE void* AlignedAlloc(size_t size) {
#endif
}

/*!
* \brief Allocate aligned memory with a runtime-known alignment.
*
* Sibling of the templated ``AlignedAlloc<align>`` for callers that
* receive ``align`` as a parameter (e.g. custom-allocator
* implementations dispatching on ``TVMFFICustomAllocator::allocate``'s
* runtime ``alignment`` argument).
*
* \param size The size.
* \param align The alignment, must be a power of 2.
* \return The pointer to the allocated memory.
*/
TVM_FFI_INLINE void* AlignedAllocRuntime(size_t size, size_t align) {
#ifdef _MSC_VER
if (void* ptr = _aligned_malloc(size, align)) {
return ptr;
}
throw std::bad_alloc();
#else
if (align <= alignof(std::max_align_t)) {
if (void* ptr = std::malloc(size)) {
return ptr;
}
throw std::bad_alloc();
}
void* ptr;
if (posix_memalign(&ptr, align, size) != 0) {
throw std::bad_alloc();
}
return ptr;
#endif
}

/*!
* \brief Free aligned memory.
* \param data The pointer to the memory to free.
Expand Down Expand Up @@ -168,7 +202,11 @@ class SimpleObjAllocator : public ObjAllocatorBase<SimpleObjAllocator> {
// class with non-virtual destructor.
// We are fine here as we captured the right deleter during construction.
// This is also the right way to get storage type for an object pool.
void* data = AlignedAlloc<alignof(T)>(sizeof(T));
static_assert(alignof(T) <= alignof(::std::max_align_t),
"Object types with alignment > max_align_t are not supported "
"by the custom allocator hook");
TVMFFICustomAllocator* alloc = TVMFFIGetCustomAllocator();
void* data = alloc->allocate(sizeof(T), alignof(T), T::RuntimeTypeIndex(), alloc->context);
new (data) T(std::forward<Args>(args)...);
return reinterpret_cast<T*>(data);
}
Expand All @@ -187,7 +225,8 @@ class SimpleObjAllocator : public ObjAllocatorBase<SimpleObjAllocator> {
tptr->T::~T();
}
if (flags & kTVMFFIObjectDeleterFlagBitMaskWeak) {
AlignedFree(static_cast<void*>(tptr));
ObjectUnsafe::GetObjectAllocHeaderFromPtr(static_cast<void*>(tptr))
->delete_space(static_cast<void*>(tptr));
}
}
};
Expand Down Expand Up @@ -215,12 +254,17 @@ class SimpleObjAllocator : public ObjAllocatorBase<SimpleObjAllocator> {
static_assert(
alignof(ArrayType) % alignof(ElemType) == 0 && sizeof(ArrayType) % alignof(ElemType) == 0,
"element alignment constraint");
static_assert(alignof(ArrayType) <= alignof(::std::max_align_t),
"Object types with alignment > max_align_t are not supported "
"by the custom allocator hook");
size_t size = sizeof(ArrayType) + sizeof(ElemType) * num_elems;
// round up to the nearest multiple of align
constexpr size_t align = alignof(ArrayType);
// C++ standard always guarantees that alignof operator returns a power of 2
size_t aligned_size = (size + (align - 1)) & ~(align - 1);
void* data = AlignedAlloc<align>(aligned_size);
TVMFFICustomAllocator* alloc = TVMFFIGetCustomAllocator();
void* data =
alloc->allocate(aligned_size, align, ArrayType::RuntimeTypeIndex(), alloc->context);
new (data) ArrayType(std::forward<Args>(args)...);
return reinterpret_cast<ArrayType*>(data);
}
Expand All @@ -239,7 +283,8 @@ class SimpleObjAllocator : public ObjAllocatorBase<SimpleObjAllocator> {
tptr->ArrayType::~ArrayType();
}
if (flags & kTVMFFIObjectDeleterFlagBitMaskWeak) {
AlignedFree(static_cast<void*>(tptr));
ObjectUnsafe::GetObjectAllocHeaderFromPtr(static_cast<void*>(tptr))
->delete_space(static_cast<void*>(tptr));
}
}
};
Expand Down
10 changes: 10 additions & 0 deletions include/tvm/ffi/object.h
Original file line number Diff line number Diff line change
Expand Up @@ -1096,6 +1096,16 @@ struct ObjectUnsafe {
return const_cast<TVMFFIObject*>(&(src->header_));
}

/*!
* \brief Recover the TVMFFIObjectAllocHeader for a TVMFFIObject pointer.
* \param ptr The pointer to the space of the object.
* \return The header set by the allocator that produced ``ptr``.
*/
TVM_FFI_INLINE static TVMFFIObjectAllocHeader* GetObjectAllocHeaderFromPtr(void* ptr) {
return reinterpret_cast<TVMFFIObjectAllocHeader*>(static_cast<char*>(ptr) -
sizeof(TVMFFIObjectAllocHeader));
}

// Suppress -Winvalid-offsetof: we intentionally use offsetof on non-standard-layout types
// to avoid undefined behavior from null pointer arithmetic that sanitizers flag.
#if defined(__clang__) || defined(__GNUC__)
Expand Down
7 changes: 2 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -232,14 +232,11 @@ docstring-code-line-length = 80
[tool.cibuildwheel]
build-verbosity = 1

# only build up to cp312, cp312
# will be abi3 and can be used in future versions
# ship 314t threaded nogil version
build = ["cp38-*", "cp39-*", "cp310-*", "cp311-*", "cp312-*", "cp314t-*"]
# Per-Python-version wheels (no abi3 / limited API).
build = ["cp38-*", "cp39-*", "cp310-*", "cp311-*", "cp312-*", "cp313-*", "cp314t-*"]
skip = ["*musllinux*"]
# we only need to test on cp312
test-skip = ["cp38-*", "cp39-*", "cp310-*", "cp311-*"]
# focus on testing abi3 wheel
build-frontend = "build[uv]"
test-command = "pytest {package}/tests/python -vvs"
test-groups = ["test"]
Expand Down
17 changes: 17 additions & 0 deletions python/tvm_ffi/cython/base.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,17 @@ def _env_get_current_stream(int device_type, int device_id):


cdef extern from "tvm_ffi_python_helpers.h":
int TVMFFIPyRegisterDefaultAllocator() noexcept
void TVMFFIPyMarkPythonFinalizing() noexcept

bint TVMFFIPyTryGetAttachedPyObject(void* chandle, PyObject** out) noexcept
bint TVMFFIPyIsDetached(void* chandle) noexcept
void TVMFFIPyAttachPyObject(void* chandle, PyObject* obj) noexcept
void TVMFFIPyDetachPyObject(void* chandle, PyObject* obj) noexcept
void TVMFFIPyTpDealloc(void** ptr_to_chandle, PyObject* wrapper) noexcept
void TVMFFIPySetReviveBlock(PyObject* cached_alloc) noexcept
void TVMFFIPyInstallTypeSlots(PyObject* type_obj) noexcept

# no need to expose fields of the call context setter data structure
ctypedef struct TVMFFIPyCallContext:
int device_type
Expand Down Expand Up @@ -542,5 +553,11 @@ cdef _init_env_api():

_init_env_api()


CHECK_CALL(TVMFFIPyRegisterDefaultAllocator())

import atexit as _tvm_ffi_atexit
_tvm_ffi_atexit.register(TVMFFIPyMarkPythonFinalizing)

# ensure testing is linked and we can run testcases
TVMFFITestingDummyTarget()
15 changes: 13 additions & 2 deletions python/tvm_ffi/cython/function.pxi
Original file line number Diff line number Diff line change
Expand Up @@ -545,9 +545,19 @@ cdef int TVMFFIPyArgSetterObjectRValueRef_(
PyObject* py_arg, TVMFFIAny* out
) except -1:
"""Setter for ObjectRValueRef"""
cdef object arg = <object>py_arg
cdef CObject src = (<object>py_arg).obj
# need to detach from chandle
# there are two possible outcomes after the call:
# chandle gets moved, so it is set to NULL
# callee did not move chandle, in such case src.chandle is valid
# but chandle is no longer attached to PyObject
# we need to carefully handle chandle and PyObject recycling in both cases.
# These logics are implemented in TVMFFIPyTpDealloc (CObject.__dealloc__).
# NOTE: TVMFFIPyDetachPyObject is robust to cases where the Object is not
# allocated by the custom Python allocator.
TVMFFIPyDetachPyObject(src.chandle, <PyObject*>src)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

comment in here,

# need to detach from chandle
# there are two possible outcomes after all:
# chandle get moved so it is set to NULL
# callee did not move chandle, in such case, src.chandle is valid but chandle is not attached to PyObject
# we need to carefully handle chandle and PyObject recycling in both cases
# These logics are implemented in TVMFFIPyTPFinalize

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

note: TVMFFIPyDetachPyObject is robust to cases where Object is not allocated by custom PyAttachField

out.type_index = kTVMFFIObjectRValueRef
out.v_ptr = &((<CObject>(arg.obj)).chandle)
out.v_ptr = &(src.chandle)
return 0


Expand Down Expand Up @@ -1089,6 +1099,7 @@ def _register_global_func(name: str, pyfunc: Callable[..., Any] | Function, over


def _get_global_func(name: str, allow_missing: bool):
# PyObject tying is not applicable here.
cdef TVMFFIObjectHandle chandle
cdef ByteArrayArg name_arg = ByteArrayArg(c_str(name))

Expand Down
Loading
Loading