Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 22 additions & 3 deletions .github/workflows/embedding_build_template.yml
Original file line number Diff line number Diff line change
Expand Up @@ -234,13 +234,32 @@ jobs:
run: |
# Set Docker image based on architecture
# Download glibc2_17-compatible ORT static lib (avoids __isoc23_strtoll from pyke.io builds)
ORT_VERSION="1.24.2"
read_ort_metadata() {
local key="$1"
awk -v key="$key" '
/^\[package.metadata.manticore.ort\]/ { in_section=1; next }
/^\[/ { in_section=0 }
in_section && $1 == key {
gsub(/"/, "", $3)
print $3
exit
}
' embeddings/Cargo.toml
}

ORT_VERSION="$(read_ort_metadata version)"
ORT_GLIBC="$(read_ort_metadata linux-glibc)"
if [[ -z "${ORT_VERSION}" || -z "${ORT_GLIBC}" ]]; then
echo "Failed to read ORT metadata from embeddings/Cargo.toml" >&2
exit 1
fi

if [[ "${{ inputs.arch }}" == "aarch64" ]]; then
docker_image="ghcr.io/manticoresoftware/rust-min-libc:aarch64-rust1.94.1-glibc2.27-openssl1.1.1k"
ort_asset="onnxruntime-linux-aarch64-static_lib-${ORT_VERSION}-glibc2_17"
ort_asset="onnxruntime-linux-aarch64-static_lib-${ORT_VERSION}-glibc${ORT_GLIBC}"
else
docker_image="ghcr.io/manticoresoftware/rust-min-libc:amd64-rust1.94.1-glibc2.27-openssl1.1.1k"
ort_asset="onnxruntime-linux-x64-static_lib-${ORT_VERSION}-glibc2_17"
ort_asset="onnxruntime-linux-x64-static_lib-${ORT_VERSION}-glibc${ORT_GLIBC}"
fi

curl -sL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${ORT_VERSION}/${ort_asset}.zip" -o /tmp/ort.zip
Expand Down
111 changes: 100 additions & 11 deletions cmake/build_embeddings.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,80 @@ if (__build_embeddings_included)
endif ()
set ( __build_embeddings_included YES )

set ( EMBEDDINGS_ORT_VERSION "" CACHE STRING "ONNX Runtime version used for local Linux embeddings builds; defaults to embeddings/Cargo.toml metadata" )
set ( EMBEDDINGS_ORT_GLIBC "" CACHE STRING "ONNX Runtime glibc baseline used for local Linux embeddings builds; defaults to embeddings/Cargo.toml metadata" )

function(read_embeddings_ort_metadata OUT_ORT_VERSION OUT_ORT_GLIBC)
set ( CARGO_TOML "${CMAKE_SOURCE_DIR}/embeddings/Cargo.toml" )
if (NOT EXISTS "${CARGO_TOML}")
message ( FATAL_ERROR "embeddings Cargo.toml was not found: ${CARGO_TOML}" )
endif()

file ( READ "${CARGO_TOML}" CARGO_TOML_CONTENT )
string ( REGEX MATCH "\\[package\\.metadata\\.manticore\\.ort\\][^\[]*" ORT_METADATA "${CARGO_TOML_CONTENT}" )
if (NOT ORT_METADATA)
message ( FATAL_ERROR "Missing [package.metadata.manticore.ort] version/linux-glibc in ${CARGO_TOML}" )
endif()

if (NOT ORT_METADATA MATCHES "version[ \t]*=[ \t]*\"([^\"]+)\"")
message ( FATAL_ERROR "Missing [package.metadata.manticore.ort] version in ${CARGO_TOML}" )
endif()
set ( ORT_VERSION "${CMAKE_MATCH_1}" )

if (NOT ORT_METADATA MATCHES "linux-glibc[ \t]*=[ \t]*\"([^\"]+)\"")
message ( FATAL_ERROR "Missing [package.metadata.manticore.ort] linux-glibc in ${CARGO_TOML}" )
endif()
set ( ORT_GLIBC "${CMAKE_MATCH_1}" )

if (EMBEDDINGS_ORT_VERSION)
set ( ORT_VERSION "${EMBEDDINGS_ORT_VERSION}" )
endif()
if (EMBEDDINGS_ORT_GLIBC)
set ( ORT_GLIBC "${EMBEDDINGS_ORT_GLIBC}" )
endif()

set ( ${OUT_ORT_VERSION} "${ORT_VERSION}" PARENT_SCOPE )
set ( ${OUT_ORT_GLIBC} "${ORT_GLIBC}" PARENT_SCOPE )
endfunction()

function(prepare_embeddings_ort)
if (NOT UNIX OR APPLE)
return()
endif()

read_embeddings_ort_metadata ( ORT_VERSION ORT_GLIBC )

if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64)$")
set ( ORT_ARCH "aarch64" )
else()
set ( ORT_ARCH "x64" )
endif()

set ( ORT_ASSET "onnxruntime-linux-${ORT_ARCH}-static_lib-${ORT_VERSION}-glibc${ORT_GLIBC}" )
set ( ORT_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v${ORT_VERSION}/${ORT_ASSET}.zip" )
set ( ORT_ROOT "${CMAKE_CURRENT_BINARY_DIR}/embeddings/ort/${ORT_ASSET}" )
set ( ORT_ZIP "${CMAKE_CURRENT_BINARY_DIR}/embeddings/ort/${ORT_ASSET}.zip" )

if (NOT EXISTS "${ORT_ROOT}/lib")
message ( STATUS "Downloading ONNX Runtime static library: ${ORT_ASSET}" )
file ( MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/embeddings/ort" )
file ( DOWNLOAD "${ORT_URL}" "${ORT_ZIP}" STATUS ORT_DOWNLOAD_STATUS SHOW_PROGRESS )
list ( GET ORT_DOWNLOAD_STATUS 0 ORT_DOWNLOAD_CODE )
if (NOT ORT_DOWNLOAD_CODE EQUAL 0)
list ( GET ORT_DOWNLOAD_STATUS 1 ORT_DOWNLOAD_ERROR )
message ( FATAL_ERROR "Failed to download ${ORT_URL}: ${ORT_DOWNLOAD_ERROR}" )
endif()
file ( ARCHIVE_EXTRACT INPUT "${ORT_ZIP}" DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/embeddings/ort" )
endif()

if (NOT EXISTS "${ORT_ROOT}/lib")
message ( FATAL_ERROR "ONNX Runtime lib directory was not found: ${ORT_ROOT}/lib" )
endif()

set ( ENV{ORT_LIB_PATH} "${ORT_ROOT}/lib" )
message ( STATUS "Using ONNX Runtime from ORT_LIB_PATH=$ENV{ORT_LIB_PATH}" )
endfunction()

function(build_embeddings_lib)
message ( STATUS "building embeddings locally..." )

Expand Down Expand Up @@ -49,21 +123,37 @@ function(build_embeddings_lib)
# This matches the format used by other Manticore libraries for consistent version display
set(ENV{GIT_COMMIT_ID} "${GIT_COMMIT_ID}")
set(ENV{GIT_TIMESTAMP_ID} "${GIT_TIMESTAMP_ID}")
prepare_embeddings_ort()

# Enable platform-specific BLAS acceleration for candle when available
set(EMBEDDINGS_CARGO_FEATURES "")
if(APPLE)
set(EMBEDDINGS_CARGO_FEATURES "--features" "accelerate")
elseif(UNIX)
# MKL provides multi-threaded BLAS on Linux; skip if not available
execute_process(COMMAND pkg-config --exists mkl-dynamic-lp64-seq RESULT_VARIABLE MKL_FOUND OUTPUT_QUIET ERROR_QUIET)
if(MKL_FOUND EQUAL 0)
set(EMBEDDINGS_CARGO_FEATURES "--features" "mkl")
# Enable platform-specific BLAS acceleration for candle when available.
if (DEFINED EMBEDDINGS_CARGO_FEATURES)
set(EMBEDDINGS_FEATURES_CSV "${EMBEDDINGS_CARGO_FEATURES}")
else()
set(EMBEDDINGS_FEATURE_LIST)
if(APPLE)
list(APPEND EMBEDDINGS_FEATURE_LIST accelerate)
elseif(UNIX)
# MKL provides multi-threaded BLAS on Linux; skip if not available
execute_process(COMMAND pkg-config --exists mkl-dynamic-lp64-seq RESULT_VARIABLE MKL_FOUND OUTPUT_QUIET ERROR_QUIET)
if(MKL_FOUND EQUAL 0)
list(APPEND EMBEDDINGS_FEATURE_LIST mkl)
endif()
endif()
list(JOIN EMBEDDINGS_FEATURE_LIST "," EMBEDDINGS_FEATURES_CSV)
endif()

if (UNIX AND NOT APPLE AND DEFINED ENV{ORT_LIB_PATH} AND NOT "$ENV{ORT_LIB_PATH}" STREQUAL "" AND EMBEDDINGS_FEATURES_CSV)
string(REPLACE "," ";" EMBEDDINGS_FEATURE_LIST "${EMBEDDINGS_FEATURES_CSV}")
list(REMOVE_ITEM EMBEDDINGS_FEATURE_LIST download-ort)
list(JOIN EMBEDDINGS_FEATURE_LIST "," EMBEDDINGS_FEATURES_CSV)
endif()

if (EMBEDDINGS_FEATURES_CSV)
set(EMBEDDINGS_CARGO_FEATURE_ARGS "--features" "${EMBEDDINGS_FEATURES_CSV}")
endif()

execute_process (
COMMAND cargo build --manifest-path ${CMAKE_SOURCE_DIR}/embeddings/Cargo.toml --lib --release ${EMBEDDINGS_CARGO_FEATURES} --target-dir ${CMAKE_CURRENT_BINARY_DIR}/embeddings
COMMAND cargo build --manifest-path ${CMAKE_SOURCE_DIR}/embeddings/Cargo.toml --lib --release ${EMBEDDINGS_CARGO_FEATURE_ARGS} --target-dir ${CMAKE_CURRENT_BINARY_DIR}/embeddings
RESULT_VARIABLE CMD_RESULT
)

Expand All @@ -86,4 +176,3 @@ function(build_embeddings_lib)
file(RENAME "${CMAKE_CURRENT_BINARY_DIR}/embeddings/release/${EMBEDDINGS_LIB_NAME}.pdb" "${CMAKE_CURRENT_BINARY_DIR}/embeddings/release/lib_${EMBEDDINGS_LIB_NAME}.pdb")
endif()
endfunction ()

4 changes: 4 additions & 0 deletions embeddings/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@ name = "manticore-knn-embeddings"
version = "1.1.1"
edition = "2021"

[package.metadata.manticore.ort]
version = "1.24.2"
linux-glibc = "2_17"

# Candle: git dep so CI works without a local candle clone.
# For local dev with ../../candle, add a [patch] section to use path deps.
[dependencies]
Expand Down
15 changes: 15 additions & 0 deletions embeddings/src/model/ffi_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,16 @@ mod tests {
TextModelWrapper::free_model_result(result);
}

fn run_concurrent_ffi_embeddings_from_small_stack(model_id: &'static str) {
std::thread::Builder::new()
.name("manticore-embeddings-small-stack-test".to_string())
.stack_size(256 * 1024)
.spawn(move || run_concurrent_ffi_embeddings(model_id))
.expect("failed to spawn small-stack test thread")
.join()
.expect("small-stack test thread panicked");
}

#[test]
fn test_text_model_result_structure() {
// Test that TextModelResult has the expected structure
Expand Down Expand Up @@ -501,4 +511,9 @@ mod tests {
fn test_concurrent_qwen_embeddings_via_ffi() {
run_concurrent_ffi_embeddings("Qwen/Qwen3-Embedding-0.6B");
}

#[test]
fn test_concurrent_qwen_embeddings_via_ffi_from_small_stack() {
run_concurrent_ffi_embeddings_from_small_stack("Qwen/Qwen3-Embedding-0.6B");
}
}
84 changes: 69 additions & 15 deletions embeddings/src/model/text_model_wrapper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ const MODEL_MAGIC: u64 = 0xC0FF_EE5E_E7BE_EFDE;
/// MAGIC (and proceeds safely) or DEAD (and gets a clean error).
const MODEL_DEAD: u64 = 0xDEAD_DEAD_DEAD_DEAD;

const DEFAULT_EMBEDDINGS_STACK_SIZE: usize = 32 * 1024 * 1024;
const MIN_EMBEDDINGS_STACK_SIZE: usize = 1024 * 1024;

/// Heap-allocated wrapper that the FFI hands to C++ as `*mut c_void`. The C++
/// side stores the raw pointer and passes it back into every call; we use the
/// `magic` field to validate that the pointer still references a live handle.
Expand Down Expand Up @@ -74,6 +77,43 @@ pub struct StringItem {
pub len: usize,
}

fn embedding_stack_size() -> usize {
std::env::var("MANTICORE_EMBEDDINGS_STACK_SIZE")
.ok()
.and_then(|value| value.parse::<usize>().ok())
.filter(|&value| value >= MIN_EMBEDDINGS_STACK_SIZE)
.unwrap_or(DEFAULT_EMBEDDINGS_STACK_SIZE)
}

fn should_run_on_embedding_stack() -> bool {
cfg!(debug_assertions)
|| cfg!(windows)
|| std::env::var_os("MANTICORE_EMBEDDINGS_STACK_SIZE").is_some()
}

fn run_on_embedding_stack<T, F>(name: &str, f: F) -> Result<T, String>
where
T: Send,
F: FnOnce() -> T + Send,
{
if !should_run_on_embedding_stack() {
return Ok(f());
}

std::thread::scope(|scope| {
let handle = match std::thread::Builder::new()
.name(name.to_string())
.stack_size(embedding_stack_size())
.spawn_scoped(scope, f)
{
Ok(handle) => handle,
Err(e) => return Err(format!("failed to spawn {name} thread: {e}")),
};

handle.join().map_err(|_| format!("{name} thread panicked"))
})
}

impl TextModelWrapper {
pub extern "C" fn load_model(
name_ptr: *const c_char,
Expand Down Expand Up @@ -132,12 +172,16 @@ impl TextModelWrapper {
use_gpu: Some(use_gpu),
};

match create_model(options) {
Ok(model) => TextModelResult {
let model_result = run_on_embedding_stack("manticore-embeddings-load", move || {
create_model(options).map_err(|e| e.to_string())
});

match model_result {
Ok(Ok(model)) => TextModelResult {
model: Box::into_raw(Box::new(ModelHandle::new(model))) as *mut c_void,
error: ptr::null_mut(),
},
Err(e) => {
Ok(Err(e)) | Err(e) => {
let c_error = std::ffi::CString::new(e.to_string()).unwrap();
TextModelResult {
model: ptr::null_mut(),
Expand All @@ -148,17 +192,22 @@ impl TextModelWrapper {
}

pub extern "C" fn free_model_result(res: TextModelResult) {
unsafe {
if !res.model.is_null() {
let model = res.model as usize;
let error = res.error as usize;
let _ = run_on_embedding_stack("manticore-embeddings-free-model", move || unsafe {
let model = model as *mut c_void;
let error = error as *mut c_char;

if !model.is_null() {
// Drop runs ModelHandle::drop first (tombstones magic to
// MODEL_DEAD), then destroys the inner Model.
drop(Box::from_raw(res.model as *mut ModelHandle));
drop(Box::from_raw(model as *mut ModelHandle));
}

if !res.error.is_null() {
let _ = std::ffi::CString::from_raw(res.error);
if !error.is_null() {
let _ = std::ffi::CString::from_raw(error);
}
}
});
}

/// Validate the handle pointer before dereferencing. Returns a static error
Expand Down Expand Up @@ -212,10 +261,13 @@ impl TextModelWrapper {
})
.collect();

let embeddings_list = run_on_embedding_stack("manticore-embeddings-predict", || {
model.predict(&string_refs).map_err(|e| e.to_string())
});

let mut float_vec_list: Vec<FloatVec> = Vec::new();
let embeddings_list = model.predict(&string_refs);
let c_error = match embeddings_list {
Ok(embeddings_list) => {
Ok(Ok(embeddings_list)) => {
for embeddings in embeddings_list.iter() {
let ptr = embeddings.as_ptr();
let len = embeddings.len();
Expand All @@ -227,7 +279,7 @@ impl TextModelWrapper {
std::mem::forget(embeddings_list);
ptr::null_mut()
}
Err(e) => {
Ok(Err(e)) | Err(e) => {
// Don't push empty vector on error - return error through szError pattern
let c_error = std::ffi::CString::new(e.to_string()).unwrap();
c_error.into_raw()
Expand Down Expand Up @@ -292,9 +344,11 @@ impl TextModelWrapper {
.unwrap_or(ptr::null_mut());
}
};
match model.validate_api_key() {
Ok(()) => ptr::null_mut(),
Err(e) => {
match run_on_embedding_stack("manticore-embeddings-validate-key", || {
model.validate_api_key().map_err(|e| e.to_string())
}) {
Ok(Ok(())) => ptr::null_mut(),
Ok(Err(e)) | Err(e) => {
let error_str = e.to_string();
let c_error = match std::ffi::CString::new(error_str) {
Ok(cstr) => cstr,
Expand Down
Loading