Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,11 @@ bm25 = { path = "./crates/bm25" }
index = { path = "./crates/index" }
simd = { path = "./crates/simd" }

getrandom.workspace = true
pgrx = "=0.17.0"
pgrx-catalog = "0.3.2"
rand.workspace = true
serde.workspace = true
tempfile.workspace = true
toml = "1.1.2"
validator.workspace = true

Expand All @@ -49,9 +50,9 @@ version = "0.0.0"
edition = "2024"

[workspace.dependencies]
getrandom = "0.4.2"
rand = "0.10.1"
serde = { version = "1.0.228", features = ["derive"] }
tempfile = "3.27.0"
validator = { version = "0.20.0", features = ["derive"] }
zerocopy = { version = "0.8.48", features = ["derive", "simd"] }

Expand Down
3 changes: 1 addition & 2 deletions crates/bm25/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,9 @@ score = { path = "../score" }
simd = { path = "../simd" }

blake3 = "1.8.4"
getrandom = "0.4.2"
getrandom.workspace = true
memmap2 = "0.9.10"
serde.workspace = true
tempfile.workspace = true
validator.workspace = true
zerocopy.workspace = true

Expand Down
26 changes: 16 additions & 10 deletions crates/bm25/src/maintain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,17 @@ use crate::tuples::*;
use crate::vector::Document;
use crate::{Opaque, WIDTH, compression};
use index::relation::{Page, PageGuard, RelationRead, RelationWrite};
use std::fs::File;
use std::fs::{File, OpenOptions};
use std::io::BufWriter;
use std::path::Path;
use zerocopy::{FromBytes, IntoBytes};

pub fn maintain<R: RelationRead + RelationWrite>(index: &R, _check: impl Fn())
where
pub fn maintain<R: RelationRead + RelationWrite>(
index: &R,
_check: impl Fn(),
dir: &Path,
file: &Path,
) where
R::Page: Page<Opaque = Opaque>,
{
let meta_guard = index.read(0);
Expand All @@ -38,10 +43,11 @@ where

let _lock_guard = index.write(ptr_lock);

let tempdir = handle_io_error(tempfile::tempdir());

let mut relabel = BufWriter::with_capacity(16 * 1024, handle_io_error(tempfile::tempfile()));
let mut records_writer = crate::io::records_writer(tempdir.path(), 0);
let mut relabel = BufWriter::with_capacity(
16 * 1024,
handle_io_error(OpenOptions::new().read(true).write(true).open(file)),
Comment thread
usamoi marked this conversation as resolved.
);
let mut records_writer = crate::io::records_writer(dir, 0);

let jump_guard = index.read(ptr_jump);
let jump_bytes = jump_guard.get(1).expect("data corruption");
Expand Down Expand Up @@ -93,7 +99,7 @@ where
} else {
&mut []
};
let mut mappings_writer = crate::io::mappings_writer(tempdir.path(), 0);
let mut mappings_writer = crate::io::mappings_writer(dir, 0);

{
let mut tape_tokens = TapeReader::new(jump_tuple.ptr_tokens(), |bytes| {
Expand Down Expand Up @@ -254,9 +260,9 @@ where
mappings_writer.flush();
drop(records_writer);
drop(mappings_writer);
crate::io::locally_merge(tempdir.path(), 0);
crate::io::locally_merge(dir, 0);

let segment = crate::io::readers(tempdir.path(), 1);
let segment = crate::io::readers(dir, 1);
let flushed = crate::flush::flush(k1, b, index, segment);

let mut jump_guard = index.write(ptr_jump);
Expand Down
2 changes: 1 addition & 1 deletion crates/xtask/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ serde.workspace = true
serde_json = "1.0.149"
shlex = "1.3.0"
target-triple = "1.0.0"
tempfile.workspace = true
tempfile = "3.27.0"

[lints]
workspace = true
4 changes: 3 additions & 1 deletion src/index/bm25/am/am_build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ use crate::index::bm25::am::Reloption;
use crate::index::bm25::types::*;
use crate::index::fetcher::ctid_to_key;
use crate::index::storage::PostgresRelation;
use crate::index::temp::tempdir;
use crate::index::traverse::{HeapTraverser, Traverser};
use std::ffi::{CStr, OsStr};
use std::marker::PhantomData;
Expand Down Expand Up @@ -140,7 +141,7 @@ pub unsafe extern "C-unwind" fn ambuild(
reporter.tuples_total(unsafe { (*(*index_relation).rd_rel).reltuples as u64 });
reporter.phase(BuildPhase::from_code(BuildPhaseCode::Scanning));
let seed = bm25::seed::random();
let tempdir = tempfile::tempdir().expect("failed to create temporary directory");
let tempdir = tempdir();
let total = if let Some(leader) = unsafe {
Bm25Leader::enter(
c"bm25_parallel_build_main",
Expand Down Expand Up @@ -534,6 +535,7 @@ pub unsafe extern "C-unwind" fn bm25_parallel_build_main(
_seg: *mut pgrx::pg_sys::dsm_segment,
toc: *mut pgrx::pg_sys::shm_toc,
) {
let _ = rand::rng().reseed();
let bm25shared = unsafe {
pgrx::pg_sys::shm_toc_lookup(toc, 0xA000000000000001, false).cast::<Bm25Shared>()
};
Expand Down
5 changes: 4 additions & 1 deletion src/index/bm25/am/am_vacuumcleanup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
// Copyright (c) 2025-2026 TensorChord Inc.

use crate::index::storage::PostgresRelation;
use crate::index::temp::{tempdir, tempfile};

#[pgrx::pg_guard]
pub unsafe extern "C-unwind" fn amvacuumcleanup(
Expand All @@ -33,6 +34,8 @@ pub unsafe extern "C-unwind" fn amvacuumcleanup(
#[cfg(feature = "pg18")]
pgrx::pg_sys::vacuum_delay_point(false);
};
bm25::maintain(&index, check);
let tempdir = tempdir();
let tempfile = tempfile();
bm25::maintain(&index, check, tempdir.path(), tempfile.path());
stats
}
1 change: 1 addition & 0 deletions src/index/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ mod hook;
mod operators;
mod scanners;
mod storage;
mod temp;
mod traverse;

pub fn init() {
Expand Down
101 changes: 101 additions & 0 deletions src/index/temp.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
// This software is licensed under a dual license model:
//
// GNU Affero General Public License v3 (AGPLv3): You may use, modify, and
// distribute this software under the terms of the AGPLv3.
//
// Elastic License v2 (ELv2): You may also use, modify, and distribute this
// software under the Elastic License v2, which has specific restrictions.
//
// We welcome any commercial collaboration or support. For inquiries
// regarding the licenses, please contact us at:
// vectorchord-inquiry@tensorchord.ai
//
// Copyright (c) 2025-2026 TensorChord Inc.

use std::path::{Path, PathBuf};

pub struct TempFile {
path: PathBuf,
}

impl TempFile {
pub fn path(&self) -> &Path {
self.path.as_path()
}
}

impl Drop for TempFile {
fn drop(&mut self) {
let _ = std::fs::remove_file(&self.path);
}
}

pub fn tempfile() -> TempFile {
let path = temppath();
std::fs::File::create_new(&path).expect("failed to create the temporary file");
TempFile { path }
}

pub struct TempDir {
path: PathBuf,
}

impl TempDir {
pub fn path(&self) -> &Path {
self.path.as_path()
}
}

impl Drop for TempDir {
fn drop(&mut self) {
let _ = std::fs::remove_dir_all(&self.path);
}
}

pub fn tempdir() -> TempDir {
let path = temppath();
std::fs::create_dir(&path).expect("failed to create the temporary directory");
TempDir { path }
}

fn temppath() -> PathBuf {
let tablespace_path = unsafe {
use rand::seq::IndexedRandom;
use std::mem::MaybeUninit;
pgrx::pg_sys::PrepareTempTablespaces();
let mut tablespaces = [pgrx::pg_sys::Oid::INVALID; 8];
let length =
pgrx::pg_sys::GetTempTablespaces(tablespaces.as_mut_ptr(), tablespaces.len() as _);
let tablespace = tablespaces[..length as usize]
.choose(&mut rand::rng())
.copied()
.unwrap_or(pgrx::pg_sys::Oid::INVALID);
let tablespace = if tablespace != pgrx::pg_sys::Oid::INVALID {
tablespace
} else {
pgrx::pg_sys::MyDatabaseTableSpace
};
let mut buf = [MaybeUninit::<std::ffi::c_char>::uninit(); pgrx::pg_sys::MAXPGPATH as usize];
pgrx::pg_sys::TempTablespacePath(buf.as_mut_ptr().cast::<std::ffi::c_char>(), tablespace);
let s = std::ffi::CStr::from_ptr(buf.as_ptr().cast::<std::ffi::c_char>());
// It is reasonable to make this assumption because PostgreSQL
// uses symbolic links internally to access tablespaces.
let s = s.to_str().expect("found non-utf8 characters in the path");
assert!(s.is_ascii(), "found non-ascii characters in the path");
Comment thread
usamoi marked this conversation as resolved.
debug_assert!(s.starts_with("base/") || s.starts_with("pg_tblspc/"));
debug_assert!(s.ends_with("/pgsql_tmp"));
AsRef::<Path>::as_ref(s).to_path_buf()
};
if let Err(e) = std::fs::create_dir(&tablespace_path) {
if e.kind() != std::io::ErrorKind::AlreadyExists {
panic!("failed to create the temporary directory in the tablespace");
Comment thread
usamoi marked this conversation as resolved.
}
}
let path = tablespace_path.join(crate::tempname());
{
// a leftover from a backend crash
let _ = std::fs::remove_file(&path);
let _ = std::fs::remove_dir_all(&path);
}
path
}
14 changes: 14 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,20 @@ fn is_main() -> bool {
IS_MAIN.get()
}

#[must_use]
fn tempname() -> String {
let pid = std::process::id();
let number = {
static mut COUNTER: u32 = 0;
unsafe {
let number = COUNTER;
COUNTER = COUNTER.wrapping_add(1);
number
}
Comment thread
usamoi marked this conversation as resolved.
};
format!("pgsql_tmp{pid}.{number}.vchord_bm25")
}

#[cfg(not(panic = "unwind"))]
compile_error!("This crate must be compiled with `-Cpanic=unwind`.");

Expand Down
Loading