Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ endee = [ "endee==0.1.10" ]
lindorm = [ "opensearch-py" ]
seekdb = [ "mysql-connector-python" ]
pinot = [ "requests" ]
logosdb = [ "logosdb" ]

[project.urls]
Repository = "https://github.com/zilliztech/VectorDBBench"
Expand Down
16 changes: 16 additions & 0 deletions vectordb_bench/backend/clients/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ class DB(Enum):
PolarDB = "PolarDB"
Pinot = "Pinot"
SeekDB = "SeekDB"
LogosDB = "LogosDB"

@property
def init_cls(self) -> type[VectorDB]: # noqa: PLR0911, PLR0912, C901, PLR0915
Expand Down Expand Up @@ -269,6 +270,11 @@ def init_cls(self) -> type[VectorDB]: # noqa: PLR0911, PLR0912, C901, PLR0915

return SeekDB

if self == DB.LogosDB:
from .logosdb.logosdb import LogosDB

return LogosDB

msg = f"Unknown DB: {self.name}"
raise ValueError(msg)

Expand Down Expand Up @@ -477,6 +483,11 @@ def config_cls(self) -> type[DBConfig]: # noqa: PLR0911, PLR0912, C901, PLR0915

return SeekDBConfig

if self == DB.LogosDB:
from .logosdb.config import LogosDBConfig

return LogosDBConfig

msg = f"Unknown DB: {self.name}"
raise ValueError(msg)

Expand Down Expand Up @@ -667,6 +678,11 @@ def case_config_cls( # noqa: C901, PLR0911, PLR0912, PLR0915

return _seekdb_case_config.get(index_type)

if self == DB.LogosDB:
from .logosdb.config import LogosDBIndexConfig

return LogosDBIndexConfig

# DB.Pinecone, DB.Redis
return EmptyDBCaseConfig

Expand Down
Empty file.
40 changes: 40 additions & 0 deletions vectordb_bench/backend/clients/logosdb/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from typing import Annotated, Unpack

import click

from vectordb_bench.backend.clients import DB
from vectordb_bench.cli.cli import (
CommonTypedDict,
cli,
click_parameter_decorators_from_typed_dict,
run,
)

DBTYPE = DB.LogosDB


class LogosDBTypedDict(CommonTypedDict):
uri: Annotated[
str,
click.option(
"--uri",
type=str,
help="Path to LogosDB directory (local embedded DB)",
required=False,
default="/tmp/vectordbbench_logosdb",
show_default=True,
),
]


@cli.command()
@click_parameter_decorators_from_typed_dict(LogosDBTypedDict)
def LogosDB(**parameters: Unpack[LogosDBTypedDict]):
from .config import LogosDBConfig, LogosDBIndexConfig

run(
db=DBTYPE,
db_config=LogosDBConfig(uri=parameters["uri"]),
db_case_config=LogosDBIndexConfig(),
**parameters,
)
29 changes: 29 additions & 0 deletions vectordb_bench/backend/clients/logosdb/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from pydantic import BaseModel

from ..api import DBCaseConfig, DBConfig, MetricType


class LogosDBConfig(DBConfig):
uri: str = "/tmp/vectordbbench_logosdb"

def to_dict(self) -> dict:
return {"uri": self.uri}


class LogosDBIndexConfig(BaseModel, DBCaseConfig):
metric_type: MetricType | None = None

def parse_metric(self) -> int:
import logosdb

if self.metric_type == MetricType.L2:
return logosdb.DIST_L2
if self.metric_type == MetricType.IP:
return logosdb.DIST_IP
return logosdb.DIST_COSINE

def index_param(self) -> dict:
return {}

def search_param(self) -> dict:
return {}
85 changes: 85 additions & 0 deletions vectordb_bench/backend/clients/logosdb/logosdb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import logging
import os
import shutil
from collections.abc import Iterable
from contextlib import contextmanager

import numpy as np

from ..api import VectorDB
from .config import LogosDBIndexConfig

log = logging.getLogger(__name__)


class LogosDB(VectorDB):
def __init__(
self,
dim: int,
db_config: dict,
db_case_config: LogosDBIndexConfig,
collection_name: str = "LogosDBCollection",
drop_old: bool = False,
name: str = "LogosDB",
**kwargs,
):
self.name = name
self.db_config = db_config
self.case_config = db_case_config
self.dim = dim
self.uri = db_config["uri"]
self.db = None

if drop_old and os.path.exists(self.uri):
log.info(f"{self.name} drop_old: removing {self.uri}")
shutil.rmtree(self.uri)

import logosdb as _logosdb

distance = self.case_config.parse_metric()
db = _logosdb.DB(self.uri, dim=self.dim, distance=distance)
log.info(f"{self.name} initialized at {self.uri} dim={dim} distance={distance}")
del db

@contextmanager
def init(self):
import logosdb as _logosdb

distance = self.case_config.parse_metric()
self.db = _logosdb.DB(self.uri, dim=self.dim, distance=distance)
try:
yield
finally:
del self.db
self.db = None

def insert_embeddings(
self,
embeddings: Iterable[list[float]],
metadata: list[int],
**kwargs,
) -> tuple[int, Exception]:
assert self.db is not None
try:
embeddings_arr = np.array(list(embeddings), dtype=np.float32)
texts = [str(m) for m in metadata]
self.db.put_batch(embeddings_arr, texts=texts)
return len(metadata), None
except Exception as e:
log.warning(f"{self.name} insert_embeddings error: {e}")
return 0, e

def search_embedding(
self,
query: list[float],
k: int = 100,
filters: dict | None = None,
timeout: int | None = None,
) -> list[int]:
assert self.db is not None
q = np.array(query, dtype=np.float32)
hits = self.db.search(q, top_k=k)
return [int(h.text) for h in hits]

def optimize(self, data_size: int | None = None):
log.info(f"{self.name} optimize: HNSW index is built incrementally, no explicit step needed")
2 changes: 2 additions & 0 deletions vectordb_bench/cli/vectordbbench.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from ..backend.clients.endee.cli import Endee
from ..backend.clients.hologres.cli import HologresHGraph
from ..backend.clients.lancedb.cli import LanceDB
from ..backend.clients.logosdb.cli import LogosDB
from ..backend.clients.lindorm.cli import LindormHNSW, LindormIVFBQ, LindormIVFPQ
from ..backend.clients.mariadb.cli import MariaDBHNSW
from ..backend.clients.memorydb.cli import MemoryDB
Expand Down Expand Up @@ -97,6 +98,7 @@
cli.add_command(PolarDBHNSWPQ)
cli.add_command(PolarDBHNSWSQ)
cli.add_command(SeekDBHNSW)
cli.add_command(LogosDB)


if __name__ == "__main__":
Expand Down