techiejd · techiejd · Jun 1, 2026 · Jun 1, 2026 · Jun 1, 2026 · Jun 1, 2026
@@ -832,7 +832,7 @@ curl -X POST http://localhost:3000/api/vector-retry-failed-batch \
 
 ### Local API
 
-The plugin provides a `getVectorizedPayload(payload)` function which returns a `vectorizedPayload` object exposing `search`, `queueEmbed`, `bulkEmbed`, and `retryFailedBatch` methods.
+The plugin provides a `getVectorizedPayload(payload)` function which returns a `vectorizedPayload` object exposing `search`, `findEmbeddingsByIds`, `queueEmbed`, `bulkEmbed`, and `retryFailedBatch` methods.
 
 #### Getting the Vectorized Payload Object
 
@@ -883,6 +883,28 @@ const results = await vectorizedPayload.search({
 })
 ```
 
+#### `vectorizedPayload.findEmbeddingsByIds(params)`
+
+Fetch stored embedding records by primary key — **including the raw embedding vector**, which the normal search/query API never returns. The `id` of each record is whatever [`search()`](#vectorizedpayloadsearchparams) returns as `result.id`, so a search result round-trips directly. This is the building block for "more like this" flows.
+
+**Returns:** `Promise<Array<EmbeddingRecord>>` — `EmbeddingRecord` is the search result shape without `score` and with `embedding: number[]`.
+
+**Example:**
+
+```typescript
+const [record] = await vectorizedPayload.findEmbeddingsByIds({
+  knowledgePool: 'mainKnowledgePool',
+  ids: ['<an id from a previous search result>'],
+})
+
+if (record) {
+  // record.embedding is the raw number[] vector — feed it back into search for "more like this"
+  console.log(record.embedding.length, record.chunkText)
+}
+```
+
+Misses are dropped (the result may be shorter than `ids`), order is not guaranteed, and an empty `ids` array returns `[]` without touching the backend.
+
 #### `vectorizedPayload.queueEmbed(params)`
 
 Manually queue a vectorization job for a document.

@@ -110,6 +110,7 @@ import type {
   KnowledgePoolDynamicConfig,
   StoreChunkData,
   VectorSearchResult,
+  EmbeddingRecord,
 } from 'payloadcms-vectorize'
 
 export type DbAdapter = {
@@ -150,6 +151,12 @@ export type DbAdapter = {
     limit?: number,
     where?: Where,
   ) => Promise<Array<VectorSearchResult>>
+
+  findByIds: (
+    payload: BasePayload,
+    poolName: KnowledgePoolName,
+    ids: string[],
+  ) => Promise<Array<EmbeddingRecord>>
 }
 ```
 
@@ -162,6 +169,7 @@ export type DbAdapter = {
 | `deleteChunks` | After a source document is deleted. | Remove every chunk where `sourceCollection === ... && docId === ...`. Must be safe to call when no chunks exist (no-op, no throw). |
 | `hasEmbeddingVersion` | During bulk-embed planning, per candidate document. | Return `true` iff at least one chunk exists with the matching `(sourceCollection, docId, embeddingVersion)` triple. Must filter on **all three** — older `0.7.0` adapters that ignored `embeddingVersion` caused stale embeddings on model bumps. |
 | `search` | Per `/vector-search` request and per `getVectorizedPayload().search()` call. | Translate `where` (Payload-style) into your store's filter language, perform a vector search using `queryEmbedding`, and return up to `limit` results sorted by descending relevance. |
+| `findByIds` | Per `getVectorizedPayload().findEmbeddingsByIds()` call. | Fetch stored embedding records by primary key, **including the raw `embedding` vector** (which `search` never returns). Look up by the same `id` your `search` returns as `result.id`. Unknown ids are dropped (result length may be `< ids.length`); order is not guaranteed; empty `ids` returns `[]` without a backend call. Adapters with a strict id format (pg integer PK, MongoDB `ObjectId`) also drop *malformed* ids as misses without erroring; adapters keyed on an opaque id (CF's composite vector id) forward ids to the backend as-is, so a backend that rejects a malformed id may surface that error. |
 
 ### Error contract
 
@@ -286,6 +294,12 @@ export const createYourDbVectorIntegration = (
       //       Return Array<VectorSearchResult> sorted by descending score.
       return []
     },
+
+    findByIds: async (payload, poolName, ids) => {
+      // TODO: fetch stored records by primary key, including the raw `embedding` vector.
+      //       Return Array<EmbeddingRecord>. Unknown ids are misses (drop them, don't throw).
+      return []
+    },
   }
 
   return { adapter }
@@ -361,6 +375,25 @@ export interface VectorSearchResult {
   /** Any extensionFields persisted via storeChunk must round-trip here. */
   [key: string]: any
 }
+
+export interface EmbeddingRecord {
+  /** Embedding record ID — the same value your adapter returns as VectorSearchResult.id. */
+  id: string
+  /** Source collection slug (echoed from StoreChunkData). */
+  sourceCollection: string
+  /** Source document ID (echoed from StoreChunkData). */
+  docId: string
+  /** Chunk index within the source document. */
+  chunkIndex: number
+  /** The original chunk text. */
+  chunkText: string
+  /** Embedding model/version string. */
+  embeddingVersion: string
+  /** The raw embedding vector — never returned by `search`. */
+  embedding: number[]
+  /** Any extensionFields persisted via storeChunk round-trip here. */
+  [key: string]: any
+}
 ```
 
 | Field | Required | Notes |
@@ -371,6 +404,8 @@ export interface VectorSearchResult {
 | `chunkText`, `embeddingVersion` | yes | Same. |
 | `extensionFields.*` | optional | Whatever the user passed in `extensionFields` must be queryable via `where`. |
 
+> `EmbeddingRecord` (returned by `findByIds`) is `VectorSearchResult` without `score` and with the raw `embedding: number[]`.
+
 ## Testing your adapter
 
 The dev harness in [`dev/`](../dev) runs the integration suite against any adapter you wire up. To test a new adapter:

@@ -61,6 +61,13 @@ function createMockCloudflareBinding() {
       }
     }),
 
+    getByIds: vi.fn(async (ids: string[]) => {
+      return ids
+        .map((id) => storage.get(id))
+        .filter((v): v is { id: string; values: number[]; metadata: any } => v !== undefined)
+        .map((v) => ({ id: v.id, values: v.values, metadata: v.metadata }))
+    }),
+
     list: vi.fn(async (options: any) => {
       const vectors = Array.from(storage.values()).map((item) => ({
         id: item.id,
@@ -419,4 +426,73 @@ describe('createCloudflareVectorizeIntegration', () => {
       })
     })
   })
+
+  describe('findByIds', () => {
+    test('returns full EmbeddingRecord including embedding values', async () => {
+      const mockBinding = createMockCloudflareBinding()
+      const { adapter } = createCloudflareVectorizeIntegration({
+        config: { default: { dims: DIMS } },
+        binding: mockBinding as any,
+      })
+      const mockPayload = createMockPayload(mockBinding)
+      const embedding = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]
+
+      await adapter.storeChunk(mockPayload, 'default', {
+        sourceCollection: 'posts',
+        docId: 'doc-1',
+        chunkIndex: 0,
+        chunkText: 'find me',
+        embeddingVersion: 'v1',
+        embedding,
+        extensionFields: { category: 'science' },
+      })
+
+      const id = 'default:posts:doc-1:0'
+      const records = await adapter.findByIds(mockPayload, 'default', [id])
+      expect(records).toHaveLength(1)
+      const [r] = records
+      expect(r.id).toBe(id)
+      expect(r.embedding).toEqual(embedding)
+      expect(r.sourceCollection).toBe('posts')
+      expect(r.docId).toBe('doc-1')
+      expect(r.chunkText).toBe('find me')
+      expect(r.embeddingVersion).toBe('v1')
+      expect((r as any).category).toBe('science')
+    })
+
+    test('drops misses', async () => {
+      const mockBinding = createMockCloudflareBinding()
+      const { adapter } = createCloudflareVectorizeIntegration({
+        config: { default: { dims: DIMS } },
+        binding: mockBinding as any,
+      })
+      const mockPayload = createMockPayload(mockBinding)
+      await adapter.storeChunk(mockPayload, 'default', {
+        sourceCollection: 'posts',
+        docId: 'doc-1',
+        chunkIndex: 0,
+        chunkText: 'x',
+        embeddingVersion: 'v1',
+        embedding: [0, 0, 0, 0, 0, 0, 0, 0],
+        extensionFields: {},
+      })
+      const records = await adapter.findByIds(mockPayload, 'default', [
+        'default:posts:doc-1:0',
+        'default:posts:nope:0',
+      ])
+      expect(records).toHaveLength(1)
+      expect(records[0].id).toBe('default:posts:doc-1:0')
+    })
+
+    test('empty ids returns []', async () => {
+      const mockBinding = createMockCloudflareBinding()
+      const { adapter } = createCloudflareVectorizeIntegration({
+        config: { default: { dims: DIMS } },
+        binding: mockBinding as any,
+      })
+      const mockPayload = createMockPayload(mockBinding)
+      const records = await adapter.findByIds(mockPayload, 'default', [])
+      expect(records).toEqual([])
+    })
+  })
 })
@@ -0,0 +1,44 @@
+import { BasePayload } from 'payload'
+import { KnowledgePoolName, EmbeddingRecord } from 'payloadcms-vectorize'
+import { getVectorizeBinding } from './types.js'
+
+const RESERVED_METADATA = ['sourceCollection', 'docId', 'chunkIndex', 'chunkText', 'embeddingVersion']
+
+export default async (
+  payload: BasePayload,
+  _poolName: KnowledgePoolName,
+  ids: string[],
+): Promise<Array<EmbeddingRecord>> => {
+  if (ids.length === 0) return []
+
+  const binding = getVectorizeBinding(payload)
+
+  try {
+    const vectors = await binding.getByIds(ids)
+    if (!vectors) return []
+
+    return vectors.map((vector) => {
+      const metadata = (vector.metadata || {}) as Record<string, unknown>
+      const extensionFields = Object.fromEntries(
+        Object.entries(metadata).filter(([k]) => !RESERVED_METADATA.includes(k)),
+      )
+      return {
+        id: vector.id,
+        sourceCollection: String(metadata.sourceCollection ?? ''),
+        docId: String(metadata.docId ?? ''),
+        chunkIndex:
+          typeof metadata.chunkIndex === 'number'
+            ? metadata.chunkIndex
+            : parseInt(String(metadata.chunkIndex ?? '0'), 10),
+        chunkText: String(metadata.chunkText ?? ''),
+        embeddingVersion: String(metadata.embeddingVersion ?? ''),
+        embedding: Array.from(vector.values ?? []),
+        ...extensionFields,
+      }
+    })
+  } catch (e) {
+    const errorMessage = e instanceof Error ? e.message : String(e)
+    payload.logger.error(`[@payloadcms-vectorize/cf] findByIds failed: ${errorMessage}`)
+    throw new Error(`[@payloadcms-vectorize/cf] findByIds failed: ${errorMessage}`)
+  }
+}
@@ -5,6 +5,7 @@ import type { CloudflareVectorizeBinding, KnowledgePoolsConfig } from './types.j
 import cfMappingsCollection, { CF_MAPPINGS_SLUG } from './collections/cfMappings.js'
 import embed from './embed.js'
 import search from './search.js'
+import findByIds from './findByIds.js'
 
 /**
  * Configuration for Cloudflare Vectorize integration
@@ -117,6 +118,8 @@ export const createCloudflareVectorizeIntegration = (
       }
     },
 
+    findByIds,
+
     hasEmbeddingVersion: async (payload, poolName, sourceCollection, docId, embeddingVersion) => {
       const result = await payload.find({
         collection: CF_MAPPINGS_SLUG as CollectionSlug,

@@ -0,0 +1,89 @@
+import { afterAll, beforeAll, describe, expect, test } from 'vitest'
+import { MongoClient } from 'mongodb'
+import type { BasePayload } from 'payload'
+import type { DbAdapter } from 'payloadcms-vectorize'
+import { DIMS, MONGO_URI } from './constants.js'
+import { buildMongoTestPayload, teardownDbs } from './utils.js'
+import { testEmbeddingVersion, makeDummyEmbedDocs, makeDummyEmbedQuery } from '@shared-test/helpers/embed'
+
+const DB = `mongo_find_by_ids_${Date.now()}`
+
+describe('mongodb findByIds', () => {
+  let payload: BasePayload
+  let adapter: DbAdapter
+  let embeddingId: string
+
+  beforeAll(async () => {
+    const built = await buildMongoTestPayload({
+      uri: MONGO_URI,
+      dbName: DB,
+      pools: { default: { dimensions: DIMS, filterableFields: ['category'] } },
+      knowledgePools: {
+        default: {
+          collections: {},
+          extensionFields: [{ name: 'category', type: 'text' }],
+          embeddingConfig: {
+            version: testEmbeddingVersion,
+            queryFn: makeDummyEmbedQuery(DIMS),
+            realTimeIngestionFn: makeDummyEmbedDocs(DIMS),
+          },
+        },
+      },
+    })
+    payload = built.payload
+    adapter = built.adapter
+
+    await adapter.storeChunk(payload, 'default', {
+      sourceCollection: 'posts',
+      docId: 'doc-1',
+      chunkIndex: 0,
+      chunkText: 'find me',
+      embeddingVersion: testEmbeddingVersion,
+      embedding: Array(DIMS).fill(0.25),
+      extensionFields: { category: 'science' },
+    })
+
+    const c = new MongoClient(MONGO_URI)
+    await c.connect()
+    const doc = await c.db(`${DB}_vectors`).collection('vectorize_default').findOne({ docId: 'doc-1' })
+    embeddingId = String(doc!._id)
+    await c.close()
+  })
+
+  afterAll(async () => {
+    await teardownDbs(payload, MONGO_URI, DB)
+  })
+
+  test('returns full EmbeddingRecord including numeric embedding array', async () => {
+    const records = await adapter.findByIds(payload, 'default', [embeddingId])
+    expect(records).toHaveLength(1)
+    const [r] = records
+    expect(r.id).toBe(embeddingId)
+    expect(Array.isArray(r.embedding)).toBe(true)
+    expect(r.embedding.length).toBe(DIMS)
+    expect(r.embedding.every((n) => typeof n === 'number')).toBe(true)
+    expect(r.sourceCollection).toBe('posts')
+    expect(r.chunkText).toBe('find me')
+    expect(r.embeddingVersion).toBe(testEmbeddingVersion)
+  })
+
+  test('includes extension fields', async () => {
+    const [r] = await adapter.findByIds(payload, 'default', [embeddingId])
+    expect((r as any).category).toBe('science')
+  })
+
+  test('drops misses and invalid ids without throwing', async () => {
+    const records = await adapter.findByIds(payload, 'default', [
+      embeddingId,
+      '000000000000000000000000',
+      'not-an-object-id',
+    ])
+    expect(records).toHaveLength(1)
+    expect(records[0].id).toBe(embeddingId)
+  })
+
+  test('empty ids returns []', async () => {
+    const records = await adapter.findByIds(payload, 'default', [])
+    expect(records).toEqual([])
+  })
+})