-
Notifications
You must be signed in to change notification settings - Fork 153
Remap heap graph ordinals on serialization #618
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -27,6 +27,8 @@ | |
| import io.github.jbellis.jvector.annotations.Experimental; | ||
| import io.github.jbellis.jvector.disk.RandomAccessReader; | ||
| import io.github.jbellis.jvector.graph.ConcurrentNeighborMap.Neighbors; | ||
| import io.github.jbellis.jvector.graph.disk.AbstractGraphIndexWriter; | ||
| import io.github.jbellis.jvector.graph.disk.OrdinalMapper; | ||
| import io.github.jbellis.jvector.graph.diversity.DiversityProvider; | ||
| import io.github.jbellis.jvector.graph.similarity.ScoreFunction; | ||
| import io.github.jbellis.jvector.util.Accountable; | ||
|
|
@@ -49,7 +51,6 @@ | |
| import java.util.concurrent.atomic.AtomicIntegerArray; | ||
| import java.util.concurrent.atomic.AtomicReference; | ||
| import java.util.concurrent.locks.StampedLock; | ||
| import java.util.function.Function; | ||
| import java.util.stream.IntStream; | ||
|
|
||
| /** | ||
|
|
@@ -62,6 +63,7 @@ | |
| public class OnHeapGraphIndex implements MutableGraphIndex { | ||
| // Used for saving and loading OnHeapGraphIndex | ||
| public static final int MAGIC = 0x75EC4012; // JVECTOR, with some imagination | ||
| public static final int SERIALIZE_VERSION = 4; | ||
|
|
||
| // The current entry node for searches | ||
| private final AtomicReference<NodeAtLevel> entryPoint; | ||
|
|
@@ -523,17 +525,43 @@ public String toString() { | |
| } | ||
|
|
||
| /** | ||
| * Saves the graph to the given DataOutput for reloading into memory later | ||
| * <p>Saves the graph to the given DataOutput for reloading into memory later. | ||
| * | ||
| * <p> | ||
| * Does not alter existing ordinals even if the ordinals are not compact, | ||
| * which can happen if some nodes were deleted. | ||
| */ | ||
| @Experimental | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know this is neither the point of this PR, nor was it introduced by this PR but these @experimental and @deprecated annotations have been here since 10/10/25. It's nonsensical to combine the 2 annotations to begin with, and based on the fact that the code in question is still in use I would argue that it is neither experimental nor deprecated at this point. Can we just get rid of them? |
||
| @Deprecated | ||
| public void save(DataOutput out) throws IOException { | ||
| save(out, new OrdinalMapper.IdentityMapper(getIdUpperBound() - 1)); | ||
| } | ||
|
|
||
| /** | ||
| * <p>Saves the graph to the given DataOutput for reloading into memory later. | ||
| * | ||
| * <p> | ||
| * Ensures that ordinals holes (from deletions) are not present in the saved graph | ||
| * By creating a compact mapping which preserves the relative order of the existing nodes. | ||
| */ | ||
| @Experimental | ||
| @Deprecated | ||
| public void saveWithCompactOrdinals(DataOutput out) throws IOException { | ||
| save(out, new OrdinalMapper.MapMapper(AbstractGraphIndexWriter.sequentialRenumbering(this))); | ||
| } | ||
|
|
||
| /** | ||
| * Saves the graph to the given DataOutput for reloading into memory later | ||
| */ | ||
| @Experimental | ||
| @Deprecated | ||
| public void save(DataOutput out, OrdinalMapper mapper) throws IOException { | ||
| if (!allMutationsCompleted()) { | ||
| throw new IllegalStateException("Cannot save a graph with pending mutations. Call cleanup() first"); | ||
| } | ||
|
|
||
| out.writeInt(OnHeapGraphIndex.MAGIC); // the magic number | ||
| out.writeInt(4); // The version | ||
| out.writeInt(SERIALIZE_VERSION); // The version | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Our versioning is frankly a mess, and needs to be refactored completely. That said, we are currently up to version 6 , so this may need to be updated here as well. |
||
|
|
||
| // Write graph-level properties. | ||
| out.writeInt(layers.size()); | ||
|
|
@@ -543,7 +571,7 @@ public void save(DataOutput out) throws IOException { | |
|
|
||
| var entryNode = entryPoint.get(); | ||
| assert entryNode.level == getMaxLevel(); | ||
| out.writeInt(entryNode.node); | ||
| out.writeInt(mapper.oldToNew(entryNode.node)); | ||
|
|
||
| for (int level = 0; level < layers.size(); level++) { | ||
| out.writeInt(size(level)); | ||
|
|
@@ -553,19 +581,19 @@ public void save(DataOutput out) throws IOException { | |
| while (it.hasNext()) { | ||
| int nodeId = it.nextInt(); | ||
| var neighbors = layers.get(level).get(nodeId); | ||
| out.writeInt(nodeId); | ||
| out.writeInt(mapper.oldToNew(nodeId)); | ||
| out.writeInt(neighbors.size()); | ||
|
|
||
| for (int n = 0; n < neighbors.size(); n++) { | ||
| out.writeInt(neighbors.getNode(n)); | ||
| out.writeInt(mapper.oldToNew(neighbors.getNode(n))); | ||
| out.writeFloat(neighbors.getScore(n)); | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Saves the graph to the given DataOutput for reloading into memory later | ||
| * Loads the graph from the given RandomAccessReader | ||
| */ | ||
| @Experimental | ||
| @Deprecated | ||
|
|
@@ -576,8 +604,8 @@ public static OnHeapGraphIndex load(RandomAccessReader in, int dimension, double | |
| } | ||
|
|
||
| int version = in.readInt(); // The version | ||
| if (version != 4) { | ||
| throw new IOException("Unsupported version: " + version); | ||
| if (version != SERIALIZE_VERSION) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same as above |
||
| throw new IOException("Unsupported version: " + version + ", expected " + SERIALIZE_VERSION); | ||
| } | ||
|
|
||
| // Write graph-level properties. | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If this is JVector version, consider writing out the entire version (e.g. 4.0.0-rc4)?