-
Notifications
You must be signed in to change notification settings - Fork 140
Bring back the fused graph index #561
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
9d191a4
2a76e39
1b16d25
8f514d8
f315b61
7246a38
cc9cfc8
b633d5d
0177c43
670a97d
0dded51
4179de7
f0a9e4f
fc0aa88
e0cb196
c0b53a4
e5fbf1d
0300fda
ddbf06d
14a879f
459c63c
af1f142
698ebcd
ca0cd79
fd1bb4a
450b0eb
ae9416b
7e4b118
2c2e9f3
48735b9
4d1fefd
1573c83
cf42983
a3d653c
f03bdb1
b4dbc24
d0b8e08
5a20102
885ea6a
742dba3
d56b65b
dedacaa
748df31
4bca39f
e41f143
06c84cb
a33679d
817342c
1dc5683
64eb613
321eaf0
2a49f7f
4901fb6
6ffea27
8063492
e99b09a
f51007c
5a76fc7
b5387f2
1680c95
563b131
ad2facb
8957a9b
6406bb3
ff0da48
0b4c75b
92c1c57
8da3762
69b4a67
4b6bcb0
f22ef4b
c4a2304
1b87234
b2117b2
d17838d
dfc38bf
600040b
c02d731
75d7822
74d5f3e
c9b902e
3dc9210
7f07216
1212f00
0a86e50
afc269d
7e9d468
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,11 +18,20 @@ | |
|
|
||
| import io.github.jbellis.jvector.disk.IndexWriter; | ||
| import io.github.jbellis.jvector.graph.ImmutableGraphIndex; | ||
| import io.github.jbellis.jvector.graph.disk.feature.*; | ||
| import io.github.jbellis.jvector.graph.disk.feature.Feature; | ||
| import io.github.jbellis.jvector.graph.disk.feature.FeatureId; | ||
| import io.github.jbellis.jvector.graph.disk.feature.FusedFeature; | ||
| import io.github.jbellis.jvector.graph.disk.feature.InlineVectors; | ||
| import io.github.jbellis.jvector.graph.disk.feature.NVQ; | ||
| import io.github.jbellis.jvector.graph.disk.feature.SeparatedFeature; | ||
| import io.github.jbellis.jvector.graph.disk.feature.SeparatedNVQ; | ||
| import io.github.jbellis.jvector.graph.disk.feature.SeparatedVectors; | ||
|
|
||
| import org.agrona.collections.Int2IntHashMap; | ||
|
|
||
| import java.io.IOException; | ||
| import java.util.EnumMap; | ||
| import java.util.LinkedHashMap; | ||
| import java.util.List; | ||
| import java.util.Map; | ||
| import java.util.Set; | ||
|
|
@@ -38,20 +47,18 @@ public abstract class AbstractGraphIndexWriter<T extends IndexWriter> implements | |
| final ImmutableGraphIndex graph; | ||
| final OrdinalMapper ordinalMapper; | ||
| final int dimension; | ||
| // we don't use Map features but EnumMap is the best way to make sure we don't | ||
| // accidentally introduce an ordering bug in the future | ||
| final EnumMap<FeatureId, Feature> featureMap; | ||
| final Map<FeatureId, Feature> featureMap; | ||
| final T out; /* output for graph nodes and inline features */ | ||
| final int headerSize; | ||
| volatile int maxOrdinalWritten = -1; | ||
| final List<Feature> inlineFeatures; | ||
|
|
||
| AbstractGraphIndexWriter(T out, | ||
| int version, | ||
| ImmutableGraphIndex graph, | ||
| OrdinalMapper oldToNewOrdinals, | ||
| int dimension, | ||
| EnumMap<FeatureId, Feature> features) | ||
| int version, | ||
| ImmutableGraphIndex graph, | ||
| OrdinalMapper oldToNewOrdinals, | ||
| int dimension, | ||
| EnumMap<FeatureId, Feature> features) | ||
| { | ||
| if (graph.getMaxLevel() > 0 && version < 4) { | ||
| throw new IllegalArgumentException("Multilayer graphs must be written with version 4 or higher"); | ||
|
|
@@ -60,8 +67,28 @@ public abstract class AbstractGraphIndexWriter<T extends IndexWriter> implements | |
| this.graph = graph; | ||
| this.ordinalMapper = oldToNewOrdinals; | ||
| this.dimension = dimension; | ||
| this.featureMap = features; | ||
| this.inlineFeatures = features.values().stream().filter(f -> !(f instanceof SeparatedFeature)).collect(Collectors.toList()); | ||
|
|
||
| if (version <= 5) { | ||
| // Versions <= 5 use the old feature ordering, simply provided by the FeatureId | ||
| this.featureMap = features; | ||
| this.inlineFeatures = features.values().stream().filter(f -> !(f instanceof SeparatedFeature)).collect(Collectors.toList()); | ||
| } else { | ||
| // Version 6 uses the new feature ordering to place fused features last in the list | ||
| var sortedFeatures = features.values().stream().sorted().collect(Collectors.toList()); | ||
| this.featureMap = new LinkedHashMap<>(); | ||
| for (var feature : sortedFeatures) { | ||
| this.featureMap.put(feature.id(), feature); | ||
| } | ||
| this.inlineFeatures = sortedFeatures.stream().filter(f -> !(f instanceof SeparatedFeature)).sorted().collect(Collectors.toList()); | ||
| } | ||
|
|
||
| long fusedFeaturesCount = this.inlineFeatures.stream().filter(Feature::isFused).count(); | ||
| if (fusedFeaturesCount > 1) { | ||
| throw new IllegalArgumentException("At most one fused feature is allowed"); | ||
| } | ||
| if (fusedFeaturesCount == 1 && version < 6) { | ||
| throw new IllegalArgumentException("Fused features require version 6 or higher"); | ||
| } | ||
| this.out = out; | ||
|
|
||
| // create a mock Header to determine the correct size | ||
|
|
@@ -164,7 +191,7 @@ public synchronized void writeHeader(ImmutableGraphIndex.View view, long startOf | |
| assert out.position() == startOffset + headerSize : String.format("%d != %d", out.position(), startOffset + headerSize); | ||
| } | ||
|
|
||
| void writeSparseLevels(ImmutableGraphIndex.View view) throws IOException { | ||
| void writeSparseLevels(ImmutableGraphIndex.View view, Map<FeatureId, IntFunction<Feature.State>> featureStateSuppliers) throws IOException { | ||
| // write sparse levels | ||
| for (int level = 1; level <= graph.getMaxLevel(); level++) { | ||
| int layerSize = graph.size(level); | ||
|
|
@@ -193,6 +220,50 @@ void writeSparseLevels(ImmutableGraphIndex.View view) throws IOException { | |
| throw new IllegalStateException("Mismatch between layer size and nodes written"); | ||
| } | ||
| } | ||
|
|
||
| // In V6, fused features for the in-memory hierarchy are written in a block after the top layers of the graph. | ||
| // Since everything in level 1 is also contained in the higher levels, we only need to write the fused features for level 1. | ||
| if (version == 6) { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this strictly version == 6 or should it be version >= 6 like in another places in the code ?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is a good observation. The rule I followed is that subsequent versions might change this behavior anyway, so there's no benefit in expressing expectations over future versions in the code. |
||
| // There should be only one fused feature per node. This is checked in the class constructor. | ||
| // This is the only place where we explicitly need the fused feature. If there are more places in the | ||
| // future, it may be worth having fusedFeature as class member. | ||
| FusedFeature fusedFeature = null; | ||
| for (var feature : inlineFeatures) { | ||
| if (feature.isFused()) { | ||
| fusedFeature = (FusedFeature) feature; | ||
| } | ||
| } | ||
| if (fusedFeature != null) { | ||
| var supplier = featureStateSuppliers.get(fusedFeature.id()); | ||
| if (supplier == null) { | ||
| throw new IllegalStateException("Supplier for feature " + fusedFeature.id() + " not found"); | ||
| } | ||
|
|
||
| if (graph.getMaxLevel() >= 1) { | ||
| int level = 1; | ||
| int layerSize = graph.size(level); | ||
| int nodesWritten = 0; | ||
| for (var it = graph.getNodes(level); it.hasNext(); ) { | ||
| int originalOrdinal = it.nextInt(); | ||
|
|
||
| // We write the ordinal (node id) so that we can map it to the corresponding feature | ||
| final int newOrdinal = ordinalMapper.oldToNew(originalOrdinal); | ||
| out.writeInt(newOrdinal); | ||
| fusedFeature.writeSourceFeature(out, supplier.apply(originalOrdinal)); | ||
| nodesWritten++; | ||
| } | ||
| if (nodesWritten != layerSize) { | ||
| throw new IllegalStateException("Mismatch between layer 1 size and features written"); | ||
| } | ||
| } else { | ||
| // Write the source feature of the entry node | ||
| final int originalEntryNode = view.entryNode().node; | ||
| final int entryNode = ordinalMapper.oldToNew(originalEntryNode); | ||
| out.writeInt(entryNode); | ||
| fusedFeature.writeSourceFeature(out, supplier.apply(originalEntryNode)); | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| void writeSeparatedFeatures(Map<FeatureId, IntFunction<Feature.State>> featureStateSuppliers) throws IOException { | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.