Skip to content

Commit

Permalink
Introduce RemoteIndexBuildStrategy, refactor NativeIndexBuildStrategy…
Browse files Browse the repository at this point in the history
… to accept vector value supplier

Signed-off-by: Jay Deng <jayd0104@gmail.com>
  • Loading branch information
jed326 authored and Jay Deng committed Feb 18, 2025
1 parent 45ecb5b commit e3e057b
Show file tree
Hide file tree
Showing 25 changed files with 703 additions and 297 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ public class KNNFeatureFlags {

// Feature flags
private static final String KNN_FORCE_EVICT_CACHE_ENABLED = "knn.feature.cache.force_evict.enabled";
private static final String KNN_REMOTE_VECTOR_BUILD = "knn.feature.remote_index_build.enabled";

@VisibleForTesting
public static final Setting<Boolean> KNN_FORCE_EVICT_CACHE_ENABLED_SETTING = Setting.boolSetting(
Expand All @@ -35,8 +36,18 @@ public class KNNFeatureFlags {
Dynamic
);

/**
* Feature flag to control remote index build at the cluster level
*/
public static final Setting<Boolean> KNN_REMOTE_VECTOR_BUILD_SETTING = Setting.boolSetting(
KNN_REMOTE_VECTOR_BUILD,
false,
NodeScope,
Dynamic
);

public static List<Setting<?>> getFeatureFlags() {
return ImmutableList.of(KNN_FORCE_EVICT_CACHE_ENABLED_SETTING);
return ImmutableList.of(KNN_FORCE_EVICT_CACHE_ENABLED_SETTING, KNN_REMOTE_VECTOR_BUILD_SETTING);
}

/**
Expand All @@ -46,4 +57,11 @@ public static List<Setting<?>> getFeatureFlags() {
public static boolean isForceEvictCacheEnabled() {
return Booleans.parseBoolean(KNNSettings.state().getSettingValue(KNN_FORCE_EVICT_CACHE_ENABLED).toString(), false);
}

/**
* @return true if remote vector index build feature flag is enabled
*/
public static boolean isKNNRemoteVectorBuildEnabled() {
return Booleans.parseBooleanStrict(KNNSettings.state().getSettingValue(KNN_REMOTE_VECTOR_BUILD).toString(), false);
}
}
31 changes: 29 additions & 2 deletions src/main/java/org/opensearch/knn/index/KNNSettings.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@

import static java.util.stream.Collectors.toUnmodifiableMap;
import static org.opensearch.common.settings.Setting.Property.Dynamic;
import static org.opensearch.common.settings.Setting.Property.Final;
import static org.opensearch.common.settings.Setting.Property.IndexScope;
import static org.opensearch.common.settings.Setting.Property.NodeScope;
import static org.opensearch.common.settings.Setting.Property.Final;
import static org.opensearch.common.settings.Setting.Property.UnmodifiableOnRestore;
import static org.opensearch.common.unit.MemorySizeValue.parseBytesSizeValueOrHeapRatio;
import static org.opensearch.core.common.unit.ByteSizeValue.parseBytesSizeValue;
Expand Down Expand Up @@ -94,6 +94,8 @@ public class KNNSettings {
public static final String KNN_FAISS_AVX512_SPR_DISABLED = "knn.faiss.avx512_spr.disabled";
public static final String KNN_DISK_VECTOR_SHARD_LEVEL_RESCORING_DISABLED = "index.knn.disk.vector.shard_level_rescoring_disabled";
public static final String KNN_DERIVED_SOURCE_ENABLED = "index.knn.derived_source.enabled";
public static final String KNN_INDEX_REMOTE_VECTOR_BUILD = "index.knn.remote_index_build.enabled";
public static final String KNN_REMOTE_VECTOR_REPO = "knn.remote_index_build.vector_repo";

/**
* Default setting values
Expand Down Expand Up @@ -371,6 +373,21 @@ public class KNNSettings {
NodeScope
);

/**
* Index level setting to control whether remote index build is enabled or not.
*/
public static final Setting<Boolean> KNN_INDEX_REMOTE_VECTOR_BUILD_SETTING = Setting.boolSetting(
KNN_INDEX_REMOTE_VECTOR_BUILD,
false,
Dynamic,
IndexScope
);

/**
* Cluster level setting which indicates the repository that the remote index build should write to.
*/
public static final Setting<String> KNN_REMOTE_VECTOR_REPO_SETTING = Setting.simpleString(KNN_REMOTE_VECTOR_REPO, Dynamic, NodeScope);

/**
* Dynamic settings
*/
Expand Down Expand Up @@ -525,6 +542,14 @@ private Setting<?> getSetting(String key) {
return KNN_DERIVED_SOURCE_ENABLED_SETTING;
}

if (KNN_INDEX_REMOTE_VECTOR_BUILD.equals(key)) {
return KNN_INDEX_REMOTE_VECTOR_BUILD_SETTING;
}

if (KNN_REMOTE_VECTOR_REPO.equals(key)) {
return KNN_REMOTE_VECTOR_REPO_SETTING;
}

throw new IllegalArgumentException("Cannot find setting by key [" + key + "]");
}

Expand All @@ -550,7 +575,9 @@ public List<Setting<?>> getSettings() {
QUANTIZATION_STATE_CACHE_SIZE_LIMIT_SETTING,
QUANTIZATION_STATE_CACHE_EXPIRY_TIME_MINUTES_SETTING,
KNN_DISK_VECTOR_SHARD_LEVEL_RESCORING_DISABLED_SETTING,
KNN_DERIVED_SOURCE_ENABLED_SETTING
KNN_DERIVED_SOURCE_ENABLED_SETTING,
KNN_INDEX_REMOTE_VECTOR_BUILD_SETTING,
KNN_REMOTE_VECTOR_REPO_SETTING
);
return Stream.concat(settings.stream(), Stream.concat(getFeatureFlags().stream(), dynamicCacheSettings.values().stream()))
.collect(Collectors.toList());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.opensearch.knn.index.engine.KNNMethodContext;
import org.opensearch.knn.index.mapper.KNNMappingConfig;
import org.opensearch.knn.index.mapper.KNNVectorFieldType;
import org.opensearch.repositories.RepositoriesService;

import java.util.Map;
import java.util.Optional;
Expand All @@ -44,6 +45,7 @@ public abstract class BasePerFieldKnnVectorsFormat extends PerFieldKnnVectorsFor
private final Supplier<KnnVectorsFormat> defaultFormatSupplier;
private final Function<KNNVectorsFormatParams, KnnVectorsFormat> vectorsFormatSupplier;
private Function<KNNScalarQuantizedVectorsFormatParams, KnnVectorsFormat> scalarQuantizedVectorsFormatSupplier;
private final Supplier<RepositoriesService> repositoriesServiceSupplier;
private static final String MAX_CONNECTIONS = "max_connections";
private static final String BEAM_WIDTH = "beam_width";

Expand All @@ -54,11 +56,26 @@ public BasePerFieldKnnVectorsFormat(
Supplier<KnnVectorsFormat> defaultFormatSupplier,
Function<KNNVectorsFormatParams, KnnVectorsFormat> vectorsFormatSupplier
) {
this.mapperService = mapperService;
this.defaultMaxConnections = defaultMaxConnections;
this.defaultBeamWidth = defaultBeamWidth;
this.defaultFormatSupplier = defaultFormatSupplier;
this.vectorsFormatSupplier = vectorsFormatSupplier;
this(mapperService, defaultMaxConnections, defaultBeamWidth, defaultFormatSupplier, vectorsFormatSupplier, null);
}

public BasePerFieldKnnVectorsFormat(
Optional<MapperService> mapperService,
int defaultMaxConnections,
int defaultBeamWidth,
Supplier<KnnVectorsFormat> defaultFormatSupplier,
Function<KNNVectorsFormatParams, KnnVectorsFormat> vectorsFormatSupplier,
Function<KNNScalarQuantizedVectorsFormatParams, KnnVectorsFormat> scalarQuantizedVectorsFormatSupplier
) {
this(
mapperService,
defaultMaxConnections,
defaultBeamWidth,
defaultFormatSupplier,
vectorsFormatSupplier,
scalarQuantizedVectorsFormatSupplier,
null
);
}

@Override
Expand Down Expand Up @@ -141,7 +158,9 @@ private NativeEngines990KnnVectorsFormat nativeEngineVectorsFormat() {
int approximateThreshold = getApproximateThresholdValue();
return new NativeEngines990KnnVectorsFormat(
new Lucene99FlatVectorsFormat(FlatVectorScorerUtil.getLucene99FlatVectorsScorer()),
approximateThreshold
approximateThreshold,
repositoriesServiceSupplier,
mapperService.get().getIndexSettings()
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,6 @@
package org.opensearch.knn.index.codec.KNN80Codec;

import lombok.extern.log4j.Log4j2;
import org.opensearch.common.StopWatch;
import org.opensearch.knn.index.VectorDataType;
import org.opensearch.knn.index.engine.KNNEngine;
import org.opensearch.knn.index.vectorvalues.KNNVectorValues;
import org.opensearch.knn.index.vectorvalues.KNNVectorValuesFactory;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.codecs.DocValuesConsumer;
Expand All @@ -19,8 +14,13 @@
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.SegmentWriteState;
import org.opensearch.common.StopWatch;
import org.opensearch.knn.index.VectorDataType;
import org.opensearch.knn.index.codec.nativeindex.NativeIndexWriter;
import org.opensearch.knn.index.engine.KNNEngine;
import org.opensearch.knn.index.mapper.KNNVectorFieldMapper;
import org.opensearch.knn.index.vectorvalues.KNNVectorValues;
import org.opensearch.knn.index.vectorvalues.KNNVectorValuesFactory;
import org.opensearch.knn.plugin.stats.KNNGraphValue;

import java.io.IOException;
Expand Down Expand Up @@ -72,9 +72,9 @@ public void addKNNBinaryField(FieldInfo field, DocValuesProducer valuesProducer,
// For BDV it is fine to use knnVectorValues.totalLiveDocs() as we already run the full loop to calculate total
// live docs
if (isMerge) {
NativeIndexWriter.getWriter(field, state).mergeIndex(knnVectorValues, (int) knnVectorValues.totalLiveDocs());
NativeIndexWriter.getWriter(field, state).mergeIndex(() -> knnVectorValues, (int) knnVectorValues.totalLiveDocs());
} else {
NativeIndexWriter.getWriter(field, state).flushIndex(knnVectorValues, (int) knnVectorValues.totalLiveDocs());
NativeIndexWriter.getWriter(field, state).flushIndex(() -> knnVectorValues, (int) knnVectorValues.totalLiveDocs());
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,12 @@
import org.opensearch.knn.index.SpaceType;
import org.opensearch.knn.index.codec.BasePerFieldKnnVectorsFormat;
import org.opensearch.knn.index.engine.KNNEngine;
import org.opensearch.repositories.RepositoriesService;

import java.util.Optional;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.function.Supplier;

/**
* Class provides per field format implementation for Lucene Knn vector type
Expand All @@ -25,6 +27,13 @@ public class KNN9120PerFieldKnnVectorsFormat extends BasePerFieldKnnVectorsForma
private static final Tuple<Integer, ExecutorService> DEFAULT_MERGE_THREAD_COUNT_AND_EXECUTOR_SERVICE = Tuple.tuple(1, null);

public KNN9120PerFieldKnnVectorsFormat(final Optional<MapperService> mapperService) {
this(mapperService, null);
}

public KNN9120PerFieldKnnVectorsFormat(
final Optional<MapperService> mapperService,
Supplier<RepositoriesService> repositoriesServiceSupplier
) {
super(
mapperService,
Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN,
Expand Down Expand Up @@ -67,7 +76,8 @@ public KNN9120PerFieldKnnVectorsFormat(final Optional<MapperService> mapperServi
// Executor service
mergeThreadCountAndExecutorService.v2()
);
}
},
repositoriesServiceSupplier
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,13 @@
import org.apache.lucene.codecs.lucene99.Lucene99FlatVectorsFormat;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
import org.opensearch.index.IndexSettings;
import org.opensearch.knn.index.KNNSettings;
import org.opensearch.knn.index.engine.KNNEngine;
import org.opensearch.repositories.RepositoriesService;

import java.io.IOException;
import java.util.function.Supplier;

/**
* This is a Vector format that will be used for Native engines like Faiss and Nmslib for reading and writing vector
Expand All @@ -33,6 +36,8 @@ public class NativeEngines990KnnVectorsFormat extends KnnVectorsFormat {
private static FlatVectorsFormat flatVectorsFormat;
private static final String FORMAT_NAME = "NativeEngines990KnnVectorsFormat";
private static int approximateThreshold;
private final Supplier<RepositoriesService> repositoriesServiceSupplier;
private final IndexSettings indexSettings;

public NativeEngines990KnnVectorsFormat() {
this(new Lucene99FlatVectorsFormat(new DefaultFlatVectorScorer()));
Expand All @@ -47,9 +52,20 @@ public NativeEngines990KnnVectorsFormat(final FlatVectorsFormat flatVectorsForma
}

public NativeEngines990KnnVectorsFormat(final FlatVectorsFormat flatVectorsFormat, int approximateThreshold) {
this(flatVectorsFormat, approximateThreshold, null, null);
}

public NativeEngines990KnnVectorsFormat(
final FlatVectorsFormat flatVectorsFormat,
int approximateThreshold,
Supplier<RepositoriesService> repositoriesServiceSupplier,
IndexSettings indexSettings
) {
super(FORMAT_NAME);
NativeEngines990KnnVectorsFormat.flatVectorsFormat = flatVectorsFormat;
NativeEngines990KnnVectorsFormat.approximateThreshold = approximateThreshold;
this.repositoriesServiceSupplier = repositoriesServiceSupplier;
this.indexSettings = indexSettings;
}

/**
Expand All @@ -59,7 +75,13 @@ public NativeEngines990KnnVectorsFormat(final FlatVectorsFormat flatVectorsForma
*/
@Override
public KnnVectorsWriter fieldsWriter(final SegmentWriteState state) throws IOException {
return new NativeEngines990KnnVectorsWriter(state, flatVectorsFormat.fieldsWriter(state), approximateThreshold);
return new NativeEngines990KnnVectorsWriter(
state,
flatVectorsFormat.fieldsWriter(state),
approximateThreshold,
repositoriesServiceSupplier,
indexSettings
);
}

/**
Expand Down
Loading

0 comments on commit e3e057b

Please sign in to comment.