From 52bff7fab84210702efba1a7c4c5ec352938755d Mon Sep 17 00:00:00 2001 From: Steve Lorello <42971704+slorello89@users.noreply.github.com> Date: Mon, 28 Oct 2024 07:55:20 -0400 Subject: [PATCH] Bugfix/comprehensive index equality (#496) * fixing issues with index equality * fixing hash vector corner case --- src/Redis.OM/Modeling/RedisIndex.cs | 153 +++++++++++++++++- src/Redis.OM/RedisIndexInfo.cs | 71 ++++++++ .../RediSearchTests/RedisIndexTests.cs | 128 ++++++++++++++- 3 files changed, 345 insertions(+), 7 deletions(-) diff --git a/src/Redis.OM/Modeling/RedisIndex.cs b/src/Redis.OM/Modeling/RedisIndex.cs index 459f36fc..a65f4894 100644 --- a/src/Redis.OM/Modeling/RedisIndex.cs +++ b/src/Redis.OM/Modeling/RedisIndex.cs @@ -20,29 +20,88 @@ public static class RedisIndex public static bool IndexDefinitionEquals(this RedisIndexInfo redisIndexInfo, Type type) { var serialisedDefinition = SerializeIndex(type); - var existingSet = redisIndexInfo.Attributes?.Select(a => (Property: a.Attribute!, a.Type!)).OrderBy(a => a.Property); var isJson = redisIndexInfo.IndexDefinition?.Identifier == "JSON"; + var currentOffset = 0; if (serialisedDefinition.Length < 5) { throw new ArgumentException($"Could not parse the index definition for type: {type.Name}."); } - if (redisIndexInfo.IndexName != serialisedDefinition[0]) + if (redisIndexInfo.IndexDefinition is null) { return false; } - if (redisIndexInfo.IndexDefinition?.Identifier?.Equals(serialisedDefinition[2], StringComparison.OrdinalIgnoreCase) == false) + // these are properties we cannot process because FT.INFO does not respond with them + var unprocessableProperties = new string[] { "EPSILON", "EF_RUNTIME", "PHONETIC", "STOPWORDS" }; + + foreach (var property in unprocessableProperties) + { + if (serialisedDefinition.Any(x => x.Equals(property))) + { + throw new ArgumentException($"Could not validate index definition that contains {property}"); + } + } + + if (redisIndexInfo.IndexName != serialisedDefinition[currentOffset]) + { + return false; + } + + currentOffset += 2; // skip to the index type at index 2 + + if (redisIndexInfo.IndexDefinition?.Identifier?.Equals(serialisedDefinition[currentOffset], StringComparison.OrdinalIgnoreCase) == false) + { + return false; + } + + currentOffset += 2; // skip to prefix count + + if (!int.TryParse(serialisedDefinition[currentOffset], out var numPrefixes)) + { + throw new ArgumentException("Could not parse index with unknown number of prefixes"); + } + + currentOffset += 2; // skip to first prefix + + if (redisIndexInfo.IndexDefinition?.Prefixes is null || redisIndexInfo.IndexDefinition.Prefixes.Length != numPrefixes || serialisedDefinition.Skip(currentOffset).Take(numPrefixes).SequenceEqual(redisIndexInfo.IndexDefinition.Prefixes)) + { + return false; + } + + currentOffset += numPrefixes; + + if (redisIndexInfo.IndexDefinition?.Filter is not null && !redisIndexInfo.IndexDefinition.Filter.Equals(serialisedDefinition.ElementAt(currentOffset))) + { + return false; + } + + if (redisIndexInfo.IndexDefinition?.Filter is not null) + { + currentOffset += 2; + } + + if (redisIndexInfo.IndexDefinition?.DefaultLanguage is not null && !redisIndexInfo.IndexDefinition.DefaultLanguage.Equals(serialisedDefinition.ElementAt(currentOffset))) { return false; } - if (redisIndexInfo.IndexDefinition?.Prefixes.FirstOrDefault().Equals(serialisedDefinition[5]) == false) + if (redisIndexInfo.IndexDefinition?.DefaultLanguage is not null) + { + currentOffset += 2; + } + + if (redisIndexInfo.IndexDefinition?.LanguageField is not null && !redisIndexInfo.IndexDefinition.LanguageField.Equals(serialisedDefinition.ElementAt(currentOffset))) { return false; } + if (redisIndexInfo.IndexDefinition?.LanguageField is not null) + { + currentOffset += 2; + } + var target = redisIndexInfo.Attributes?.SelectMany(a => { var attr = new List(); @@ -58,11 +117,81 @@ public static bool IndexDefinitionEquals(this RedisIndexInfo redisIndexInfo, Typ attr.Add("AS"); } + if (!isJson && a.Type is not null && a.Type == "VECTOR") + { + attr.Add($"{a.Attribute!}.Vector"); + attr.Add("AS"); + } + attr.Add(a.Attribute!); if (a.Type != null) { attr.Add(a.Type); + if (a.Type == "TAG") + { + attr.Add("SEPARATOR"); + attr.Add(a.Separator ?? "|"); + } + + if (a.Type == "TEXT") + { + if (a.NoStem == true) + { + attr.Add("NOSTEM"); + } + + if (a.Weight is not null && a.Weight != "1") + { + attr.Add("WEIGHT"); + attr.Add(a.Weight); + } + } + + if (a.Type == "VECTOR") + { + if (a.Algorithm is null) + { + throw new InvalidOperationException("Encountered Vector field with no algorithm"); + } + + attr.Add(a.Algorithm); + if (a.VectorType is null) + { + throw new InvalidOperationException("Encountered vector field with no Vector Type"); + } + + attr.Add(NumVectorArgs(a).ToString()); + + attr.Add("TYPE"); + attr.Add(a.VectorType); + + if (a.Dimension is null) + { + throw new InvalidOperationException("Encountered vector field with no dimension"); + } + + attr.Add("DIM"); + attr.Add(a.Dimension); + + if (a.DistanceMetric is not null) + { + attr.Add("DISTANCE_METRIC"); + attr.Add(a.DistanceMetric); + } + + if (a.M is not null) + { + attr.Add("M"); + attr.Add(a.M); + } + + if (a.EfConstruction is not null) + { + attr.Add("EF_CONSTRUCTION"); + attr.Add(a.EfConstruction); + } + } } if (a.Sortable == true) @@ -73,7 +202,21 @@ public static bool IndexDefinitionEquals(this RedisIndexInfo redisIndexInfo, Typ return attr.ToArray(); }); - return target.SequenceEqual(serialisedDefinition.Skip(7)); + return target.SequenceEqual(serialisedDefinition.Skip(currentOffset)); + } + + /// + /// calculates the number of arguments that would be required based to reverse engineer the index based off what + /// is in the Info attribute. + /// + /// The attribute. + /// The number of arguments. + internal static int NumVectorArgs(this RedisIndexInfo.RedisIndexInfoAttribute attr) + { + var numArgs = 6; + numArgs += attr.M is not null ? 2 : 0; + numArgs += attr.EfConstruction is not null ? 2 : 0; + return numArgs; } /// diff --git a/src/Redis.OM/RedisIndexInfo.cs b/src/Redis.OM/RedisIndexInfo.cs index 334d29eb..dfcd0953 100644 --- a/src/Redis.OM/RedisIndexInfo.cs +++ b/src/Redis.OM/RedisIndexInfo.cs @@ -2,6 +2,7 @@ using System.Collections.Generic; using System.Globalization; using System.Linq; +using Redis.OM.Modeling; namespace Redis.OM { @@ -220,6 +221,9 @@ public RedisIndexInfoIndexDefinition(RedisReply redisReply) case "key_type": Identifier = value.ToString(CultureInfo.InvariantCulture); break; case "prefixes": Prefixes = value.ToArray().Select(x => x.ToString(CultureInfo.InvariantCulture)).ToArray(); break; case "default_score": DefaultScore = value.ToString(CultureInfo.InvariantCulture); break; + case "default_language": DefaultLanguage = value.ToString(CultureInfo.InvariantCulture); break; + case "filter": Filter = value.ToString(CultureInfo.InvariantCulture); break; + case "language_field": LanguageField = value.ToString(CultureInfo.InvariantCulture); break; } } } @@ -238,6 +242,21 @@ public RedisIndexInfoIndexDefinition(RedisReply redisReply) /// Gets default_score. /// public string? DefaultScore { get; } + + /// + /// Gets Filter. + /// + public string? Filter { get; } + + /// + /// Gets language. + /// + public string? DefaultLanguage { get; } + + /// + /// Gets LanguageField. + /// + public string? LanguageField { get; } } /// @@ -266,9 +285,21 @@ public RedisIndexInfoAttribute(RedisReply redisReply) case "attribute": Attribute = value; break; case "type": Type = value; break; case "SEPARATOR": Separator = value; break; + case "algorithm": Algorithm = value; break; + case "data_type": VectorType = value; break; + case "dim": Dimension = value; break; + case "distance_metric": DistanceMetric = value; break; + case "M": M = value; break; + case "ef_construction": EfConstruction = value; break; + case "WEIGHT": Weight = value; break; } } + if (responseArray.Any(x => ((string)x).Equals("NOSTEM", StringComparison.InvariantCultureIgnoreCase))) + { + NoStem = true; + } + if (responseArray.Select(x => x.ToString()) .Any(x => x.Equals("SORTABLE", StringComparison.InvariantCultureIgnoreCase))) { @@ -300,6 +331,46 @@ public RedisIndexInfoAttribute(RedisReply redisReply) /// Gets SORTABLE. /// public bool? Sortable { get; } + + /// + /// Gets NOSTEM. + /// + public bool? NoStem { get; } + + /// + /// Gets weight. + /// + public string? Weight { get; } + + /// + /// Gets Algorithm. + /// + public string? Algorithm { get; } + + /// + /// Gets the VectorType. + /// + public string? VectorType { get; } + + /// + /// Gets Dimension. + /// + public string? Dimension { get; } + + /// + /// Gets DistanceMetric. + /// + public string? DistanceMetric { get; } + + /// + /// Gets M. + /// + public string? M { get; } + + /// + /// Gets EF constructor. + /// + public string? EfConstruction { get; } } /// diff --git a/test/Redis.OM.Unit.Tests/RediSearchTests/RedisIndexTests.cs b/test/Redis.OM.Unit.Tests/RediSearchTests/RedisIndexTests.cs index fc1b7785..045989e7 100644 --- a/test/Redis.OM.Unit.Tests/RediSearchTests/RedisIndexTests.cs +++ b/test/Redis.OM.Unit.Tests/RediSearchTests/RedisIndexTests.cs @@ -1,8 +1,10 @@ using System; using System.Linq; +using System.Runtime.InteropServices; using System.Threading.Tasks; using Redis.OM.Contracts; using Redis.OM.Modeling; +using Redis.OM.Vectorizers; using Xunit; namespace Redis.OM.Unit.Tests.RediSearchTests @@ -10,6 +12,18 @@ namespace Redis.OM.Unit.Tests.RediSearchTests public class RedisIndexTests { + [Document] + public class TestNoExtras + { + [Searchable] public string Name { get; set; } + [Indexed] public int Age { get; set; } + [Indexed] public double Height { get; set; } + [Indexed] public string[] NickNames { get; set; } + [Indexed] public string Tag { get; set; } + [Indexed] public GeoLoc GeoLoc { get; set; } + [Indexed] [OpenAIVectorizer]public Vector VectorField { get; set; } + } + [Document(IndexName = "TestPersonClassHappyPath-idx", StorageType = StorageType.Hash)] public class TestPersonClassHappyPath { @@ -33,22 +47,62 @@ public class TestPersonClassHappyPathWithMutatedDefinition [Document(IndexName = "SerialisedJson-idx", Prefixes = new []{"Simple"}, StorageType = StorageType.Json)] public class SerialisedJsonType { - [Searchable(Sortable = true)] + [Searchable(Sortable = true, NoStem = true, Weight = .8)] public string Name { get; set; } + [Indexed(Sortable = true)] + public string Tag { get; set; } + + [Indexed] public GeoLoc GeoField { get; set; } + + [Indexed(M = 40, EfConstructor = 250, Algorithm = VectorAlgorithm.HNSW)] [OpenAIVectorizer]public Vector VectorField { get; set; } + public int Age { get; set; } } [Document(IndexName = "SerialisedJson-idx", Prefixes = new []{"Simple"}, StorageType = StorageType.Json)] public class SerialisedJsonTypeNotMatch { - [Searchable(Sortable = true)] + [Searchable(Sortable = true, NoStem = true, Weight = .8)] public string Name { get; set; } + [Indexed(Sortable = true)] + public string Tag { get; set; } + + [Indexed] public GeoLoc GeoField { get; set; } + + [Indexed(M = 40, EfConstructor = 250, Algorithm = VectorAlgorithm.HNSW)] [OpenAIVectorizer]public Vector VectorField { get; set; } + [Indexed(Sortable = true)] public int Age { get; set; } } + [Document(IndexName = "Uncheckable-idx", Prefixes = new[] { "Simple" }, StorageType = StorageType.Json)] + public class UncheckableIndex + { + [Indexed(M = 40, EfConstructor = 250, Algorithm = VectorAlgorithm.HNSW, Epsilon = .02, EfRuntime = 11)] [OpenAIVectorizer]public Vector VectorField { get; set; } + } + + [Document(IndexName = "Uncheckable-idx", Prefixes = new[] { "Simple" }, StorageType = StorageType.Json)] + public class UncheckableIndexPhoneticMatcher + { + [Searchable(PhoneticMatcher = "dm:fr")]public string Name { get; set; } + } + + [Document(IndexName = "kitchen-sink", Prefixes = new []{"prefix1", "prefix2", "prefix3"}, Language = "norwegian", LanguageField = nameof(Lang), Filter = "@Name == 'foo'")] + public class KitchenSinkDocumentIndex + { + [Indexed]public string Name { get; set; } + public string Lang { get; set; } + } + + [Document(IndexName = "kitchen-sink", Prefixes = new []{"prefix1", "prefix2", "prefix3"}, Language = "norwegian", LanguageField = nameof(Lang), Filter = "@Name == 'foo'", Stopwords = new []{"break"})] + public class KitchenSinkDocumentIndexFailForStopwords + { + [Indexed]public string Name { get; set; } + public string Lang { get; set; } + } + [Document(IndexName = "TestPersonClassHappyPath-idx", StorageType = StorageType.Hash, Prefixes = new []{"Person:"})] public class TestPersonClassOverridenPrefix { @@ -285,5 +339,75 @@ public async Task TestGetIndexInfoWhichDoesNotMatchExistingJson() Assert.False(indexInfo.IndexDefinitionEquals(typeof(SerialisedJsonTypeNotMatch))); Assert.True(indexInfo.IndexDefinitionEquals(typeof(SerialisedJsonType))); } + + [Fact] + public async Task TestArgumentExceptionOnUncheckableIndexType() + { + var host = Environment.GetEnvironmentVariable("STANDALONE_HOST_PORT") ?? "localhost"; + var provider = new RedisConnectionProvider($"redis://{host}"); + var connection = provider.Connection; + + await connection.DropIndexAsync(typeof(UncheckableIndex)); + await connection.CreateIndexAsync(typeof(UncheckableIndex)); + var indexInfo = await connection.GetIndexInfoAsync(typeof(UncheckableIndex)); + + Assert.Throws(()=>indexInfo.IndexDefinitionEquals(typeof(UncheckableIndex))); + } + + [Fact] + public async Task TestArgumentExceptionOnUncheckableIndexTypePhonetics() + { + var host = Environment.GetEnvironmentVariable("STANDALONE_HOST_PORT") ?? "localhost"; + var provider = new RedisConnectionProvider($"redis://{host}"); + var connection = provider.Connection; + + await connection.DropIndexAsync(typeof(UncheckableIndexPhoneticMatcher)); + await connection.CreateIndexAsync(typeof(UncheckableIndexPhoneticMatcher)); + var indexInfo = await connection.GetIndexInfoAsync(typeof(UncheckableIndexPhoneticMatcher)); + + Assert.Throws(()=>indexInfo.IndexDefinitionEquals(typeof(UncheckableIndexPhoneticMatcher))); + } + + [Fact] + public async Task TestKitchenSinkEquality() + { + var host = Environment.GetEnvironmentVariable("STANDALONE_HOST_PORT") ?? "localhost"; + var provider = new RedisConnectionProvider($"redis://{host}"); + var connection = provider.Connection; + + await connection.DropIndexAsync(typeof(KitchenSinkDocumentIndex)); + await connection.CreateIndexAsync(typeof(KitchenSinkDocumentIndex)); + var indexInfo = await connection.GetIndexInfoAsync(typeof(KitchenSinkDocumentIndex)); + + Assert.True(indexInfo.IndexDefinitionEquals(typeof(KitchenSinkDocumentIndex))); + } + + [Fact] + public async Task TestKitchenSinkEqualityFailForStopwords() + { + var host = Environment.GetEnvironmentVariable("STANDALONE_HOST_PORT") ?? "localhost"; + var provider = new RedisConnectionProvider($"redis://{host}"); + var connection = provider.Connection; + + await connection.DropIndexAsync(typeof(KitchenSinkDocumentIndexFailForStopwords)); + await connection.CreateIndexAsync(typeof(KitchenSinkDocumentIndexFailForStopwords)); + var indexInfo = await connection.GetIndexInfoAsync(typeof(KitchenSinkDocumentIndexFailForStopwords)); + + Assert.Throws(()=>indexInfo.IndexDefinitionEquals(typeof(KitchenSinkDocumentIndexFailForStopwords))); + } + + [Fact] + public async Task TestNoExtraStuffInIndex() + { + var host = Environment.GetEnvironmentVariable("STANDALONE_HOST_PORT") ?? "localhost"; + var provider = new RedisConnectionProvider($"redis://{host}"); + var connection = provider.Connection; + + await connection.DropIndexAsync(typeof(TestNoExtras)); + await connection.CreateIndexAsync(typeof(TestNoExtras)); + var indexInfo = await connection.GetIndexInfoAsync(typeof(TestNoExtras)); + + Assert.True(indexInfo.IndexDefinitionEquals(typeof(TestNoExtras))); + } } }