diff --git a/BurrowsWheelerTransform/BurrowsWheelerTransform.Tests/BurrowsWheelerTransform.Tests.csproj b/BurrowsWheelerTransform/BurrowsWheelerTransform.Tests/BurrowsWheelerTransform.Tests.csproj new file mode 100644 index 0000000..56dba5b --- /dev/null +++ b/BurrowsWheelerTransform/BurrowsWheelerTransform.Tests/BurrowsWheelerTransform.Tests.csproj @@ -0,0 +1,28 @@ + + + + net9.0 + latest + enable + enable + + true + + + + + + + + + + + + + + + + + + + diff --git a/BurrowsWheelerTransform/BurrowsWheelerTransform.Tests/GlobalSuppressions.cs b/BurrowsWheelerTransform/BurrowsWheelerTransform.Tests/GlobalSuppressions.cs new file mode 100644 index 0000000..eeeaf17 --- /dev/null +++ b/BurrowsWheelerTransform/BurrowsWheelerTransform.Tests/GlobalSuppressions.cs @@ -0,0 +1,8 @@ +// This file is used by Code Analysis to maintain SuppressMessage +// attributes that are applied to this project. +// Project-level suppressions either have no target or are given +// a specific target and scoped to a namespace, type, member, etc. + +using System.Diagnostics.CodeAnalysis; + +[assembly: SuppressMessage("StyleCop.CSharp.DocumentationRules", "SA1600:Elements should be documented", Justification = "This is tests project")] diff --git a/BurrowsWheelerTransform/BurrowsWheelerTransform.Tests/TransformResultTests.cs b/BurrowsWheelerTransform/BurrowsWheelerTransform.Tests/TransformResultTests.cs new file mode 100644 index 0000000..30d1979 --- /dev/null +++ b/BurrowsWheelerTransform/BurrowsWheelerTransform.Tests/TransformResultTests.cs @@ -0,0 +1,21 @@ +namespace BurrowsWheelerTransform.Tests; + +public class TransformResultTests +{ + [Test] + public void TransformResultConstructor_ShouldNotThrow() + { + Assert.DoesNotThrow(() => new TransformResult(string.Empty, -1)); + Assert.DoesNotThrow(() => new TransformResult("A", 0)); + Assert.DoesNotThrow(() => new TransformResult("ABCDEF", 3)); + Assert.DoesNotThrow(() => new TransformResult("QWERTY", 5)); + } + + [Test] + public void TransformResultConstructor_ShouldThrow() + { + Assert.Throws(() => new TransformResult(string.Empty, 0)); + Assert.Throws(() => new TransformResult("ABCD", -1)); + Assert.Throws(() => new TransformResult("ABCD", 4)); + } +} diff --git a/BurrowsWheelerTransform/BurrowsWheelerTransform.Tests/TransformTests.cs b/BurrowsWheelerTransform/BurrowsWheelerTransform.Tests/TransformTests.cs new file mode 100644 index 0000000..605cd67 --- /dev/null +++ b/BurrowsWheelerTransform/BurrowsWheelerTransform.Tests/TransformTests.cs @@ -0,0 +1,45 @@ +namespace BurrowsWheelerTransform.Tests; + +public class TransformTests +{ + private static readonly List TestData = + [ + string.Empty, + + "A", + "BB", + "CCCCCC", + "ABACABA", + "ABABABABAB", + + ..GetRandomStrings() + ]; + + [Test] + public void InverseTransform_ShouldBe_SameAs_Input([ValueSource(nameof(TestData))] string input) + { + var result = Transform.ForwardTransform(input); + var reconstructed = Transform.InverseTransform(result); + Assert.That(input.SequenceEqual(reconstructed), Is.True); + } + + private static IEnumerable GetRandomStrings() + { + int seed = 1743658243; + var random = new Random(seed); + + int steps = 16; + int length = 256; + var buffer = new char[length]; + + for (int i = 0; i < steps; i++) + { + for (int j = 0; j < length; j++) + { + buffer[j] = (char)random.Next(' ', '~' + 1); + } + + yield return new(buffer); + } + } +} diff --git a/BurrowsWheelerTransform/BurrowsWheelerTransform.sln b/BurrowsWheelerTransform/BurrowsWheelerTransform.sln new file mode 100644 index 0000000..efaec90 --- /dev/null +++ b/BurrowsWheelerTransform/BurrowsWheelerTransform.sln @@ -0,0 +1,48 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.0.31903.59 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "BurrowsWheelerTransform", "BurrowsWheelerTransform\BurrowsWheelerTransform.csproj", "{1DA138D9-04B6-498B-9C33-05E9684F1D5C}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "BurrowsWheelerTransform.Tests", "BurrowsWheelerTransform.Tests\BurrowsWheelerTransform.Tests.csproj", "{FDF7CD5C-DC17-4A89-B7B8-12B1A75EF86D}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Release|Any CPU = Release|Any CPU + Release|x64 = Release|x64 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {1DA138D9-04B6-498B-9C33-05E9684F1D5C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {1DA138D9-04B6-498B-9C33-05E9684F1D5C}.Debug|Any CPU.Build.0 = Debug|Any CPU + {1DA138D9-04B6-498B-9C33-05E9684F1D5C}.Debug|x64.ActiveCfg = Debug|Any CPU + {1DA138D9-04B6-498B-9C33-05E9684F1D5C}.Debug|x64.Build.0 = Debug|Any CPU + {1DA138D9-04B6-498B-9C33-05E9684F1D5C}.Debug|x86.ActiveCfg = Debug|Any CPU + {1DA138D9-04B6-498B-9C33-05E9684F1D5C}.Debug|x86.Build.0 = Debug|Any CPU + {1DA138D9-04B6-498B-9C33-05E9684F1D5C}.Release|Any CPU.ActiveCfg = Release|Any CPU + {1DA138D9-04B6-498B-9C33-05E9684F1D5C}.Release|Any CPU.Build.0 = Release|Any CPU + {1DA138D9-04B6-498B-9C33-05E9684F1D5C}.Release|x64.ActiveCfg = Release|Any CPU + {1DA138D9-04B6-498B-9C33-05E9684F1D5C}.Release|x64.Build.0 = Release|Any CPU + {1DA138D9-04B6-498B-9C33-05E9684F1D5C}.Release|x86.ActiveCfg = Release|Any CPU + {1DA138D9-04B6-498B-9C33-05E9684F1D5C}.Release|x86.Build.0 = Release|Any CPU + {FDF7CD5C-DC17-4A89-B7B8-12B1A75EF86D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {FDF7CD5C-DC17-4A89-B7B8-12B1A75EF86D}.Debug|Any CPU.Build.0 = Debug|Any CPU + {FDF7CD5C-DC17-4A89-B7B8-12B1A75EF86D}.Debug|x64.ActiveCfg = Debug|Any CPU + {FDF7CD5C-DC17-4A89-B7B8-12B1A75EF86D}.Debug|x64.Build.0 = Debug|Any CPU + {FDF7CD5C-DC17-4A89-B7B8-12B1A75EF86D}.Debug|x86.ActiveCfg = Debug|Any CPU + {FDF7CD5C-DC17-4A89-B7B8-12B1A75EF86D}.Debug|x86.Build.0 = Debug|Any CPU + {FDF7CD5C-DC17-4A89-B7B8-12B1A75EF86D}.Release|Any CPU.ActiveCfg = Release|Any CPU + {FDF7CD5C-DC17-4A89-B7B8-12B1A75EF86D}.Release|Any CPU.Build.0 = Release|Any CPU + {FDF7CD5C-DC17-4A89-B7B8-12B1A75EF86D}.Release|x64.ActiveCfg = Release|Any CPU + {FDF7CD5C-DC17-4A89-B7B8-12B1A75EF86D}.Release|x64.Build.0 = Release|Any CPU + {FDF7CD5C-DC17-4A89-B7B8-12B1A75EF86D}.Release|x86.ActiveCfg = Release|Any CPU + {FDF7CD5C-DC17-4A89-B7B8-12B1A75EF86D}.Release|x86.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/BurrowsWheelerTransform/BurrowsWheelerTransform/BurrowsWheelerTransform.csproj b/BurrowsWheelerTransform/BurrowsWheelerTransform/BurrowsWheelerTransform.csproj new file mode 100644 index 0000000..bf05b58 --- /dev/null +++ b/BurrowsWheelerTransform/BurrowsWheelerTransform/BurrowsWheelerTransform.csproj @@ -0,0 +1,10 @@ + + + + Library + net9.0 + enable + enable + + + diff --git a/BurrowsWheelerTransform/BurrowsWheelerTransform/Transform.cs b/BurrowsWheelerTransform/BurrowsWheelerTransform/Transform.cs new file mode 100644 index 0000000..506d13c --- /dev/null +++ b/BurrowsWheelerTransform/BurrowsWheelerTransform/Transform.cs @@ -0,0 +1,123 @@ +namespace BurrowsWheelerTransform; + +using System.Diagnostics; + +/// +/// Burrows-Wheeler transform implementation. +/// +public static class Transform +{ + /// + /// Transforms given string using Burrows-Wheeler algorithm. + /// + /// Input string. + /// Result of transformation. + public static TransformResult ForwardTransform(string input) + { + int length = input.Length; + + if (length == 0) + { + return new(string.Empty, -1); + } + + Span offsets = stackalloc int[length]; + for (int i = 0; i < length; i++) + { + offsets[i] = i; + } + + int Compare(int x, int y) + { + for (int i = 0; i < length; i++) + { + int compare = input[(i + x) % length] - input[(i + y) % length]; + if (compare != 0) + { + return compare; + } + } + + return 0; + } + + offsets.Sort(Compare); + + int? identityPosition = null; + Span result = stackalloc char[length]; + for (int i = 0; i < length; i++) + { + if (offsets[i] == 0) + { + identityPosition = i; + } + + result[i] = input[(offsets[i] + length - 1) % length]; + } + + Debug.Assert(identityPosition.HasValue, "Identity position not found"); + + return new(new(result), identityPosition.Value); + } + + /// + /// Reconstructs string transformed with Burrows-Wheeler algorithm. + /// + /// Transformed string. + /// Reconstructed string. + public static string InverseTransform(TransformResult result) + { + if (result.IdentityIndex == -1) + { + return string.Empty; + } + + int length = result.Value.Length; + + Span appearances = stackalloc int[length]; + var lastAppearances = new Dictionary(); + var charCounter = new SortedDictionary(); + + for (int i = 0; i < length; i++) + { + char currentChar = result.Value[i]; + + if (!charCounter.TryGetValue(currentChar, out int count)) + { + charCounter[currentChar] = 0; + } + + charCounter[currentChar]++; + + appearances[i] = + lastAppearances.TryGetValue(currentChar, out int lastIndex) + ? appearances[lastIndex] + 1 + : 0; + + lastAppearances[currentChar] = i; + } + + var lesserCharsCounter = new Dictionary(); + int previousCount = 0; + foreach (var (character, count) in charCounter) + { + lesserCharsCounter[character] = previousCount; + previousCount += count; + } + + Span reconstructed = stackalloc char[length]; + + int lastIdentityIndex = result.IdentityIndex; + char lastCharacter = result.Value[lastIdentityIndex]; + reconstructed[^1] = result.Value[result.IdentityIndex]; + + for (int i = 1; i < length; i++) + { + lastIdentityIndex = appearances[lastIdentityIndex] + lesserCharsCounter[lastCharacter]; + lastCharacter = result.Value[lastIdentityIndex]; + reconstructed[^(i + 1)] = lastCharacter; + } + + return new(reconstructed); + } +} diff --git a/BurrowsWheelerTransform/BurrowsWheelerTransform/TransformResult.cs b/BurrowsWheelerTransform/BurrowsWheelerTransform/TransformResult.cs new file mode 100644 index 0000000..a788de6 --- /dev/null +++ b/BurrowsWheelerTransform/BurrowsWheelerTransform/TransformResult.cs @@ -0,0 +1,45 @@ +namespace BurrowsWheelerTransform; + +/// +/// Result of Burrows-Wheeler Transform. +/// +public readonly struct TransformResult +{ + /// + /// Initializes a new instance of the struct. + /// + /// Transformed string. + /// Index that is used to reconstruct string. + /// + /// is out of range (less than 0 or greater or equal to .Length) + /// or not equal to -1 if is . + /// + public TransformResult(string value, int identityIndex) + { + if (value.Length == 0) + { + if (identityIndex != -1) + { + throw new ArgumentOutOfRangeException(nameof(identityIndex), "Identity index of an empty string must be equal to -1"); + } + } + else + { + ArgumentOutOfRangeException.ThrowIfNegative(identityIndex, nameof(identityIndex)); + ArgumentOutOfRangeException.ThrowIfGreaterThanOrEqual(identityIndex, value.Length, nameof(identityIndex)); + } + + Value = value; + IdentityIndex = identityIndex; + } + + /// + /// Gets transformed string. + /// + public string Value { get; } + + /// + /// Gets identity index. + /// + public int IdentityIndex { get; } +}