From e1783eb7ddcf61d19834ae0350fc04f3fa20f5ae Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Sun, 2 Mar 2025 07:36:29 +0300 Subject: [PATCH 01/57] Homework 3 - LZW (wip) Added arbitrary bit length writer/reader --- Zipper/Zipper.sln | 48 ++++++++++++++ Zipper/Zipper/ArbitraryBitReader.cs | 67 ++++++++++++++++++++ Zipper/Zipper/ArbitraryBitWriter.cs | 97 +++++++++++++++++++++++++++++ Zipper/Zipper/Zipper.csproj | 14 +++++ 4 files changed, 226 insertions(+) create mode 100644 Zipper/Zipper.sln create mode 100644 Zipper/Zipper/ArbitraryBitReader.cs create mode 100644 Zipper/Zipper/ArbitraryBitWriter.cs create mode 100644 Zipper/Zipper/Zipper.csproj diff --git a/Zipper/Zipper.sln b/Zipper/Zipper.sln new file mode 100644 index 0000000..213ddd2 --- /dev/null +++ b/Zipper/Zipper.sln @@ -0,0 +1,48 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.0.31903.59 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Zipper", "Zipper\Zipper.csproj", "{9E9FE984-8520-41D1-A74D-6C0DF2AE79D5}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Zipper.Tests", "Zipper.Tests\Zipper.Tests.csproj", "{10C8B0BF-1C7E-4169-8DBB-3B138722C444}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Release|Any CPU = Release|Any CPU + Release|x64 = Release|x64 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {9E9FE984-8520-41D1-A74D-6C0DF2AE79D5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {9E9FE984-8520-41D1-A74D-6C0DF2AE79D5}.Debug|Any CPU.Build.0 = Debug|Any CPU + {9E9FE984-8520-41D1-A74D-6C0DF2AE79D5}.Debug|x64.ActiveCfg = Debug|Any CPU + {9E9FE984-8520-41D1-A74D-6C0DF2AE79D5}.Debug|x64.Build.0 = Debug|Any CPU + {9E9FE984-8520-41D1-A74D-6C0DF2AE79D5}.Debug|x86.ActiveCfg = Debug|Any CPU + {9E9FE984-8520-41D1-A74D-6C0DF2AE79D5}.Debug|x86.Build.0 = Debug|Any CPU + {9E9FE984-8520-41D1-A74D-6C0DF2AE79D5}.Release|Any CPU.ActiveCfg = Release|Any CPU + {9E9FE984-8520-41D1-A74D-6C0DF2AE79D5}.Release|Any CPU.Build.0 = Release|Any CPU + {9E9FE984-8520-41D1-A74D-6C0DF2AE79D5}.Release|x64.ActiveCfg = Release|Any CPU + {9E9FE984-8520-41D1-A74D-6C0DF2AE79D5}.Release|x64.Build.0 = Release|Any CPU + {9E9FE984-8520-41D1-A74D-6C0DF2AE79D5}.Release|x86.ActiveCfg = Release|Any CPU + {9E9FE984-8520-41D1-A74D-6C0DF2AE79D5}.Release|x86.Build.0 = Release|Any CPU + {10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Debug|Any CPU.Build.0 = Debug|Any CPU + {10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Debug|x64.ActiveCfg = Debug|Any CPU + {10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Debug|x64.Build.0 = Debug|Any CPU + {10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Debug|x86.ActiveCfg = Debug|Any CPU + {10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Debug|x86.Build.0 = Debug|Any CPU + {10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Release|Any CPU.ActiveCfg = Release|Any CPU + {10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Release|Any CPU.Build.0 = Release|Any CPU + {10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Release|x64.ActiveCfg = Release|Any CPU + {10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Release|x64.Build.0 = Release|Any CPU + {10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Release|x86.ActiveCfg = Release|Any CPU + {10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Release|x86.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/Zipper/Zipper/ArbitraryBitReader.cs b/Zipper/Zipper/ArbitraryBitReader.cs new file mode 100644 index 0000000..fec8c46 --- /dev/null +++ b/Zipper/Zipper/ArbitraryBitReader.cs @@ -0,0 +1,67 @@ +namespace Zipper; + +using System.Diagnostics; + +/// +/// Reads integers of arbitrary width. +/// +internal class ArbitraryBitReader +{ + private readonly Stream stream; + private readonly int width; + private byte buffer; + private int? bitsReadFromBuffer; + + /// + /// Initializes a new instance of the class. + /// + /// Stream to write to. + /// Width of integers between 4 and 32 bits. + public ArbitraryBitReader(Stream stream, int width) + { + ArgumentOutOfRangeException.ThrowIfLessThan(width, 4, nameof(width)); + ArgumentOutOfRangeException.ThrowIfGreaterThan(width, 32, nameof(width)); + + this.stream = stream; + this.width = width; + bitsReadFromBuffer = null; + } + + /// + /// Reads number from underlying stream and stores it in the . + /// + /// When this method returns, contains the value that was read, if read successfully; otherwise, zero. + /// if was successfuly read, otherwise. + public bool ReadNext(out int number) + { + number = 0; + + int remainingWidth = width; + while (remainingWidth > 0) + { + if (bitsReadFromBuffer is null or 0 or >= 8) + { + int readByte = stream.ReadByte(); + if (readByte == -1) + { + return false; + } + + buffer = (byte)readByte; + bitsReadFromBuffer = 0; + } + + int remainingBitsToRead = 8 - bitsReadFromBuffer.Value; + int mask = 0xFF >> bitsReadFromBuffer.Value; + int toWrite = (buffer & mask) >> Math.Max(0, remainingBitsToRead - remainingWidth); + + int previousRemainingWidth = remainingWidth; + remainingWidth -= remainingBitsToRead; + remainingWidth = Math.Max(0, remainingWidth); + bitsReadFromBuffer += previousRemainingWidth - remainingWidth; + number |= toWrite << remainingWidth; + } + + return true; + } +} diff --git a/Zipper/Zipper/ArbitraryBitWriter.cs b/Zipper/Zipper/ArbitraryBitWriter.cs new file mode 100644 index 0000000..c6dde81 --- /dev/null +++ b/Zipper/Zipper/ArbitraryBitWriter.cs @@ -0,0 +1,97 @@ +namespace Zipper; + +using System.Diagnostics; + +/// +/// Writes integers of arbitrary width. +/// +internal class ArbitraryBitWriter : IDisposable +{ + private readonly Stream stream; + private readonly int width; + private readonly byte[] buffer; + private int bitsWrittenInBuffer; + private bool disposed = false; + + /// + /// Initializes a new instance of the class. + /// + /// Stream to write to. + /// Width of integers between 4 and 32 bits. + public ArbitraryBitWriter(Stream stream, int width) + { + ArgumentOutOfRangeException.ThrowIfLessThan(width, 4, nameof(width)); + ArgumentOutOfRangeException.ThrowIfGreaterThan(width, 32, nameof(width)); + + this.stream = stream; + this.width = width; + buffer = new byte[width * 8]; + bitsWrittenInBuffer = 0; + } + + /// + /// Writes to the underlying stream. + /// + /// Number to write. + public void Write(int number) + { + ObjectDisposedException.ThrowIf(disposed, this); + + number &= (int)(0xFFFFFFFF >> (32 - width)); + + int remainingWidth = width; + while (remainingWidth > 0) + { + int bufferOffset = bitsWrittenInBuffer / 8; + int bitsWrittenToCurrentByte = bitsWrittenInBuffer % 8; + int bitsRemainingInCurrentByte = 8 - bitsWrittenToCurrentByte; + + int toWrite = number >> Math.Max(0, remainingWidth - bitsRemainingInCurrentByte); + int remainingWidthInCurrentByte = Math.Max(0, 8 - remainingWidth); + buffer[bufferOffset] |= (byte)(toWrite << remainingWidthInCurrentByte); + + remainingWidth -= bitsRemainingInCurrentByte; + bitsWrittenInBuffer += bitsRemainingInCurrentByte - remainingWidthInCurrentByte; + } + + if (bitsWrittenInBuffer == buffer.Length * 8) + { + stream.Write(buffer); + } + } + + /// + /// Releases all resources used by the current instance of the class. + /// + public void Dispose() + { + Dispose(true); + } + + /// + /// Releases the unmanaged resources used by the and optionally releases the managed resources. + /// + /// to release both managed and unmanaged resources; to release only unmanaged resources. + protected virtual void Dispose(bool disposing) + { + if (disposed) + { + return; + } + + disposed = true; + + if (disposing) + { + return; + } + + if (bitsWrittenInBuffer == 0) + { + return; + } + + int bytesWrittenInBuffer = bitsWrittenInBuffer / 8; + stream.Write(buffer.AsSpan()[..(bytesWrittenInBuffer + 1)]); + } +} diff --git a/Zipper/Zipper/Zipper.csproj b/Zipper/Zipper/Zipper.csproj new file mode 100644 index 0000000..a258532 --- /dev/null +++ b/Zipper/Zipper/Zipper.csproj @@ -0,0 +1,14 @@ + + + + Library + net9.0 + enable + enable + + + + + + + From 3bce49265ebf59920559e7dc421fd102f6ed40bf Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Sun, 2 Mar 2025 07:42:14 +0300 Subject: [PATCH 02/57] Added tests project (hw3 - lzw) (wip) --- Zipper/Zipper.Tests/Zipper.Tests.csproj | 28 +++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 Zipper/Zipper.Tests/Zipper.Tests.csproj diff --git a/Zipper/Zipper.Tests/Zipper.Tests.csproj b/Zipper/Zipper.Tests/Zipper.Tests.csproj new file mode 100644 index 0000000..78b1af6 --- /dev/null +++ b/Zipper/Zipper.Tests/Zipper.Tests.csproj @@ -0,0 +1,28 @@ + + + + net9.0 + latest + enable + enable + + true + + + + + + + + + + + + + + + + + + + From fc6d5c45b2008d1a750588227c7b741edf92c786 Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Sun, 2 Mar 2025 13:25:05 +0300 Subject: [PATCH 03/57] Fixed writer and reader (hw3 - lzw) (wip) --- Zipper/Zipper/ArbitraryBitReader.cs | 4 +--- Zipper/Zipper/ArbitraryBitWriter.cs | 25 ++++++++++++++----------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/Zipper/Zipper/ArbitraryBitReader.cs b/Zipper/Zipper/ArbitraryBitReader.cs index fec8c46..c284edb 100644 --- a/Zipper/Zipper/ArbitraryBitReader.cs +++ b/Zipper/Zipper/ArbitraryBitReader.cs @@ -1,7 +1,5 @@ namespace Zipper; -using System.Diagnostics; - /// /// Reads integers of arbitrary width. /// @@ -39,7 +37,7 @@ public bool ReadNext(out int number) int remainingWidth = width; while (remainingWidth > 0) { - if (bitsReadFromBuffer is null or 0 or >= 8) + if (bitsReadFromBuffer is null or >= 8) { int readByte = stream.ReadByte(); if (readByte == -1) diff --git a/Zipper/Zipper/ArbitraryBitWriter.cs b/Zipper/Zipper/ArbitraryBitWriter.cs index c6dde81..a4b845d 100644 --- a/Zipper/Zipper/ArbitraryBitWriter.cs +++ b/Zipper/Zipper/ArbitraryBitWriter.cs @@ -1,7 +1,5 @@ namespace Zipper; -using System.Diagnostics; - /// /// Writes integers of arbitrary width. /// @@ -25,7 +23,7 @@ public ArbitraryBitWriter(Stream stream, int width) this.stream = stream; this.width = width; - buffer = new byte[width * 8]; + buffer = new byte[width]; bitsWrittenInBuffer = 0; } @@ -47,16 +45,21 @@ public void Write(int number) int bitsRemainingInCurrentByte = 8 - bitsWrittenToCurrentByte; int toWrite = number >> Math.Max(0, remainingWidth - bitsRemainingInCurrentByte); - int remainingWidthInCurrentByte = Math.Max(0, 8 - remainingWidth); - buffer[bufferOffset] |= (byte)(toWrite << remainingWidthInCurrentByte); - + int previousRemainingWidth = remainingWidth; remainingWidth -= bitsRemainingInCurrentByte; - bitsWrittenInBuffer += bitsRemainingInCurrentByte - remainingWidthInCurrentByte; + remainingWidth = Math.Max(0, remainingWidth); + + int bitsToBeWritten = previousRemainingWidth - remainingWidth; + int bitsToBeLeftInCurrentByte = bitsRemainingInCurrentByte - bitsToBeWritten; + buffer[bufferOffset] |= (byte)(toWrite << bitsToBeLeftInCurrentByte); + bitsWrittenInBuffer += bitsToBeWritten; } - if (bitsWrittenInBuffer == buffer.Length * 8) + if (bitsWrittenInBuffer >= buffer.Length * 8) { stream.Write(buffer); + Array.Clear(buffer); + bitsWrittenInBuffer = 0; } } @@ -81,7 +84,7 @@ protected virtual void Dispose(bool disposing) disposed = true; - if (disposing) + if (!disposing) { return; } @@ -91,7 +94,7 @@ protected virtual void Dispose(bool disposing) return; } - int bytesWrittenInBuffer = bitsWrittenInBuffer / 8; - stream.Write(buffer.AsSpan()[..(bytesWrittenInBuffer + 1)]); + int bytesWrittenInBuffer = (int)Math.Ceiling(bitsWrittenInBuffer / 8f); + stream.Write(buffer.AsSpan()[..bytesWrittenInBuffer]); } } From a797044a159812be1e61a02042d00be7a4e1b8c4 Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Sun, 2 Mar 2025 13:25:45 +0300 Subject: [PATCH 04/57] Fixed InternalsVisibleTo (hw3 - lzw) (wip) --- Zipper/Zipper/Zipper.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Zipper/Zipper/Zipper.csproj b/Zipper/Zipper/Zipper.csproj index a258532..036d040 100644 --- a/Zipper/Zipper/Zipper.csproj +++ b/Zipper/Zipper/Zipper.csproj @@ -8,7 +8,7 @@ - + From 686e43bb765048131f43ff1616cd58d208720f4a Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Sun, 2 Mar 2025 13:27:07 +0300 Subject: [PATCH 05/57] Added tests for writer and reader (hw3 - lzw) (wip) --- .../ArbitraryBitReaderWriterTests.cs | 96 +++++++++++++++++++ .../Zipper.Tests/ArbitraryBitWriterTests.cs | 83 ++++++++++++++++ Zipper/Zipper.Tests/GlobalSuppressions.cs | 8 ++ 3 files changed, 187 insertions(+) create mode 100644 Zipper/Zipper.Tests/ArbitraryBitReaderWriterTests.cs create mode 100644 Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs create mode 100644 Zipper/Zipper.Tests/GlobalSuppressions.cs diff --git a/Zipper/Zipper.Tests/ArbitraryBitReaderWriterTests.cs b/Zipper/Zipper.Tests/ArbitraryBitReaderWriterTests.cs new file mode 100644 index 0000000..ae3c5a6 --- /dev/null +++ b/Zipper/Zipper.Tests/ArbitraryBitReaderWriterTests.cs @@ -0,0 +1,96 @@ +namespace Zipper.Tests; + +public class ArbitraryBitReaderWriterTests +{ + private static readonly TestData[] TestDataSource = GenerateData(); + + [Test] + public void Reader_ShouldReadValues_WrittenBy_Writer_Correctly([ValueSource(nameof(TestDataSource))] TestData data) + { + int memorySize = (int)Math.Ceiling(data.Width * data.Numbers.Length / 8f); + var backingMemory = new byte[memorySize]; + + using (var memory = new MemoryStream(backingMemory)) + { + using var writer = new ArbitraryBitWriter(memory, data.Width); + for (int i = 0; i < data.Numbers.Length; i++) + { + writer.Write(data.Numbers[i]); + } + } + + using (var memory = new MemoryStream(backingMemory)) + { + var reader = new ArbitraryBitReader(memory, data.Width); + for (int i = 0; i < data.Numbers.Length; i++) + { + Assert.Multiple(() => + { + Assert.That(reader.ReadNext(out int number), Is.True); + Assert.That(number, Is.EqualTo(data.Numbers[i])); + }); + } + } + } + + [Test] + public void Reader_ReadNext_ShouldReturnFalse_WhenNoDataAvailable() + { + int width = 7; + int numbersCount = 5; + int memorySize = (int)Math.Ceiling(width * numbersCount / 8f); + var backingMemory = new byte[memorySize]; + + using (var memory = new MemoryStream(backingMemory)) + { + using var writer = new ArbitraryBitWriter(memory, width); + for (int i = 0; i < numbersCount; i++) + { + writer.Write(i * i); + } + } + + using (var memory = new MemoryStream(backingMemory)) + { + var reader = new ArbitraryBitReader(memory, width); + for (int i = 0; i < numbersCount; i++) + { + Assert.Multiple(() => + { + Assert.That(reader.ReadNext(out int number), Is.True); + Assert.That(number, Is.EqualTo(i * i)); + }); + } + + Assert.That(reader.ReadNext(out _), Is.False); + } + } + + private static TestData[] GenerateData() + { + var random = new Random(872375823); + + int minWidth = 4; + int maxWidth = 32; + int numbersLength = 21; + + var result = new TestData[maxWidth - minWidth + 1]; + for (int i = 0; i < result.Length; i++) + { + int width = i + minWidth; + int upperBound = (int)Math.Min(1ul << width, int.MaxValue); + + var numbers = new int[numbersLength]; + for (int j = 0; j < numbersLength; j++) + { + numbers[j] = random.Next(upperBound); + } + + result[i] = new(width, numbers); + } + + return result; + } + + public readonly record struct TestData(int Width, int[] Numbers); +} diff --git a/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs b/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs new file mode 100644 index 0000000..fc8e120 --- /dev/null +++ b/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs @@ -0,0 +1,83 @@ +namespace Zipper.Tests; + +public class ArbitraryBitWriterTests +{ + private readonly int width = 11; + private MemoryStream memory; + private ArbitraryBitWriter writer; + + [SetUp] + public void Setup() + { + memory = new MemoryStream(); + writer = new(memory, width); + } + + [TearDown] + public void Teardown() + { + writer.Dispose(); + memory.Dispose(); + } + + [Test] + public void Writer_Dispose_ShouldDoNothing_IfCalledTwice() + { + writer.Write(123); + + writer.Dispose(); + var position = memory.Position; + + writer.Dispose(); + Assert.That(memory.Position, Is.EqualTo(position)); + } + + [Test] + public void Writer_Dispose_ShouldDoNothing_IfBufferIsEmpty() + { + // buffer is filled on every eight Write() + for (int i = 0; i < 8; i++) + { + writer.Write(i * i); + } + + var position = memory.Position; + + writer.Dispose(); + Assert.That(memory.Position, Is.EqualTo(position)); + } + + [Test] + public void Writer_Dispose_ShouldDoNothing_IfCalledWithFalse() + { + var writer = new TestWriter(memory, width); + + for (int i = 0; i < 5; i++) + { + writer.Write(i * i); + } + + var position = memory.Position; + + writer.Dispose(); + Assert.That(memory.Position, Is.EqualTo(position)); + } + + [Test] + public void Writer_Write_ShouldThrow_IfDisposed() + { + writer.Write(123); + + writer.Dispose(); + + Assert.Throws(() => writer.Write(456)); + } + + private class TestWriter(Stream stream, int width) : ArbitraryBitWriter(stream, width) + { + protected override void Dispose(bool disposing) + { + base.Dispose(false); + } + } +} diff --git a/Zipper/Zipper.Tests/GlobalSuppressions.cs b/Zipper/Zipper.Tests/GlobalSuppressions.cs new file mode 100644 index 0000000..eeeaf17 --- /dev/null +++ b/Zipper/Zipper.Tests/GlobalSuppressions.cs @@ -0,0 +1,8 @@ +// This file is used by Code Analysis to maintain SuppressMessage +// attributes that are applied to this project. +// Project-level suppressions either have no target or are given +// a specific target and scoped to a namespace, type, member, etc. + +using System.Diagnostics.CodeAnalysis; + +[assembly: SuppressMessage("StyleCop.CSharp.DocumentationRules", "SA1600:Elements should be documented", Justification = "This is tests project")] From 3cccd8a086da039b1a4f630042c2ca23afd59da1 Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Sun, 2 Mar 2025 13:28:36 +0300 Subject: [PATCH 06/57] Added BWT (hw3 - lzw) (wip) --- Zipper/Zipper.Tests/BWTTests.cs | 48 ++++++++++++ Zipper/Zipper/BWT.cs | 126 ++++++++++++++++++++++++++++++++ 2 files changed, 174 insertions(+) create mode 100644 Zipper/Zipper.Tests/BWTTests.cs create mode 100644 Zipper/Zipper/BWT.cs diff --git a/Zipper/Zipper.Tests/BWTTests.cs b/Zipper/Zipper.Tests/BWTTests.cs new file mode 100644 index 0000000..1274afa --- /dev/null +++ b/Zipper/Zipper.Tests/BWTTests.cs @@ -0,0 +1,48 @@ +namespace Zipper.Tests; + +public class BWTTests +{ + private static readonly string[] StringTestData = + [ + string.Empty, + + "A", + "BB", + "CCCCCC", + "ABACABA", + "ABABABABAB", + ]; + + private static readonly byte[][] TestData = + [ + .. StringTestData.Select(System.Text.Encoding.UTF8.GetBytes), + ..GetRandomStrings() + ]; + + [Test] + public void InverseTransform_ShouldBe_SameAs_Input([ValueSource(nameof(TestData))] byte[] input) + { + Span transformed = stackalloc byte[input.Length]; + var index = BWT.ForwardTransform(input, transformed); + + Span reconstructed = stackalloc byte[input.Length]; + BWT.InverseTransform(transformed, index, reconstructed); + Assert.That(reconstructed.SequenceEqual(input), Is.True); + } + + private static IEnumerable GetRandomStrings() + { + int seed = 74687324; + var random = new Random(seed); + + int steps = 16; + int length = 256; + + for (int i = 0; i < steps; i++) + { + var buffer = new byte[length]; + random.NextBytes(buffer); + yield return buffer; + } + } +} diff --git a/Zipper/Zipper/BWT.cs b/Zipper/Zipper/BWT.cs new file mode 100644 index 0000000..008e7dc --- /dev/null +++ b/Zipper/Zipper/BWT.cs @@ -0,0 +1,126 @@ +namespace Zipper; + +using System.Diagnostics; + +/// +/// Burrows-Wheeler transform implementation. +/// +internal static class BWT +{ + /// + /// Transforms given byte sequence using Burrows-Wheeler algorithm. + /// + /// Input byte sequence. + /// Span to write transofrmed input to. + /// Index that is used to reconstruct byte sequence. + public static int ForwardTransform(Memory input, Span output) + { + Debug.Assert(input.Length == output.Length, "Length of input and output should be the same"); + int length = input.Length; + + if (length == 0) + { + return -1; + } + + Span offsets = stackalloc int[length]; + for (int i = 0; i < length; i++) + { + offsets[i] = i; + } + + int Compare(int x, int y) + { + var inputSpan = input.Span; + for (int i = 0; i < length; i++) + { + int compare = inputSpan[(i + x) % length] - inputSpan[(i + y) % length]; + if (compare != 0) + { + return compare; + } + } + + return 0; + } + + offsets.Sort(Compare); + + var inputSpan = input.Span; + int? identityPosition = null; + for (int i = 0; i < length; i++) + { + if (offsets[i] == 0) + { + identityPosition = i; + } + + output[i] = inputSpan[(offsets[i] + length - 1) % length]; + } + + Debug.Assert(identityPosition.HasValue, "Identity position not found"); + + return identityPosition.Value; + } + + /// + /// Reconstructs byte sequence transformed with Burrows-Wheeler algorithm. + /// + /// Transformed byte sequence. + /// Index that is used to reconstruct byte sequence. + /// Span to write reconstructed byte sequence to. + public static void InverseTransform(Span input, int identityIndex, Span output) + { + Debug.Assert(input.Length == output.Length, "Length of input and output should be the same"); + + if (identityIndex == -1) + { + return; + } + + int length = input.Length; + + Span appearances = stackalloc int[length]; + Span lastAppearances = stackalloc int[256]; + Span byteCounter = stackalloc int[256]; + + for (int i = 0; i < 256; i++) + { + lastAppearances[i] = -1; + } + + for (int i = 0; i < length; i++) + { + byte currentByte = input[i]; + byteCounter[currentByte]++; + + int lastAppearance = lastAppearances[currentByte]; + appearances[i] = lastAppearance == -1 ? 0 : appearances[lastAppearance] + 1; + lastAppearances[currentByte] = i; + } + + Span lesserBytesCounter = stackalloc int[256]; + int previousCount = 0; + for (int i = 0; i < 256; i++) + { + if (byteCounter[i] == 0) + { + continue; + } + + lesserBytesCounter[i] = previousCount; + previousCount += byteCounter[i]; + } + + int lastIdentityIndex = identityIndex; + byte lastByte = input[lastIdentityIndex]; + output[^1] = input[identityIndex]; + + for (int i = 1; i < length; i++) + { + lastIdentityIndex = appearances[lastIdentityIndex] + lesserBytesCounter[lastByte]; + lastByte = input[lastIdentityIndex]; + output[^(i + 1)] = lastByte; + } + } +} From 44a95c191f2a73b7ec96b0f50d16c01149e4e66b Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Mon, 3 Mar 2025 22:43:31 +0300 Subject: [PATCH 07/57] Switched writer and reader to uint (hw3 - lzw) (wip) --- .../Zipper.Tests/ArbitraryBitReaderWriterTests.cs | 14 +++++++------- Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs | 4 ++-- Zipper/Zipper/ArbitraryBitReader.cs | 8 ++++---- Zipper/Zipper/ArbitraryBitWriter.cs | 8 ++++---- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/Zipper/Zipper.Tests/ArbitraryBitReaderWriterTests.cs b/Zipper/Zipper.Tests/ArbitraryBitReaderWriterTests.cs index ae3c5a6..e45222f 100644 --- a/Zipper/Zipper.Tests/ArbitraryBitReaderWriterTests.cs +++ b/Zipper/Zipper.Tests/ArbitraryBitReaderWriterTests.cs @@ -26,7 +26,7 @@ public void Reader_ShouldReadValues_WrittenBy_Writer_Correctly([ValueSource(name { Assert.Multiple(() => { - Assert.That(reader.ReadNext(out int number), Is.True); + Assert.That(reader.ReadNext(out uint number), Is.True); Assert.That(number, Is.EqualTo(data.Numbers[i])); }); } @@ -46,7 +46,7 @@ public void Reader_ReadNext_ShouldReturnFalse_WhenNoDataAvailable() using var writer = new ArbitraryBitWriter(memory, width); for (int i = 0; i < numbersCount; i++) { - writer.Write(i * i); + writer.Write((uint)(i * i)); } } @@ -57,7 +57,7 @@ public void Reader_ReadNext_ShouldReturnFalse_WhenNoDataAvailable() { Assert.Multiple(() => { - Assert.That(reader.ReadNext(out int number), Is.True); + Assert.That(reader.ReadNext(out uint number), Is.True); Assert.That(number, Is.EqualTo(i * i)); }); } @@ -78,12 +78,12 @@ private static TestData[] GenerateData() for (int i = 0; i < result.Length; i++) { int width = i + minWidth; - int upperBound = (int)Math.Min(1ul << width, int.MaxValue); + long upperBound = 1L << width; - var numbers = new int[numbersLength]; + var numbers = new uint[numbersLength]; for (int j = 0; j < numbersLength; j++) { - numbers[j] = random.Next(upperBound); + numbers[j] = (uint)random.NextInt64(upperBound); } result[i] = new(width, numbers); @@ -92,5 +92,5 @@ private static TestData[] GenerateData() return result; } - public readonly record struct TestData(int Width, int[] Numbers); + public readonly record struct TestData(int Width, uint[] Numbers); } diff --git a/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs b/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs index fc8e120..915e168 100644 --- a/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs +++ b/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs @@ -38,7 +38,7 @@ public void Writer_Dispose_ShouldDoNothing_IfBufferIsEmpty() // buffer is filled on every eight Write() for (int i = 0; i < 8; i++) { - writer.Write(i * i); + writer.Write((uint)(i * i)); } var position = memory.Position; @@ -54,7 +54,7 @@ public void Writer_Dispose_ShouldDoNothing_IfCalledWithFalse() for (int i = 0; i < 5; i++) { - writer.Write(i * i); + writer.Write((uint)(i * i)); } var position = memory.Position; diff --git a/Zipper/Zipper/ArbitraryBitReader.cs b/Zipper/Zipper/ArbitraryBitReader.cs index c284edb..9e35d95 100644 --- a/Zipper/Zipper/ArbitraryBitReader.cs +++ b/Zipper/Zipper/ArbitraryBitReader.cs @@ -1,7 +1,7 @@ namespace Zipper; /// -/// Reads integers of arbitrary width. +/// Reads unsigned integers of arbitrary width. /// internal class ArbitraryBitReader { @@ -30,7 +30,7 @@ public ArbitraryBitReader(Stream stream, int width) /// /// When this method returns, contains the value that was read, if read successfully; otherwise, zero. /// if was successfuly read, otherwise. - public bool ReadNext(out int number) + public bool ReadNext(out uint number) { number = 0; @@ -50,8 +50,8 @@ public bool ReadNext(out int number) } int remainingBitsToRead = 8 - bitsReadFromBuffer.Value; - int mask = 0xFF >> bitsReadFromBuffer.Value; - int toWrite = (buffer & mask) >> Math.Max(0, remainingBitsToRead - remainingWidth); + uint mask = 0xFFu >> bitsReadFromBuffer.Value; + uint toWrite = (buffer & mask) >> Math.Max(0, remainingBitsToRead - remainingWidth); int previousRemainingWidth = remainingWidth; remainingWidth -= remainingBitsToRead; diff --git a/Zipper/Zipper/ArbitraryBitWriter.cs b/Zipper/Zipper/ArbitraryBitWriter.cs index a4b845d..d287c97 100644 --- a/Zipper/Zipper/ArbitraryBitWriter.cs +++ b/Zipper/Zipper/ArbitraryBitWriter.cs @@ -1,7 +1,7 @@ namespace Zipper; /// -/// Writes integers of arbitrary width. +/// Writes unsigned integers of arbitrary width. /// internal class ArbitraryBitWriter : IDisposable { @@ -31,11 +31,11 @@ public ArbitraryBitWriter(Stream stream, int width) /// Writes to the underlying stream. /// /// Number to write. - public void Write(int number) + public void Write(uint number) { ObjectDisposedException.ThrowIf(disposed, this); - number &= (int)(0xFFFFFFFF >> (32 - width)); + number &= 0xFFFFFFFF >> (32 - width); int remainingWidth = width; while (remainingWidth > 0) @@ -44,7 +44,7 @@ public void Write(int number) int bitsWrittenToCurrentByte = bitsWrittenInBuffer % 8; int bitsRemainingInCurrentByte = 8 - bitsWrittenToCurrentByte; - int toWrite = number >> Math.Max(0, remainingWidth - bitsRemainingInCurrentByte); + uint toWrite = number >> Math.Max(0, remainingWidth - bitsRemainingInCurrentByte); int previousRemainingWidth = remainingWidth; remainingWidth -= bitsRemainingInCurrentByte; remainingWidth = Math.Max(0, remainingWidth); From f80bcc9bee5f27969bd773da9b7553a77a2f2acb Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Mon, 3 Mar 2025 23:12:41 +0300 Subject: [PATCH 08/57] Added leaveOpen option and Flush() to writer (hw3 - lzw) (wip) --- .../Zipper.Tests/ArbitraryBitWriterTests.cs | 62 ++++++++++++++++++- Zipper/Zipper/ArbitraryBitWriter.cs | 46 +++++++++----- 2 files changed, 92 insertions(+), 16 deletions(-) diff --git a/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs b/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs index 915e168..f5eb8a2 100644 --- a/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs +++ b/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs @@ -10,7 +10,7 @@ public class ArbitraryBitWriterTests public void Setup() { memory = new MemoryStream(); - writer = new(memory, width); + writer = new(memory, width, true); } [TearDown] @@ -47,6 +47,56 @@ public void Writer_Dispose_ShouldDoNothing_IfBufferIsEmpty() Assert.That(memory.Position, Is.EqualTo(position)); } + [Test] + public void Writer_Flush_ShouldDoNothing_IfBufferIsEmpty() + { + // buffer is filled on every eight Write() + for (int i = 0; i < 8; i++) + { + writer.Write((uint)(i * i)); + } + + var position = memory.Position; + + writer.Flush(); + Assert.That(memory.Position, Is.EqualTo(position)); + } + + [Test] + public void Writer_Flush_ShouldFlushHalfFilledBuffer() + { + // buffer is filled on every eight Write(), so write only 4 numbers + for (int i = 0; i < 4; i++) + { + writer.Write((uint)(i * i)); + } + + var position = memory.Position; + + writer.Flush(); + Assert.That(memory.Position, Is.Not.EqualTo(position)); + } + + [Test] + public void Writer_Dispose_ShouldDisposeStream_IfLeaveOpenWasInitializedWith_False() + { + var closingWriter = new ArbitraryBitWriter(memory, width, false); + + closingWriter.Dispose(); + + // getting any property should throw if stream was disposed + Assert.Throws(() => _ = memory.Position); + } + + [Test] + public void Writer_Dispose_ShouldNotDisposeStream_IfLeaveOpenWasInitializedWith_True() + { + writer.Dispose(); + + // getting any property should not throw if stream was disposed + Assert.DoesNotThrow(() => _ = memory.Position); + } + [Test] public void Writer_Dispose_ShouldDoNothing_IfCalledWithFalse() { @@ -73,6 +123,16 @@ public void Writer_Write_ShouldThrow_IfDisposed() Assert.Throws(() => writer.Write(456)); } + [Test] + public void Writer_Flush_ShouldThrow_IfDisposed() + { + writer.Write(123); + + writer.Dispose(); + + Assert.Throws(writer.Flush); + } + private class TestWriter(Stream stream, int width) : ArbitraryBitWriter(stream, width) { protected override void Dispose(bool disposing) diff --git a/Zipper/Zipper/ArbitraryBitWriter.cs b/Zipper/Zipper/ArbitraryBitWriter.cs index d287c97..e6d9d5b 100644 --- a/Zipper/Zipper/ArbitraryBitWriter.cs +++ b/Zipper/Zipper/ArbitraryBitWriter.cs @@ -8,6 +8,7 @@ internal class ArbitraryBitWriter : IDisposable private readonly Stream stream; private readonly int width; private readonly byte[] buffer; + private readonly bool leaveOpen; private int bitsWrittenInBuffer; private bool disposed = false; @@ -16,13 +17,15 @@ internal class ArbitraryBitWriter : IDisposable /// /// Stream to write to. /// Width of integers between 4 and 32 bits. - public ArbitraryBitWriter(Stream stream, int width) + /// to leave the open after disposing the object, otherwise. + public ArbitraryBitWriter(Stream stream, int width, bool leaveOpen = false) { ArgumentOutOfRangeException.ThrowIfLessThan(width, 4, nameof(width)); ArgumentOutOfRangeException.ThrowIfGreaterThan(width, 32, nameof(width)); this.stream = stream; this.width = width; + this.leaveOpen = leaveOpen; buffer = new byte[width]; bitsWrittenInBuffer = 0; } @@ -57,12 +60,29 @@ public void Write(uint number) if (bitsWrittenInBuffer >= buffer.Length * 8) { - stream.Write(buffer); - Array.Clear(buffer); - bitsWrittenInBuffer = 0; + Flush(); } } + /// + /// Flushes the internal buffer. + /// + public void Flush() + { + ObjectDisposedException.ThrowIf(disposed, this); + + if (bitsWrittenInBuffer == 0) + { + return; + } + + int bytesWrittenInBuffer = (int)Math.Ceiling(bitsWrittenInBuffer / 8f); + stream.Write(buffer.AsSpan()[..bytesWrittenInBuffer]); + + Array.Clear(buffer); + bitsWrittenInBuffer = 0; + } + /// /// Releases all resources used by the current instance of the class. /// @@ -82,19 +102,15 @@ protected virtual void Dispose(bool disposing) return; } - disposed = true; - - if (!disposing) + if (disposing) { - return; - } + Flush(); + if (!leaveOpen) + { + stream.Dispose(); + } - if (bitsWrittenInBuffer == 0) - { - return; + disposed = true; } - - int bytesWrittenInBuffer = (int)Math.Ceiling(bitsWrittenInBuffer / 8f); - stream.Write(buffer.AsSpan()[..bytesWrittenInBuffer]); } } From e76291122720680ae34fd7806b13b8b35dd5d88e Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Tue, 4 Mar 2025 00:23:33 +0300 Subject: [PATCH 09/57] Removed unnecessary casting to Span in Writer.Flush() (hw3 - lzw) (wip) --- Zipper/Zipper/ArbitraryBitWriter.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Zipper/Zipper/ArbitraryBitWriter.cs b/Zipper/Zipper/ArbitraryBitWriter.cs index e6d9d5b..4789cbb 100644 --- a/Zipper/Zipper/ArbitraryBitWriter.cs +++ b/Zipper/Zipper/ArbitraryBitWriter.cs @@ -77,7 +77,7 @@ public void Flush() } int bytesWrittenInBuffer = (int)Math.Ceiling(bitsWrittenInBuffer / 8f); - stream.Write(buffer.AsSpan()[..bytesWrittenInBuffer]); + stream.Write(buffer, 0, bytesWrittenInBuffer); Array.Clear(buffer); bitsWrittenInBuffer = 0; From 17fe1fabfc1fa212df2dba4e4111089f5fbbc14b Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Mon, 3 Mar 2025 23:37:29 +0300 Subject: [PATCH 10/57] Replaced stackalloc with ArrayPool (hw3 - lzw) (wip) --- Zipper/Zipper/BWT.cs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Zipper/Zipper/BWT.cs b/Zipper/Zipper/BWT.cs index 008e7dc..499cca9 100644 --- a/Zipper/Zipper/BWT.cs +++ b/Zipper/Zipper/BWT.cs @@ -1,5 +1,6 @@ namespace Zipper; +using System.Buffers; using System.Diagnostics; /// @@ -23,7 +24,7 @@ public static int ForwardTransform(Memory input, Span output) return -1; } - Span offsets = stackalloc int[length]; + int[] offsets = ArrayPool.Shared.Rent(length); for (int i = 0; i < length; i++) { offsets[i] = i; @@ -44,7 +45,7 @@ int Compare(int x, int y) return 0; } - offsets.Sort(Compare); + Array.Sort(offsets, Compare); var inputSpan = input.Span; int? identityPosition = null; @@ -58,6 +59,8 @@ int Compare(int x, int y) output[i] = inputSpan[(offsets[i] + length - 1) % length]; } + ArrayPool.Shared.Return(offsets); + Debug.Assert(identityPosition.HasValue, "Identity position not found"); return identityPosition.Value; @@ -80,7 +83,7 @@ public static void InverseTransform(Span input, int identityIndex, Span appearances = stackalloc int[length]; + int[] appearances = ArrayPool.Shared.Rent(length); Span lastAppearances = stackalloc int[256]; Span byteCounter = stackalloc int[256]; @@ -122,5 +125,7 @@ public static void InverseTransform(Span input, int identityIndex, Span.Shared.Return(appearances); } } From 89a2749aa3f5e385c9146d389f0cc02bc35715c6 Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Tue, 4 Mar 2025 00:17:50 +0300 Subject: [PATCH 11/57] Fixed sorting error in BWT.ForwardTransform() (hw3 - lzw) (wip) --- Zipper/Zipper/BWT.cs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Zipper/Zipper/BWT.cs b/Zipper/Zipper/BWT.cs index 499cca9..45b5b78 100644 --- a/Zipper/Zipper/BWT.cs +++ b/Zipper/Zipper/BWT.cs @@ -45,7 +45,9 @@ int Compare(int x, int y) return 0; } - Array.Sort(offsets, Compare); + var offsetsSpan = offsets.AsSpan(0, length); + + offsetsSpan.Sort(Compare); var inputSpan = input.Span; int? identityPosition = null; From ae7ecc6dfa64a1ce6f06f5a78758999daea3f41a Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Tue, 4 Mar 2025 00:19:18 +0300 Subject: [PATCH 12/57] Moved random byte sequence generation for tests to its own class (hw3 - lzw) (wip) --- Zipper/Zipper.Tests/BWTTests.cs | 18 +----------------- Zipper/Zipper.Tests/TestUtil.cs | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 17 deletions(-) create mode 100644 Zipper/Zipper.Tests/TestUtil.cs diff --git a/Zipper/Zipper.Tests/BWTTests.cs b/Zipper/Zipper.Tests/BWTTests.cs index 1274afa..62c0db8 100644 --- a/Zipper/Zipper.Tests/BWTTests.cs +++ b/Zipper/Zipper.Tests/BWTTests.cs @@ -16,7 +16,7 @@ public class BWTTests private static readonly byte[][] TestData = [ .. StringTestData.Select(System.Text.Encoding.UTF8.GetBytes), - ..GetRandomStrings() + .. TestUtil.GetRandomStrings() ]; [Test] @@ -29,20 +29,4 @@ public void InverseTransform_ShouldBe_SameAs_Input([ValueSource(nameof(TestData) BWT.InverseTransform(transformed, index, reconstructed); Assert.That(reconstructed.SequenceEqual(input), Is.True); } - - private static IEnumerable GetRandomStrings() - { - int seed = 74687324; - var random = new Random(seed); - - int steps = 16; - int length = 256; - - for (int i = 0; i < steps; i++) - { - var buffer = new byte[length]; - random.NextBytes(buffer); - yield return buffer; - } - } } diff --git a/Zipper/Zipper.Tests/TestUtil.cs b/Zipper/Zipper.Tests/TestUtil.cs new file mode 100644 index 0000000..00b8ccd --- /dev/null +++ b/Zipper/Zipper.Tests/TestUtil.cs @@ -0,0 +1,20 @@ +namespace Zipper.Tests; + +public static class TestUtil +{ + public static IEnumerable GetRandomStrings() + { + int seed = 74687324; + var random = new Random(seed); + + int steps = 16; + int length = 256; + + for (int i = 0; i < steps; i++) + { + var buffer = new byte[length]; + random.NextBytes(buffer); + yield return buffer; + } + } +} From 87ef171e6b87a202d7fdd7779c5d1ab103d82212 Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Tue, 4 Mar 2025 00:19:58 +0300 Subject: [PATCH 13/57] Added trie (hw3 - lzw) (wip) --- Zipper/Zipper.Tests/TrieTests.cs | 93 ++++++++++++++++++ Zipper/Zipper/Trie.cs | 159 +++++++++++++++++++++++++++++++ 2 files changed, 252 insertions(+) create mode 100644 Zipper/Zipper.Tests/TrieTests.cs create mode 100644 Zipper/Zipper/Trie.cs diff --git a/Zipper/Zipper.Tests/TrieTests.cs b/Zipper/Zipper.Tests/TrieTests.cs new file mode 100644 index 0000000..0f4d818 --- /dev/null +++ b/Zipper/Zipper.Tests/TrieTests.cs @@ -0,0 +1,93 @@ +namespace Zipper.Tests; + +public class TrieTests +{ + private static readonly byte[][] TestStrings = + [ + .. new string[] + { + string.Empty, + "A", + "AB", + "ABC", + "ABCD", + } + .Select(System.Text.Encoding.UTF8.GetBytes) + ]; + + private static readonly byte[][][] TestStringsSet = + [ + TestStrings, + [.. TestUtil.GetRandomStrings()] + ]; + + private Trie trie; + + [SetUp] + public void Setup() + { + trie = new(); + } + + [Test] + public void TrieAdd_And_TrieRemove_ReturnsCorrectly_SingleValue([ValueSource(nameof(TestStrings))] byte[] item) + { + Assert.That(() => trie.Add(item), Is.True); + Assert.That(() => trie.Add(item), Is.False); + Assert.That(trie.Size, Is.EqualTo(1)); + + Assert.That(() => trie.Remove(item), Is.True); + Assert.That(() => trie.Remove(item), Is.False); + Assert.That(trie.Size, Is.EqualTo(0)); + } + + [Test] + public void TrieAdd_And_TrieRemove_ReturnsCorrectly_MultipleValues([ValueSource(nameof(TestStringsSet))] byte[][] strings) + { + for (int i = 0; i < strings.Length; i++) + { + var item = strings[i]; + Assert.That(() => trie.Add(item), Is.True); + Assert.That(() => trie.Add(item), Is.False); + Assert.That(trie.Size, Is.EqualTo(i + 1)); + } + + for (int i = strings.Length - 1; i >= 0; i--) + { + var item = strings[i]; + Assert.That(() => trie.Remove(item), Is.True); + Assert.That(() => trie.Remove(item), Is.False); + Assert.That(trie.Size, Is.EqualTo(i)); + } + } + + [Test] + public void TrieContains_IsCorrect([ValueSource(nameof(TestStrings))] byte[] item) + { + Assert.That(() => trie.Contains(item), Is.False); + trie.Add(item); + Assert.That(() => trie.Contains(item), Is.True); + trie.Remove(item); + Assert.That(() => trie.Contains(item), Is.False); + } + + [Test] + public void TrieHowManyStartsWithPrefix_IsCorrect() + { + int length = TestStrings.Length; + for (int i = 0; i < length; i++) + { + var item = TestStrings[i]; + Assert.That(() => trie.Add(item), Is.True); + Assert.That(() => trie.HowManyStartsWithPrefix(item), Is.EqualTo(1)); + } + + for (int i = 0; i < length; i++) + { + var item = TestStrings[i]; + Assert.That(() => trie.HowManyStartsWithPrefix(item), Is.EqualTo(length - i)); + } + + Assert.That(() => trie.HowManyStartsWithPrefix("random_prefix"u8), Is.Zero); + } +} diff --git a/Zipper/Zipper/Trie.cs b/Zipper/Zipper/Trie.cs new file mode 100644 index 0000000..8166f6a --- /dev/null +++ b/Zipper/Zipper/Trie.cs @@ -0,0 +1,159 @@ +namespace Zipper; + +using System.Diagnostics.CodeAnalysis; + +/// +/// Trie data structure, also known as prefix tree. +/// +public class Trie +{ + private readonly Node rootNode = new(null, 0); + + /// + /// Gets count of all strings stored in this trie. + /// + public int Size => rootNode.TotalDescendants; + + /// + /// Adds to this trie. + /// + /// The byte sequence to add. + /// if wasn't present in trie before adding it, otherwise. + public bool Add(ReadOnlySpan item) + { + var lastNode = rootNode; + foreach (var character in item) + { + lastNode = lastNode.GetOrCreateChild(character); + } + + if (lastNode.EndOfWord) + { + return false; + } + + lastNode.MarkAsEndOfWord(); + + return true; + } + + /// + /// Checks if this trie contains . + /// + /// The byte sequence to seek. + /// if is present in trie, otherwise. + public bool Contains(ReadOnlySpan item) + => GetNode(item, out var node) && node.EndOfWord; + + /// + /// Removes from this trie. + /// + /// The byte sequence to remove. + /// if was present in trie before removing it, otherwise. + public bool Remove(ReadOnlySpan item) + { + if (!GetNode(item, out var node) || node.EndOfWord == false) + { + return false; + } + + node.RemoveSelf(); + + return true; + } + + /// + /// Gets count of byte sequences stored in this trie that start with . + /// + /// Prefix to check against. + /// Count of byte sequences stored in this trie that start with . + public int HowManyStartsWithPrefix(ReadOnlySpan prefix) + => GetNode(prefix, out var node) ? node.TotalDescendants : 0; + + private bool GetNode(ReadOnlySpan prefix, [MaybeNullWhen(false)] out Node node) + { + node = rootNode; + foreach (var character in prefix) + { + if (!node.TryGetChild(character, out node)) + { + return false; + } + } + + return true; + } + + private class Node + { + private readonly Dictionary children = []; + private readonly Node? parent; + private readonly byte value; + + internal Node(Node? parent, byte value) + { + this.parent = parent; + this.value = value; + } + + public bool EndOfWord { get; private set; } + + // node itself is counted as descendant if marked as end of word + public int TotalDescendants { get; set; } + + public Node GetOrCreateChild(byte value) + { + if (!children.TryGetValue(value, out Node? node)) + { + node = new(this, value); + children[value] = node; + + return node; + } + + return node; + } + + public void RemoveSelf() + { + EndOfWord = false; + TotalDescendants--; + + if (parent == null) + { + return; + } + + var lastNode = parent; + var lastValue = value; + while (lastNode.parent != null && lastNode.TotalDescendants == 1) + { + lastValue = lastNode.value; + lastNode = lastNode.parent; + } + + lastNode.children.Remove(lastValue); + + while (lastNode != null) + { + lastNode.TotalDescendants--; + lastNode = lastNode.parent; + } + } + + public bool TryGetChild(byte value, [MaybeNullWhen(false)] out Node node) => children.TryGetValue(value, out node); + + public void MarkAsEndOfWord() + { + EndOfWord = true; + TotalDescendants++; + + var lastNode = parent; + while (lastNode != null) + { + lastNode.TotalDescendants++; + lastNode = lastNode.parent; + } + } + } +} From ae3e24f2e6c7a2f2b5b5d944c85d2757fe8c7b95 Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Tue, 4 Mar 2025 01:19:30 +0300 Subject: [PATCH 14/57] Rephrased api docs for Reader.ReadNext() for consistency (hw3 - lzw) (wip) --- Zipper/Zipper/ArbitraryBitReader.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Zipper/Zipper/ArbitraryBitReader.cs b/Zipper/Zipper/ArbitraryBitReader.cs index 9e35d95..f9e4c51 100644 --- a/Zipper/Zipper/ArbitraryBitReader.cs +++ b/Zipper/Zipper/ArbitraryBitReader.cs @@ -28,7 +28,7 @@ public ArbitraryBitReader(Stream stream, int width) /// /// Reads number from underlying stream and stores it in the . /// - /// When this method returns, contains the value that was read, if read successfully; otherwise, zero. + /// When this method returns, contains the value that was read, if read successfully, zero otherwise. /// if was successfuly read, otherwise. public bool ReadNext(out uint number) { From aa38e5145193ad84519d17572f66307079654be5 Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Tue, 4 Mar 2025 01:19:56 +0300 Subject: [PATCH 15/57] Fixed api docs for Reader.ReadNext() (hw3 - lzw) (wip) --- Zipper/Zipper/ArbitraryBitReader.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Zipper/Zipper/ArbitraryBitReader.cs b/Zipper/Zipper/ArbitraryBitReader.cs index f9e4c51..5286ba3 100644 --- a/Zipper/Zipper/ArbitraryBitReader.cs +++ b/Zipper/Zipper/ArbitraryBitReader.cs @@ -29,7 +29,7 @@ public ArbitraryBitReader(Stream stream, int width) /// Reads number from underlying stream and stores it in the . /// /// When this method returns, contains the value that was read, if read successfully, zero otherwise. - /// if was successfuly read, otherwise. + /// if was successfuly read, otherwise. public bool ReadNext(out uint number) { number = 0; From 80bb5179a5fb54cc186d23b4a6d2a37038184fb9 Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Tue, 4 Mar 2025 01:20:34 +0300 Subject: [PATCH 16/57] Removed unnecessary features in trie (hw3 - lzw) (wip) --- Zipper/Zipper.Tests/TrieTests.cs | 62 ------------------- Zipper/Zipper/Trie.cs | 100 ++----------------------------- 2 files changed, 6 insertions(+), 156 deletions(-) diff --git a/Zipper/Zipper.Tests/TrieTests.cs b/Zipper/Zipper.Tests/TrieTests.cs index 0f4d818..0a2a269 100644 --- a/Zipper/Zipper.Tests/TrieTests.cs +++ b/Zipper/Zipper.Tests/TrieTests.cs @@ -28,66 +28,4 @@ public void Setup() { trie = new(); } - - [Test] - public void TrieAdd_And_TrieRemove_ReturnsCorrectly_SingleValue([ValueSource(nameof(TestStrings))] byte[] item) - { - Assert.That(() => trie.Add(item), Is.True); - Assert.That(() => trie.Add(item), Is.False); - Assert.That(trie.Size, Is.EqualTo(1)); - - Assert.That(() => trie.Remove(item), Is.True); - Assert.That(() => trie.Remove(item), Is.False); - Assert.That(trie.Size, Is.EqualTo(0)); - } - - [Test] - public void TrieAdd_And_TrieRemove_ReturnsCorrectly_MultipleValues([ValueSource(nameof(TestStringsSet))] byte[][] strings) - { - for (int i = 0; i < strings.Length; i++) - { - var item = strings[i]; - Assert.That(() => trie.Add(item), Is.True); - Assert.That(() => trie.Add(item), Is.False); - Assert.That(trie.Size, Is.EqualTo(i + 1)); - } - - for (int i = strings.Length - 1; i >= 0; i--) - { - var item = strings[i]; - Assert.That(() => trie.Remove(item), Is.True); - Assert.That(() => trie.Remove(item), Is.False); - Assert.That(trie.Size, Is.EqualTo(i)); - } - } - - [Test] - public void TrieContains_IsCorrect([ValueSource(nameof(TestStrings))] byte[] item) - { - Assert.That(() => trie.Contains(item), Is.False); - trie.Add(item); - Assert.That(() => trie.Contains(item), Is.True); - trie.Remove(item); - Assert.That(() => trie.Contains(item), Is.False); - } - - [Test] - public void TrieHowManyStartsWithPrefix_IsCorrect() - { - int length = TestStrings.Length; - for (int i = 0; i < length; i++) - { - var item = TestStrings[i]; - Assert.That(() => trie.Add(item), Is.True); - Assert.That(() => trie.HowManyStartsWithPrefix(item), Is.EqualTo(1)); - } - - for (int i = 0; i < length; i++) - { - var item = TestStrings[i]; - Assert.That(() => trie.HowManyStartsWithPrefix(item), Is.EqualTo(length - i)); - } - - Assert.That(() => trie.HowManyStartsWithPrefix("random_prefix"u8), Is.Zero); - } } diff --git a/Zipper/Zipper/Trie.cs b/Zipper/Zipper/Trie.cs index 8166f6a..974f806 100644 --- a/Zipper/Zipper/Trie.cs +++ b/Zipper/Zipper/Trie.cs @@ -7,12 +7,7 @@ namespace Zipper; /// public class Trie { - private readonly Node rootNode = new(null, 0); - - /// - /// Gets count of all strings stored in this trie. - /// - public int Size => rootNode.TotalDescendants; + private readonly Node rootNode = new(); /// /// Adds to this trie. @@ -32,44 +27,11 @@ public bool Add(ReadOnlySpan item) return false; } - lastNode.MarkAsEndOfWord(); - - return true; - } - - /// - /// Checks if this trie contains . - /// - /// The byte sequence to seek. - /// if is present in trie, otherwise. - public bool Contains(ReadOnlySpan item) - => GetNode(item, out var node) && node.EndOfWord; - - /// - /// Removes from this trie. - /// - /// The byte sequence to remove. - /// if was present in trie before removing it, otherwise. - public bool Remove(ReadOnlySpan item) - { - if (!GetNode(item, out var node) || node.EndOfWord == false) - { - return false; - } - - node.RemoveSelf(); + lastNode.EndOfWord = true; return true; } - /// - /// Gets count of byte sequences stored in this trie that start with . - /// - /// Prefix to check against. - /// Count of byte sequences stored in this trie that start with . - public int HowManyStartsWithPrefix(ReadOnlySpan prefix) - => GetNode(prefix, out var node) ? node.TotalDescendants : 0; - private bool GetNode(ReadOnlySpan prefix, [MaybeNullWhen(false)] out Node node) { node = rootNode; @@ -87,25 +49,14 @@ private bool GetNode(ReadOnlySpan prefix, [MaybeNullWhen(false)] out Node private class Node { private readonly Dictionary children = []; - private readonly Node? parent; - private readonly byte value; - internal Node(Node? parent, byte value) - { - this.parent = parent; - this.value = value; - } - - public bool EndOfWord { get; private set; } - - // node itself is counted as descendant if marked as end of word - public int TotalDescendants { get; set; } + public bool EndOfWord { get; set; } public Node GetOrCreateChild(byte value) { if (!children.TryGetValue(value, out Node? node)) { - node = new(this, value); + node = new(); children[value] = node; return node; @@ -114,46 +65,7 @@ public Node GetOrCreateChild(byte value) return node; } - public void RemoveSelf() - { - EndOfWord = false; - TotalDescendants--; - - if (parent == null) - { - return; - } - - var lastNode = parent; - var lastValue = value; - while (lastNode.parent != null && lastNode.TotalDescendants == 1) - { - lastValue = lastNode.value; - lastNode = lastNode.parent; - } - - lastNode.children.Remove(lastValue); - - while (lastNode != null) - { - lastNode.TotalDescendants--; - lastNode = lastNode.parent; - } - } - - public bool TryGetChild(byte value, [MaybeNullWhen(false)] out Node node) => children.TryGetValue(value, out node); - - public void MarkAsEndOfWord() - { - EndOfWord = true; - TotalDescendants++; - - var lastNode = parent; - while (lastNode != null) - { - lastNode.TotalDescendants++; - lastNode = lastNode.parent; - } - } + public bool TryGetChild(byte value, [MaybeNullWhen(false)] out Node node) + => children.TryGetValue(value, out node); } } From 5ef617b950b714eac5925827b9fba110ef09fe82 Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Tue, 4 Mar 2025 01:36:18 +0300 Subject: [PATCH 17/57] Converted trie to dictionary (hw3 - lzw) (wip) --- Zipper/Zipper.Tests/TrieTests.cs | 32 +++++++++++++++++++++++++++++++ Zipper/Zipper/Trie.cs | 33 ++++++++++++++++++++++++++------ 2 files changed, 59 insertions(+), 6 deletions(-) diff --git a/Zipper/Zipper.Tests/TrieTests.cs b/Zipper/Zipper.Tests/TrieTests.cs index 0a2a269..8344639 100644 --- a/Zipper/Zipper.Tests/TrieTests.cs +++ b/Zipper/Zipper.Tests/TrieTests.cs @@ -28,4 +28,36 @@ public void Setup() { trie = new(); } + + [Test] + public void TrieAdd_And_TrieTryGetValue_ReturnsCorrectly_SingleValue([ValueSource(nameof(TestStrings))] byte[] bytes) + { + TestAddValue(trie, bytes); + } + + [Test] + public void TrieAdd_And_TrieTryGetValue_ReturnsCorrectly_MultipleValues([ValueSource(nameof(TestStringsSet))] byte[][] strings) + { + foreach (var bytes in strings) + { + TestAddValue(trie, bytes); + } + } + + private static void TestAddValue(Trie trie, byte[] bytes) + { + Assert.Multiple(() => + { + Assert.That(trie.TryGetValue(bytes, out _), Is.False); + + Assert.That(Add(trie, bytes), Is.True); + Assert.That(Add(trie, bytes), Is.False); + + Assert.That(trie.TryGetValue(bytes, out int value), Is.True); + Assert.That(value, Is.EqualTo(bytes.Length)); + }); + } + + private static bool Add(Trie trie, byte[] bytes) + => trie.Add(bytes, bytes.Length); } diff --git a/Zipper/Zipper/Trie.cs b/Zipper/Zipper/Trie.cs index 974f806..def50b4 100644 --- a/Zipper/Zipper/Trie.cs +++ b/Zipper/Zipper/Trie.cs @@ -3,21 +3,22 @@ namespace Zipper; using System.Diagnostics.CodeAnalysis; /// -/// Trie data structure, also known as prefix tree. +/// Trie data structure, also known as prefix tree, implemented as dictionary. /// public class Trie { private readonly Node rootNode = new(); /// - /// Adds to this trie. + /// Adds associated with to this trie. /// - /// The byte sequence to add. - /// if wasn't present in trie before adding it, otherwise. - public bool Add(ReadOnlySpan item) + /// The byte sequence to as key. + /// The number to add as value. + /// if wasn't present in trie before adding it, otherwise. + public bool Add(ReadOnlySpan key, int value) { var lastNode = rootNode; - foreach (var character in item) + foreach (var character in key) { lastNode = lastNode.GetOrCreateChild(character); } @@ -28,10 +29,28 @@ public bool Add(ReadOnlySpan item) } lastNode.EndOfWord = true; + lastNode.Value = value; return true; } + /// + /// Tries to get value associated with . + /// + /// The key of the value to get. + /// + /// When this method returns, contains the value associated with , if is found, zero otherwise. + /// + /// if is found, otherwise. + public bool TryGetValue(ReadOnlySpan key, out int value) + { + var nodeExistsAndHasValue = GetNode(key, out var node) && node.EndOfWord; + + value = node?.Value ?? 0; + + return nodeExistsAndHasValue; + } + private bool GetNode(ReadOnlySpan prefix, [MaybeNullWhen(false)] out Node node) { node = rootNode; @@ -52,6 +71,8 @@ private class Node public bool EndOfWord { get; set; } + public int Value { get; set; } + public Node GetOrCreateChild(byte value) { if (!children.TryGetValue(value, out Node? node)) From 33afe78e68328016b30d9f4661eecc8cf5a325c2 Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Tue, 4 Mar 2025 22:12:30 +0300 Subject: [PATCH 18/57] Switched trie to store uint as value intstead of int (hw3 - lzw) (wip) --- Zipper/Zipper.Tests/TrieTests.cs | 4 ++-- Zipper/Zipper/Trie.cs | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Zipper/Zipper.Tests/TrieTests.cs b/Zipper/Zipper.Tests/TrieTests.cs index 8344639..190e047 100644 --- a/Zipper/Zipper.Tests/TrieTests.cs +++ b/Zipper/Zipper.Tests/TrieTests.cs @@ -53,11 +53,11 @@ private static void TestAddValue(Trie trie, byte[] bytes) Assert.That(Add(trie, bytes), Is.True); Assert.That(Add(trie, bytes), Is.False); - Assert.That(trie.TryGetValue(bytes, out int value), Is.True); + Assert.That(trie.TryGetValue(bytes, out uint value), Is.True); Assert.That(value, Is.EqualTo(bytes.Length)); }); } private static bool Add(Trie trie, byte[] bytes) - => trie.Add(bytes, bytes.Length); + => trie.Add(bytes, (uint)bytes.Length); } diff --git a/Zipper/Zipper/Trie.cs b/Zipper/Zipper/Trie.cs index def50b4..79cd6a3 100644 --- a/Zipper/Zipper/Trie.cs +++ b/Zipper/Zipper/Trie.cs @@ -15,7 +15,7 @@ public class Trie /// The byte sequence to as key. /// The number to add as value. /// if wasn't present in trie before adding it, otherwise. - public bool Add(ReadOnlySpan key, int value) + public bool Add(ReadOnlySpan key, uint value) { var lastNode = rootNode; foreach (var character in key) @@ -42,7 +42,7 @@ public bool Add(ReadOnlySpan key, int value) /// When this method returns, contains the value associated with , if is found, zero otherwise. /// /// if is found, otherwise. - public bool TryGetValue(ReadOnlySpan key, out int value) + public bool TryGetValue(ReadOnlySpan key, out uint value) { var nodeExistsAndHasValue = GetNode(key, out var node) && node.EndOfWord; @@ -71,7 +71,7 @@ private class Node public bool EndOfWord { get; set; } - public int Value { get; set; } + public uint Value { get; set; } public Node GetOrCreateChild(byte value) { From d040fa1e1393ac171798b2f1648fbcdaa0d2103f Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Wed, 5 Mar 2025 02:21:58 +0300 Subject: [PATCH 19/57] Introduced constants for min and max width of numbers in writer and reader (hw3 - lzw) (wip) --- Zipper/Zipper/ArbitraryBitReader.cs | 12 +++++++++--- Zipper/Zipper/ArbitraryBitWriter.cs | 16 +++++++++++++--- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/Zipper/Zipper/ArbitraryBitReader.cs b/Zipper/Zipper/ArbitraryBitReader.cs index 5286ba3..973ad03 100644 --- a/Zipper/Zipper/ArbitraryBitReader.cs +++ b/Zipper/Zipper/ArbitraryBitReader.cs @@ -5,6 +5,12 @@ namespace Zipper; /// internal class ArbitraryBitReader { + /// + public const int MinWidth = ArbitraryBitWriter.MinWidth; + + /// + public const int MaxWidth = ArbitraryBitWriter.MaxWidth; + private readonly Stream stream; private readonly int width; private byte buffer; @@ -14,11 +20,11 @@ internal class ArbitraryBitReader /// Initializes a new instance of the class. /// /// Stream to write to. - /// Width of integers between 4 and 32 bits. + /// Width of integers between and bits. public ArbitraryBitReader(Stream stream, int width) { - ArgumentOutOfRangeException.ThrowIfLessThan(width, 4, nameof(width)); - ArgumentOutOfRangeException.ThrowIfGreaterThan(width, 32, nameof(width)); + ArgumentOutOfRangeException.ThrowIfLessThan(width, MinWidth, nameof(width)); + ArgumentOutOfRangeException.ThrowIfGreaterThan(width, MaxWidth, nameof(width)); this.stream = stream; this.width = width; diff --git a/Zipper/Zipper/ArbitraryBitWriter.cs b/Zipper/Zipper/ArbitraryBitWriter.cs index 4789cbb..5e09717 100644 --- a/Zipper/Zipper/ArbitraryBitWriter.cs +++ b/Zipper/Zipper/ArbitraryBitWriter.cs @@ -5,6 +5,16 @@ namespace Zipper; /// internal class ArbitraryBitWriter : IDisposable { + /// + /// Smallest allowed width of numbers. + /// + public const int MinWidth = 4; + + /// + /// Largest allowed width of numbers. + /// + public const int MaxWidth = 32; + private readonly Stream stream; private readonly int width; private readonly byte[] buffer; @@ -16,12 +26,12 @@ internal class ArbitraryBitWriter : IDisposable /// Initializes a new instance of the class. /// /// Stream to write to. - /// Width of integers between 4 and 32 bits. + /// Width of integers between and bits. /// to leave the open after disposing the object, otherwise. public ArbitraryBitWriter(Stream stream, int width, bool leaveOpen = false) { - ArgumentOutOfRangeException.ThrowIfLessThan(width, 4, nameof(width)); - ArgumentOutOfRangeException.ThrowIfGreaterThan(width, 32, nameof(width)); + ArgumentOutOfRangeException.ThrowIfLessThan(width, MinWidth, nameof(width)); + ArgumentOutOfRangeException.ThrowIfGreaterThan(width, MaxWidth, nameof(width)); this.stream = stream; this.width = width; From ccf6ce246f523dbf382e8a89e29b96e875c304bb Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Wed, 5 Mar 2025 02:40:02 +0300 Subject: [PATCH 20/57] Added checks for stream read-/writeabiltiy to writer and reader (hw3 - lzw) (wip) --- Zipper/Zipper/ArbitraryBitReader.cs | 5 +++++ Zipper/Zipper/ArbitraryBitWriter.cs | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/Zipper/Zipper/ArbitraryBitReader.cs b/Zipper/Zipper/ArbitraryBitReader.cs index 973ad03..2308421 100644 --- a/Zipper/Zipper/ArbitraryBitReader.cs +++ b/Zipper/Zipper/ArbitraryBitReader.cs @@ -26,6 +26,11 @@ public ArbitraryBitReader(Stream stream, int width) ArgumentOutOfRangeException.ThrowIfLessThan(width, MinWidth, nameof(width)); ArgumentOutOfRangeException.ThrowIfGreaterThan(width, MaxWidth, nameof(width)); + if (!stream.CanRead) + { + throw new ArgumentException("Stream does not support reading", nameof(stream)); + } + this.stream = stream; this.width = width; bitsReadFromBuffer = null; diff --git a/Zipper/Zipper/ArbitraryBitWriter.cs b/Zipper/Zipper/ArbitraryBitWriter.cs index 5e09717..4c4edfe 100644 --- a/Zipper/Zipper/ArbitraryBitWriter.cs +++ b/Zipper/Zipper/ArbitraryBitWriter.cs @@ -33,6 +33,11 @@ public ArbitraryBitWriter(Stream stream, int width, bool leaveOpen = false) ArgumentOutOfRangeException.ThrowIfLessThan(width, MinWidth, nameof(width)); ArgumentOutOfRangeException.ThrowIfGreaterThan(width, MaxWidth, nameof(width)); + if (!stream.CanWrite) + { + throw new ArgumentException("Stream does not support writing", nameof(stream)); + } + this.stream = stream; this.width = width; this.leaveOpen = leaveOpen; From 3b73ae867f28425e0748fadd6475276e0f0b1b5a Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Wed, 5 Mar 2025 02:50:42 +0300 Subject: [PATCH 21/57] Added more tests for writer and reader (hw3 - lzw) (wip) --- .../ArbitraryBitReaderWriterTests.cs | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/Zipper/Zipper.Tests/ArbitraryBitReaderWriterTests.cs b/Zipper/Zipper.Tests/ArbitraryBitReaderWriterTests.cs index e45222f..e67f8a1 100644 --- a/Zipper/Zipper.Tests/ArbitraryBitReaderWriterTests.cs +++ b/Zipper/Zipper.Tests/ArbitraryBitReaderWriterTests.cs @@ -4,6 +4,24 @@ public class ArbitraryBitReaderWriterTests { private static readonly TestData[] TestDataSource = GenerateData(); + [Test] + public void ReaderAndWriter_ShouldThrowIf_InitializedWith_Width_InDisallowedRange() + { + Assert.Throws(() => new ArbitraryBitReader(Stream.Null, ArbitraryBitReader.MinWidth - 1)); + Assert.Throws(() => new ArbitraryBitReader(Stream.Null, ArbitraryBitReader.MaxWidth + 1)); + + Assert.Throws(() => new ArbitraryBitWriter(Stream.Null, ArbitraryBitWriter.MinWidth - 1)); + Assert.Throws(() => new ArbitraryBitWriter(Stream.Null, ArbitraryBitWriter.MaxWidth + 1)); + } + + [Test] + public void Reader_ShouldThrowIf_StreamCanNotRead() + => Assert.Throws(() => new ArbitraryBitReader(new TestStream(), ArbitraryBitReader.MinWidth)); + + [Test] + public void Writer_ShouldThrowIf_StreamCanNotWrite() + => Assert.Throws(() => new ArbitraryBitWriter(new TestStream(), ArbitraryBitWriter.MinWidth)); + [Test] public void Reader_ShouldReadValues_WrittenBy_Writer_Correctly([ValueSource(nameof(TestDataSource))] TestData data) { @@ -93,4 +111,12 @@ private static TestData[] GenerateData() } public readonly record struct TestData(int Width, uint[] Numbers); + + // use MemoryStream, because implementing all Stream's abstract membrers leads to bad code coverage + private class TestStream : MemoryStream + { + public override bool CanRead => false; + + public override bool CanWrite => false; + } } From fc3f02d2a24423c130314d07f791166e2f041820 Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Wed, 5 Mar 2025 03:05:32 +0300 Subject: [PATCH 22/57] Use ArrayPool in writer (hw3 - lzw) (wip) --- Zipper/Zipper/ArbitraryBitWriter.cs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/Zipper/Zipper/ArbitraryBitWriter.cs b/Zipper/Zipper/ArbitraryBitWriter.cs index 4c4edfe..eba0fcb 100644 --- a/Zipper/Zipper/ArbitraryBitWriter.cs +++ b/Zipper/Zipper/ArbitraryBitWriter.cs @@ -1,5 +1,7 @@ namespace Zipper; +using System.Buffers; + /// /// Writes unsigned integers of arbitrary width. /// @@ -15,6 +17,8 @@ internal class ArbitraryBitWriter : IDisposable /// public const int MaxWidth = 32; + private static readonly ArrayPool BufferPool = ArrayPool.Create(); + private readonly Stream stream; private readonly int width; private readonly byte[] buffer; @@ -41,7 +45,7 @@ public ArbitraryBitWriter(Stream stream, int width, bool leaveOpen = false) this.stream = stream; this.width = width; this.leaveOpen = leaveOpen; - buffer = new byte[width]; + buffer = BufferPool.Rent(width); bitsWrittenInBuffer = 0; } @@ -73,7 +77,7 @@ public void Write(uint number) bitsWrittenInBuffer += bitsToBeWritten; } - if (bitsWrittenInBuffer >= buffer.Length * 8) + if (bitsWrittenInBuffer >= width * 8) { Flush(); } @@ -120,6 +124,8 @@ protected virtual void Dispose(bool disposing) if (disposing) { Flush(); + BufferPool.Return(buffer); + if (!leaveOpen) { stream.Dispose(); From 0c4c56f8ba8496c4f7f564f9b5b2d1351be29229 Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Wed, 5 Mar 2025 03:07:32 +0300 Subject: [PATCH 23/57] Use local ArrayPool instead of shared in BWT (hw3 - lzw) (wip) --- Zipper/Zipper/BWT.cs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/Zipper/Zipper/BWT.cs b/Zipper/Zipper/BWT.cs index 45b5b78..eabe9ce 100644 --- a/Zipper/Zipper/BWT.cs +++ b/Zipper/Zipper/BWT.cs @@ -8,6 +8,8 @@ namespace Zipper; /// internal static class BWT { + private static readonly ArrayPool Pool = ArrayPool.Create(); + /// /// Transforms given byte sequence using Burrows-Wheeler algorithm. /// @@ -24,7 +26,7 @@ public static int ForwardTransform(Memory input, Span output) return -1; } - int[] offsets = ArrayPool.Shared.Rent(length); + int[] offsets = Pool.Rent(length); for (int i = 0; i < length; i++) { offsets[i] = i; @@ -61,7 +63,7 @@ int Compare(int x, int y) output[i] = inputSpan[(offsets[i] + length - 1) % length]; } - ArrayPool.Shared.Return(offsets); + Pool.Return(offsets); Debug.Assert(identityPosition.HasValue, "Identity position not found"); @@ -85,7 +87,7 @@ public static void InverseTransform(Span input, int identityIndex, Span.Shared.Rent(length); + int[] appearances = Pool.Rent(length); Span lastAppearances = stackalloc int[256]; Span byteCounter = stackalloc int[256]; @@ -128,6 +130,6 @@ public static void InverseTransform(Span input, int identityIndex, Span.Shared.Return(appearances); + Pool.Return(appearances); } } From 1ce4bec36bf3d7f041ec557165e770f6e76f603c Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Thu, 6 Mar 2025 01:13:29 +0300 Subject: [PATCH 24/57] Switched trie to use generic type as value (hw3 - lzw) (wip) --- Zipper/Zipper.Tests/TrieTests.cs | 10 +++++----- Zipper/Zipper/Trie.cs | 23 ++++++++++++++--------- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/Zipper/Zipper.Tests/TrieTests.cs b/Zipper/Zipper.Tests/TrieTests.cs index 190e047..ee12c05 100644 --- a/Zipper/Zipper.Tests/TrieTests.cs +++ b/Zipper/Zipper.Tests/TrieTests.cs @@ -21,7 +21,7 @@ public class TrieTests [.. TestUtil.GetRandomStrings()] ]; - private Trie trie; + private Trie trie; [SetUp] public void Setup() @@ -44,7 +44,7 @@ public void TrieAdd_And_TrieTryGetValue_ReturnsCorrectly_MultipleValues([ValueSo } } - private static void TestAddValue(Trie trie, byte[] bytes) + private static void TestAddValue(Trie trie, byte[] bytes) { Assert.Multiple(() => { @@ -53,11 +53,11 @@ private static void TestAddValue(Trie trie, byte[] bytes) Assert.That(Add(trie, bytes), Is.True); Assert.That(Add(trie, bytes), Is.False); - Assert.That(trie.TryGetValue(bytes, out uint value), Is.True); + Assert.That(trie.TryGetValue(bytes, out int value), Is.True); Assert.That(value, Is.EqualTo(bytes.Length)); }); } - private static bool Add(Trie trie, byte[] bytes) - => trie.Add(bytes, (uint)bytes.Length); + private static bool Add(Trie trie, byte[] bytes) + => trie.Add(bytes, bytes.Length); } diff --git a/Zipper/Zipper/Trie.cs b/Zipper/Zipper/Trie.cs index 79cd6a3..4202ea8 100644 --- a/Zipper/Zipper/Trie.cs +++ b/Zipper/Zipper/Trie.cs @@ -5,7 +5,9 @@ namespace Zipper; /// /// Trie data structure, also known as prefix tree, implemented as dictionary. /// -public class Trie +/// Type of values. +public class Trie + where T : struct { private readonly Node rootNode = new(); @@ -15,7 +17,7 @@ public class Trie /// The byte sequence to as key. /// The number to add as value. /// if wasn't present in trie before adding it, otherwise. - public bool Add(ReadOnlySpan key, uint value) + public bool Add(ReadOnlySpan key, T value) { var lastNode = rootNode; foreach (var character in key) @@ -39,16 +41,19 @@ public bool Add(ReadOnlySpan key, uint value) /// /// The key of the value to get. /// - /// When this method returns, contains the value associated with , if is found, zero otherwise. + /// When this method returns, contains the value associated with , if is found, otherwise. /// /// if is found, otherwise. - public bool TryGetValue(ReadOnlySpan key, out uint value) + public bool TryGetValue(ReadOnlySpan key, out T value) { - var nodeExistsAndHasValue = GetNode(key, out var node) && node.EndOfWord; - - value = node?.Value ?? 0; + value = default; + if (GetNode(key, out var node) && node.EndOfWord) + { + value = node.Value; + return true; + } - return nodeExistsAndHasValue; + return false; } private bool GetNode(ReadOnlySpan prefix, [MaybeNullWhen(false)] out Node node) @@ -71,7 +76,7 @@ private class Node public bool EndOfWord { get; set; } - public uint Value { get; set; } + public T Value { get; set; } public Node GetOrCreateChild(byte value) { From 8cb223b36beeca490cdcff3772ff73fd2bc9e7d9 Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Thu, 6 Mar 2025 01:23:00 +0300 Subject: [PATCH 25/57] Switched Trie to be internal instead of public (hw3 - lzw) (wip) --- Zipper/Zipper/Trie.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Zipper/Zipper/Trie.cs b/Zipper/Zipper/Trie.cs index 4202ea8..e46ba1c 100644 --- a/Zipper/Zipper/Trie.cs +++ b/Zipper/Zipper/Trie.cs @@ -6,7 +6,7 @@ namespace Zipper; /// Trie data structure, also known as prefix tree, implemented as dictionary. /// /// Type of values. -public class Trie +internal class Trie where T : struct { private readonly Node rootNode = new(); From 20a8e95ae3bebbd41979b911bb4a8a42d30e64e7 Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Sun, 9 Mar 2025 15:11:30 +0300 Subject: [PATCH 26/57] Removed Dispose(bool) from writer (hw3 - lzw) (wip) --- .../Zipper.Tests/ArbitraryBitWriterTests.cs | 24 ----------------- Zipper/Zipper/ArbitraryBitWriter.cs | 26 +++++-------------- 2 files changed, 7 insertions(+), 43 deletions(-) diff --git a/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs b/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs index f5eb8a2..05daf30 100644 --- a/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs +++ b/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs @@ -97,22 +97,6 @@ public void Writer_Dispose_ShouldNotDisposeStream_IfLeaveOpenWasInitializedWith_ Assert.DoesNotThrow(() => _ = memory.Position); } - [Test] - public void Writer_Dispose_ShouldDoNothing_IfCalledWithFalse() - { - var writer = new TestWriter(memory, width); - - for (int i = 0; i < 5; i++) - { - writer.Write((uint)(i * i)); - } - - var position = memory.Position; - - writer.Dispose(); - Assert.That(memory.Position, Is.EqualTo(position)); - } - [Test] public void Writer_Write_ShouldThrow_IfDisposed() { @@ -132,12 +116,4 @@ public void Writer_Flush_ShouldThrow_IfDisposed() Assert.Throws(writer.Flush); } - - private class TestWriter(Stream stream, int width) : ArbitraryBitWriter(stream, width) - { - protected override void Dispose(bool disposing) - { - base.Dispose(false); - } - } } diff --git a/Zipper/Zipper/ArbitraryBitWriter.cs b/Zipper/Zipper/ArbitraryBitWriter.cs index eba0fcb..72ad46f 100644 --- a/Zipper/Zipper/ArbitraryBitWriter.cs +++ b/Zipper/Zipper/ArbitraryBitWriter.cs @@ -106,32 +106,20 @@ public void Flush() /// Releases all resources used by the current instance of the class. /// public void Dispose() - { - Dispose(true); - } - - /// - /// Releases the unmanaged resources used by the and optionally releases the managed resources. - /// - /// to release both managed and unmanaged resources; to release only unmanaged resources. - protected virtual void Dispose(bool disposing) { if (disposed) { return; } - if (disposing) - { - Flush(); - BufferPool.Return(buffer); - - if (!leaveOpen) - { - stream.Dispose(); - } + Flush(); + BufferPool.Return(buffer); - disposed = true; + if (!leaveOpen) + { + stream.Dispose(); } + + disposed = true; } } From ce7a127dc08082f1f9fe618ed4bc859335dad5d2 Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Sun, 9 Mar 2025 15:49:39 +0300 Subject: [PATCH 27/57] Changed trie to use char-by-char mode (hw3 - lzw) (wip) --- Zipper/Zipper.Tests/TrieTests.cs | 98 +++++++++++++++++++++++---- Zipper/Zipper/Trie.cs | 113 ++++++++++++++----------------- 2 files changed, 135 insertions(+), 76 deletions(-) diff --git a/Zipper/Zipper.Tests/TrieTests.cs b/Zipper/Zipper.Tests/TrieTests.cs index ee12c05..cf7b353 100644 --- a/Zipper/Zipper.Tests/TrieTests.cs +++ b/Zipper/Zipper.Tests/TrieTests.cs @@ -21,6 +21,9 @@ public class TrieTests [.. TestUtil.GetRandomStrings()] ]; + private readonly byte testKey = 157; + private readonly int testValue = 252354; + private Trie trie; [SetUp] @@ -30,34 +33,99 @@ public void Setup() } [Test] - public void TrieAdd_And_TrieTryGetValue_ReturnsCorrectly_SingleValue([ValueSource(nameof(TestStrings))] byte[] bytes) + public void AddChild_ShouldReturnTrue_IfChildDidNotExists() { - TestAddValue(trie, bytes); + Assert.Multiple(() => + { + Assert.That(trie.HasChild(testKey), Is.False); + Assert.That(trie.AddChild(testKey, testValue), Is.True); + }); } [Test] - public void TrieAdd_And_TrieTryGetValue_ReturnsCorrectly_MultipleValues([ValueSource(nameof(TestStringsSet))] byte[][] strings) + public void HasChild_ShouldReturnTrue_IfAddedChild() { - foreach (var bytes in strings) - { - TestAddValue(trie, bytes); - } + trie.AddChild(testKey, testValue); + Assert.That(trie.HasChild(testKey), Is.True); } - private static void TestAddValue(Trie trie, byte[] bytes) + [Test] + public void HasChild_ShouldReturnFalse_IfChildDoesNotExist() + { + Assert.That(trie.HasChild(testKey), Is.False); + } + + [Test] + public void AddChild_ShouldReturnFalse_IfChildExisted() + { + trie.AddChild(testKey, testValue); + Assert.That(trie.AddChild(testKey, testValue), Is.False); + } + + [Test] + public void Add_ShouldNotMove() { Assert.Multiple(() => { - Assert.That(trie.TryGetValue(bytes, out _), Is.False); + Assert.That(trie.AtRoot, Is.True); + Assert.That(trie.AddChild(testKey, testValue), Is.True); + Assert.That(trie.AtRoot, Is.True); + }); + } + + [Test] + public void MoveForward_ShouldReturnTrue_IfMovingToAddedChild() + { + trie.AddChild(testKey, testValue); + Assert.That(trie.MoveForward(testKey), Is.True); + } - Assert.That(Add(trie, bytes), Is.True); - Assert.That(Add(trie, bytes), Is.False); + [Test] + public void MoveForward_ShouldReturnFalse_IfChildDoesNotExist() + { + Assert.That(trie.MoveForward(testKey), Is.False); + } - Assert.That(trie.TryGetValue(bytes, out int value), Is.True); - Assert.That(value, Is.EqualTo(bytes.Length)); + [Test] + public void MoveForward_ShouldMove() + { + trie.AddChild(testKey, testValue); + Assert.Multiple(() => + { + Assert.That(trie.AtRoot, Is.True); + Assert.That(trie.MoveForward(testKey), Is.True); + Assert.That(trie.AtRoot, Is.False); }); } - private static bool Add(Trie trie, byte[] bytes) - => trie.Add(bytes, bytes.Length); + [Test] + public void Reset_ShouldReset_IfMoved() + { + Assert.That(trie.AtRoot, Is.True); + + trie.AddChild(testKey, testValue); + trie.MoveForward(testKey); + Assert.That(trie.AtRoot, Is.False); + + trie.Reset(); + Assert.That(trie.AtRoot, Is.True); + } + + [Test] + public void AddChild_ShouldAdd_Once() + { + int valueA = 3463235; + int valueB = 73334536; + + trie.AddChild(testKey, valueA); + Assert.That(trie.AddChild(testKey, valueB), Is.False); + } + + [Test] + public void CurrentValue_ShouldReturnAddedValue() + { + trie.AddChild(testKey, testValue); + trie.MoveForward(testKey); + Assert.That(trie.CurrentValue, Is.EqualTo(testValue)); + } } diff --git a/Zipper/Zipper/Trie.cs b/Zipper/Zipper/Trie.cs index e46ba1c..9145553 100644 --- a/Zipper/Zipper/Trie.cs +++ b/Zipper/Zipper/Trie.cs @@ -1,97 +1,88 @@ namespace Zipper; -using System.Diagnostics.CodeAnalysis; - /// -/// Trie data structure, also known as prefix tree, implemented as dictionary. +/// Trie data structure, also known as prefix tree, that can be traversed through. /// /// Type of values. internal class Trie where T : struct { - private readonly Node rootNode = new(); + private readonly Node rootNode = new(default); + private Node lastNode; /// - /// Adds associated with to this trie. + /// Initializes a new instance of the class. /// - /// The byte sequence to as key. - /// The number to add as value. - /// if wasn't present in trie before adding it, otherwise. - public bool Add(ReadOnlySpan key, T value) + public Trie() { - var lastNode = rootNode; - foreach (var character in key) - { - lastNode = lastNode.GetOrCreateChild(character); - } + lastNode = rootNode; + } - if (lastNode.EndOfWord) + /// + /// Gets value stored at current position in trie. + /// + public T CurrentValue => lastNode.Value; + + /// + /// Gets a value indicating whether current position is root. + /// + public bool AtRoot => lastNode == rootNode; + + /// + /// Resets position to root. + /// + public void Reset() + { + lastNode = rootNode; + } + + /// + /// Adds child at current position with specified and . + /// + /// Key of child to add. + /// Value of child to add. + /// if child with specified key did not exist at current position, otherwise. + public bool AddChild(byte key, T value) + { + if (HasChild(key)) { return false; } - lastNode.EndOfWord = true; - lastNode.Value = value; + var node = new Node(value); + lastNode.Children[key] = node; return true; } /// - /// Tries to get value associated with . + /// Moves forward if is found, otherwise doesn't move. /// - /// The key of the value to get. - /// - /// When this method returns, contains the value associated with , if is found, otherwise. - /// - /// if is found, otherwise. - public bool TryGetValue(ReadOnlySpan key, out T value) + /// Key to search for. + /// if moved forward, otherwise. + public bool MoveForward(byte key) { - value = default; - if (GetNode(key, out var node) && node.EndOfWord) + if (lastNode.Children.TryGetValue(key, out var existingNode)) { - value = node.Value; + lastNode = existingNode; return true; } return false; } - private bool GetNode(ReadOnlySpan prefix, [MaybeNullWhen(false)] out Node node) - { - node = rootNode; - foreach (var character in prefix) - { - if (!node.TryGetChild(character, out node)) - { - return false; - } - } - - return true; - } + /// + /// Checks if child with specified exists at current position. + /// + /// Key to search for. + /// if child with specified key did not exist at current position, otherwise. + public bool HasChild(byte key) + => lastNode.Children.ContainsKey(key); - private class Node + private class Node(T value) { - private readonly Dictionary children = []; - - public bool EndOfWord { get; set; } - - public T Value { get; set; } - - public Node GetOrCreateChild(byte value) - { - if (!children.TryGetValue(value, out Node? node)) - { - node = new(); - children[value] = node; - - return node; - } - - return node; - } + public Dictionary Children { get; } = []; - public bool TryGetChild(byte value, [MaybeNullWhen(false)] out Node node) - => children.TryGetValue(value, out node); + public T Value { get; } = value; } } From 0e34d48e6c69ad6a95225849cec56560338ac61a Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Sun, 9 Mar 2025 15:52:20 +0300 Subject: [PATCH 28/57] Removed unnecessary test data from trie tests (hw3 - lzw) (wip) --- Zipper/Zipper.Tests/TrieTests.cs | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/Zipper/Zipper.Tests/TrieTests.cs b/Zipper/Zipper.Tests/TrieTests.cs index cf7b353..ea98c39 100644 --- a/Zipper/Zipper.Tests/TrieTests.cs +++ b/Zipper/Zipper.Tests/TrieTests.cs @@ -2,25 +2,6 @@ namespace Zipper.Tests; public class TrieTests { - private static readonly byte[][] TestStrings = - [ - .. new string[] - { - string.Empty, - "A", - "AB", - "ABC", - "ABCD", - } - .Select(System.Text.Encoding.UTF8.GetBytes) - ]; - - private static readonly byte[][][] TestStringsSet = - [ - TestStrings, - [.. TestUtil.GetRandomStrings()] - ]; - private readonly byte testKey = 157; private readonly int testValue = 252354; From 73918405aa2d7927cf7aa75cebc323dd757f83ec Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Sun, 9 Mar 2025 16:03:47 +0300 Subject: [PATCH 29/57] Switched writer and reader back to int (hw3 - lzw) (wip) --- Zipper/Zipper.Tests/ArbitraryBitReaderWriterTests.cs | 12 ++++++------ Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs | 6 +++--- Zipper/Zipper/ArbitraryBitReader.cs | 8 ++++---- Zipper/Zipper/ArbitraryBitWriter.cs | 8 ++++---- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/Zipper/Zipper.Tests/ArbitraryBitReaderWriterTests.cs b/Zipper/Zipper.Tests/ArbitraryBitReaderWriterTests.cs index e67f8a1..05e8732 100644 --- a/Zipper/Zipper.Tests/ArbitraryBitReaderWriterTests.cs +++ b/Zipper/Zipper.Tests/ArbitraryBitReaderWriterTests.cs @@ -44,7 +44,7 @@ public void Reader_ShouldReadValues_WrittenBy_Writer_Correctly([ValueSource(name { Assert.Multiple(() => { - Assert.That(reader.ReadNext(out uint number), Is.True); + Assert.That(reader.ReadNext(out int number), Is.True); Assert.That(number, Is.EqualTo(data.Numbers[i])); }); } @@ -64,7 +64,7 @@ public void Reader_ReadNext_ShouldReturnFalse_WhenNoDataAvailable() using var writer = new ArbitraryBitWriter(memory, width); for (int i = 0; i < numbersCount; i++) { - writer.Write((uint)(i * i)); + writer.Write(i * i); } } @@ -75,7 +75,7 @@ public void Reader_ReadNext_ShouldReturnFalse_WhenNoDataAvailable() { Assert.Multiple(() => { - Assert.That(reader.ReadNext(out uint number), Is.True); + Assert.That(reader.ReadNext(out int number), Is.True); Assert.That(number, Is.EqualTo(i * i)); }); } @@ -98,10 +98,10 @@ private static TestData[] GenerateData() int width = i + minWidth; long upperBound = 1L << width; - var numbers = new uint[numbersLength]; + var numbers = new int[numbersLength]; for (int j = 0; j < numbersLength; j++) { - numbers[j] = (uint)random.NextInt64(upperBound); + numbers[j] = (int)random.NextInt64(upperBound); } result[i] = new(width, numbers); @@ -110,7 +110,7 @@ private static TestData[] GenerateData() return result; } - public readonly record struct TestData(int Width, uint[] Numbers); + public readonly record struct TestData(int Width, int[] Numbers); // use MemoryStream, because implementing all Stream's abstract membrers leads to bad code coverage private class TestStream : MemoryStream diff --git a/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs b/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs index 05daf30..6a44a5e 100644 --- a/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs +++ b/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs @@ -38,7 +38,7 @@ public void Writer_Dispose_ShouldDoNothing_IfBufferIsEmpty() // buffer is filled on every eight Write() for (int i = 0; i < 8; i++) { - writer.Write((uint)(i * i)); + writer.Write(i * i); } var position = memory.Position; @@ -53,7 +53,7 @@ public void Writer_Flush_ShouldDoNothing_IfBufferIsEmpty() // buffer is filled on every eight Write() for (int i = 0; i < 8; i++) { - writer.Write((uint)(i * i)); + writer.Write(i * i); } var position = memory.Position; @@ -68,7 +68,7 @@ public void Writer_Flush_ShouldFlushHalfFilledBuffer() // buffer is filled on every eight Write(), so write only 4 numbers for (int i = 0; i < 4; i++) { - writer.Write((uint)(i * i)); + writer.Write(i * i); } var position = memory.Position; diff --git a/Zipper/Zipper/ArbitraryBitReader.cs b/Zipper/Zipper/ArbitraryBitReader.cs index 2308421..3b5857a 100644 --- a/Zipper/Zipper/ArbitraryBitReader.cs +++ b/Zipper/Zipper/ArbitraryBitReader.cs @@ -1,7 +1,7 @@ namespace Zipper; /// -/// Reads unsigned integers of arbitrary width. +/// Reads integers of arbitrary width. /// internal class ArbitraryBitReader { @@ -41,7 +41,7 @@ public ArbitraryBitReader(Stream stream, int width) /// /// When this method returns, contains the value that was read, if read successfully, zero otherwise. /// if was successfuly read, otherwise. - public bool ReadNext(out uint number) + public bool ReadNext(out int number) { number = 0; @@ -61,8 +61,8 @@ public bool ReadNext(out uint number) } int remainingBitsToRead = 8 - bitsReadFromBuffer.Value; - uint mask = 0xFFu >> bitsReadFromBuffer.Value; - uint toWrite = (buffer & mask) >> Math.Max(0, remainingBitsToRead - remainingWidth); + int mask = 0xFF >> bitsReadFromBuffer.Value; + int toWrite = (buffer & mask) >> Math.Max(0, remainingBitsToRead - remainingWidth); int previousRemainingWidth = remainingWidth; remainingWidth -= remainingBitsToRead; diff --git a/Zipper/Zipper/ArbitraryBitWriter.cs b/Zipper/Zipper/ArbitraryBitWriter.cs index 72ad46f..eda32c2 100644 --- a/Zipper/Zipper/ArbitraryBitWriter.cs +++ b/Zipper/Zipper/ArbitraryBitWriter.cs @@ -3,7 +3,7 @@ namespace Zipper; using System.Buffers; /// -/// Writes unsigned integers of arbitrary width. +/// Writes integers of arbitrary width. /// internal class ArbitraryBitWriter : IDisposable { @@ -53,11 +53,11 @@ public ArbitraryBitWriter(Stream stream, int width, bool leaveOpen = false) /// Writes to the underlying stream. /// /// Number to write. - public void Write(uint number) + public void Write(int number) { ObjectDisposedException.ThrowIf(disposed, this); - number &= 0xFFFFFFFF >> (32 - width); + number &= (int)(0xFFFFFFFF >> (32 - width)); int remainingWidth = width; while (remainingWidth > 0) @@ -66,7 +66,7 @@ public void Write(uint number) int bitsWrittenToCurrentByte = bitsWrittenInBuffer % 8; int bitsRemainingInCurrentByte = 8 - bitsWrittenToCurrentByte; - uint toWrite = number >> Math.Max(0, remainingWidth - bitsRemainingInCurrentByte); + int toWrite = number >> Math.Max(0, remainingWidth - bitsRemainingInCurrentByte); int previousRemainingWidth = remainingWidth; remainingWidth -= bitsRemainingInCurrentByte; remainingWidth = Math.Max(0, remainingWidth); From cfc13a102feacd4a872712b0f04f2b9f8c7feaad Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Sun, 9 Mar 2025 16:31:12 +0300 Subject: [PATCH 30/57] LZW implementation (hw3 - lzw) --- Zipper/Zipper/LZW/BlockType.cs | 22 ++++ Zipper/Zipper/LZW/LZWReader.cs | 178 ++++++++++++++++++++++++++++ Zipper/Zipper/LZW/LZWStream.cs | 206 +++++++++++++++++++++++++++++++++ Zipper/Zipper/LZW/LZWWriter.cs | 193 ++++++++++++++++++++++++++++++ Zipper/Zipper/ZipperMode.cs | 17 +++ 5 files changed, 616 insertions(+) create mode 100644 Zipper/Zipper/LZW/BlockType.cs create mode 100644 Zipper/Zipper/LZW/LZWReader.cs create mode 100644 Zipper/Zipper/LZW/LZWStream.cs create mode 100644 Zipper/Zipper/LZW/LZWWriter.cs create mode 100644 Zipper/Zipper/ZipperMode.cs diff --git a/Zipper/Zipper/LZW/BlockType.cs b/Zipper/Zipper/LZW/BlockType.cs new file mode 100644 index 0000000..ab4b2ce --- /dev/null +++ b/Zipper/Zipper/LZW/BlockType.cs @@ -0,0 +1,22 @@ +namespace Zipper.LZW; + +/// +/// Block type used to mark blocks in . +/// +internal enum BlockType : byte +{ + /// + /// Treat block as usual. + /// + Default = 0, + + /// + /// All blocks after this one should not expand code table. + /// + FixCodeTableSize = 1, + + /// + /// This block was written after and is the last one to be read. + /// + EndOfStream = 2, +} diff --git a/Zipper/Zipper/LZW/LZWReader.cs b/Zipper/Zipper/LZW/LZWReader.cs new file mode 100644 index 0000000..90d0d42 --- /dev/null +++ b/Zipper/Zipper/LZW/LZWReader.cs @@ -0,0 +1,178 @@ +namespace Zipper.LZW; + +using System.Buffers; +using System.Buffers.Binary; +using System.Diagnostics; + +/// +/// Internal class used to read compressed data from stream. +/// +internal class LZWReader : IDisposable +{ + private static readonly ArrayPool BlockPool = ArrayPool.Create(); + + private readonly Stream stream; + private readonly Dictionary storedCodes; + private MemoryStream? memory; + private byte[]? block; + private int blockSize; + private bool endOfStreamReached; + + private byte[]? word; + private int wordPosition; + + private ArbitraryBitReader? reader; + private int lastWordCode; + private int maxCodesCount; + + /// + /// Initializes a new instance of the class. + /// + /// Stream to read from. + public LZWReader(Stream stream) + { + this.stream = stream; + + storedCodes = []; + for (int i = 0; i < 256; i++) + { + storedCodes[i] = [(byte)i]; + } + + lastWordCode = 256; + maxCodesCount = int.MaxValue; + } + + /// + /// Reads data from underlying stream, decompresses it and writes to . + /// + /// Buffer to write decompressed data to. + /// Count of read bytes. + /// Unexpected end of stream. + /// Invalid data stream. + public int Read(Span buffer) + { + int bufferPosition = 0; + + while (bufferPosition < buffer.Length) + { + // write leftover word from previous (iteration) or (Read() call) + if (word != null) + { + int wordLength = Math.Min(buffer.Length - bufferPosition, word.Length - wordPosition); + word.AsSpan().Slice(wordPosition, wordLength).CopyTo(buffer[bufferPosition..]); + + bufferPosition += wordLength; + wordPosition += wordLength; + if (bufferPosition >= buffer.Length) + { + break; + } + + word = null; + } + + if (block == null && !TryReadBuffer()) + { + return 0; + } + + Debug.Assert(block != null, "Block is null"); + Debug.Assert(memory != null, "Memory is null"); + Debug.Assert(reader != null, "Reader is null"); + + if (!reader.ReadNext(out int code)) + { + if (endOfStreamReached) + { + break; + } + + throw new EndOfStreamException(); + } + + if (!storedCodes.TryGetValue(code, out var readWord)) + { + throw new InvalidDataException(); + } + + word = readWord; + wordPosition = 0; + + if (lastWordCode <= maxCodesCount) + { + if (storedCodes.TryGetValue(lastWordCode, out var incompleteWord)) + { + incompleteWord[^1] = word[0]; + lastWordCode++; + } + + var newWord = new byte[word.Length + 1]; + word.CopyTo(newWord, 0); + storedCodes[lastWordCode] = newWord; + } + + if (memory.Position >= blockSize) + { + BlockPool.Return(block); + block = null; + } + } + + return bufferPosition; + } + + /// + /// Disposes internal buffers. + /// + public void Dispose() + { + if (block != null) + { + BlockPool.Return(block); + } + } + + private bool TryReadBuffer() + { + int headerSize = 4; + Span header = stackalloc byte[headerSize]; + if (stream.Read(header) != headerSize) + { + return false; + } + + var blockType = (BlockType)header[0]; + var codeWidth = header[1]; + + blockSize = BinaryPrimitives.ReadUInt16LittleEndian(header[2..4]); + block = BlockPool.Rent(blockSize); + + if (stream.Read(block, 0, blockSize) != blockSize) + { + throw new EndOfStreamException(); + } + + switch (blockType) + { + case BlockType.Default: + break; + + case BlockType.FixCodeTableSize: + maxCodesCount = BinaryPrimitives.ReadInt32LittleEndian(block); + return TryReadBuffer(); + + case BlockType.EndOfStream: + endOfStreamReached = true; + break; + + default: + throw new InvalidDataException(); + } + + memory = new(block); + reader = new(memory, codeWidth); + + return true; + } +} diff --git a/Zipper/Zipper/LZW/LZWStream.cs b/Zipper/Zipper/LZW/LZWStream.cs new file mode 100644 index 0000000..44c566e --- /dev/null +++ b/Zipper/Zipper/LZW/LZWStream.cs @@ -0,0 +1,206 @@ +namespace Zipper.LZW; + +using System.Diagnostics; + +/// +/// Provides methods and properties used to compress and decompress streams by using the LZW algorithm. +/// +public class LZWStream : Stream +{ + /// + /// Smallest allowed Block length. + /// + public const int MinBlockSize = 256; + + /// + /// Largest allowed Block length. + /// + public const int MaxBlockSize = 64 * 1024; + + private const int DefaultBlockSize = 1024; + + private readonly Stream stream; + private readonly ZipperMode mode; + private readonly bool leaveOpen; + + private readonly LZWWriter? writer; + private readonly LZWReader? reader; + + private bool disposed; + + /// + public LZWStream(Stream stream, ZipperMode mode = ZipperMode.Compress, bool leaveOpen = false) + : this(stream, DefaultBlockSize, mode, leaveOpen) + { + } + + /// + /// Initializes a new instance of the class. + /// + /// The stream to which compressed data is written or from which data to uncompress is read. + /// The internal block size to use, should be between and . + /// that determines whether to compress or uncompress data. + /// + /// The value indicating whether should be disposed along with this instance, + /// if is . + /// + public LZWStream(Stream stream, int blockSize, ZipperMode mode = ZipperMode.Compress, bool leaveOpen = false) + { + ArgumentOutOfRangeException.ThrowIfLessThan(blockSize, MinBlockSize); + ArgumentOutOfRangeException.ThrowIfGreaterThan(blockSize, MaxBlockSize); + + if (!Enum.IsDefined(mode)) + { + throw new ArgumentException($"Value was neither {ZipperMode.Compress} nor {ZipperMode.Decompress}", nameof(mode)); + } + + if (mode == ZipperMode.Compress) + { + writer = new(stream, blockSize); + } + else + { + reader = new(stream); + } + + this.stream = stream; + this.mode = mode; + this.leaveOpen = leaveOpen; + disposed = false; + } + + /// + /// Gets a value indicating whether the stream supports reading. + /// + /// + public override bool CanRead => mode == ZipperMode.Decompress && stream.CanRead; + + /// + /// Gets a value indicating whether the stream supports reading. + /// + /// + public override bool CanWrite => mode == ZipperMode.Compress && stream.CanWrite; + + /// + /// Gets a value indicating whether the stream supports reading. + /// + /// + public override bool CanSeek => false; + + /// + /// This property is not supported and always throws a . + /// + /// + public override long Length => throw new NotSupportedException(); + + /// + /// This property is not supported and always throws a . + /// + /// + public override long Position + { + get => throw new NotSupportedException(); + set => throw new NotSupportedException(); + } + + /// + /// This method is not supported and always throws a . + /// + /// + public override long Seek(long offset, SeekOrigin origin) + => throw new NotSupportedException(); + + /// + /// This method is not supported and always throws a . + /// + /// + public override void SetLength(long value) + => throw new NotSupportedException(); + + /// + /// Flushes the internal buffers. + /// + /// + public override void Flush() + { + EnsureNotClosed(); + if (mode == ZipperMode.Compress) + { + Debug.Assert(writer != null, "Writer is null"); + writer.Flush(); + } + } + + /// + public override int Read(byte[] buffer, int offset, int count) + => Read(buffer.AsSpan(offset, count)); + + /// + public override void Write(byte[] buffer, int offset, int count) + => Write(buffer.AsSpan(offset, count)); + + /// + public override void Write(ReadOnlySpan buffer) + { + EnsureNotClosed(); + EnsureMode(ZipperMode.Compress); + + Debug.Assert(writer != null, "Writer is null"); + + writer.Write(buffer); + } + + /// + public override int Read(Span buffer) + { + EnsureNotClosed(); + EnsureMode(ZipperMode.Decompress); + + Debug.Assert(reader != null, "Reader is null"); + + return reader.Read(buffer); + } + + /// + protected override void Dispose(bool disposing) + { + if (disposed) + { + return; + } + + if (disposing) + { + if (mode == ZipperMode.Compress) + { + Debug.Assert(writer != null, "Writer is null"); + writer.Dispose(); + } + else + { + Debug.Assert(reader != null, "Reader is null"); + reader.Dispose(); + } + + if (!leaveOpen) + { + stream.Dispose(); + } + + disposed = true; + } + } + + private void EnsureMode(ZipperMode mode) + { + if (this.mode != mode) + { + throw new InvalidOperationException(); + } + } + + private void EnsureNotClosed() + { + ObjectDisposedException.ThrowIf(disposed, this); + } +} diff --git a/Zipper/Zipper/LZW/LZWWriter.cs b/Zipper/Zipper/LZW/LZWWriter.cs new file mode 100644 index 0000000..61d8e27 --- /dev/null +++ b/Zipper/Zipper/LZW/LZWWriter.cs @@ -0,0 +1,193 @@ +namespace Zipper.LZW; + +using System.Buffers; +using System.Buffers.Binary; +using System.Diagnostics; + +/// +/// Internal class used to write compressed data to stream. +/// +internal class LZWWriter : IDisposable +{ + private const int DataOffset = 4; + private const int MaxCodesCount = (320 * 1024) - 1; + + private static readonly ArrayPool BlockPool = ArrayPool.Create(); + + private readonly Stream stream; + private readonly int blockSize; + private readonly byte[] block; + private readonly MemoryStream memory; + private readonly Trie trie; + + private ArbitraryBitWriter writer; + private int bitsWrittenInBlock; + private bool disableCodeTableExpansion; + + private int blockIndex = 0; + private int bytesRead = 0; + + private int codeWidth; + private int codesCount; + + /// + /// Initializes a new instance of the class. + /// + /// Stream to write to. + /// The internal block size to use. + public LZWWriter(Stream stream, int blockSize) + { + ArgumentOutOfRangeException.ThrowIfLessThan(blockSize, LZWStream.MinBlockSize); + ArgumentOutOfRangeException.ThrowIfGreaterThan(blockSize, LZWStream.MaxBlockSize); + + this.stream = stream; + this.blockSize = blockSize; + + block = BlockPool.Rent(blockSize); + memory = new(block); + memory.Seek(DataOffset, SeekOrigin.Begin); + bitsWrittenInBlock = DataOffset * 8; + + codeWidth = 8; + codesCount = 1 << codeWidth; + + trie = new(); + for (int i = 0; i < codesCount; i++) + { + trie.AddChild((byte)i, i); + } + + writer = new(memory, codeWidth, true); + } + + /// + /// Compresses data in and writes it to underlying stream. + /// + /// Buffer to read data from. + public void Write(ReadOnlySpan buffer) + { + for (int i = 0; i < buffer.Length; i++) + { + bytesRead++; + byte value = buffer[i]; + if (trie.AtRoot) + { + trie.MoveForward(value); + continue; + } + + if (!trie.HasChild(value)) + { + if (!disableCodeTableExpansion) + { + trie.AddChild(value, codesCount); + } + + bool bufferOverflow = bitsWrittenInBlock + codeWidth > blockSize * 8; + + bool shouldIncrementCodeWidth = false; + bool shouldDisableCodeTableExpansion = false; + if (!disableCodeTableExpansion) + { + codesCount++; + shouldDisableCodeTableExpansion = codesCount >= MaxCodesCount; + shouldIncrementCodeWidth = codesCount >= 1 << codeWidth; + } + + if (shouldDisableCodeTableExpansion) + { + disableCodeTableExpansion = true; + FlushInternal(BlockType.Default); + FlushInternal(BlockType.FixCodeTableSize); + } + else if (bufferOverflow || shouldIncrementCodeWidth) + { + FlushInternal(BlockType.Default); + } + + if (shouldIncrementCodeWidth) + { + codeWidth++; + + writer.Dispose(); + writer = new(memory, codeWidth, true); + } + + writer.Write(trie.CurrentValue); + bitsWrittenInBlock += codeWidth; + + trie.Reset(); + } + + trie.MoveForward(value); + } + } + + /// + /// Writes all pending data to the underlying stream. + /// + public void Flush() + { + FlushInternal(BlockType.Default); + + writer.Write(trie.CurrentValue); + bitsWrittenInBlock += codeWidth; + + trie.Reset(); + + FlushInternal(BlockType.Default); + } + + /// + /// Writes all pending data to the underlying stream and disposes internal buffers. + /// + public void Dispose() + { + Flush(); + FlushInternal(BlockType.EndOfStream); + + memory.Dispose(); + BlockPool.Return(block); + } + + private void FlushInternal(BlockType type) + { + Debug.Assert(codeWidth <= 32, "Code width too large"); + Debug.Assert(Enum.IsDefined(type), $"Unknown {nameof(BlockType)} parameter"); + + if (type == BlockType.FixCodeTableSize) + { + var binWriter = new BinaryWriter(memory); + binWriter.Write(MaxCodesCount); + binWriter.Flush(); + + Console.WriteLine($"disable code table expansion @ block #{blockIndex}"); + } + + writer.Flush(); + int length = (int)memory.Position; + ushort dataLength = (ushort)(length - DataOffset); + + if (!(dataLength == 0 && type == BlockType.Default)) + { + block[0] = (byte)type; + block[1] = (byte)codeWidth; + BinaryPrimitives.WriteUInt16LittleEndian(block.AsSpan()[2..4], dataLength); + + stream.Write(block, 0, length); + stream.Flush(); + + if (blockIndex < 20 || (blockIndex % 1024) == 0 || type == BlockType.FixCodeTableSize) + { + Console.WriteLine($"wrote block #{blockIndex}, read: {bytesRead / 1024.0:0.00} K, block size: {dataLength}, codeWidth: {codeWidth}, used codes: {codesCount}"); + } + + blockIndex++; + } + + Array.Clear(block); + + memory.Seek(DataOffset, SeekOrigin.Begin); + bitsWrittenInBlock = DataOffset * 8; + } +} diff --git a/Zipper/Zipper/ZipperMode.cs b/Zipper/Zipper/ZipperMode.cs new file mode 100644 index 0000000..9df19ad --- /dev/null +++ b/Zipper/Zipper/ZipperMode.cs @@ -0,0 +1,17 @@ +namespace Zipper; + +/// +/// Specifies whether to compress data to or decompress data from the underlying stream. +/// +public enum ZipperMode +{ + /// + /// Compress data to the underlying stream. + /// + Compress, + + /// + /// Decompress data from the underlying stream. + /// + Decompress, +} From 5ae229c9bf41b899fbf61e6f49c0b8dac95a426b Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Sun, 9 Mar 2025 16:55:21 +0300 Subject: [PATCH 31/57] Switch input parameter in BWT.ForwardTransform to Span (hw3 - lzw) --- Zipper/Zipper/BWT.cs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/Zipper/Zipper/BWT.cs b/Zipper/Zipper/BWT.cs index eabe9ce..607fb75 100644 --- a/Zipper/Zipper/BWT.cs +++ b/Zipper/Zipper/BWT.cs @@ -9,6 +9,7 @@ namespace Zipper; internal static class BWT { private static readonly ArrayPool Pool = ArrayPool.Create(); + private static readonly ArrayPool InputPool = ArrayPool.Create(); /// /// Transforms given byte sequence using Burrows-Wheeler algorithm. @@ -16,7 +17,7 @@ internal static class BWT /// Input byte sequence. /// Span to write transofrmed input to. /// Index that is used to reconstruct byte sequence. - public static int ForwardTransform(Memory input, Span output) + public static int ForwardTransform(Span input, Span output) { Debug.Assert(input.Length == output.Length, "Length of input and output should be the same"); int length = input.Length; @@ -32,12 +33,14 @@ public static int ForwardTransform(Memory input, Span output) offsets[i] = i; } + var inputCopy = InputPool.Rent(length); + input.CopyTo(inputCopy); + int Compare(int x, int y) { - var inputSpan = input.Span; for (int i = 0; i < length; i++) { - int compare = inputSpan[(i + x) % length] - inputSpan[(i + y) % length]; + int compare = inputCopy[(i + x) % length] - inputCopy[(i + y) % length]; if (compare != 0) { return compare; @@ -51,7 +54,6 @@ int Compare(int x, int y) offsetsSpan.Sort(Compare); - var inputSpan = input.Span; int? identityPosition = null; for (int i = 0; i < length; i++) { @@ -60,10 +62,11 @@ int Compare(int x, int y) identityPosition = i; } - output[i] = inputSpan[(offsets[i] + length - 1) % length]; + output[i] = inputCopy[(offsets[i] + length - 1) % length]; } Pool.Return(offsets); + InputPool.Return(inputCopy); Debug.Assert(identityPosition.HasValue, "Identity position not found"); From 3ec0ab3fe3c2816cccff3704558c90028ebfeec4 Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Sun, 9 Mar 2025 16:56:43 +0300 Subject: [PATCH 32/57] Switch input parameter in forward and inverse transform in BWT to be readonly (hw3 - lzw) --- Zipper/Zipper/BWT.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Zipper/Zipper/BWT.cs b/Zipper/Zipper/BWT.cs index 607fb75..099f06c 100644 --- a/Zipper/Zipper/BWT.cs +++ b/Zipper/Zipper/BWT.cs @@ -17,7 +17,7 @@ internal static class BWT /// Input byte sequence. /// Span to write transofrmed input to. /// Index that is used to reconstruct byte sequence. - public static int ForwardTransform(Span input, Span output) + public static int ForwardTransform(ReadOnlySpan input, Span output) { Debug.Assert(input.Length == output.Length, "Length of input and output should be the same"); int length = input.Length; @@ -79,7 +79,7 @@ int Compare(int x, int y) /// Transformed byte sequence. /// Index that is used to reconstruct byte sequence. /// Span to write reconstructed byte sequence to. - public static void InverseTransform(Span input, int identityIndex, Span output) + public static void InverseTransform(ReadOnlySpan input, int identityIndex, Span output) { Debug.Assert(input.Length == output.Length, "Length of input and output should be the same"); From 032cc484f5065308d8e949faa1644783ce32c530 Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Sun, 9 Mar 2025 18:39:23 +0300 Subject: [PATCH 33/57] Disable debug logging in LZWWriter (hw3 - lzw) --- Zipper/Zipper/LZW/LZWWriter.cs | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/Zipper/Zipper/LZW/LZWWriter.cs b/Zipper/Zipper/LZW/LZWWriter.cs index 61d8e27..59a7877 100644 --- a/Zipper/Zipper/LZW/LZWWriter.cs +++ b/Zipper/Zipper/LZW/LZWWriter.cs @@ -24,9 +24,6 @@ internal class LZWWriter : IDisposable private int bitsWrittenInBlock; private bool disableCodeTableExpansion; - private int blockIndex = 0; - private int bytesRead = 0; - private int codeWidth; private int codesCount; @@ -68,7 +65,6 @@ public void Write(ReadOnlySpan buffer) { for (int i = 0; i < buffer.Length; i++) { - bytesRead++; byte value = buffer[i]; if (trie.AtRoot) { @@ -160,8 +156,6 @@ private void FlushInternal(BlockType type) var binWriter = new BinaryWriter(memory); binWriter.Write(MaxCodesCount); binWriter.Flush(); - - Console.WriteLine($"disable code table expansion @ block #{blockIndex}"); } writer.Flush(); @@ -176,13 +170,6 @@ private void FlushInternal(BlockType type) stream.Write(block, 0, length); stream.Flush(); - - if (blockIndex < 20 || (blockIndex % 1024) == 0 || type == BlockType.FixCodeTableSize) - { - Console.WriteLine($"wrote block #{blockIndex}, read: {bytesRead / 1024.0:0.00} K, block size: {dataLength}, codeWidth: {codeWidth}, used codes: {codesCount}"); - } - - blockIndex++; } Array.Clear(block); From ca5ed633a6e66e39ddaf5cc6d130921b2f901c94 Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Sun, 9 Mar 2025 19:30:32 +0300 Subject: [PATCH 34/57] Use non-nullable int in bit writer for good code coverage (hw3 - lzw) --- Zipper/Zipper/ArbitraryBitReader.cs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Zipper/Zipper/ArbitraryBitReader.cs b/Zipper/Zipper/ArbitraryBitReader.cs index 3b5857a..3f53ad6 100644 --- a/Zipper/Zipper/ArbitraryBitReader.cs +++ b/Zipper/Zipper/ArbitraryBitReader.cs @@ -14,7 +14,7 @@ internal class ArbitraryBitReader private readonly Stream stream; private readonly int width; private byte buffer; - private int? bitsReadFromBuffer; + private int bitsReadFromBuffer; /// /// Initializes a new instance of the class. @@ -33,7 +33,7 @@ public ArbitraryBitReader(Stream stream, int width) this.stream = stream; this.width = width; - bitsReadFromBuffer = null; + bitsReadFromBuffer = int.MaxValue; } /// @@ -48,7 +48,7 @@ public bool ReadNext(out int number) int remainingWidth = width; while (remainingWidth > 0) { - if (bitsReadFromBuffer is null or >= 8) + if (bitsReadFromBuffer >= 8) { int readByte = stream.ReadByte(); if (readByte == -1) @@ -60,8 +60,8 @@ public bool ReadNext(out int number) bitsReadFromBuffer = 0; } - int remainingBitsToRead = 8 - bitsReadFromBuffer.Value; - int mask = 0xFF >> bitsReadFromBuffer.Value; + int remainingBitsToRead = 8 - bitsReadFromBuffer; + int mask = 0xFF >> bitsReadFromBuffer; int toWrite = (buffer & mask) >> Math.Max(0, remainingBitsToRead - remainingWidth); int previousRemainingWidth = remainingWidth; From 8d99900d508c54cdfea7ac6a5634df748c1393a7 Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Mon, 10 Mar 2025 15:29:59 +0300 Subject: [PATCH 35/57] Updated api docs in LZWStream (hw3 - lzw) --- Zipper/Zipper/LZW/LZWStream.cs | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/Zipper/Zipper/LZW/LZWStream.cs b/Zipper/Zipper/LZW/LZWStream.cs index 44c566e..d0933c4 100644 --- a/Zipper/Zipper/LZW/LZWStream.cs +++ b/Zipper/Zipper/LZW/LZWStream.cs @@ -44,6 +44,8 @@ public LZWStream(Stream stream, ZipperMode mode = ZipperMode.Compress, bool leav /// The value indicating whether should be disposed along with this instance, /// if is . /// + /// is not nor . + /// is out of range. public LZWStream(Stream stream, int blockSize, ZipperMode mode = ZipperMode.Compress, bool leaveOpen = false) { ArgumentOutOfRangeException.ThrowIfLessThan(blockSize, MinBlockSize); @@ -120,7 +122,7 @@ public override void SetLength(long value) /// /// Flushes the internal buffers. /// - /// + /// Stream is disposed. public override void Flush() { EnsureNotClosed(); @@ -131,15 +133,32 @@ public override void Flush() } } - /// + /// + /// Reads data from underlying stream, decompresses it and writes to . + /// + /// Buffer to write decompressed data to. + /// How many bytes to skip before reading from . + /// How many bytes to read from . + /// Count of read bytes. + /// Unexpected end of stream. + /// Invalid data stream. + /// Stream is set to mode. + /// Stream is disposed. public override int Read(byte[] buffer, int offset, int count) => Read(buffer.AsSpan(offset, count)); - /// + /// + /// Reads data from underlying stream, decompresses it and writes to . + /// + /// Buffer to write decompressed data to. + /// How many bytes to skip before reading from . + /// How many bytes to read from . + /// Stream is set to mode. + /// Stream is disposed. public override void Write(byte[] buffer, int offset, int count) => Write(buffer.AsSpan(offset, count)); - /// + /// public override void Write(ReadOnlySpan buffer) { EnsureNotClosed(); @@ -150,7 +169,7 @@ public override void Write(ReadOnlySpan buffer) writer.Write(buffer); } - /// + /// public override int Read(Span buffer) { EnsureNotClosed(); From 4de2514be88809c721774247a76b8d902a8eca39 Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Mon, 10 Mar 2025 15:32:07 +0300 Subject: [PATCH 36/57] Swapped Read() and Write() in LZWStream for consistency (hw3 - lzw) --- Zipper/Zipper/LZW/LZWStream.cs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/Zipper/Zipper/LZW/LZWStream.cs b/Zipper/Zipper/LZW/LZWStream.cs index d0933c4..9323110 100644 --- a/Zipper/Zipper/LZW/LZWStream.cs +++ b/Zipper/Zipper/LZW/LZWStream.cs @@ -139,13 +139,10 @@ public override void Flush() /// Buffer to write decompressed data to. /// How many bytes to skip before reading from . /// How many bytes to read from . - /// Count of read bytes. - /// Unexpected end of stream. - /// Invalid data stream. - /// Stream is set to mode. + /// Stream is set to mode. /// Stream is disposed. - public override int Read(byte[] buffer, int offset, int count) - => Read(buffer.AsSpan(offset, count)); + public override void Write(byte[] buffer, int offset, int count) + => Write(buffer.AsSpan(offset, count)); /// /// Reads data from underlying stream, decompresses it and writes to . @@ -153,10 +150,13 @@ public override int Read(byte[] buffer, int offset, int count) /// Buffer to write decompressed data to. /// How many bytes to skip before reading from . /// How many bytes to read from . - /// Stream is set to mode. + /// Count of read bytes. + /// Unexpected end of stream. + /// Invalid data stream. + /// Stream is set to mode. /// Stream is disposed. - public override void Write(byte[] buffer, int offset, int count) - => Write(buffer.AsSpan(offset, count)); + public override int Read(byte[] buffer, int offset, int count) + => Read(buffer.AsSpan(offset, count)); /// public override void Write(ReadOnlySpan buffer) From 2a0d6ec2f9067fd29f8c21fd88cdc30b87a4e3d7 Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Mon, 10 Mar 2025 15:36:55 +0300 Subject: [PATCH 37/57] Made ZipperMode required parameter in LZWStream constructor (hw3 - lzw) --- Zipper/Zipper/LZW/LZWStream.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Zipper/Zipper/LZW/LZWStream.cs b/Zipper/Zipper/LZW/LZWStream.cs index 9323110..98d0759 100644 --- a/Zipper/Zipper/LZW/LZWStream.cs +++ b/Zipper/Zipper/LZW/LZWStream.cs @@ -29,7 +29,7 @@ public class LZWStream : Stream private bool disposed; /// - public LZWStream(Stream stream, ZipperMode mode = ZipperMode.Compress, bool leaveOpen = false) + public LZWStream(Stream stream, ZipperMode mode, bool leaveOpen = false) : this(stream, DefaultBlockSize, mode, leaveOpen) { } @@ -46,7 +46,7 @@ public LZWStream(Stream stream, ZipperMode mode = ZipperMode.Compress, bool leav /// /// is not nor . /// is out of range. - public LZWStream(Stream stream, int blockSize, ZipperMode mode = ZipperMode.Compress, bool leaveOpen = false) + public LZWStream(Stream stream, int blockSize, ZipperMode mode, bool leaveOpen = false) { ArgumentOutOfRangeException.ThrowIfLessThan(blockSize, MinBlockSize); ArgumentOutOfRangeException.ThrowIfGreaterThan(blockSize, MaxBlockSize); From 027102c9eabbb1dddaf38e20dd97670aa3cf87ad Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Mon, 10 Mar 2025 15:41:50 +0300 Subject: [PATCH 38/57] Updated 'uncompress' -> 'decompress' in api docs for consistency (hw3 - lzw) --- Zipper/Zipper/LZW/LZWStream.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Zipper/Zipper/LZW/LZWStream.cs b/Zipper/Zipper/LZW/LZWStream.cs index 98d0759..631d0ee 100644 --- a/Zipper/Zipper/LZW/LZWStream.cs +++ b/Zipper/Zipper/LZW/LZWStream.cs @@ -37,9 +37,9 @@ public LZWStream(Stream stream, ZipperMode mode, bool leaveOpen = false) /// /// Initializes a new instance of the class. /// - /// The stream to which compressed data is written or from which data to uncompress is read. + /// The stream to which compressed data is written or from which data to decompress is read. /// The internal block size to use, should be between and . - /// that determines whether to compress or uncompress data. + /// that determines whether to compress or decompress data. /// /// The value indicating whether should be disposed along with this instance, /// if is . From 6fb71ead89bbc192328d03f3c9827f70747c4182 Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Mon, 10 Mar 2025 16:03:27 +0300 Subject: [PATCH 39/57] Added checks for stream read-/writeabiltiy to LZW reader and writer (hw3 - lzw) --- Zipper/Zipper/LZW/LZWReader.cs | 5 +++++ Zipper/Zipper/LZW/LZWWriter.cs | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/Zipper/Zipper/LZW/LZWReader.cs b/Zipper/Zipper/LZW/LZWReader.cs index 90d0d42..27a35d8 100644 --- a/Zipper/Zipper/LZW/LZWReader.cs +++ b/Zipper/Zipper/LZW/LZWReader.cs @@ -31,6 +31,11 @@ internal class LZWReader : IDisposable /// Stream to read from. public LZWReader(Stream stream) { + if (!stream.CanRead) + { + throw new ArgumentException("Stream does not support reading", nameof(stream)); + } + this.stream = stream; storedCodes = []; diff --git a/Zipper/Zipper/LZW/LZWWriter.cs b/Zipper/Zipper/LZW/LZWWriter.cs index 59a7877..bc83168 100644 --- a/Zipper/Zipper/LZW/LZWWriter.cs +++ b/Zipper/Zipper/LZW/LZWWriter.cs @@ -37,6 +37,11 @@ public LZWWriter(Stream stream, int blockSize) ArgumentOutOfRangeException.ThrowIfLessThan(blockSize, LZWStream.MinBlockSize); ArgumentOutOfRangeException.ThrowIfGreaterThan(blockSize, LZWStream.MaxBlockSize); + if (!stream.CanWrite) + { + throw new ArgumentException("Stream does not support writing", nameof(stream)); + } + this.stream = stream; this.blockSize = blockSize; From 53ded80e7900c5de6c5203ba2e516ec2366196ad Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Wed, 12 Mar 2025 01:52:55 +0300 Subject: [PATCH 40/57] Added BlockType.Flush and changed LZWStream.Flush behavior (hw3 - lzw) --- Zipper/Zipper/LZW/BlockType.cs | 5 +++++ Zipper/Zipper/LZW/LZWReader.cs | 21 ++++++++++++++++++--- Zipper/Zipper/LZW/LZWWriter.cs | 11 +++++++---- 3 files changed, 30 insertions(+), 7 deletions(-) diff --git a/Zipper/Zipper/LZW/BlockType.cs b/Zipper/Zipper/LZW/BlockType.cs index ab4b2ce..42e7683 100644 --- a/Zipper/Zipper/LZW/BlockType.cs +++ b/Zipper/Zipper/LZW/BlockType.cs @@ -19,4 +19,9 @@ internal enum BlockType : byte /// This block was written after and is the last one to be read. /// EndOfStream = 2, + + /// + /// This block was written after . + /// + Flush = 3, } diff --git a/Zipper/Zipper/LZW/LZWReader.cs b/Zipper/Zipper/LZW/LZWReader.cs index 27a35d8..331fe0c 100644 --- a/Zipper/Zipper/LZW/LZWReader.cs +++ b/Zipper/Zipper/LZW/LZWReader.cs @@ -17,6 +17,7 @@ internal class LZWReader : IDisposable private byte[]? block; private int blockSize; private bool endOfStreamReached; + private bool flushed; private byte[]? word; private int wordPosition; @@ -93,6 +94,12 @@ public int Read(Span buffer) break; } + if (blockSize == 0) + { + block = null; + continue; + } + throw new EndOfStreamException(); } @@ -112,9 +119,12 @@ public int Read(Span buffer) lastWordCode++; } - var newWord = new byte[word.Length + 1]; - word.CopyTo(newWord, 0); - storedCodes[lastWordCode] = newWord; + if (!flushed) + { + var newWord = new byte[word.Length + 1]; + word.CopyTo(newWord, 0); + storedCodes[lastWordCode] = newWord; + } } if (memory.Position >= blockSize) @@ -161,6 +171,7 @@ private bool TryReadBuffer() switch (blockType) { case BlockType.Default: + flushed = false; break; case BlockType.FixCodeTableSize: @@ -171,6 +182,10 @@ private bool TryReadBuffer() endOfStreamReached = true; break; + case BlockType.Flush: + flushed = true; + break; + default: throw new InvalidDataException(); } diff --git a/Zipper/Zipper/LZW/LZWWriter.cs b/Zipper/Zipper/LZW/LZWWriter.cs index bc83168..e75b581 100644 --- a/Zipper/Zipper/LZW/LZWWriter.cs +++ b/Zipper/Zipper/LZW/LZWWriter.cs @@ -131,12 +131,15 @@ public void Flush() { FlushInternal(BlockType.Default); - writer.Write(trie.CurrentValue); - bitsWrittenInBlock += codeWidth; + if (!trie.AtRoot) + { + writer.Write(trie.CurrentValue); + bitsWrittenInBlock += codeWidth; - trie.Reset(); + trie.Reset(); - FlushInternal(BlockType.Default); + FlushInternal(BlockType.Flush); + } } /// From 16267da17f1dfbe9bf7d3c39d79d2f536b4e7460 Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Wed, 12 Mar 2025 01:53:38 +0300 Subject: [PATCH 41/57] Added tests for LZWStream (hw3 - lzw) --- .../LZW/LZWStreamExceptionsTests.cs | 152 ++++++++++++++++++ .../LZW/LZWStreamReadWriteTests.cs | 78 +++++++++ Zipper/Zipper.Tests/LZW/LZWTestsSource.cs | 22 +++ 3 files changed, 252 insertions(+) create mode 100644 Zipper/Zipper.Tests/LZW/LZWStreamExceptionsTests.cs create mode 100644 Zipper/Zipper.Tests/LZW/LZWStreamReadWriteTests.cs create mode 100644 Zipper/Zipper.Tests/LZW/LZWTestsSource.cs diff --git a/Zipper/Zipper.Tests/LZW/LZWStreamExceptionsTests.cs b/Zipper/Zipper.Tests/LZW/LZWStreamExceptionsTests.cs new file mode 100644 index 0000000..02ad3d5 --- /dev/null +++ b/Zipper/Zipper.Tests/LZW/LZWStreamExceptionsTests.cs @@ -0,0 +1,152 @@ +namespace Zipper.Tests.LZW; + +using Zipper.LZW; + +public class LZWStreamExceptionsTests +{ + private LZWStream compressor; + private LZWStream decompressor; + + [SetUp] + public void Setup() + { + compressor = new LZWStream(Stream.Null, ZipperMode.Compress); + decompressor = new LZWStream(Stream.Null, ZipperMode.Decompress); + } + + [TearDown] + public void Teardown() + { + compressor.Dispose(); + decompressor.Dispose(); + } + + [Test] + public void Constructor_ShouldThrowIf_BlockSize_IsIncorrect() + { + Assert.Throws(() => new LZWStream(Stream.Null, LZWStream.MinBlockSize - 1, ZipperMode.Compress)); + Assert.Throws(() => new LZWStream(Stream.Null, LZWStream.MaxBlockSize + 1, ZipperMode.Compress)); + } + + [Test] + public void Constructor_ShouldThrowIf_Mode_IsNotDefined() + { + Assert.Throws(() => new LZWStream(Stream.Null, ZipperMode.Compress + 10)); + Assert.Throws(() => new LZWStream(Stream.Null, ZipperMode.Decompress + 100)); + } + + [Test] + public void Constructor_ShouldThrowIf_Mode_IsCompress_And_Stream_CanNotWrite() + { + Assert.Throws(() => new LZWStream(new UnwriteableStream(), ZipperMode.Compress)); + } + + [Test] + public void Constructor_ShouldThrowIf_Mode_IsDecompress_And_Stream_CanNotRead() + { + Assert.Throws(() => new LZWStream(new UnreadableStream(), ZipperMode.Decompress)); + } + + [Test] + public void UnsupportedPropertiesAndMethods_ShouldThrow() + { + Assert.Throws(() => _ = compressor.Length); + + Assert.Throws(() => _ = compressor.Position); + Assert.Throws(() => compressor.Position = 0); + + Assert.Throws(() => compressor.Seek(0, SeekOrigin.Begin)); + Assert.Throws(() => compressor.SetLength(0)); + } + + [Test] + public void CanRead_ShouldReturnFalse_AndCanWrite_ShouldReturnTrue_IfModeIs_Compress() + { + Assert.Multiple(() => + { + Assert.That(compressor.CanRead, Is.False); + Assert.That(compressor.CanWrite, Is.True); + }); + } + + [Test] + public void CanRead_ShouldReturnTrue_And_CanWrite_ShouldReturnFalse_IfModeIs_Decompress() + { + Assert.Multiple(() => + { + Assert.That(decompressor.CanRead, Is.True); + Assert.That(decompressor.CanWrite, Is.False); + }); + } + + [Test] + public void CanSeek_ShouldReturnFalse() + { + Assert.Multiple(() => + { + Assert.That(compressor.CanSeek, Is.False); + Assert.That(decompressor.CanSeek, Is.False); + }); + } + + [Test] + public void Write_ShouldThrow_IfModeIs_Decompress() + { + Assert.Throws(() => decompressor.Write([])); + Assert.Throws(() => decompressor.Write([], 0, 0)); + } + + [Test] + public void Read_ShouldThrow_IfModeIs_Compress() + { + Assert.Throws(() => _ = compressor.Read([])); + Assert.Throws(() => _ = compressor.Read([], 0, 0)); + } + + [Test] + public void AllMethodsShouldThrow_IfDisposed() + { + compressor.Dispose(); + decompressor.Dispose(); + + Assert.Throws(() => compressor.Write([])); + Assert.Throws(() => compressor.Write([], 0, 0)); + + Assert.Throws(() => _ = compressor.Read([])); + Assert.Throws(() => _ = compressor.Read([], 0, 0)); + + Assert.Throws(compressor.Flush); + } + + [Test] + public void ReadAndWrite_ShouldThrow_IfArgumentsAreIncorrect() + { + int length = 8; + var buffer = new byte[length]; + + Test(buffer, -1, length); + Test(buffer, length, length); + + Test(buffer, 0, -1); + Test(buffer, 0, length + 1); + + Test(buffer, length / 2, length); + Test(buffer, 0, -1); + + void Test(byte[] buffer, int offset, int count) + { + Assert.Throws(() => compressor.Write(buffer, offset, count)); + Assert.Throws(() => _ = decompressor.Read(buffer, offset, count)); + } + } + + private class UnwriteableStream : MemoryStream + { + public override bool CanWrite => false; + } + + private class UnreadableStream : MemoryStream + { + public override bool CanRead => false; + } +} diff --git a/Zipper/Zipper.Tests/LZW/LZWStreamReadWriteTests.cs b/Zipper/Zipper.Tests/LZW/LZWStreamReadWriteTests.cs new file mode 100644 index 0000000..9472400 --- /dev/null +++ b/Zipper/Zipper.Tests/LZW/LZWStreamReadWriteTests.cs @@ -0,0 +1,78 @@ +namespace Zipper.Tests.LZW; + +using System.Text; +using Zipper.LZW; + +public class LZWStreamReadWriteTests +{ + private static readonly int[] BufferSizes = [1, 2, 3, 7, 14, 19, 31, 63, 127, 255, 1023]; + + private MemoryStream stream; + + [SetUp] + public void Setup() + { + stream = new(); + } + + [Test] + public void Read_ShouldReadData_WrittenBy_Write_Correctly([ValueSource(nameof(BufferSizes))] int readBufferSize) + { + var testData = LZWTestsSource.Data; + + using (var compressor = new LZWStream(stream, ZipperMode.Compress, true)) + { + compressor.Write(testData); + } + + DecompressData_And_AssertThat_ItIsCorrect(testData, readBufferSize); + } + + [Test] + public void Flush_ShouldNotAffect_DataToBeRead([ValueSource(nameof(BufferSizes))] int readWriteBufferSize) + { + var testData = LZWTestsSource.Data.AsSpan(); + + using (var compressor = new LZWStream(stream, ZipperMode.Compress, true)) + { + for (int offset = 0; offset < testData.Length; offset += readWriteBufferSize) + { + var dataSlice = testData.Slice(offset, Math.Min(readWriteBufferSize, testData.Length - offset)); + compressor.Write(dataSlice); + compressor.Flush(); + } + } + + DecompressData_And_AssertThat_ItIsCorrect(testData, readWriteBufferSize); + } + + private void DecompressData_And_AssertThat_ItIsCorrect(ReadOnlySpan testData, int readBufferSize) + { + stream.Seek(0, SeekOrigin.Begin); + + using var decompressor = new LZWStream(stream, ZipperMode.Decompress, true); + int offset = 0; + Span buffer = stackalloc byte[readBufferSize]; + + while (true) + { + int bytesRead = decompressor.Read(buffer); + + Assert.That(bytesRead, Is.LessThanOrEqualTo(readBufferSize)); + + if (bytesRead == 0) + { + break; + } + + Assert.That(offset + bytesRead, Is.LessThanOrEqualTo(testData.Length)); + + var slicedData = testData.Slice(offset, bytesRead); + var slicedBuffer = buffer[..bytesRead]; + + Assert.That(slicedBuffer.SequenceEqual(slicedData), Is.True); + + offset += bytesRead; + } + } +} diff --git a/Zipper/Zipper.Tests/LZW/LZWTestsSource.cs b/Zipper/Zipper.Tests/LZW/LZWTestsSource.cs new file mode 100644 index 0000000..7efa90a --- /dev/null +++ b/Zipper/Zipper.Tests/LZW/LZWTestsSource.cs @@ -0,0 +1,22 @@ +namespace Zipper.Tests.LZW; + +[SetUpFixture] +public class LZWTestsSource +{ + // "https://en.wikipedia.org/wiki/Lempel–Ziv–Welch"; + private const string DataUrl = "https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch"; + + public static byte[] Data { get; private set; } + + [OneTimeSetUp] + public static void DataSetup() + { + using var client = new HttpClient(); + var response = client.Send(new HttpRequestMessage(HttpMethod.Get, DataUrl)); + response.EnsureSuccessStatusCode(); + using var content = response.Content.ReadAsStream(); + using var memory = new MemoryStream(); + content.CopyTo(memory); + Data = memory.ToArray(); + } +} From e235467c91df6e5d76ae2a4e31929263903d1d75 Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Thu, 13 Mar 2025 01:20:11 +0300 Subject: [PATCH 42/57] Fixed api docs in LZWStream (hw3 - lzw) --- Zipper/Zipper/LZW/LZWStream.cs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Zipper/Zipper/LZW/LZWStream.cs b/Zipper/Zipper/LZW/LZWStream.cs index 631d0ee..5783a8e 100644 --- a/Zipper/Zipper/LZW/LZWStream.cs +++ b/Zipper/Zipper/LZW/LZWStream.cs @@ -8,12 +8,12 @@ namespace Zipper.LZW; public class LZWStream : Stream { /// - /// Smallest allowed Block length. + /// Smallest allowed block length. /// public const int MinBlockSize = 256; /// - /// Largest allowed Block length. + /// Largest allowed block length. /// public const int MaxBlockSize = 64 * 1024; @@ -78,13 +78,13 @@ public LZWStream(Stream stream, int blockSize, ZipperMode mode, bool leaveOpen = public override bool CanRead => mode == ZipperMode.Decompress && stream.CanRead; /// - /// Gets a value indicating whether the stream supports reading. + /// Gets a value indicating whether the stream supports writing. /// /// public override bool CanWrite => mode == ZipperMode.Compress && stream.CanWrite; /// - /// Gets a value indicating whether the stream supports reading. + /// Gets a value indicating whether the stream supports seeking. /// /// public override bool CanSeek => false; @@ -134,9 +134,9 @@ public override void Flush() } /// - /// Reads data from underlying stream, decompresses it and writes to . + /// Reads data from , compresses it and writes it to the underlying stream. /// - /// Buffer to write decompressed data to. + /// Buffer that contains data to be compressed. /// How many bytes to skip before reading from . /// How many bytes to read from . /// Stream is set to mode. @@ -145,12 +145,12 @@ public override void Write(byte[] buffer, int offset, int count) => Write(buffer.AsSpan(offset, count)); /// - /// Reads data from underlying stream, decompresses it and writes to . + /// Reads data from the underlying stream, decompresses it and writes to . /// /// Buffer to write decompressed data to. - /// How many bytes to skip before reading from . - /// How many bytes to read from . - /// Count of read bytes. + /// How many bytes to skip before writing to . + /// How many bytes to write to . + /// Count of read bytes, can be less than . /// Unexpected end of stream. /// Invalid data stream. /// Stream is set to mode. From ef78f30aebb7a67ae8987a82c5276081b5165594 Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Thu, 13 Mar 2025 17:48:01 +0300 Subject: [PATCH 43/57] Added BWTStream and BWTMode (hw3 - lzw) --- Zipper/Zipper/BWTMode.cs | 17 ++ Zipper/Zipper/BWTStream.cs | 323 +++++++++++++++++++++++++++++++++++++ 2 files changed, 340 insertions(+) create mode 100644 Zipper/Zipper/BWTMode.cs create mode 100644 Zipper/Zipper/BWTStream.cs diff --git a/Zipper/Zipper/BWTMode.cs b/Zipper/Zipper/BWTMode.cs new file mode 100644 index 0000000..d54036b --- /dev/null +++ b/Zipper/Zipper/BWTMode.cs @@ -0,0 +1,17 @@ +namespace Zipper; + +/// +/// Specifies whether to transform data to or reconstruct data from the underlying stream in . +/// +public enum BWTMode +{ + /// + /// Transform data and write it to the underlying stream. + /// + Transform, + + /// + /// Read data from the underlying stream and reconstruct it. + /// + Reconstruct, +} diff --git a/Zipper/Zipper/BWTStream.cs b/Zipper/Zipper/BWTStream.cs new file mode 100644 index 0000000..be8a652 --- /dev/null +++ b/Zipper/Zipper/BWTStream.cs @@ -0,0 +1,323 @@ +namespace Zipper; + +using System.Buffers; +using System.Buffers.Binary; +using System.Diagnostics; + +/// +/// Provides methods and properties used to transform and reconstruct data streams by using the BWT algorithm. +/// +public class BWTStream : Stream +{ + /// + /// Smallest allowed block length. + /// + public const int MinBlockSize = 64; + + /// + /// Largest allowed block length. + /// + public const int MaxBlockSize = 4 * 1024; + + private const int DefaultBlockSize = 512; + + private static readonly ArrayPool BlockPool = ArrayPool.Create(); + + private readonly Stream stream; + private readonly BWTMode mode; + private readonly bool leaveOpen; + + private byte[]? block; + private int blockPosition; + private int blockSize; + + private bool disposed; + + /// + public BWTStream(Stream stream, BWTMode mode, bool leaveOpen = false) + : this(stream, DefaultBlockSize, mode, leaveOpen) + { + } + + /// + /// Initializes a new instance of the class. + /// + /// The stream to which transformed data is written or from which data to reconstruct is read. + /// The internal block size to use, should be between and . + /// that determines whether to transform or reconstruct data. + /// + /// The value indicating whether should be disposed along with this instance, + /// if is . + /// + /// is not nor . + /// is out of range. + public BWTStream(Stream stream, int blockSize, BWTMode mode, bool leaveOpen = false) + { + ArgumentOutOfRangeException.ThrowIfLessThan(blockSize, MinBlockSize); + ArgumentOutOfRangeException.ThrowIfGreaterThan(blockSize, MaxBlockSize); + + if (!Enum.IsDefined(mode)) + { + throw new ArgumentException($"Value was neither {BWTMode.Transform} nor {BWTMode.Reconstruct}", nameof(mode)); + } + + if (mode == BWTMode.Transform) + { + if (!stream.CanWrite) + { + throw new ArgumentException("Stream does not support writing", nameof(stream)); + } + + this.blockSize = blockSize; + } + else + { + if (!stream.CanRead) + { + throw new ArgumentException("Stream does not support reading", nameof(stream)); + } + } + + this.stream = stream; + this.mode = mode; + this.leaveOpen = leaveOpen; + blockPosition = 0; + disposed = false; + } + + /// + /// Gets a value indicating whether the stream supports reading. + /// + /// + public override bool CanRead => mode == BWTMode.Reconstruct && stream.CanRead; + + /// + /// Gets a value indicating whether the stream supports writing. + /// + /// + public override bool CanWrite => mode == BWTMode.Transform && stream.CanWrite; + + /// + /// Gets a value indicating whether the stream supports seeking. + /// + /// + public override bool CanSeek => false; + + /// + /// This property is not supported and always throws a . + /// + /// + public override long Length => throw new NotSupportedException(); + + /// + /// This property is not supported and always throws a . + /// + /// + public override long Position + { + get => throw new NotSupportedException(); + set => throw new NotSupportedException(); + } + + /// + /// This method is not supported and always throws a . + /// + /// + public override long Seek(long offset, SeekOrigin origin) + => throw new NotSupportedException(); + + /// + /// This method is not supported and always throws a . + /// + /// + public override void SetLength(long value) + => throw new NotSupportedException(); + + /// + /// Flushes the internal buffers. + /// + /// Stream is disposed. + public override void Flush() + { + EnsureNotClosed(); + if (mode == BWTMode.Transform) + { + if (block != null) + { + WriteBlock(); + } + + stream.Flush(); + } + } + + /// + /// Reads data from , transforms it and writes it to the underlying stream. + /// + /// Buffer that contains data to be transformed. + /// How many bytes to skip before reading from . + /// How many bytes to read from . + /// Stream is set to mode. + /// Stream is disposed. + public override void Write(byte[] buffer, int offset, int count) + => Write(buffer.AsSpan(offset, count)); + + /// + /// Reads data from the underlying stream, reconstructs it and writes to . + /// + /// Buffer to write reconstructed data to. + /// How many bytes to skip before writing to . + /// How many bytes to write to . + /// Count of read bytes, can be less than . + /// Unexpected end of stream. + /// Invalid data stream. + /// Stream is set to mode. + /// Stream is disposed. + public override int Read(byte[] buffer, int offset, int count) + => Read(buffer.AsSpan(offset, count)); + + /// + public override void Write(ReadOnlySpan buffer) + { + EnsureNotClosed(); + EnsureMode(BWTMode.Transform); + + int bufferPosition = 0; + while (bufferPosition < buffer.Length) + { + if (block == null) + { + block = BlockPool.Rent(blockSize); + blockPosition = 0; + } + + int copyLength = Math.Min(blockSize - blockPosition, buffer.Length - bufferPosition); + buffer.Slice(bufferPosition, copyLength).CopyTo(block.AsSpan().Slice(blockPosition, copyLength)); + + bufferPosition += copyLength; + blockPosition += copyLength; + + if (blockPosition >= blockSize) + { + WriteBlock(); + } + } + } + + /// + public override int Read(Span buffer) + { + EnsureNotClosed(); + EnsureMode(BWTMode.Reconstruct); + + int bufferPosition = 0; + while (bufferPosition < buffer.Length) + { + if (block == null && !ReadBlock()) + { + break; + } + + int copyLength = Math.Min(blockSize - blockPosition, buffer.Length - bufferPosition); + block.AsSpan().Slice(blockPosition, copyLength).CopyTo(buffer.Slice(bufferPosition, copyLength)); + + bufferPosition += copyLength; + blockPosition += copyLength; + + if (blockPosition >= blockSize) + { + block = null; + } + } + + return bufferPosition; + } + + /// + protected override void Dispose(bool disposing) + { + if (disposed) + { + return; + } + + if (disposing) + { + Flush(); + + if (block != null) + { + BlockPool.Return(block); + } + + if (!leaveOpen) + { + stream.Dispose(); + } + + disposed = true; + } + } + + private void WriteBlock() + { + Debug.Assert(block != null, "Block is null"); + + var transformBuffer = BlockPool.Rent(blockPosition); + int identityIndex = BWT.ForwardTransform(block.AsSpan()[0..blockPosition], transformBuffer.AsSpan()[0..blockPosition]); + + Span header = stackalloc byte[8]; + BinaryPrimitives.WriteInt32LittleEndian(header[0..4], blockPosition); + BinaryPrimitives.WriteInt32LittleEndian(header[4..8], identityIndex); + + stream.Write(header); + stream.Write(transformBuffer, 0, blockPosition); + + BlockPool.Return(transformBuffer); + BlockPool.Return(block); + + block = null; + } + + private bool ReadBlock() + { + Debug.Assert(block == null, "Block was not returned before reading"); + + Span header = stackalloc byte[8]; + if (stream.Read(header) != 8) + { + return false; + } + + blockSize = BinaryPrimitives.ReadInt32LittleEndian(header[0..4]); + var identityIndex = BinaryPrimitives.ReadInt32LittleEndian(header[4..8]); + + var transformedData = BlockPool.Rent(blockSize); + if (stream.Read(transformedData, 0, blockSize) != blockSize) + { + BlockPool.Return(transformedData); + + return false; + } + + block = BlockPool.Rent(blockSize); + BWT.InverseTransform(transformedData.AsSpan()[0..blockSize], identityIndex, block.AsSpan()[0..blockSize]); + BlockPool.Return(transformedData); + blockPosition = 0; + + return true; + } + + private void EnsureMode(BWTMode mode) + { + if (this.mode != mode) + { + throw new InvalidOperationException(); + } + } + + private void EnsureNotClosed() + { + ObjectDisposedException.ThrowIf(disposed, this); + } +} From 42b16d8d8dc628a71d0967d6466c9352d5ad1435 Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Thu, 13 Mar 2025 17:59:25 +0300 Subject: [PATCH 44/57] Overhauled LZWStream tests (hw3 - lzw) --- .../Zipper.Tests/Streams/IStreamProvider.cs | 20 +++++++++++ Zipper/Zipper.Tests/Streams/LZWStreamTests.cs | 33 +++++++++++++++++++ .../StreamExceptionsTests.cs} | 28 ++++++++-------- .../StreamReadWriteTests.cs} | 28 ++++++++++------ .../StreamTestsSource.cs} | 4 +-- 5 files changed, 87 insertions(+), 26 deletions(-) create mode 100644 Zipper/Zipper.Tests/Streams/IStreamProvider.cs create mode 100644 Zipper/Zipper.Tests/Streams/LZWStreamTests.cs rename Zipper/Zipper.Tests/{LZW/LZWStreamExceptionsTests.cs => Streams/StreamExceptionsTests.cs} (76%) rename Zipper/Zipper.Tests/{LZW/LZWStreamReadWriteTests.cs => Streams/StreamReadWriteTests.cs} (68%) rename Zipper/Zipper.Tests/{LZW/LZWTestsSource.cs => Streams/StreamTestsSource.cs} (91%) diff --git a/Zipper/Zipper.Tests/Streams/IStreamProvider.cs b/Zipper/Zipper.Tests/Streams/IStreamProvider.cs new file mode 100644 index 0000000..2d45fc0 --- /dev/null +++ b/Zipper/Zipper.Tests/Streams/IStreamProvider.cs @@ -0,0 +1,20 @@ +namespace Zipper.Tests.Streams; + +public interface IStreamProvider + where TStream : Stream + where TMode : Enum +{ + public static abstract int MinBlockSize { get; } + + public static abstract int MaxBlockSize { get; } + + public static abstract TMode WritingMode { get; } + + public static abstract TMode ReadingMode { get; } + + public static abstract TMode UndefinedMode { get; } + + public static abstract TStream CreateStream(Stream stream, int blockSize, TMode mode, bool leaveOpen = false); + + public static abstract TStream CreateStream(Stream stream, TMode mode, bool leaveOpen = false); +} diff --git a/Zipper/Zipper.Tests/Streams/LZWStreamTests.cs b/Zipper/Zipper.Tests/Streams/LZWStreamTests.cs new file mode 100644 index 0000000..2d3b672 --- /dev/null +++ b/Zipper/Zipper.Tests/Streams/LZWStreamTests.cs @@ -0,0 +1,33 @@ +namespace Zipper.Tests.Streams; + +using Zipper.LZW; + +public class LZWStreamTests +{ + public class LZWStreamProvider : IStreamProvider + { + public static int MinBlockSize => LZWStream.MinBlockSize; + + public static int MaxBlockSize => LZWStream.MaxBlockSize; + + public static ZipperMode WritingMode => ZipperMode.Compress; + + public static ZipperMode ReadingMode => ZipperMode.Decompress; + + public static ZipperMode UndefinedMode => ZipperMode.Decompress + 100; + + public static LZWStream CreateStream(Stream stream, int blockSize, ZipperMode mode, bool leaveOpen = false) + => new(stream, blockSize, mode, leaveOpen); + + public static LZWStream CreateStream(Stream stream, ZipperMode mode, bool leaveOpen = false) + => new(stream, mode, leaveOpen); + } + + public class LZWStreamExceptionsTests : StreamExceptionsTests + { + } + + public class LZWStreamReadWriteTests : StreamReadWriteTests + { + } +} diff --git a/Zipper/Zipper.Tests/LZW/LZWStreamExceptionsTests.cs b/Zipper/Zipper.Tests/Streams/StreamExceptionsTests.cs similarity index 76% rename from Zipper/Zipper.Tests/LZW/LZWStreamExceptionsTests.cs rename to Zipper/Zipper.Tests/Streams/StreamExceptionsTests.cs index 02ad3d5..a67b809 100644 --- a/Zipper/Zipper.Tests/LZW/LZWStreamExceptionsTests.cs +++ b/Zipper/Zipper.Tests/Streams/StreamExceptionsTests.cs @@ -1,17 +1,18 @@ -namespace Zipper.Tests.LZW; +namespace Zipper.Tests.Streams; -using Zipper.LZW; - -public class LZWStreamExceptionsTests +public abstract class StreamExceptionsTests + where TStream : Stream + where TMode : Enum + where TProvider : IStreamProvider { - private LZWStream compressor; - private LZWStream decompressor; + private TStream compressor; + private TStream decompressor; [SetUp] public void Setup() { - compressor = new LZWStream(Stream.Null, ZipperMode.Compress); - decompressor = new LZWStream(Stream.Null, ZipperMode.Decompress); + compressor = TProvider.CreateStream(Stream.Null, TProvider.MinBlockSize, TProvider.WritingMode); + decompressor = TProvider.CreateStream(Stream.Null, TProvider.MinBlockSize, TProvider.ReadingMode); } [TearDown] @@ -24,27 +25,26 @@ public void Teardown() [Test] public void Constructor_ShouldThrowIf_BlockSize_IsIncorrect() { - Assert.Throws(() => new LZWStream(Stream.Null, LZWStream.MinBlockSize - 1, ZipperMode.Compress)); - Assert.Throws(() => new LZWStream(Stream.Null, LZWStream.MaxBlockSize + 1, ZipperMode.Compress)); + Assert.Throws(() => TProvider.CreateStream(Stream.Null, TProvider.MinBlockSize - 1, TProvider.WritingMode)); + Assert.Throws(() => TProvider.CreateStream(Stream.Null, TProvider.MaxBlockSize + 1, TProvider.WritingMode)); } [Test] public void Constructor_ShouldThrowIf_Mode_IsNotDefined() { - Assert.Throws(() => new LZWStream(Stream.Null, ZipperMode.Compress + 10)); - Assert.Throws(() => new LZWStream(Stream.Null, ZipperMode.Decompress + 100)); + Assert.Throws(() => TProvider.CreateStream(Stream.Null, TProvider.UndefinedMode)); } [Test] public void Constructor_ShouldThrowIf_Mode_IsCompress_And_Stream_CanNotWrite() { - Assert.Throws(() => new LZWStream(new UnwriteableStream(), ZipperMode.Compress)); + Assert.Throws(() => TProvider.CreateStream(new UnwriteableStream(), TProvider.WritingMode)); } [Test] public void Constructor_ShouldThrowIf_Mode_IsDecompress_And_Stream_CanNotRead() { - Assert.Throws(() => new LZWStream(new UnreadableStream(), ZipperMode.Decompress)); + Assert.Throws(() => TProvider.CreateStream(new UnreadableStream(), TProvider.ReadingMode)); } [Test] diff --git a/Zipper/Zipper.Tests/LZW/LZWStreamReadWriteTests.cs b/Zipper/Zipper.Tests/Streams/StreamReadWriteTests.cs similarity index 68% rename from Zipper/Zipper.Tests/LZW/LZWStreamReadWriteTests.cs rename to Zipper/Zipper.Tests/Streams/StreamReadWriteTests.cs index 9472400..c25ca13 100644 --- a/Zipper/Zipper.Tests/LZW/LZWStreamReadWriteTests.cs +++ b/Zipper/Zipper.Tests/Streams/StreamReadWriteTests.cs @@ -1,9 +1,9 @@ -namespace Zipper.Tests.LZW; +namespace Zipper.Tests.Streams; -using System.Text; -using Zipper.LZW; - -public class LZWStreamReadWriteTests +public abstract class StreamReadWriteTests + where TStream : Stream + where TMode : Enum + where TProvider : IStreamProvider { private static readonly int[] BufferSizes = [1, 2, 3, 7, 14, 19, 31, 63, 127, 255, 1023]; @@ -18,9 +18,9 @@ public void Setup() [Test] public void Read_ShouldReadData_WrittenBy_Write_Correctly([ValueSource(nameof(BufferSizes))] int readBufferSize) { - var testData = LZWTestsSource.Data; + var testData = GetData(readBufferSize); - using (var compressor = new LZWStream(stream, ZipperMode.Compress, true)) + using (var compressor = TProvider.CreateStream(stream, TProvider.WritingMode, true)) { compressor.Write(testData); } @@ -31,9 +31,9 @@ public void Read_ShouldReadData_WrittenBy_Write_Correctly([ValueSource(nameof(Bu [Test] public void Flush_ShouldNotAffect_DataToBeRead([ValueSource(nameof(BufferSizes))] int readWriteBufferSize) { - var testData = LZWTestsSource.Data.AsSpan(); + var testData = GetData(readWriteBufferSize); - using (var compressor = new LZWStream(stream, ZipperMode.Compress, true)) + using (var compressor = TProvider.CreateStream(stream, TProvider.WritingMode, true)) { for (int offset = 0; offset < testData.Length; offset += readWriteBufferSize) { @@ -46,11 +46,19 @@ public void Flush_ShouldNotAffect_DataToBeRead([ValueSource(nameof(BufferSizes)) DecompressData_And_AssertThat_ItIsCorrect(testData, readWriteBufferSize); } + private static ReadOnlySpan GetData(int bufferSize) + { + var data = StreamTestsSource.Data.AsSpan(); + var relativeLength = Math.Clamp(bufferSize / (float)BufferSizes[^1], 0, 1); + + return data[..(int)Math.Ceiling(data.Length * relativeLength)]; + } + private void DecompressData_And_AssertThat_ItIsCorrect(ReadOnlySpan testData, int readBufferSize) { stream.Seek(0, SeekOrigin.Begin); - using var decompressor = new LZWStream(stream, ZipperMode.Decompress, true); + using var decompressor = TProvider.CreateStream(stream, TProvider.ReadingMode, true); int offset = 0; Span buffer = stackalloc byte[readBufferSize]; diff --git a/Zipper/Zipper.Tests/LZW/LZWTestsSource.cs b/Zipper/Zipper.Tests/Streams/StreamTestsSource.cs similarity index 91% rename from Zipper/Zipper.Tests/LZW/LZWTestsSource.cs rename to Zipper/Zipper.Tests/Streams/StreamTestsSource.cs index 7efa90a..32b0013 100644 --- a/Zipper/Zipper.Tests/LZW/LZWTestsSource.cs +++ b/Zipper/Zipper.Tests/Streams/StreamTestsSource.cs @@ -1,7 +1,7 @@ -namespace Zipper.Tests.LZW; +namespace Zipper.Tests.Streams; [SetUpFixture] -public class LZWTestsSource +public class StreamTestsSource { // "https://en.wikipedia.org/wiki/Lempel–Ziv–Welch"; private const string DataUrl = "https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch"; From f0975eec14c7d73107546e720e6e1908a228d72a Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Thu, 13 Mar 2025 17:59:56 +0300 Subject: [PATCH 45/57] Added tests for BWTStream (hw3 - lzw) --- Zipper/Zipper.Tests/Streams/BWTStreamTests.cs | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 Zipper/Zipper.Tests/Streams/BWTStreamTests.cs diff --git a/Zipper/Zipper.Tests/Streams/BWTStreamTests.cs b/Zipper/Zipper.Tests/Streams/BWTStreamTests.cs new file mode 100644 index 0000000..1343f2f --- /dev/null +++ b/Zipper/Zipper.Tests/Streams/BWTStreamTests.cs @@ -0,0 +1,31 @@ +namespace Zipper.Tests.Streams; + +public class BWTStreamTests +{ + public class BWTStreamProvider : IStreamProvider + { + public static int MinBlockSize => BWTStream.MinBlockSize; + + public static int MaxBlockSize => BWTStream.MaxBlockSize; + + public static BWTMode WritingMode => BWTMode.Transform; + + public static BWTMode ReadingMode => BWTMode.Reconstruct; + + public static BWTMode UndefinedMode => BWTMode.Reconstruct + 100; + + public static BWTStream CreateStream(Stream stream, int blockSize, BWTMode mode, bool leaveOpen = false) + => new(stream, blockSize, mode, leaveOpen); + + public static BWTStream CreateStream(Stream stream, BWTMode mode, bool leaveOpen = false) + => new(stream, mode, leaveOpen); + } + + public class BWTStreamExceptionsTests : StreamExceptionsTests + { + } + + public class BWTStreamReadWriteTests : StreamReadWriteTests + { + } +} From 0b63462f2fb95af3dcfd0fcde9d7ed19caa56c87 Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Thu, 13 Mar 2025 20:18:07 +0300 Subject: [PATCH 46/57] Added some checks in BWTStream (hw3 - lzw) --- Zipper/Zipper/BWTStream.cs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/Zipper/Zipper/BWTStream.cs b/Zipper/Zipper/BWTStream.cs index be8a652..9e0adca 100644 --- a/Zipper/Zipper/BWTStream.cs +++ b/Zipper/Zipper/BWTStream.cs @@ -245,10 +245,7 @@ protected override void Dispose(bool disposing) { Flush(); - if (block != null) - { - BlockPool.Return(block); - } + Debug.Assert(block == null, "Block was not returned"); if (!leaveOpen) { @@ -292,12 +289,17 @@ private bool ReadBlock() blockSize = BinaryPrimitives.ReadInt32LittleEndian(header[0..4]); var identityIndex = BinaryPrimitives.ReadInt32LittleEndian(header[4..8]); + if (identityIndex < 0 || identityIndex >= blockSize) + { + throw new InvalidDataException(); + } + var transformedData = BlockPool.Rent(blockSize); if (stream.Read(transformedData, 0, blockSize) != blockSize) { BlockPool.Return(transformedData); - return false; + throw new EndOfStreamException(); } block = BlockPool.Rent(blockSize); From 2e449db1ac248623e21a16142a910642f86df74f Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Thu, 13 Mar 2025 20:19:16 +0300 Subject: [PATCH 47/57] Added image data as test source (hw3 - lzw) --- .../Streams/StreamReadWriteTests.cs | 7 ++- .../Zipper.Tests/Streams/StreamTestsSource.cs | 46 ++++++++++++++++--- 2 files changed, 43 insertions(+), 10 deletions(-) diff --git a/Zipper/Zipper.Tests/Streams/StreamReadWriteTests.cs b/Zipper/Zipper.Tests/Streams/StreamReadWriteTests.cs index c25ca13..4927f45 100644 --- a/Zipper/Zipper.Tests/Streams/StreamReadWriteTests.cs +++ b/Zipper/Zipper.Tests/Streams/StreamReadWriteTests.cs @@ -18,7 +18,7 @@ public void Setup() [Test] public void Read_ShouldReadData_WrittenBy_Write_Correctly([ValueSource(nameof(BufferSizes))] int readBufferSize) { - var testData = GetData(readBufferSize); + var testData = GetData(StreamTestsSource.ImageData, readBufferSize); using (var compressor = TProvider.CreateStream(stream, TProvider.WritingMode, true)) { @@ -31,7 +31,7 @@ public void Read_ShouldReadData_WrittenBy_Write_Correctly([ValueSource(nameof(Bu [Test] public void Flush_ShouldNotAffect_DataToBeRead([ValueSource(nameof(BufferSizes))] int readWriteBufferSize) { - var testData = GetData(readWriteBufferSize); + var testData = GetData(StreamTestsSource.TextData, readWriteBufferSize); using (var compressor = TProvider.CreateStream(stream, TProvider.WritingMode, true)) { @@ -46,9 +46,8 @@ public void Flush_ShouldNotAffect_DataToBeRead([ValueSource(nameof(BufferSizes)) DecompressData_And_AssertThat_ItIsCorrect(testData, readWriteBufferSize); } - private static ReadOnlySpan GetData(int bufferSize) + private static ReadOnlySpan GetData(ReadOnlySpan data, int bufferSize) { - var data = StreamTestsSource.Data.AsSpan(); var relativeLength = Math.Clamp(bufferSize / (float)BufferSizes[^1], 0, 1); return data[..(int)Math.Ceiling(data.Length * relativeLength)]; diff --git a/Zipper/Zipper.Tests/Streams/StreamTestsSource.cs b/Zipper/Zipper.Tests/Streams/StreamTestsSource.cs index 32b0013..e970322 100644 --- a/Zipper/Zipper.Tests/Streams/StreamTestsSource.cs +++ b/Zipper/Zipper.Tests/Streams/StreamTestsSource.cs @@ -1,22 +1,56 @@ namespace Zipper.Tests.Streams; +using System.Diagnostics.CodeAnalysis; + [SetUpFixture] +[ExcludeFromCodeCoverage] public class StreamTestsSource { - // "https://en.wikipedia.org/wiki/Lempel–Ziv–Welch"; - private const string DataUrl = "https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch"; + // "https://filesamples.com/samples/image/bmp/sample_640×426.bmp"; + private const string ImageDataUrl = "https://filesamples.com/samples/image/bmp/sample_640%C3%97426.bmp"; + + // "https://neerc.ifmo.ru/wiki/index.php?title=Алгоритм_LZW"; + private const string TextDataUrl = "https://neerc.ifmo.ru/wiki/index.php?title=%D0%90%D0%BB%D0%B3%D0%BE%D1%80%D0%B8%D1%82%D0%BC_LZW"; - public static byte[] Data { get; private set; } + public static byte[] TextData { get; private set; } + + public static byte[] ImageData { get; private set; } [OneTimeSetUp] public static void DataSetup() { + TextData = DownloadOrReuse(TextDataUrl); + ImageData = DownloadOrReuse(ImageDataUrl); + } + + private static byte[] DownloadOrReuse(string url) + { + string testFilesDirectory = "test_files"; + if (!Directory.Exists(testFilesDirectory)) + { + Directory.CreateDirectory(testFilesDirectory); + } + + var filename = $"test_file_{string.Concat(url.Select(c => char.IsAsciiLetterOrDigit(c) ? c : '_'))}"; + var filePath = Path.Combine(testFilesDirectory, filename); + + if (File.Exists(filePath)) + { + return File.ReadAllBytes(filePath); + } + using var client = new HttpClient(); - var response = client.Send(new HttpRequestMessage(HttpMethod.Get, DataUrl)); + using var memory = new MemoryStream(); + + var response = client.Send(new HttpRequestMessage(HttpMethod.Get, url)); response.EnsureSuccessStatusCode(); + using var content = response.Content.ReadAsStream(); - using var memory = new MemoryStream(); content.CopyTo(memory); - Data = memory.ToArray(); + + var data = memory.ToArray(); + File.WriteAllBytes(filePath, data); + + return data; } } From 907a0c5efed8c7f251cc4bd10d93fe7c7a301f1f Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Thu, 13 Mar 2025 20:46:00 +0300 Subject: [PATCH 48/57] Made BWTStream and BWTMode internal (hw3 - lzw) --- Zipper/Zipper.Tests/Streams/BWTStreamTests.cs | 2 +- Zipper/Zipper/BWTMode.cs | 2 +- Zipper/Zipper/BWTStream.cs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Zipper/Zipper.Tests/Streams/BWTStreamTests.cs b/Zipper/Zipper.Tests/Streams/BWTStreamTests.cs index 1343f2f..689086d 100644 --- a/Zipper/Zipper.Tests/Streams/BWTStreamTests.cs +++ b/Zipper/Zipper.Tests/Streams/BWTStreamTests.cs @@ -1,6 +1,6 @@ namespace Zipper.Tests.Streams; -public class BWTStreamTests +internal class BWTStreamTests { public class BWTStreamProvider : IStreamProvider { diff --git a/Zipper/Zipper/BWTMode.cs b/Zipper/Zipper/BWTMode.cs index d54036b..7774560 100644 --- a/Zipper/Zipper/BWTMode.cs +++ b/Zipper/Zipper/BWTMode.cs @@ -3,7 +3,7 @@ namespace Zipper; /// /// Specifies whether to transform data to or reconstruct data from the underlying stream in . /// -public enum BWTMode +internal enum BWTMode { /// /// Transform data and write it to the underlying stream. diff --git a/Zipper/Zipper/BWTStream.cs b/Zipper/Zipper/BWTStream.cs index 9e0adca..34e76b1 100644 --- a/Zipper/Zipper/BWTStream.cs +++ b/Zipper/Zipper/BWTStream.cs @@ -7,7 +7,7 @@ namespace Zipper; /// /// Provides methods and properties used to transform and reconstruct data streams by using the BWT algorithm. /// -public class BWTStream : Stream +internal class BWTStream : Stream { /// /// Smallest allowed block length. From 7d8470f8a77bd0c8242557ce1354609d79f58f9b Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Fri, 14 Mar 2025 02:51:30 +0300 Subject: [PATCH 49/57] Write block size as int instead of ushort (hw3 - lzw) --- Zipper/Zipper/LZW/LZWReader.cs | 4 ++-- Zipper/Zipper/LZW/LZWWriter.cs | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Zipper/Zipper/LZW/LZWReader.cs b/Zipper/Zipper/LZW/LZWReader.cs index 331fe0c..4287b96 100644 --- a/Zipper/Zipper/LZW/LZWReader.cs +++ b/Zipper/Zipper/LZW/LZWReader.cs @@ -150,7 +150,7 @@ public void Dispose() private bool TryReadBuffer() { - int headerSize = 4; + int headerSize = 6; Span header = stackalloc byte[headerSize]; if (stream.Read(header) != headerSize) { @@ -160,7 +160,7 @@ private bool TryReadBuffer() var blockType = (BlockType)header[0]; var codeWidth = header[1]; - blockSize = BinaryPrimitives.ReadUInt16LittleEndian(header[2..4]); + blockSize = BinaryPrimitives.ReadInt32LittleEndian(header[2..6]); block = BlockPool.Rent(blockSize); if (stream.Read(block, 0, blockSize) != blockSize) diff --git a/Zipper/Zipper/LZW/LZWWriter.cs b/Zipper/Zipper/LZW/LZWWriter.cs index e75b581..9e50b81 100644 --- a/Zipper/Zipper/LZW/LZWWriter.cs +++ b/Zipper/Zipper/LZW/LZWWriter.cs @@ -9,7 +9,7 @@ namespace Zipper.LZW; /// internal class LZWWriter : IDisposable { - private const int DataOffset = 4; + private const int DataOffset = 6; private const int MaxCodesCount = (320 * 1024) - 1; private static readonly ArrayPool BlockPool = ArrayPool.Create(); @@ -168,13 +168,13 @@ private void FlushInternal(BlockType type) writer.Flush(); int length = (int)memory.Position; - ushort dataLength = (ushort)(length - DataOffset); + int dataLength = length - DataOffset; if (!(dataLength == 0 && type == BlockType.Default)) { block[0] = (byte)type; block[1] = (byte)codeWidth; - BinaryPrimitives.WriteUInt16LittleEndian(block.AsSpan()[2..4], dataLength); + BinaryPrimitives.WriteInt32LittleEndian(block.AsSpan()[2..6], dataLength); stream.Write(block, 0, length); stream.Flush(); From 07240dd7a9caf4bcb2df09681e18601b29e7d5ec Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Fri, 14 Mar 2025 02:53:25 +0300 Subject: [PATCH 50/57] Changed block sizes (hw3 - lzw) --- Zipper/Zipper/BWTStream.cs | 6 +++--- Zipper/Zipper/LZW/LZWStream.cs | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Zipper/Zipper/BWTStream.cs b/Zipper/Zipper/BWTStream.cs index 34e76b1..56369bd 100644 --- a/Zipper/Zipper/BWTStream.cs +++ b/Zipper/Zipper/BWTStream.cs @@ -12,14 +12,14 @@ internal class BWTStream : Stream /// /// Smallest allowed block length. /// - public const int MinBlockSize = 64; + public const int MinBlockSize = 1024; /// /// Largest allowed block length. /// - public const int MaxBlockSize = 4 * 1024; + public const int MaxBlockSize = 16384; - private const int DefaultBlockSize = 512; + private const int DefaultBlockSize = (MinBlockSize + MaxBlockSize) / 2; private static readonly ArrayPool BlockPool = ArrayPool.Create(); diff --git a/Zipper/Zipper/LZW/LZWStream.cs b/Zipper/Zipper/LZW/LZWStream.cs index 5783a8e..bda1288 100644 --- a/Zipper/Zipper/LZW/LZWStream.cs +++ b/Zipper/Zipper/LZW/LZWStream.cs @@ -10,14 +10,14 @@ public class LZWStream : Stream /// /// Smallest allowed block length. /// - public const int MinBlockSize = 256; + public const int MinBlockSize = 4096; /// /// Largest allowed block length. /// - public const int MaxBlockSize = 64 * 1024; + public const int MaxBlockSize = 256 * 1024; - private const int DefaultBlockSize = 1024; + private const int DefaultBlockSize = (MinBlockSize + MaxBlockSize) / 2; private readonly Stream stream; private readonly ZipperMode mode; From e21b9640db927171af6ae0b7969902ab397828cf Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Fri, 14 Mar 2025 02:56:04 +0300 Subject: [PATCH 51/57] Added ZipperStream and tests for it (hw3 - lzw) --- .../Zipper.Tests/Streams/ZipperStreamTests.cs | 31 +++ Zipper/Zipper/ZipperStream.cs | 211 ++++++++++++++++++ 2 files changed, 242 insertions(+) create mode 100644 Zipper/Zipper.Tests/Streams/ZipperStreamTests.cs create mode 100644 Zipper/Zipper/ZipperStream.cs diff --git a/Zipper/Zipper.Tests/Streams/ZipperStreamTests.cs b/Zipper/Zipper.Tests/Streams/ZipperStreamTests.cs new file mode 100644 index 0000000..c566ad3 --- /dev/null +++ b/Zipper/Zipper.Tests/Streams/ZipperStreamTests.cs @@ -0,0 +1,31 @@ +namespace Zipper.Tests.Streams; + +internal class ZipperStreamTests +{ + public class ZipperStreamProvider : IStreamProvider + { + public static int MinBlockSize => ZipperStream.MinBlockSize; + + public static int MaxBlockSize => ZipperStream.MaxBlockSize; + + public static ZipperMode WritingMode => ZipperMode.Compress; + + public static ZipperMode ReadingMode => ZipperMode.Decompress; + + public static ZipperMode UndefinedMode => ZipperMode.Decompress + 100; + + public static ZipperStream CreateStream(Stream stream, int blockSize, ZipperMode mode, bool leaveOpen = false) + => new(stream, blockSize, mode, leaveOpen); + + public static ZipperStream CreateStream(Stream stream, ZipperMode mode, bool leaveOpen = false) + => new(stream, mode, leaveOpen); + } + + public class ZipperStreamExceptionsTests : StreamExceptionsTests + { + } + + public class ZipperStreamReadWriteTests : StreamReadWriteTests + { + } +} diff --git a/Zipper/Zipper/ZipperStream.cs b/Zipper/Zipper/ZipperStream.cs new file mode 100644 index 0000000..2861641 --- /dev/null +++ b/Zipper/Zipper/ZipperStream.cs @@ -0,0 +1,211 @@ +namespace Zipper; + +using Zipper.LZW; + +/// +/// Provides methods and properties used to compress and decompress streams. +/// +public class ZipperStream : Stream +{ + /// + /// Smallest allowed block length. + /// + public const int MinBlockSize = LZWStream.MinBlockSize; + + /// + /// Largest allowed block length. + /// + public const int MaxBlockSize = LZWStream.MaxBlockSize; + + private const int DefaultBlockSize = (MinBlockSize + MaxBlockSize) / 2; + + private readonly LZWStream lzwStream; + private readonly BWTStream bwtStream; + + private readonly Stream stream; + private readonly ZipperMode mode; + private readonly bool leaveOpen; + + private bool disposed; + + /// + public ZipperStream(Stream stream, ZipperMode mode, bool leaveOpen = false) + : this(stream, DefaultBlockSize, mode, leaveOpen) + { + } + + /// + /// Initializes a new instance of the class. + /// + /// The stream to which compressed data is written or from which data to decompress is read. + /// The internal block size to use, should be between and . + /// that determines whether to compress or decompress data. + /// + /// The value indicating whether should be disposed along with this instance, + /// if is . + /// + /// is not nor . + /// is out of range. + public ZipperStream(Stream stream, int blockSize, ZipperMode mode, bool leaveOpen = false) + { + ArgumentOutOfRangeException.ThrowIfLessThan(blockSize, MinBlockSize); + ArgumentOutOfRangeException.ThrowIfGreaterThan(blockSize, MaxBlockSize); + + if (!Enum.IsDefined(mode)) + { + throw new ArgumentException($"Value was neither {ZipperMode.Compress} nor {ZipperMode.Decompress}", nameof(mode)); + } + + float relativeBlockSize = (blockSize - MinBlockSize) / (float)(MaxBlockSize - MinBlockSize); + int bwtBlockSize = (int)(BWTStream.MinBlockSize + (relativeBlockSize * (BWTStream.MaxBlockSize - BWTStream.MinBlockSize))); + + this.stream = stream; + + lzwStream = new(stream, blockSize, mode, true); + bwtStream = new(lzwStream, bwtBlockSize, mode == ZipperMode.Compress ? BWTMode.Transform : BWTMode.Reconstruct, true); + + this.mode = mode; + this.leaveOpen = leaveOpen; + disposed = false; + } + + /// + /// Gets a value indicating whether the stream supports reading. + /// + /// + public override bool CanRead => mode == ZipperMode.Decompress && stream.CanRead; + + /// + /// Gets a value indicating whether the stream supports writing. + /// + /// + public override bool CanWrite => mode == ZipperMode.Compress && stream.CanWrite; + + /// + /// Gets a value indicating whether the stream supports seeking. + /// + /// + public override bool CanSeek => false; + + /// + /// This property is not supported and always throws a . + /// + /// + public override long Length => throw new NotSupportedException(); + + /// + /// This property is not supported and always throws a . + /// + /// + public override long Position + { + get => throw new NotSupportedException(); + set => throw new NotSupportedException(); + } + + /// + /// This method is not supported and always throws a . + /// + /// + public override long Seek(long offset, SeekOrigin origin) + => throw new NotSupportedException(); + + /// + /// This method is not supported and always throws a . + /// + /// + public override void SetLength(long value) + => throw new NotSupportedException(); + + /// + /// Flushes the internal buffers. + /// + /// Stream is disposed. + public override void Flush() + { + EnsureNotClosed(); + + if (mode == ZipperMode.Compress) + { + bwtStream.Flush(); + } + } + + /// + /// Reads data from , compresses it and writes it to the underlying stream. + /// + /// Buffer that contains data to be compressed. + /// How many bytes to skip before reading from . + /// How many bytes to read from . + /// Stream is set to mode. + /// Stream is disposed. + public override void Write(byte[] buffer, int offset, int count) + => Write(buffer.AsSpan(offset, count)); + + /// + /// Reads data from the underlying stream, decompresses it and writes to . + /// + /// Buffer to write decompressed data to. + /// How many bytes to skip before writing to . + /// How many bytes to write to . + /// Count of read bytes, can be less than . + /// Unexpected end of stream. + /// Invalid data stream. + /// Stream is set to mode. + /// Stream is disposed. + public override int Read(byte[] buffer, int offset, int count) + => Read(buffer.AsSpan(offset, count)); + + /// + public override void Write(ReadOnlySpan buffer) + { + EnsureNotClosed(); + EnsureMode(ZipperMode.Compress); + + bwtStream.Write(buffer); + } + + /// + public override int Read(Span buffer) + { + EnsureNotClosed(); + EnsureMode(ZipperMode.Decompress); + + return bwtStream.Read(buffer); + } + + /// + protected override void Dispose(bool disposing) + { + if (disposed) + { + return; + } + + if (disposing) + { + bwtStream.Dispose(); + lzwStream.Dispose(); + + if (!leaveOpen) + { + stream.Dispose(); + } + + disposed = true; + } + } + + private void EnsureMode(ZipperMode mode) + { + if (this.mode != mode) + { + throw new InvalidOperationException(); + } + } + + private void EnsureNotClosed() + { + ObjectDisposedException.ThrowIf(disposed, this); + } +} From 2005b625ee963750ae8b5ea4de43559d67a56f32 Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Fri, 14 Mar 2025 03:51:06 +0300 Subject: [PATCH 52/57] Added cli for zipper (hw3 - lzw) --- Zipper/Zipper.Cli/FileZipper.cs | 88 ++++++++++++++ Zipper/Zipper.Cli/Program.cs | 179 ++++++++++++++++++++++++++++ Zipper/Zipper.Cli/Zipper.Cli.csproj | 14 +++ Zipper/Zipper.sln | 14 +++ 4 files changed, 295 insertions(+) create mode 100644 Zipper/Zipper.Cli/FileZipper.cs create mode 100644 Zipper/Zipper.Cli/Program.cs create mode 100644 Zipper/Zipper.Cli/Zipper.Cli.csproj diff --git a/Zipper/Zipper.Cli/FileZipper.cs b/Zipper/Zipper.Cli/FileZipper.cs new file mode 100644 index 0000000..4e83d02 --- /dev/null +++ b/Zipper/Zipper.Cli/FileZipper.cs @@ -0,0 +1,88 @@ +namespace Zipper.Cli; + +using System.Buffers; + +/// +/// Provides methods and properties used to compress and decompress files. +/// +internal class FileZipper : IDisposable +{ + private const int BufferSize = 512 * 1024; + private static readonly ArrayPool BufferPool = ArrayPool.Create(); + + private readonly Stream readFrom; + private readonly Stream writeTo; + private readonly long inputFileSize; + private readonly byte[] buffer; + + private long bytesReadFromInput; + + /// + /// Initializes a new instance of the class. + /// + /// Mode to use. + /// File to read data from. + /// File to write compressed/decompressed data to. + public FileZipper(ZipperMode mode, string inputFilePath, string outputFilePath) + { + inputFileSize = new FileInfo(inputFilePath).Length; + + var inputFile = File.OpenRead(inputFilePath); + var outputFile = File.Create(outputFilePath); + + if (mode == ZipperMode.Compress) + { + readFrom = inputFile; + writeTo = new ZipperStream(outputFile, ZipperStream.MaxBlockSize, mode); + } + else + { + readFrom = new ZipperStream(inputFile, ZipperStream.MaxBlockSize, mode); + writeTo = outputFile; + } + + bytesReadFromInput = 0; + buffer = BufferPool.Rent(BufferSize); + + EndOfFile = false; + } + + /// + /// Gets progress as value between 0 and 1. + /// + public float Progress => (float)bytesReadFromInput / inputFileSize; + + /// + /// Gets a value indicating whether end of file was reached. + /// + public bool EndOfFile { get; private set; } + + /// + /// Compresses or decompresses part of input file. + /// + public void ReadAndWriteSingleBuffer() + { + int bytesRead = readFrom.Read(buffer, 0, BufferSize); + + if (bytesRead == 0) + { + EndOfFile = true; + return; + } + + bytesReadFromInput += bytesRead; + + writeTo.Write(buffer, 0, bytesRead); + } + + /// + /// Disposes all used files. + /// + public void Dispose() + { + BufferPool.Return(buffer); + + readFrom.Dispose(); + writeTo.Dispose(); + } +} diff --git a/Zipper/Zipper.Cli/Program.cs b/Zipper/Zipper.Cli/Program.cs new file mode 100644 index 0000000..39da260 --- /dev/null +++ b/Zipper/Zipper.Cli/Program.cs @@ -0,0 +1,179 @@ +using System.Diagnostics; +using Zipper; +using Zipper.Cli; + +const string helpMessage = +""" +Zipper - console tool for compressing and decompressing files + +Usage: dotnet run -- [options] +Options: + -h -? --help | Print this help message + ------------------------------------------------ + -c --compress | Compress specified file + ------------------------------------------------ + -u --uncompress | Decompress + -d --decompress | specified file + ------------------------------------------------ + -f --force | Overwrite files without asking + +File path should be the first argument (unless --help specified) +Options can be specified in any order +Only either '--compress' or '--decompress' can be used at the same time +"""; + +args = [.. args.Select(x => x.Trim())]; + +if (args.Length == 0 || (args.Length == 1 && args[0] is "-h" or "--help" or "-?")) +{ + Console.WriteLine(helpMessage); + + return 0; +} + +string filePath = args[0]; +bool force = false; +ZipperMode? mode = null; + +foreach (var arg in args.Skip(1)) +{ + switch (arg) + { + case "-u" or "-d" or "--uncompress" or "--decompress": + if (mode != null) + { + Console.Error.WriteLine("Error: '--compress' or '--decompress' option can only be specified once"); + return 1; + } + + mode = ZipperMode.Decompress; + break; + + case "-c" or "--compress": + if (mode != null) + { + Console.Error.WriteLine("Error: '--compress' or '--decompress' option can only be specified once"); + return 1; + } + + mode = ZipperMode.Compress; + break; + + case "-f" or "--force": + force = true; + break; + + default: + Console.Error.WriteLine("Error: unknown argument"); + return 1; + } +} + +if (mode == null) +{ + Console.Error.WriteLine("Error: neither '--compress' nor '--decompress' were specified"); + return 1; +} + +if (!File.Exists(filePath)) +{ + Console.Error.WriteLine($"Error: file '{filePath}' does not exist"); + return 1; +} + +const string zippedExtension = ".zipped"; +string? newFilePath = null; +if (mode == ZipperMode.Compress) +{ + newFilePath = $"{filePath}{zippedExtension}"; +} +else +{ + if (!filePath.EndsWith(zippedExtension)) + { + Console.Error.WriteLine($"Error: extension of the specified file is not {zippedExtension}"); + return 1; + } + + newFilePath = filePath[..^zippedExtension.Length]; +} + +if (!force && File.Exists(newFilePath)) +{ + Console.Write($"File '{newFilePath}' already exists, overwrite? (y/n): "); + if (Console.ReadLine()?.Trim() != "y") + { + Console.WriteLine("Cancelled"); + return 0; + } +} + +const string hideCursorEscape = "\e[?25l"; +const string showCursorEscape = "\e[?25h"; +const string moveToLeftEscape = "\e[0G"; +const string clearLineEscape = "\e[2K"; +const string waitingSymblols = @"|/-\"; + +Console.Write(hideCursorEscape); + +using (var fileZipper = new FileZipper(mode.Value, filePath, newFilePath)) +{ + var stopwatch = Stopwatch.StartNew(); + var lastLoggedTime = stopwatch.Elapsed; + int step = 0; + while (!fileZipper.EndOfFile) + { + fileZipper.ReadAndWriteSingleBuffer(); + + if (stopwatch.Elapsed - lastLoggedTime > TimeSpan.FromMilliseconds(4)) + { + Console.Write(moveToLeftEscape); + RenderProgress(fileZipper.Progress, stopwatch.Elapsed, step, waitingSymblols); + lastLoggedTime = stopwatch.Elapsed; + } + + step += 1; + } +} + +Console.Write(clearLineEscape); +Console.Write(moveToLeftEscape); +Console.Write(showCursorEscape); + +if (mode == ZipperMode.Compress) +{ + var inputFileSize = new FileInfo(filePath).Length; + var outputFileSize = new FileInfo(newFilePath).Length; + var compressionRate = (float)inputFileSize / outputFileSize; + + Console.WriteLine($"Compression rate: {compressionRate}"); +} + +return 0; + +static void RenderProgress(float progress, TimeSpan time, int step, string stepString) +{ + Console.Write($" {stepString[step % stepString.Length]} "); + Console.Write("["); + + for (int i = 0; i <= 100; i++) + { + Console.Write(progress >= i / 100f ? '=' : ' '); + } + + Console.Write("]"); + Console.Write($" {progress * 100,5:0.0} %"); + + if (time.TotalMinutes < 1) + { + Console.Write($" {time.Seconds} s"); + } + else if (time.TotalHours < 1) + { + Console.Write($" {time.Minutes} m {time.Seconds:00} s"); + } + else + { + Console.Write($" {time.Hours} h {time.Minutes:00} m {time.Seconds:00} s"); + } +} diff --git a/Zipper/Zipper.Cli/Zipper.Cli.csproj b/Zipper/Zipper.Cli/Zipper.Cli.csproj new file mode 100644 index 0000000..0465f53 --- /dev/null +++ b/Zipper/Zipper.Cli/Zipper.Cli.csproj @@ -0,0 +1,14 @@ + + + + Exe + net9.0 + enable + enable + + + + + + + diff --git a/Zipper/Zipper.sln b/Zipper/Zipper.sln index 213ddd2..60e7c7e 100644 --- a/Zipper/Zipper.sln +++ b/Zipper/Zipper.sln @@ -7,6 +7,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Zipper", "Zipper\Zipper.csp EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Zipper.Tests", "Zipper.Tests\Zipper.Tests.csproj", "{10C8B0BF-1C7E-4169-8DBB-3B138722C444}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Zipper.Cli", "Zipper.Cli\Zipper.Cli.csproj", "{800EFECF-6B0E-4A90-BDD9-717642A6309E}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -41,6 +43,18 @@ Global {10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Release|x64.Build.0 = Release|Any CPU {10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Release|x86.ActiveCfg = Release|Any CPU {10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Release|x86.Build.0 = Release|Any CPU + {800EFECF-6B0E-4A90-BDD9-717642A6309E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {800EFECF-6B0E-4A90-BDD9-717642A6309E}.Debug|Any CPU.Build.0 = Debug|Any CPU + {800EFECF-6B0E-4A90-BDD9-717642A6309E}.Debug|x64.ActiveCfg = Debug|Any CPU + {800EFECF-6B0E-4A90-BDD9-717642A6309E}.Debug|x64.Build.0 = Debug|Any CPU + {800EFECF-6B0E-4A90-BDD9-717642A6309E}.Debug|x86.ActiveCfg = Debug|Any CPU + {800EFECF-6B0E-4A90-BDD9-717642A6309E}.Debug|x86.Build.0 = Debug|Any CPU + {800EFECF-6B0E-4A90-BDD9-717642A6309E}.Release|Any CPU.ActiveCfg = Release|Any CPU + {800EFECF-6B0E-4A90-BDD9-717642A6309E}.Release|Any CPU.Build.0 = Release|Any CPU + {800EFECF-6B0E-4A90-BDD9-717642A6309E}.Release|x64.ActiveCfg = Release|Any CPU + {800EFECF-6B0E-4A90-BDD9-717642A6309E}.Release|x64.Build.0 = Release|Any CPU + {800EFECF-6B0E-4A90-BDD9-717642A6309E}.Release|x86.ActiveCfg = Release|Any CPU + {800EFECF-6B0E-4A90-BDD9-717642A6309E}.Release|x86.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE From 20e49bf45e0c59036fcd2737d431fa0682cc9477 Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Thu, 20 Mar 2025 16:36:33 +0300 Subject: [PATCH 53/57] Changed BWT to allow output to be longer than input (hw3 - lzw) --- Zipper/Zipper/BWT.cs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Zipper/Zipper/BWT.cs b/Zipper/Zipper/BWT.cs index 099f06c..b96769a 100644 --- a/Zipper/Zipper/BWT.cs +++ b/Zipper/Zipper/BWT.cs @@ -19,7 +19,8 @@ internal static class BWT /// Index that is used to reconstruct byte sequence. public static int ForwardTransform(ReadOnlySpan input, Span output) { - Debug.Assert(input.Length == output.Length, "Length of input and output should be the same"); + ArgumentOutOfRangeException.ThrowIfLessThan(output.Length, input.Length, nameof(output)); + int length = input.Length; if (length == 0) @@ -81,7 +82,7 @@ int Compare(int x, int y) /// Span to write reconstructed byte sequence to. public static void InverseTransform(ReadOnlySpan input, int identityIndex, Span output) { - Debug.Assert(input.Length == output.Length, "Length of input and output should be the same"); + ArgumentOutOfRangeException.ThrowIfLessThan(output.Length, input.Length, nameof(output)); if (identityIndex == -1) { @@ -124,13 +125,13 @@ public static void InverseTransform(ReadOnlySpan input, int identityIndex, int lastIdentityIndex = identityIndex; byte lastByte = input[lastIdentityIndex]; - output[^1] = input[identityIndex]; + output[length - 1] = input[identityIndex]; for (int i = 1; i < length; i++) { lastIdentityIndex = appearances[lastIdentityIndex] + lesserBytesCounter[lastByte]; lastByte = input[lastIdentityIndex]; - output[^(i + 1)] = lastByte; + output[length - (i + 1)] = lastByte; } Pool.Return(appearances); From aad99c1089bc40397ac09032cda6aaa3cadb09a3 Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Thu, 20 Mar 2025 16:37:15 +0300 Subject: [PATCH 54/57] Changed BWT.ForwardTransform to return positive index and added checks in BWT.InverseTransform (hw3 - lzw) --- Zipper/Zipper/BWT.cs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Zipper/Zipper/BWT.cs b/Zipper/Zipper/BWT.cs index b96769a..740ed20 100644 --- a/Zipper/Zipper/BWT.cs +++ b/Zipper/Zipper/BWT.cs @@ -25,7 +25,7 @@ public static int ForwardTransform(ReadOnlySpan input, Span output) if (length == 0) { - return -1; + return 0; } int[] offsets = Pool.Rent(length); @@ -82,10 +82,14 @@ int Compare(int x, int y) /// Span to write reconstructed byte sequence to. public static void InverseTransform(ReadOnlySpan input, int identityIndex, Span output) { + ArgumentOutOfRangeException.ThrowIfNegative(identityIndex, nameof(identityIndex)); + ArgumentOutOfRangeException.ThrowIfGreaterThan(identityIndex, input.Length, nameof(identityIndex)); + ArgumentOutOfRangeException.ThrowIfLessThan(output.Length, input.Length, nameof(output)); - if (identityIndex == -1) + if (input.Length <= 1) { + input.CopyTo(output); return; } From fb950497d43b1b922b0ee1826abc87564b1b4c67 Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Thu, 20 Mar 2025 16:39:26 +0300 Subject: [PATCH 55/57] Updated tests for BWT (hw3 - lzw) --- Zipper/Zipper.Tests/BWTTests.cs | 34 +++++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/Zipper/Zipper.Tests/BWTTests.cs b/Zipper/Zipper.Tests/BWTTests.cs index 62c0db8..4d74512 100644 --- a/Zipper/Zipper.Tests/BWTTests.cs +++ b/Zipper/Zipper.Tests/BWTTests.cs @@ -25,8 +25,38 @@ public void InverseTransform_ShouldBe_SameAs_Input([ValueSource(nameof(TestData) Span transformed = stackalloc byte[input.Length]; var index = BWT.ForwardTransform(input, transformed); - Span reconstructed = stackalloc byte[input.Length]; + Span reconstructed = stackalloc byte[input.Length + 16]; BWT.InverseTransform(transformed, index, reconstructed); - Assert.That(reconstructed.SequenceEqual(input), Is.True); + Assert.That(reconstructed[..input.Length].SequenceEqual(input), Is.True); + } + + [Test] + public void Transform_ShouldThrowIf_InputIsLargerThanOutput() + { + int inputLength = 16; + int outputLength = inputLength - 1; + Assert.Throws(() => BWT.ForwardTransform(stackalloc byte[inputLength], stackalloc byte[outputLength])); + } + + [Test] + public void InverseTransform_ShouldThrowIf_InputIsLargerThanOutput() + { + int inputLength = 16; + int outputLength = inputLength - 1; + Assert.Throws(() => BWT.InverseTransform(stackalloc byte[inputLength], 0, stackalloc byte[outputLength])); + } + + [Test] + public void InverseTransform_ShouldThrowIf_IdentityIndexIsNegative() + { + int length = 16; + Assert.Throws(() => BWT.InverseTransform(stackalloc byte[length], -1, stackalloc byte[length])); + } + + [Test] + public void InverseTransform_ShouldThrowIf_IdentityIndexIs_GreaterThanOrEqualTo_InputLength() + { + int length = 16; + Assert.Throws(() => BWT.InverseTransform(stackalloc byte[length], length + 1, stackalloc byte[length])); } } From c74bca34d22b3851a69ebb25aa23f65634fe9ff9 Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Thu, 20 Mar 2025 16:40:04 +0300 Subject: [PATCH 56/57] Made LZWStream internal (hw3 - lzw) --- Zipper/Zipper.Tests/Streams/LZWStreamTests.cs | 2 +- Zipper/Zipper/LZW/LZWStream.cs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Zipper/Zipper.Tests/Streams/LZWStreamTests.cs b/Zipper/Zipper.Tests/Streams/LZWStreamTests.cs index 2d3b672..dd36279 100644 --- a/Zipper/Zipper.Tests/Streams/LZWStreamTests.cs +++ b/Zipper/Zipper.Tests/Streams/LZWStreamTests.cs @@ -2,7 +2,7 @@ namespace Zipper.Tests.Streams; using Zipper.LZW; -public class LZWStreamTests +internal class LZWStreamTests { public class LZWStreamProvider : IStreamProvider { diff --git a/Zipper/Zipper/LZW/LZWStream.cs b/Zipper/Zipper/LZW/LZWStream.cs index bda1288..78d2a20 100644 --- a/Zipper/Zipper/LZW/LZWStream.cs +++ b/Zipper/Zipper/LZW/LZWStream.cs @@ -5,7 +5,7 @@ namespace Zipper.LZW; /// /// Provides methods and properties used to compress and decompress streams by using the LZW algorithm. /// -public class LZWStream : Stream +internal class LZWStream : Stream { /// /// Smallest allowed block length. From c8f6aed9a2df38b6664ae39c952f17e66758961d Mon Sep 17 00:00:00 2001 From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com> Date: Fri, 30 May 2025 16:59:41 +0300 Subject: [PATCH 57/57] Added BWT switch to ZipperStream.cs (hw3 - lzw) --- Zipper/Zipper.Cli/FileZipper.cs | 38 ++++++++++++++++++- .../Zipper.Tests/Streams/ZipperStreamTests.cs | 37 +++++++++++++++--- Zipper/Zipper/ZipperStream.cs | 32 +++++++++++----- 3 files changed, 91 insertions(+), 16 deletions(-) diff --git a/Zipper/Zipper.Cli/FileZipper.cs b/Zipper/Zipper.Cli/FileZipper.cs index 4e83d02..63f097d 100644 --- a/Zipper/Zipper.Cli/FileZipper.cs +++ b/Zipper/Zipper.Cli/FileZipper.cs @@ -1,6 +1,7 @@ namespace Zipper.Cli; using System.Buffers; +using System.Diagnostics; /// /// Provides methods and properties used to compress and decompress files. @@ -10,8 +11,13 @@ internal class FileZipper : IDisposable private const int BufferSize = 512 * 1024; private static readonly ArrayPool BufferPool = ArrayPool.Create(); + private readonly string? outputFileName; + private readonly string? outputFileNameTempA; + private readonly string? outputFileNameTempB; + private readonly Stream readFrom; private readonly Stream writeTo; + private readonly Stream? writeToAlt; private readonly long inputFileSize; private readonly byte[] buffer; @@ -28,15 +34,23 @@ public FileZipper(ZipperMode mode, string inputFilePath, string outputFilePath) inputFileSize = new FileInfo(inputFilePath).Length; var inputFile = File.OpenRead(inputFilePath); - var outputFile = File.Create(outputFilePath); if (mode == ZipperMode.Compress) { + outputFileName = outputFilePath; + outputFileNameTempA = Path.GetTempFileName(); + outputFileNameTempB = Path.GetTempFileName(); + + var outputFileA = File.Create(outputFileNameTempA); + var outputFileB = File.Create(outputFileNameTempB); + readFrom = inputFile; - writeTo = new ZipperStream(outputFile, ZipperStream.MaxBlockSize, mode); + writeTo = new ZipperStream(outputFileA, ZipperStream.MaxBlockSize, mode); + writeToAlt = new ZipperStream(outputFileB, ZipperStream.MaxBlockSize, mode, useBwt: true); } else { + var outputFile = File.Create(outputFilePath); readFrom = new ZipperStream(inputFile, ZipperStream.MaxBlockSize, mode); writeTo = outputFile; } @@ -73,6 +87,7 @@ public void ReadAndWriteSingleBuffer() bytesReadFromInput += bytesRead; writeTo.Write(buffer, 0, bytesRead); + writeToAlt?.Write(buffer, 0, bytesRead); } /// @@ -84,5 +99,24 @@ public void Dispose() readFrom.Dispose(); writeTo.Dispose(); + writeToAlt?.Dispose(); + + if (outputFileName != null) + { + Debug.Assert(outputFileNameTempA != null, $"{nameof(outputFileNameTempA)} is null"); + Debug.Assert(outputFileNameTempB != null, $"{nameof(outputFileNameTempB)} is null"); + + var tempLengthA = new FileInfo(outputFileNameTempA).Length; + var tempLengthB = new FileInfo(outputFileNameTempB).Length; + + if (tempLengthA < tempLengthB) + { + File.Move(outputFileNameTempA, outputFileName, true); + } + else + { + File.Move(outputFileNameTempB, outputFileName, true); + } + } } } diff --git a/Zipper/Zipper.Tests/Streams/ZipperStreamTests.cs b/Zipper/Zipper.Tests/Streams/ZipperStreamTests.cs index c566ad3..2865937 100644 --- a/Zipper/Zipper.Tests/Streams/ZipperStreamTests.cs +++ b/Zipper/Zipper.Tests/Streams/ZipperStreamTests.cs @@ -2,7 +2,7 @@ namespace Zipper.Tests.Streams; internal class ZipperStreamTests { - public class ZipperStreamProvider : IStreamProvider + public class ZipperStreamProviderWithoutBwt : IStreamProvider { public static int MinBlockSize => ZipperStream.MinBlockSize; @@ -15,17 +15,44 @@ public class ZipperStreamProvider : IStreamProvider public static ZipperMode UndefinedMode => ZipperMode.Decompress + 100; public static ZipperStream CreateStream(Stream stream, int blockSize, ZipperMode mode, bool leaveOpen = false) - => new(stream, blockSize, mode, leaveOpen); + => new(stream, blockSize, mode, leaveOpen, false); public static ZipperStream CreateStream(Stream stream, ZipperMode mode, bool leaveOpen = false) - => new(stream, mode, leaveOpen); + => new(stream, mode, leaveOpen, false); } - public class ZipperStreamExceptionsTests : StreamExceptionsTests + public class ZipperStreamProviderWithBwt : IStreamProvider + { + public static int MinBlockSize => ZipperStream.MinBlockSize; + + public static int MaxBlockSize => ZipperStream.MaxBlockSize; + + public static ZipperMode WritingMode => ZipperMode.Compress; + + public static ZipperMode ReadingMode => ZipperMode.Decompress; + + public static ZipperMode UndefinedMode => ZipperMode.Decompress + 100; + + public static ZipperStream CreateStream(Stream stream, int blockSize, ZipperMode mode, bool leaveOpen = false) + => new(stream, blockSize, mode, leaveOpen, true); + + public static ZipperStream CreateStream(Stream stream, ZipperMode mode, bool leaveOpen = false) + => new(stream, mode, leaveOpen, true); + } + + public class ZipperStreamWithoutBwtExceptionsTests : StreamExceptionsTests + { + } + + public class ZipperStreamWithoutBwtReadWriteTests : StreamReadWriteTests + { + } + + public class ZipperStreamWithBwtExceptionsTests : StreamExceptionsTests { } - public class ZipperStreamReadWriteTests : StreamReadWriteTests + public class ZipperStreamWithBwtReadWriteTests : StreamReadWriteTests { } } diff --git a/Zipper/Zipper/ZipperStream.cs b/Zipper/Zipper/ZipperStream.cs index 2861641..62eba44 100644 --- a/Zipper/Zipper/ZipperStream.cs +++ b/Zipper/Zipper/ZipperStream.cs @@ -20,7 +20,7 @@ public class ZipperStream : Stream private const int DefaultBlockSize = (MinBlockSize + MaxBlockSize) / 2; private readonly LZWStream lzwStream; - private readonly BWTStream bwtStream; + private readonly Stream outerStream; private readonly Stream stream; private readonly ZipperMode mode; @@ -28,9 +28,15 @@ public class ZipperStream : Stream private bool disposed; - /// + /// public ZipperStream(Stream stream, ZipperMode mode, bool leaveOpen = false) - : this(stream, DefaultBlockSize, mode, leaveOpen) + : this(stream, DefaultBlockSize, mode, leaveOpen, false) + { + } + + /// + public ZipperStream(Stream stream, ZipperMode mode, bool leaveOpen = false, bool useBwt = false) + : this(stream, DefaultBlockSize, mode, leaveOpen, useBwt) { } @@ -44,9 +50,10 @@ public ZipperStream(Stream stream, ZipperMode mode, bool leaveOpen = false) /// The value indicating whether should be disposed along with this instance, /// if is . /// + /// The value indicating whether to use Burrows-Wheeler transformation internally. /// is not nor . /// is out of range. - public ZipperStream(Stream stream, int blockSize, ZipperMode mode, bool leaveOpen = false) + public ZipperStream(Stream stream, int blockSize, ZipperMode mode, bool leaveOpen = false, bool useBwt = false) { ArgumentOutOfRangeException.ThrowIfLessThan(blockSize, MinBlockSize); ArgumentOutOfRangeException.ThrowIfGreaterThan(blockSize, MaxBlockSize); @@ -62,7 +69,14 @@ public ZipperStream(Stream stream, int blockSize, ZipperMode mode, bool leaveOpe this.stream = stream; lzwStream = new(stream, blockSize, mode, true); - bwtStream = new(lzwStream, bwtBlockSize, mode == ZipperMode.Compress ? BWTMode.Transform : BWTMode.Reconstruct, true); + if (useBwt) + { + outerStream = new BWTStream(lzwStream, bwtBlockSize, mode == ZipperMode.Compress ? BWTMode.Transform : BWTMode.Reconstruct, true); + } + else + { + outerStream = lzwStream; + } this.mode = mode; this.leaveOpen = leaveOpen; @@ -127,7 +141,7 @@ public override void Flush() if (mode == ZipperMode.Compress) { - bwtStream.Flush(); + outerStream.Flush(); } } @@ -162,7 +176,7 @@ public override void Write(ReadOnlySpan buffer) EnsureNotClosed(); EnsureMode(ZipperMode.Compress); - bwtStream.Write(buffer); + outerStream.Write(buffer); } /// @@ -171,7 +185,7 @@ public override int Read(Span buffer) EnsureNotClosed(); EnsureMode(ZipperMode.Decompress); - return bwtStream.Read(buffer); + return outerStream.Read(buffer); } /// @@ -184,7 +198,7 @@ protected override void Dispose(bool disposing) if (disposing) { - bwtStream.Dispose(); + outerStream.Dispose(); lzwStream.Dispose(); if (!leaveOpen)