From e1783eb7ddcf61d19834ae0350fc04f3fa20f5ae Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Sun, 2 Mar 2025 07:36:29 +0300
Subject: [PATCH 01/57] Homework 3 - LZW (wip)
Added arbitrary bit length writer/reader
---
Zipper/Zipper.sln | 48 ++++++++++++++
Zipper/Zipper/ArbitraryBitReader.cs | 67 ++++++++++++++++++++
Zipper/Zipper/ArbitraryBitWriter.cs | 97 +++++++++++++++++++++++++++++
Zipper/Zipper/Zipper.csproj | 14 +++++
4 files changed, 226 insertions(+)
create mode 100644 Zipper/Zipper.sln
create mode 100644 Zipper/Zipper/ArbitraryBitReader.cs
create mode 100644 Zipper/Zipper/ArbitraryBitWriter.cs
create mode 100644 Zipper/Zipper/Zipper.csproj
diff --git a/Zipper/Zipper.sln b/Zipper/Zipper.sln
new file mode 100644
index 0000000..213ddd2
--- /dev/null
+++ b/Zipper/Zipper.sln
@@ -0,0 +1,48 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio Version 17
+VisualStudioVersion = 17.0.31903.59
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Zipper", "Zipper\Zipper.csproj", "{9E9FE984-8520-41D1-A74D-6C0DF2AE79D5}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Zipper.Tests", "Zipper.Tests\Zipper.Tests.csproj", "{10C8B0BF-1C7E-4169-8DBB-3B138722C444}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|Any CPU = Debug|Any CPU
+ Debug|x64 = Debug|x64
+ Debug|x86 = Debug|x86
+ Release|Any CPU = Release|Any CPU
+ Release|x64 = Release|x64
+ Release|x86 = Release|x86
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {9E9FE984-8520-41D1-A74D-6C0DF2AE79D5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {9E9FE984-8520-41D1-A74D-6C0DF2AE79D5}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {9E9FE984-8520-41D1-A74D-6C0DF2AE79D5}.Debug|x64.ActiveCfg = Debug|Any CPU
+ {9E9FE984-8520-41D1-A74D-6C0DF2AE79D5}.Debug|x64.Build.0 = Debug|Any CPU
+ {9E9FE984-8520-41D1-A74D-6C0DF2AE79D5}.Debug|x86.ActiveCfg = Debug|Any CPU
+ {9E9FE984-8520-41D1-A74D-6C0DF2AE79D5}.Debug|x86.Build.0 = Debug|Any CPU
+ {9E9FE984-8520-41D1-A74D-6C0DF2AE79D5}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {9E9FE984-8520-41D1-A74D-6C0DF2AE79D5}.Release|Any CPU.Build.0 = Release|Any CPU
+ {9E9FE984-8520-41D1-A74D-6C0DF2AE79D5}.Release|x64.ActiveCfg = Release|Any CPU
+ {9E9FE984-8520-41D1-A74D-6C0DF2AE79D5}.Release|x64.Build.0 = Release|Any CPU
+ {9E9FE984-8520-41D1-A74D-6C0DF2AE79D5}.Release|x86.ActiveCfg = Release|Any CPU
+ {9E9FE984-8520-41D1-A74D-6C0DF2AE79D5}.Release|x86.Build.0 = Release|Any CPU
+ {10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Debug|x64.ActiveCfg = Debug|Any CPU
+ {10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Debug|x64.Build.0 = Debug|Any CPU
+ {10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Debug|x86.ActiveCfg = Debug|Any CPU
+ {10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Debug|x86.Build.0 = Debug|Any CPU
+ {10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Release|Any CPU.Build.0 = Release|Any CPU
+ {10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Release|x64.ActiveCfg = Release|Any CPU
+ {10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Release|x64.Build.0 = Release|Any CPU
+ {10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Release|x86.ActiveCfg = Release|Any CPU
+ {10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Release|x86.Build.0 = Release|Any CPU
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+EndGlobal
diff --git a/Zipper/Zipper/ArbitraryBitReader.cs b/Zipper/Zipper/ArbitraryBitReader.cs
new file mode 100644
index 0000000..fec8c46
--- /dev/null
+++ b/Zipper/Zipper/ArbitraryBitReader.cs
@@ -0,0 +1,67 @@
+namespace Zipper;
+
+using System.Diagnostics;
+
+///
+/// Reads integers of arbitrary width.
+///
+internal class ArbitraryBitReader
+{
+ private readonly Stream stream;
+ private readonly int width;
+ private byte buffer;
+ private int? bitsReadFromBuffer;
+
+ ///
+ /// Initializes a new instance of the class.
+ ///
+ /// Stream to write to.
+ /// Width of integers between 4 and 32 bits.
+ public ArbitraryBitReader(Stream stream, int width)
+ {
+ ArgumentOutOfRangeException.ThrowIfLessThan(width, 4, nameof(width));
+ ArgumentOutOfRangeException.ThrowIfGreaterThan(width, 32, nameof(width));
+
+ this.stream = stream;
+ this.width = width;
+ bitsReadFromBuffer = null;
+ }
+
+ ///
+ /// Reads number from underlying stream and stores it in the .
+ ///
+ /// When this method returns, contains the value that was read, if read successfully; otherwise, zero.
+ /// if was successfuly read, otherwise.
+ public bool ReadNext(out int number)
+ {
+ number = 0;
+
+ int remainingWidth = width;
+ while (remainingWidth > 0)
+ {
+ if (bitsReadFromBuffer is null or 0 or >= 8)
+ {
+ int readByte = stream.ReadByte();
+ if (readByte == -1)
+ {
+ return false;
+ }
+
+ buffer = (byte)readByte;
+ bitsReadFromBuffer = 0;
+ }
+
+ int remainingBitsToRead = 8 - bitsReadFromBuffer.Value;
+ int mask = 0xFF >> bitsReadFromBuffer.Value;
+ int toWrite = (buffer & mask) >> Math.Max(0, remainingBitsToRead - remainingWidth);
+
+ int previousRemainingWidth = remainingWidth;
+ remainingWidth -= remainingBitsToRead;
+ remainingWidth = Math.Max(0, remainingWidth);
+ bitsReadFromBuffer += previousRemainingWidth - remainingWidth;
+ number |= toWrite << remainingWidth;
+ }
+
+ return true;
+ }
+}
diff --git a/Zipper/Zipper/ArbitraryBitWriter.cs b/Zipper/Zipper/ArbitraryBitWriter.cs
new file mode 100644
index 0000000..c6dde81
--- /dev/null
+++ b/Zipper/Zipper/ArbitraryBitWriter.cs
@@ -0,0 +1,97 @@
+namespace Zipper;
+
+using System.Diagnostics;
+
+///
+/// Writes integers of arbitrary width.
+///
+internal class ArbitraryBitWriter : IDisposable
+{
+ private readonly Stream stream;
+ private readonly int width;
+ private readonly byte[] buffer;
+ private int bitsWrittenInBuffer;
+ private bool disposed = false;
+
+ ///
+ /// Initializes a new instance of the class.
+ ///
+ /// Stream to write to.
+ /// Width of integers between 4 and 32 bits.
+ public ArbitraryBitWriter(Stream stream, int width)
+ {
+ ArgumentOutOfRangeException.ThrowIfLessThan(width, 4, nameof(width));
+ ArgumentOutOfRangeException.ThrowIfGreaterThan(width, 32, nameof(width));
+
+ this.stream = stream;
+ this.width = width;
+ buffer = new byte[width * 8];
+ bitsWrittenInBuffer = 0;
+ }
+
+ ///
+ /// Writes to the underlying stream.
+ ///
+ /// Number to write.
+ public void Write(int number)
+ {
+ ObjectDisposedException.ThrowIf(disposed, this);
+
+ number &= (int)(0xFFFFFFFF >> (32 - width));
+
+ int remainingWidth = width;
+ while (remainingWidth > 0)
+ {
+ int bufferOffset = bitsWrittenInBuffer / 8;
+ int bitsWrittenToCurrentByte = bitsWrittenInBuffer % 8;
+ int bitsRemainingInCurrentByte = 8 - bitsWrittenToCurrentByte;
+
+ int toWrite = number >> Math.Max(0, remainingWidth - bitsRemainingInCurrentByte);
+ int remainingWidthInCurrentByte = Math.Max(0, 8 - remainingWidth);
+ buffer[bufferOffset] |= (byte)(toWrite << remainingWidthInCurrentByte);
+
+ remainingWidth -= bitsRemainingInCurrentByte;
+ bitsWrittenInBuffer += bitsRemainingInCurrentByte - remainingWidthInCurrentByte;
+ }
+
+ if (bitsWrittenInBuffer == buffer.Length * 8)
+ {
+ stream.Write(buffer);
+ }
+ }
+
+ ///
+ /// Releases all resources used by the current instance of the class.
+ ///
+ public void Dispose()
+ {
+ Dispose(true);
+ }
+
+ ///
+ /// Releases the unmanaged resources used by the and optionally releases the managed resources.
+ ///
+ /// to release both managed and unmanaged resources; to release only unmanaged resources.
+ protected virtual void Dispose(bool disposing)
+ {
+ if (disposed)
+ {
+ return;
+ }
+
+ disposed = true;
+
+ if (disposing)
+ {
+ return;
+ }
+
+ if (bitsWrittenInBuffer == 0)
+ {
+ return;
+ }
+
+ int bytesWrittenInBuffer = bitsWrittenInBuffer / 8;
+ stream.Write(buffer.AsSpan()[..(bytesWrittenInBuffer + 1)]);
+ }
+}
diff --git a/Zipper/Zipper/Zipper.csproj b/Zipper/Zipper/Zipper.csproj
new file mode 100644
index 0000000..a258532
--- /dev/null
+++ b/Zipper/Zipper/Zipper.csproj
@@ -0,0 +1,14 @@
+
+
+
+ Library
+ net9.0
+ enable
+ enable
+
+
+
+
+
+
+
From 3bce49265ebf59920559e7dc421fd102f6ed40bf Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Sun, 2 Mar 2025 07:42:14 +0300
Subject: [PATCH 02/57] Added tests project (hw3 - lzw) (wip)
---
Zipper/Zipper.Tests/Zipper.Tests.csproj | 28 +++++++++++++++++++++++++
1 file changed, 28 insertions(+)
create mode 100644 Zipper/Zipper.Tests/Zipper.Tests.csproj
diff --git a/Zipper/Zipper.Tests/Zipper.Tests.csproj b/Zipper/Zipper.Tests/Zipper.Tests.csproj
new file mode 100644
index 0000000..78b1af6
--- /dev/null
+++ b/Zipper/Zipper.Tests/Zipper.Tests.csproj
@@ -0,0 +1,28 @@
+
+
+
+ net9.0
+ latest
+ enable
+ enable
+
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
From fc6d5c45b2008d1a750588227c7b741edf92c786 Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Sun, 2 Mar 2025 13:25:05 +0300
Subject: [PATCH 03/57] Fixed writer and reader (hw3 - lzw) (wip)
---
Zipper/Zipper/ArbitraryBitReader.cs | 4 +---
Zipper/Zipper/ArbitraryBitWriter.cs | 25 ++++++++++++++-----------
2 files changed, 15 insertions(+), 14 deletions(-)
diff --git a/Zipper/Zipper/ArbitraryBitReader.cs b/Zipper/Zipper/ArbitraryBitReader.cs
index fec8c46..c284edb 100644
--- a/Zipper/Zipper/ArbitraryBitReader.cs
+++ b/Zipper/Zipper/ArbitraryBitReader.cs
@@ -1,7 +1,5 @@
namespace Zipper;
-using System.Diagnostics;
-
///
/// Reads integers of arbitrary width.
///
@@ -39,7 +37,7 @@ public bool ReadNext(out int number)
int remainingWidth = width;
while (remainingWidth > 0)
{
- if (bitsReadFromBuffer is null or 0 or >= 8)
+ if (bitsReadFromBuffer is null or >= 8)
{
int readByte = stream.ReadByte();
if (readByte == -1)
diff --git a/Zipper/Zipper/ArbitraryBitWriter.cs b/Zipper/Zipper/ArbitraryBitWriter.cs
index c6dde81..a4b845d 100644
--- a/Zipper/Zipper/ArbitraryBitWriter.cs
+++ b/Zipper/Zipper/ArbitraryBitWriter.cs
@@ -1,7 +1,5 @@
namespace Zipper;
-using System.Diagnostics;
-
///
/// Writes integers of arbitrary width.
///
@@ -25,7 +23,7 @@ public ArbitraryBitWriter(Stream stream, int width)
this.stream = stream;
this.width = width;
- buffer = new byte[width * 8];
+ buffer = new byte[width];
bitsWrittenInBuffer = 0;
}
@@ -47,16 +45,21 @@ public void Write(int number)
int bitsRemainingInCurrentByte = 8 - bitsWrittenToCurrentByte;
int toWrite = number >> Math.Max(0, remainingWidth - bitsRemainingInCurrentByte);
- int remainingWidthInCurrentByte = Math.Max(0, 8 - remainingWidth);
- buffer[bufferOffset] |= (byte)(toWrite << remainingWidthInCurrentByte);
-
+ int previousRemainingWidth = remainingWidth;
remainingWidth -= bitsRemainingInCurrentByte;
- bitsWrittenInBuffer += bitsRemainingInCurrentByte - remainingWidthInCurrentByte;
+ remainingWidth = Math.Max(0, remainingWidth);
+
+ int bitsToBeWritten = previousRemainingWidth - remainingWidth;
+ int bitsToBeLeftInCurrentByte = bitsRemainingInCurrentByte - bitsToBeWritten;
+ buffer[bufferOffset] |= (byte)(toWrite << bitsToBeLeftInCurrentByte);
+ bitsWrittenInBuffer += bitsToBeWritten;
}
- if (bitsWrittenInBuffer == buffer.Length * 8)
+ if (bitsWrittenInBuffer >= buffer.Length * 8)
{
stream.Write(buffer);
+ Array.Clear(buffer);
+ bitsWrittenInBuffer = 0;
}
}
@@ -81,7 +84,7 @@ protected virtual void Dispose(bool disposing)
disposed = true;
- if (disposing)
+ if (!disposing)
{
return;
}
@@ -91,7 +94,7 @@ protected virtual void Dispose(bool disposing)
return;
}
- int bytesWrittenInBuffer = bitsWrittenInBuffer / 8;
- stream.Write(buffer.AsSpan()[..(bytesWrittenInBuffer + 1)]);
+ int bytesWrittenInBuffer = (int)Math.Ceiling(bitsWrittenInBuffer / 8f);
+ stream.Write(buffer.AsSpan()[..bytesWrittenInBuffer]);
}
}
From a797044a159812be1e61a02042d00be7a4e1b8c4 Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Sun, 2 Mar 2025 13:25:45 +0300
Subject: [PATCH 04/57] Fixed InternalsVisibleTo (hw3 - lzw) (wip)
---
Zipper/Zipper/Zipper.csproj | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Zipper/Zipper/Zipper.csproj b/Zipper/Zipper/Zipper.csproj
index a258532..036d040 100644
--- a/Zipper/Zipper/Zipper.csproj
+++ b/Zipper/Zipper/Zipper.csproj
@@ -8,7 +8,7 @@
-
+
From 686e43bb765048131f43ff1616cd58d208720f4a Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Sun, 2 Mar 2025 13:27:07 +0300
Subject: [PATCH 05/57] Added tests for writer and reader (hw3 - lzw) (wip)
---
.../ArbitraryBitReaderWriterTests.cs | 96 +++++++++++++++++++
.../Zipper.Tests/ArbitraryBitWriterTests.cs | 83 ++++++++++++++++
Zipper/Zipper.Tests/GlobalSuppressions.cs | 8 ++
3 files changed, 187 insertions(+)
create mode 100644 Zipper/Zipper.Tests/ArbitraryBitReaderWriterTests.cs
create mode 100644 Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs
create mode 100644 Zipper/Zipper.Tests/GlobalSuppressions.cs
diff --git a/Zipper/Zipper.Tests/ArbitraryBitReaderWriterTests.cs b/Zipper/Zipper.Tests/ArbitraryBitReaderWriterTests.cs
new file mode 100644
index 0000000..ae3c5a6
--- /dev/null
+++ b/Zipper/Zipper.Tests/ArbitraryBitReaderWriterTests.cs
@@ -0,0 +1,96 @@
+namespace Zipper.Tests;
+
+public class ArbitraryBitReaderWriterTests
+{
+ private static readonly TestData[] TestDataSource = GenerateData();
+
+ [Test]
+ public void Reader_ShouldReadValues_WrittenBy_Writer_Correctly([ValueSource(nameof(TestDataSource))] TestData data)
+ {
+ int memorySize = (int)Math.Ceiling(data.Width * data.Numbers.Length / 8f);
+ var backingMemory = new byte[memorySize];
+
+ using (var memory = new MemoryStream(backingMemory))
+ {
+ using var writer = new ArbitraryBitWriter(memory, data.Width);
+ for (int i = 0; i < data.Numbers.Length; i++)
+ {
+ writer.Write(data.Numbers[i]);
+ }
+ }
+
+ using (var memory = new MemoryStream(backingMemory))
+ {
+ var reader = new ArbitraryBitReader(memory, data.Width);
+ for (int i = 0; i < data.Numbers.Length; i++)
+ {
+ Assert.Multiple(() =>
+ {
+ Assert.That(reader.ReadNext(out int number), Is.True);
+ Assert.That(number, Is.EqualTo(data.Numbers[i]));
+ });
+ }
+ }
+ }
+
+ [Test]
+ public void Reader_ReadNext_ShouldReturnFalse_WhenNoDataAvailable()
+ {
+ int width = 7;
+ int numbersCount = 5;
+ int memorySize = (int)Math.Ceiling(width * numbersCount / 8f);
+ var backingMemory = new byte[memorySize];
+
+ using (var memory = new MemoryStream(backingMemory))
+ {
+ using var writer = new ArbitraryBitWriter(memory, width);
+ for (int i = 0; i < numbersCount; i++)
+ {
+ writer.Write(i * i);
+ }
+ }
+
+ using (var memory = new MemoryStream(backingMemory))
+ {
+ var reader = new ArbitraryBitReader(memory, width);
+ for (int i = 0; i < numbersCount; i++)
+ {
+ Assert.Multiple(() =>
+ {
+ Assert.That(reader.ReadNext(out int number), Is.True);
+ Assert.That(number, Is.EqualTo(i * i));
+ });
+ }
+
+ Assert.That(reader.ReadNext(out _), Is.False);
+ }
+ }
+
+ private static TestData[] GenerateData()
+ {
+ var random = new Random(872375823);
+
+ int minWidth = 4;
+ int maxWidth = 32;
+ int numbersLength = 21;
+
+ var result = new TestData[maxWidth - minWidth + 1];
+ for (int i = 0; i < result.Length; i++)
+ {
+ int width = i + minWidth;
+ int upperBound = (int)Math.Min(1ul << width, int.MaxValue);
+
+ var numbers = new int[numbersLength];
+ for (int j = 0; j < numbersLength; j++)
+ {
+ numbers[j] = random.Next(upperBound);
+ }
+
+ result[i] = new(width, numbers);
+ }
+
+ return result;
+ }
+
+ public readonly record struct TestData(int Width, int[] Numbers);
+}
diff --git a/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs b/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs
new file mode 100644
index 0000000..fc8e120
--- /dev/null
+++ b/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs
@@ -0,0 +1,83 @@
+namespace Zipper.Tests;
+
+public class ArbitraryBitWriterTests
+{
+ private readonly int width = 11;
+ private MemoryStream memory;
+ private ArbitraryBitWriter writer;
+
+ [SetUp]
+ public void Setup()
+ {
+ memory = new MemoryStream();
+ writer = new(memory, width);
+ }
+
+ [TearDown]
+ public void Teardown()
+ {
+ writer.Dispose();
+ memory.Dispose();
+ }
+
+ [Test]
+ public void Writer_Dispose_ShouldDoNothing_IfCalledTwice()
+ {
+ writer.Write(123);
+
+ writer.Dispose();
+ var position = memory.Position;
+
+ writer.Dispose();
+ Assert.That(memory.Position, Is.EqualTo(position));
+ }
+
+ [Test]
+ public void Writer_Dispose_ShouldDoNothing_IfBufferIsEmpty()
+ {
+ // buffer is filled on every eight Write()
+ for (int i = 0; i < 8; i++)
+ {
+ writer.Write(i * i);
+ }
+
+ var position = memory.Position;
+
+ writer.Dispose();
+ Assert.That(memory.Position, Is.EqualTo(position));
+ }
+
+ [Test]
+ public void Writer_Dispose_ShouldDoNothing_IfCalledWithFalse()
+ {
+ var writer = new TestWriter(memory, width);
+
+ for (int i = 0; i < 5; i++)
+ {
+ writer.Write(i * i);
+ }
+
+ var position = memory.Position;
+
+ writer.Dispose();
+ Assert.That(memory.Position, Is.EqualTo(position));
+ }
+
+ [Test]
+ public void Writer_Write_ShouldThrow_IfDisposed()
+ {
+ writer.Write(123);
+
+ writer.Dispose();
+
+ Assert.Throws(() => writer.Write(456));
+ }
+
+ private class TestWriter(Stream stream, int width) : ArbitraryBitWriter(stream, width)
+ {
+ protected override void Dispose(bool disposing)
+ {
+ base.Dispose(false);
+ }
+ }
+}
diff --git a/Zipper/Zipper.Tests/GlobalSuppressions.cs b/Zipper/Zipper.Tests/GlobalSuppressions.cs
new file mode 100644
index 0000000..eeeaf17
--- /dev/null
+++ b/Zipper/Zipper.Tests/GlobalSuppressions.cs
@@ -0,0 +1,8 @@
+// This file is used by Code Analysis to maintain SuppressMessage
+// attributes that are applied to this project.
+// Project-level suppressions either have no target or are given
+// a specific target and scoped to a namespace, type, member, etc.
+
+using System.Diagnostics.CodeAnalysis;
+
+[assembly: SuppressMessage("StyleCop.CSharp.DocumentationRules", "SA1600:Elements should be documented", Justification = "This is tests project")]
From 3cccd8a086da039b1a4f630042c2ca23afd59da1 Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Sun, 2 Mar 2025 13:28:36 +0300
Subject: [PATCH 06/57] Added BWT (hw3 - lzw) (wip)
---
Zipper/Zipper.Tests/BWTTests.cs | 48 ++++++++++++
Zipper/Zipper/BWT.cs | 126 ++++++++++++++++++++++++++++++++
2 files changed, 174 insertions(+)
create mode 100644 Zipper/Zipper.Tests/BWTTests.cs
create mode 100644 Zipper/Zipper/BWT.cs
diff --git a/Zipper/Zipper.Tests/BWTTests.cs b/Zipper/Zipper.Tests/BWTTests.cs
new file mode 100644
index 0000000..1274afa
--- /dev/null
+++ b/Zipper/Zipper.Tests/BWTTests.cs
@@ -0,0 +1,48 @@
+namespace Zipper.Tests;
+
+public class BWTTests
+{
+ private static readonly string[] StringTestData =
+ [
+ string.Empty,
+
+ "A",
+ "BB",
+ "CCCCCC",
+ "ABACABA",
+ "ABABABABAB",
+ ];
+
+ private static readonly byte[][] TestData =
+ [
+ .. StringTestData.Select(System.Text.Encoding.UTF8.GetBytes),
+ ..GetRandomStrings()
+ ];
+
+ [Test]
+ public void InverseTransform_ShouldBe_SameAs_Input([ValueSource(nameof(TestData))] byte[] input)
+ {
+ Span transformed = stackalloc byte[input.Length];
+ var index = BWT.ForwardTransform(input, transformed);
+
+ Span reconstructed = stackalloc byte[input.Length];
+ BWT.InverseTransform(transformed, index, reconstructed);
+ Assert.That(reconstructed.SequenceEqual(input), Is.True);
+ }
+
+ private static IEnumerable GetRandomStrings()
+ {
+ int seed = 74687324;
+ var random = new Random(seed);
+
+ int steps = 16;
+ int length = 256;
+
+ for (int i = 0; i < steps; i++)
+ {
+ var buffer = new byte[length];
+ random.NextBytes(buffer);
+ yield return buffer;
+ }
+ }
+}
diff --git a/Zipper/Zipper/BWT.cs b/Zipper/Zipper/BWT.cs
new file mode 100644
index 0000000..008e7dc
--- /dev/null
+++ b/Zipper/Zipper/BWT.cs
@@ -0,0 +1,126 @@
+namespace Zipper;
+
+using System.Diagnostics;
+
+///
+/// Burrows-Wheeler transform implementation.
+///
+internal static class BWT
+{
+ ///
+ /// Transforms given byte sequence using Burrows-Wheeler algorithm.
+ ///
+ /// Input byte sequence.
+ /// Span to write transofrmed input to.
+ /// Index that is used to reconstruct byte sequence.
+ public static int ForwardTransform(Memory input, Span output)
+ {
+ Debug.Assert(input.Length == output.Length, "Length of input and output should be the same");
+ int length = input.Length;
+
+ if (length == 0)
+ {
+ return -1;
+ }
+
+ Span offsets = stackalloc int[length];
+ for (int i = 0; i < length; i++)
+ {
+ offsets[i] = i;
+ }
+
+ int Compare(int x, int y)
+ {
+ var inputSpan = input.Span;
+ for (int i = 0; i < length; i++)
+ {
+ int compare = inputSpan[(i + x) % length] - inputSpan[(i + y) % length];
+ if (compare != 0)
+ {
+ return compare;
+ }
+ }
+
+ return 0;
+ }
+
+ offsets.Sort(Compare);
+
+ var inputSpan = input.Span;
+ int? identityPosition = null;
+ for (int i = 0; i < length; i++)
+ {
+ if (offsets[i] == 0)
+ {
+ identityPosition = i;
+ }
+
+ output[i] = inputSpan[(offsets[i] + length - 1) % length];
+ }
+
+ Debug.Assert(identityPosition.HasValue, "Identity position not found");
+
+ return identityPosition.Value;
+ }
+
+ ///
+ /// Reconstructs byte sequence transformed with Burrows-Wheeler algorithm.
+ ///
+ /// Transformed byte sequence.
+ /// Index that is used to reconstruct byte sequence.
+ /// Span to write reconstructed byte sequence to.
+ public static void InverseTransform(Span input, int identityIndex, Span output)
+ {
+ Debug.Assert(input.Length == output.Length, "Length of input and output should be the same");
+
+ if (identityIndex == -1)
+ {
+ return;
+ }
+
+ int length = input.Length;
+
+ Span appearances = stackalloc int[length];
+ Span lastAppearances = stackalloc int[256];
+ Span byteCounter = stackalloc int[256];
+
+ for (int i = 0; i < 256; i++)
+ {
+ lastAppearances[i] = -1;
+ }
+
+ for (int i = 0; i < length; i++)
+ {
+ byte currentByte = input[i];
+ byteCounter[currentByte]++;
+
+ int lastAppearance = lastAppearances[currentByte];
+ appearances[i] = lastAppearance == -1 ? 0 : appearances[lastAppearance] + 1;
+ lastAppearances[currentByte] = i;
+ }
+
+ Span lesserBytesCounter = stackalloc int[256];
+ int previousCount = 0;
+ for (int i = 0; i < 256; i++)
+ {
+ if (byteCounter[i] == 0)
+ {
+ continue;
+ }
+
+ lesserBytesCounter[i] = previousCount;
+ previousCount += byteCounter[i];
+ }
+
+ int lastIdentityIndex = identityIndex;
+ byte lastByte = input[lastIdentityIndex];
+ output[^1] = input[identityIndex];
+
+ for (int i = 1; i < length; i++)
+ {
+ lastIdentityIndex = appearances[lastIdentityIndex] + lesserBytesCounter[lastByte];
+ lastByte = input[lastIdentityIndex];
+ output[^(i + 1)] = lastByte;
+ }
+ }
+}
From 44a95c191f2a73b7ec96b0f50d16c01149e4e66b Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Mon, 3 Mar 2025 22:43:31 +0300
Subject: [PATCH 07/57] Switched writer and reader to uint (hw3 - lzw) (wip)
---
.../Zipper.Tests/ArbitraryBitReaderWriterTests.cs | 14 +++++++-------
Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs | 4 ++--
Zipper/Zipper/ArbitraryBitReader.cs | 8 ++++----
Zipper/Zipper/ArbitraryBitWriter.cs | 8 ++++----
4 files changed, 17 insertions(+), 17 deletions(-)
diff --git a/Zipper/Zipper.Tests/ArbitraryBitReaderWriterTests.cs b/Zipper/Zipper.Tests/ArbitraryBitReaderWriterTests.cs
index ae3c5a6..e45222f 100644
--- a/Zipper/Zipper.Tests/ArbitraryBitReaderWriterTests.cs
+++ b/Zipper/Zipper.Tests/ArbitraryBitReaderWriterTests.cs
@@ -26,7 +26,7 @@ public void Reader_ShouldReadValues_WrittenBy_Writer_Correctly([ValueSource(name
{
Assert.Multiple(() =>
{
- Assert.That(reader.ReadNext(out int number), Is.True);
+ Assert.That(reader.ReadNext(out uint number), Is.True);
Assert.That(number, Is.EqualTo(data.Numbers[i]));
});
}
@@ -46,7 +46,7 @@ public void Reader_ReadNext_ShouldReturnFalse_WhenNoDataAvailable()
using var writer = new ArbitraryBitWriter(memory, width);
for (int i = 0; i < numbersCount; i++)
{
- writer.Write(i * i);
+ writer.Write((uint)(i * i));
}
}
@@ -57,7 +57,7 @@ public void Reader_ReadNext_ShouldReturnFalse_WhenNoDataAvailable()
{
Assert.Multiple(() =>
{
- Assert.That(reader.ReadNext(out int number), Is.True);
+ Assert.That(reader.ReadNext(out uint number), Is.True);
Assert.That(number, Is.EqualTo(i * i));
});
}
@@ -78,12 +78,12 @@ private static TestData[] GenerateData()
for (int i = 0; i < result.Length; i++)
{
int width = i + minWidth;
- int upperBound = (int)Math.Min(1ul << width, int.MaxValue);
+ long upperBound = 1L << width;
- var numbers = new int[numbersLength];
+ var numbers = new uint[numbersLength];
for (int j = 0; j < numbersLength; j++)
{
- numbers[j] = random.Next(upperBound);
+ numbers[j] = (uint)random.NextInt64(upperBound);
}
result[i] = new(width, numbers);
@@ -92,5 +92,5 @@ private static TestData[] GenerateData()
return result;
}
- public readonly record struct TestData(int Width, int[] Numbers);
+ public readonly record struct TestData(int Width, uint[] Numbers);
}
diff --git a/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs b/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs
index fc8e120..915e168 100644
--- a/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs
+++ b/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs
@@ -38,7 +38,7 @@ public void Writer_Dispose_ShouldDoNothing_IfBufferIsEmpty()
// buffer is filled on every eight Write()
for (int i = 0; i < 8; i++)
{
- writer.Write(i * i);
+ writer.Write((uint)(i * i));
}
var position = memory.Position;
@@ -54,7 +54,7 @@ public void Writer_Dispose_ShouldDoNothing_IfCalledWithFalse()
for (int i = 0; i < 5; i++)
{
- writer.Write(i * i);
+ writer.Write((uint)(i * i));
}
var position = memory.Position;
diff --git a/Zipper/Zipper/ArbitraryBitReader.cs b/Zipper/Zipper/ArbitraryBitReader.cs
index c284edb..9e35d95 100644
--- a/Zipper/Zipper/ArbitraryBitReader.cs
+++ b/Zipper/Zipper/ArbitraryBitReader.cs
@@ -1,7 +1,7 @@
namespace Zipper;
///
-/// Reads integers of arbitrary width.
+/// Reads unsigned integers of arbitrary width.
///
internal class ArbitraryBitReader
{
@@ -30,7 +30,7 @@ public ArbitraryBitReader(Stream stream, int width)
///
/// When this method returns, contains the value that was read, if read successfully; otherwise, zero.
/// if was successfuly read, otherwise.
- public bool ReadNext(out int number)
+ public bool ReadNext(out uint number)
{
number = 0;
@@ -50,8 +50,8 @@ public bool ReadNext(out int number)
}
int remainingBitsToRead = 8 - bitsReadFromBuffer.Value;
- int mask = 0xFF >> bitsReadFromBuffer.Value;
- int toWrite = (buffer & mask) >> Math.Max(0, remainingBitsToRead - remainingWidth);
+ uint mask = 0xFFu >> bitsReadFromBuffer.Value;
+ uint toWrite = (buffer & mask) >> Math.Max(0, remainingBitsToRead - remainingWidth);
int previousRemainingWidth = remainingWidth;
remainingWidth -= remainingBitsToRead;
diff --git a/Zipper/Zipper/ArbitraryBitWriter.cs b/Zipper/Zipper/ArbitraryBitWriter.cs
index a4b845d..d287c97 100644
--- a/Zipper/Zipper/ArbitraryBitWriter.cs
+++ b/Zipper/Zipper/ArbitraryBitWriter.cs
@@ -1,7 +1,7 @@
namespace Zipper;
///
-/// Writes integers of arbitrary width.
+/// Writes unsigned integers of arbitrary width.
///
internal class ArbitraryBitWriter : IDisposable
{
@@ -31,11 +31,11 @@ public ArbitraryBitWriter(Stream stream, int width)
/// Writes to the underlying stream.
///
/// Number to write.
- public void Write(int number)
+ public void Write(uint number)
{
ObjectDisposedException.ThrowIf(disposed, this);
- number &= (int)(0xFFFFFFFF >> (32 - width));
+ number &= 0xFFFFFFFF >> (32 - width);
int remainingWidth = width;
while (remainingWidth > 0)
@@ -44,7 +44,7 @@ public void Write(int number)
int bitsWrittenToCurrentByte = bitsWrittenInBuffer % 8;
int bitsRemainingInCurrentByte = 8 - bitsWrittenToCurrentByte;
- int toWrite = number >> Math.Max(0, remainingWidth - bitsRemainingInCurrentByte);
+ uint toWrite = number >> Math.Max(0, remainingWidth - bitsRemainingInCurrentByte);
int previousRemainingWidth = remainingWidth;
remainingWidth -= bitsRemainingInCurrentByte;
remainingWidth = Math.Max(0, remainingWidth);
From f80bcc9bee5f27969bd773da9b7553a77a2f2acb Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Mon, 3 Mar 2025 23:12:41 +0300
Subject: [PATCH 08/57] Added leaveOpen option and Flush() to writer (hw3 -
lzw) (wip)
---
.../Zipper.Tests/ArbitraryBitWriterTests.cs | 62 ++++++++++++++++++-
Zipper/Zipper/ArbitraryBitWriter.cs | 46 +++++++++-----
2 files changed, 92 insertions(+), 16 deletions(-)
diff --git a/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs b/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs
index 915e168..f5eb8a2 100644
--- a/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs
+++ b/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs
@@ -10,7 +10,7 @@ public class ArbitraryBitWriterTests
public void Setup()
{
memory = new MemoryStream();
- writer = new(memory, width);
+ writer = new(memory, width, true);
}
[TearDown]
@@ -47,6 +47,56 @@ public void Writer_Dispose_ShouldDoNothing_IfBufferIsEmpty()
Assert.That(memory.Position, Is.EqualTo(position));
}
+ [Test]
+ public void Writer_Flush_ShouldDoNothing_IfBufferIsEmpty()
+ {
+ // buffer is filled on every eight Write()
+ for (int i = 0; i < 8; i++)
+ {
+ writer.Write((uint)(i * i));
+ }
+
+ var position = memory.Position;
+
+ writer.Flush();
+ Assert.That(memory.Position, Is.EqualTo(position));
+ }
+
+ [Test]
+ public void Writer_Flush_ShouldFlushHalfFilledBuffer()
+ {
+ // buffer is filled on every eight Write(), so write only 4 numbers
+ for (int i = 0; i < 4; i++)
+ {
+ writer.Write((uint)(i * i));
+ }
+
+ var position = memory.Position;
+
+ writer.Flush();
+ Assert.That(memory.Position, Is.Not.EqualTo(position));
+ }
+
+ [Test]
+ public void Writer_Dispose_ShouldDisposeStream_IfLeaveOpenWasInitializedWith_False()
+ {
+ var closingWriter = new ArbitraryBitWriter(memory, width, false);
+
+ closingWriter.Dispose();
+
+ // getting any property should throw if stream was disposed
+ Assert.Throws(() => _ = memory.Position);
+ }
+
+ [Test]
+ public void Writer_Dispose_ShouldNotDisposeStream_IfLeaveOpenWasInitializedWith_True()
+ {
+ writer.Dispose();
+
+ // getting any property should not throw if stream was disposed
+ Assert.DoesNotThrow(() => _ = memory.Position);
+ }
+
[Test]
public void Writer_Dispose_ShouldDoNothing_IfCalledWithFalse()
{
@@ -73,6 +123,16 @@ public void Writer_Write_ShouldThrow_IfDisposed()
Assert.Throws(() => writer.Write(456));
}
+ [Test]
+ public void Writer_Flush_ShouldThrow_IfDisposed()
+ {
+ writer.Write(123);
+
+ writer.Dispose();
+
+ Assert.Throws(writer.Flush);
+ }
+
private class TestWriter(Stream stream, int width) : ArbitraryBitWriter(stream, width)
{
protected override void Dispose(bool disposing)
diff --git a/Zipper/Zipper/ArbitraryBitWriter.cs b/Zipper/Zipper/ArbitraryBitWriter.cs
index d287c97..e6d9d5b 100644
--- a/Zipper/Zipper/ArbitraryBitWriter.cs
+++ b/Zipper/Zipper/ArbitraryBitWriter.cs
@@ -8,6 +8,7 @@ internal class ArbitraryBitWriter : IDisposable
private readonly Stream stream;
private readonly int width;
private readonly byte[] buffer;
+ private readonly bool leaveOpen;
private int bitsWrittenInBuffer;
private bool disposed = false;
@@ -16,13 +17,15 @@ internal class ArbitraryBitWriter : IDisposable
///
/// Stream to write to.
/// Width of integers between 4 and 32 bits.
- public ArbitraryBitWriter(Stream stream, int width)
+ /// to leave the open after disposing the object, otherwise.
+ public ArbitraryBitWriter(Stream stream, int width, bool leaveOpen = false)
{
ArgumentOutOfRangeException.ThrowIfLessThan(width, 4, nameof(width));
ArgumentOutOfRangeException.ThrowIfGreaterThan(width, 32, nameof(width));
this.stream = stream;
this.width = width;
+ this.leaveOpen = leaveOpen;
buffer = new byte[width];
bitsWrittenInBuffer = 0;
}
@@ -57,12 +60,29 @@ public void Write(uint number)
if (bitsWrittenInBuffer >= buffer.Length * 8)
{
- stream.Write(buffer);
- Array.Clear(buffer);
- bitsWrittenInBuffer = 0;
+ Flush();
}
}
+ ///
+ /// Flushes the internal buffer.
+ ///
+ public void Flush()
+ {
+ ObjectDisposedException.ThrowIf(disposed, this);
+
+ if (bitsWrittenInBuffer == 0)
+ {
+ return;
+ }
+
+ int bytesWrittenInBuffer = (int)Math.Ceiling(bitsWrittenInBuffer / 8f);
+ stream.Write(buffer.AsSpan()[..bytesWrittenInBuffer]);
+
+ Array.Clear(buffer);
+ bitsWrittenInBuffer = 0;
+ }
+
///
/// Releases all resources used by the current instance of the class.
///
@@ -82,19 +102,15 @@ protected virtual void Dispose(bool disposing)
return;
}
- disposed = true;
-
- if (!disposing)
+ if (disposing)
{
- return;
- }
+ Flush();
+ if (!leaveOpen)
+ {
+ stream.Dispose();
+ }
- if (bitsWrittenInBuffer == 0)
- {
- return;
+ disposed = true;
}
-
- int bytesWrittenInBuffer = (int)Math.Ceiling(bitsWrittenInBuffer / 8f);
- stream.Write(buffer.AsSpan()[..bytesWrittenInBuffer]);
}
}
From e76291122720680ae34fd7806b13b8b35dd5d88e Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Tue, 4 Mar 2025 00:23:33 +0300
Subject: [PATCH 09/57] Removed unnecessary casting to Span in Writer.Flush()
(hw3 - lzw) (wip)
---
Zipper/Zipper/ArbitraryBitWriter.cs | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Zipper/Zipper/ArbitraryBitWriter.cs b/Zipper/Zipper/ArbitraryBitWriter.cs
index e6d9d5b..4789cbb 100644
--- a/Zipper/Zipper/ArbitraryBitWriter.cs
+++ b/Zipper/Zipper/ArbitraryBitWriter.cs
@@ -77,7 +77,7 @@ public void Flush()
}
int bytesWrittenInBuffer = (int)Math.Ceiling(bitsWrittenInBuffer / 8f);
- stream.Write(buffer.AsSpan()[..bytesWrittenInBuffer]);
+ stream.Write(buffer, 0, bytesWrittenInBuffer);
Array.Clear(buffer);
bitsWrittenInBuffer = 0;
From 17fe1fabfc1fa212df2dba4e4111089f5fbbc14b Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Mon, 3 Mar 2025 23:37:29 +0300
Subject: [PATCH 10/57] Replaced stackalloc with ArrayPool (hw3 - lzw) (wip)
---
Zipper/Zipper/BWT.cs | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/Zipper/Zipper/BWT.cs b/Zipper/Zipper/BWT.cs
index 008e7dc..499cca9 100644
--- a/Zipper/Zipper/BWT.cs
+++ b/Zipper/Zipper/BWT.cs
@@ -1,5 +1,6 @@
namespace Zipper;
+using System.Buffers;
using System.Diagnostics;
///
@@ -23,7 +24,7 @@ public static int ForwardTransform(Memory input, Span output)
return -1;
}
- Span offsets = stackalloc int[length];
+ int[] offsets = ArrayPool.Shared.Rent(length);
for (int i = 0; i < length; i++)
{
offsets[i] = i;
@@ -44,7 +45,7 @@ int Compare(int x, int y)
return 0;
}
- offsets.Sort(Compare);
+ Array.Sort(offsets, Compare);
var inputSpan = input.Span;
int? identityPosition = null;
@@ -58,6 +59,8 @@ int Compare(int x, int y)
output[i] = inputSpan[(offsets[i] + length - 1) % length];
}
+ ArrayPool.Shared.Return(offsets);
+
Debug.Assert(identityPosition.HasValue, "Identity position not found");
return identityPosition.Value;
@@ -80,7 +83,7 @@ public static void InverseTransform(Span input, int identityIndex, Span appearances = stackalloc int[length];
+ int[] appearances = ArrayPool.Shared.Rent(length);
Span lastAppearances = stackalloc int[256];
Span byteCounter = stackalloc int[256];
@@ -122,5 +125,7 @@ public static void InverseTransform(Span input, int identityIndex, Span.Shared.Return(appearances);
}
}
From 89a2749aa3f5e385c9146d389f0cc02bc35715c6 Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Tue, 4 Mar 2025 00:17:50 +0300
Subject: [PATCH 11/57] Fixed sorting error in BWT.ForwardTransform() (hw3 -
lzw) (wip)
---
Zipper/Zipper/BWT.cs | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/Zipper/Zipper/BWT.cs b/Zipper/Zipper/BWT.cs
index 499cca9..45b5b78 100644
--- a/Zipper/Zipper/BWT.cs
+++ b/Zipper/Zipper/BWT.cs
@@ -45,7 +45,9 @@ int Compare(int x, int y)
return 0;
}
- Array.Sort(offsets, Compare);
+ var offsetsSpan = offsets.AsSpan(0, length);
+
+ offsetsSpan.Sort(Compare);
var inputSpan = input.Span;
int? identityPosition = null;
From ae7ecc6dfa64a1ce6f06f5a78758999daea3f41a Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Tue, 4 Mar 2025 00:19:18 +0300
Subject: [PATCH 12/57] Moved random byte sequence generation for tests to its
own class (hw3 - lzw) (wip)
---
Zipper/Zipper.Tests/BWTTests.cs | 18 +-----------------
Zipper/Zipper.Tests/TestUtil.cs | 20 ++++++++++++++++++++
2 files changed, 21 insertions(+), 17 deletions(-)
create mode 100644 Zipper/Zipper.Tests/TestUtil.cs
diff --git a/Zipper/Zipper.Tests/BWTTests.cs b/Zipper/Zipper.Tests/BWTTests.cs
index 1274afa..62c0db8 100644
--- a/Zipper/Zipper.Tests/BWTTests.cs
+++ b/Zipper/Zipper.Tests/BWTTests.cs
@@ -16,7 +16,7 @@ public class BWTTests
private static readonly byte[][] TestData =
[
.. StringTestData.Select(System.Text.Encoding.UTF8.GetBytes),
- ..GetRandomStrings()
+ .. TestUtil.GetRandomStrings()
];
[Test]
@@ -29,20 +29,4 @@ public void InverseTransform_ShouldBe_SameAs_Input([ValueSource(nameof(TestData)
BWT.InverseTransform(transformed, index, reconstructed);
Assert.That(reconstructed.SequenceEqual(input), Is.True);
}
-
- private static IEnumerable GetRandomStrings()
- {
- int seed = 74687324;
- var random = new Random(seed);
-
- int steps = 16;
- int length = 256;
-
- for (int i = 0; i < steps; i++)
- {
- var buffer = new byte[length];
- random.NextBytes(buffer);
- yield return buffer;
- }
- }
}
diff --git a/Zipper/Zipper.Tests/TestUtil.cs b/Zipper/Zipper.Tests/TestUtil.cs
new file mode 100644
index 0000000..00b8ccd
--- /dev/null
+++ b/Zipper/Zipper.Tests/TestUtil.cs
@@ -0,0 +1,20 @@
+namespace Zipper.Tests;
+
+public static class TestUtil
+{
+ public static IEnumerable GetRandomStrings()
+ {
+ int seed = 74687324;
+ var random = new Random(seed);
+
+ int steps = 16;
+ int length = 256;
+
+ for (int i = 0; i < steps; i++)
+ {
+ var buffer = new byte[length];
+ random.NextBytes(buffer);
+ yield return buffer;
+ }
+ }
+}
From 87ef171e6b87a202d7fdd7779c5d1ab103d82212 Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Tue, 4 Mar 2025 00:19:58 +0300
Subject: [PATCH 13/57] Added trie (hw3 - lzw) (wip)
---
Zipper/Zipper.Tests/TrieTests.cs | 93 ++++++++++++++++++
Zipper/Zipper/Trie.cs | 159 +++++++++++++++++++++++++++++++
2 files changed, 252 insertions(+)
create mode 100644 Zipper/Zipper.Tests/TrieTests.cs
create mode 100644 Zipper/Zipper/Trie.cs
diff --git a/Zipper/Zipper.Tests/TrieTests.cs b/Zipper/Zipper.Tests/TrieTests.cs
new file mode 100644
index 0000000..0f4d818
--- /dev/null
+++ b/Zipper/Zipper.Tests/TrieTests.cs
@@ -0,0 +1,93 @@
+namespace Zipper.Tests;
+
+public class TrieTests
+{
+ private static readonly byte[][] TestStrings =
+ [
+ .. new string[]
+ {
+ string.Empty,
+ "A",
+ "AB",
+ "ABC",
+ "ABCD",
+ }
+ .Select(System.Text.Encoding.UTF8.GetBytes)
+ ];
+
+ private static readonly byte[][][] TestStringsSet =
+ [
+ TestStrings,
+ [.. TestUtil.GetRandomStrings()]
+ ];
+
+ private Trie trie;
+
+ [SetUp]
+ public void Setup()
+ {
+ trie = new();
+ }
+
+ [Test]
+ public void TrieAdd_And_TrieRemove_ReturnsCorrectly_SingleValue([ValueSource(nameof(TestStrings))] byte[] item)
+ {
+ Assert.That(() => trie.Add(item), Is.True);
+ Assert.That(() => trie.Add(item), Is.False);
+ Assert.That(trie.Size, Is.EqualTo(1));
+
+ Assert.That(() => trie.Remove(item), Is.True);
+ Assert.That(() => trie.Remove(item), Is.False);
+ Assert.That(trie.Size, Is.EqualTo(0));
+ }
+
+ [Test]
+ public void TrieAdd_And_TrieRemove_ReturnsCorrectly_MultipleValues([ValueSource(nameof(TestStringsSet))] byte[][] strings)
+ {
+ for (int i = 0; i < strings.Length; i++)
+ {
+ var item = strings[i];
+ Assert.That(() => trie.Add(item), Is.True);
+ Assert.That(() => trie.Add(item), Is.False);
+ Assert.That(trie.Size, Is.EqualTo(i + 1));
+ }
+
+ for (int i = strings.Length - 1; i >= 0; i--)
+ {
+ var item = strings[i];
+ Assert.That(() => trie.Remove(item), Is.True);
+ Assert.That(() => trie.Remove(item), Is.False);
+ Assert.That(trie.Size, Is.EqualTo(i));
+ }
+ }
+
+ [Test]
+ public void TrieContains_IsCorrect([ValueSource(nameof(TestStrings))] byte[] item)
+ {
+ Assert.That(() => trie.Contains(item), Is.False);
+ trie.Add(item);
+ Assert.That(() => trie.Contains(item), Is.True);
+ trie.Remove(item);
+ Assert.That(() => trie.Contains(item), Is.False);
+ }
+
+ [Test]
+ public void TrieHowManyStartsWithPrefix_IsCorrect()
+ {
+ int length = TestStrings.Length;
+ for (int i = 0; i < length; i++)
+ {
+ var item = TestStrings[i];
+ Assert.That(() => trie.Add(item), Is.True);
+ Assert.That(() => trie.HowManyStartsWithPrefix(item), Is.EqualTo(1));
+ }
+
+ for (int i = 0; i < length; i++)
+ {
+ var item = TestStrings[i];
+ Assert.That(() => trie.HowManyStartsWithPrefix(item), Is.EqualTo(length - i));
+ }
+
+ Assert.That(() => trie.HowManyStartsWithPrefix("random_prefix"u8), Is.Zero);
+ }
+}
diff --git a/Zipper/Zipper/Trie.cs b/Zipper/Zipper/Trie.cs
new file mode 100644
index 0000000..8166f6a
--- /dev/null
+++ b/Zipper/Zipper/Trie.cs
@@ -0,0 +1,159 @@
+namespace Zipper;
+
+using System.Diagnostics.CodeAnalysis;
+
+///
+/// Trie data structure, also known as prefix tree.
+///
+public class Trie
+{
+ private readonly Node rootNode = new(null, 0);
+
+ ///
+ /// Gets count of all strings stored in this trie.
+ ///
+ public int Size => rootNode.TotalDescendants;
+
+ ///
+ /// Adds to this trie.
+ ///
+ /// The byte sequence to add.
+ /// if wasn't present in trie before adding it, otherwise.
+ public bool Add(ReadOnlySpan item)
+ {
+ var lastNode = rootNode;
+ foreach (var character in item)
+ {
+ lastNode = lastNode.GetOrCreateChild(character);
+ }
+
+ if (lastNode.EndOfWord)
+ {
+ return false;
+ }
+
+ lastNode.MarkAsEndOfWord();
+
+ return true;
+ }
+
+ ///
+ /// Checks if this trie contains .
+ ///
+ /// The byte sequence to seek.
+ /// if is present in trie, otherwise.
+ public bool Contains(ReadOnlySpan item)
+ => GetNode(item, out var node) && node.EndOfWord;
+
+ ///
+ /// Removes from this trie.
+ ///
+ /// The byte sequence to remove.
+ /// if was present in trie before removing it, otherwise.
+ public bool Remove(ReadOnlySpan item)
+ {
+ if (!GetNode(item, out var node) || node.EndOfWord == false)
+ {
+ return false;
+ }
+
+ node.RemoveSelf();
+
+ return true;
+ }
+
+ ///
+ /// Gets count of byte sequences stored in this trie that start with .
+ ///
+ /// Prefix to check against.
+ /// Count of byte sequences stored in this trie that start with .
+ public int HowManyStartsWithPrefix(ReadOnlySpan prefix)
+ => GetNode(prefix, out var node) ? node.TotalDescendants : 0;
+
+ private bool GetNode(ReadOnlySpan prefix, [MaybeNullWhen(false)] out Node node)
+ {
+ node = rootNode;
+ foreach (var character in prefix)
+ {
+ if (!node.TryGetChild(character, out node))
+ {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ private class Node
+ {
+ private readonly Dictionary children = [];
+ private readonly Node? parent;
+ private readonly byte value;
+
+ internal Node(Node? parent, byte value)
+ {
+ this.parent = parent;
+ this.value = value;
+ }
+
+ public bool EndOfWord { get; private set; }
+
+ // node itself is counted as descendant if marked as end of word
+ public int TotalDescendants { get; set; }
+
+ public Node GetOrCreateChild(byte value)
+ {
+ if (!children.TryGetValue(value, out Node? node))
+ {
+ node = new(this, value);
+ children[value] = node;
+
+ return node;
+ }
+
+ return node;
+ }
+
+ public void RemoveSelf()
+ {
+ EndOfWord = false;
+ TotalDescendants--;
+
+ if (parent == null)
+ {
+ return;
+ }
+
+ var lastNode = parent;
+ var lastValue = value;
+ while (lastNode.parent != null && lastNode.TotalDescendants == 1)
+ {
+ lastValue = lastNode.value;
+ lastNode = lastNode.parent;
+ }
+
+ lastNode.children.Remove(lastValue);
+
+ while (lastNode != null)
+ {
+ lastNode.TotalDescendants--;
+ lastNode = lastNode.parent;
+ }
+ }
+
+ public bool TryGetChild(byte value, [MaybeNullWhen(false)] out Node node) => children.TryGetValue(value, out node);
+
+ public void MarkAsEndOfWord()
+ {
+ EndOfWord = true;
+ TotalDescendants++;
+
+ var lastNode = parent;
+ while (lastNode != null)
+ {
+ lastNode.TotalDescendants++;
+ lastNode = lastNode.parent;
+ }
+ }
+ }
+}
From ae3e24f2e6c7a2f2b5b5d944c85d2757fe8c7b95 Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Tue, 4 Mar 2025 01:19:30 +0300
Subject: [PATCH 14/57] Rephrased api docs for Reader.ReadNext() for
consistency (hw3 - lzw) (wip)
---
Zipper/Zipper/ArbitraryBitReader.cs | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Zipper/Zipper/ArbitraryBitReader.cs b/Zipper/Zipper/ArbitraryBitReader.cs
index 9e35d95..f9e4c51 100644
--- a/Zipper/Zipper/ArbitraryBitReader.cs
+++ b/Zipper/Zipper/ArbitraryBitReader.cs
@@ -28,7 +28,7 @@ public ArbitraryBitReader(Stream stream, int width)
///
/// Reads number from underlying stream and stores it in the .
///
- /// When this method returns, contains the value that was read, if read successfully; otherwise, zero.
+ /// When this method returns, contains the value that was read, if read successfully, zero otherwise.
/// if was successfuly read, otherwise.
public bool ReadNext(out uint number)
{
From aa38e5145193ad84519d17572f66307079654be5 Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Tue, 4 Mar 2025 01:19:56 +0300
Subject: [PATCH 15/57] Fixed api docs for Reader.ReadNext() (hw3 - lzw) (wip)
---
Zipper/Zipper/ArbitraryBitReader.cs | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Zipper/Zipper/ArbitraryBitReader.cs b/Zipper/Zipper/ArbitraryBitReader.cs
index f9e4c51..5286ba3 100644
--- a/Zipper/Zipper/ArbitraryBitReader.cs
+++ b/Zipper/Zipper/ArbitraryBitReader.cs
@@ -29,7 +29,7 @@ public ArbitraryBitReader(Stream stream, int width)
/// Reads number from underlying stream and stores it in the .
///
/// When this method returns, contains the value that was read, if read successfully, zero otherwise.
- /// if was successfuly read, otherwise.
+ /// if was successfuly read, otherwise.
public bool ReadNext(out uint number)
{
number = 0;
From 80bb5179a5fb54cc186d23b4a6d2a37038184fb9 Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Tue, 4 Mar 2025 01:20:34 +0300
Subject: [PATCH 16/57] Removed unnecessary features in trie (hw3 - lzw) (wip)
---
Zipper/Zipper.Tests/TrieTests.cs | 62 -------------------
Zipper/Zipper/Trie.cs | 100 ++-----------------------------
2 files changed, 6 insertions(+), 156 deletions(-)
diff --git a/Zipper/Zipper.Tests/TrieTests.cs b/Zipper/Zipper.Tests/TrieTests.cs
index 0f4d818..0a2a269 100644
--- a/Zipper/Zipper.Tests/TrieTests.cs
+++ b/Zipper/Zipper.Tests/TrieTests.cs
@@ -28,66 +28,4 @@ public void Setup()
{
trie = new();
}
-
- [Test]
- public void TrieAdd_And_TrieRemove_ReturnsCorrectly_SingleValue([ValueSource(nameof(TestStrings))] byte[] item)
- {
- Assert.That(() => trie.Add(item), Is.True);
- Assert.That(() => trie.Add(item), Is.False);
- Assert.That(trie.Size, Is.EqualTo(1));
-
- Assert.That(() => trie.Remove(item), Is.True);
- Assert.That(() => trie.Remove(item), Is.False);
- Assert.That(trie.Size, Is.EqualTo(0));
- }
-
- [Test]
- public void TrieAdd_And_TrieRemove_ReturnsCorrectly_MultipleValues([ValueSource(nameof(TestStringsSet))] byte[][] strings)
- {
- for (int i = 0; i < strings.Length; i++)
- {
- var item = strings[i];
- Assert.That(() => trie.Add(item), Is.True);
- Assert.That(() => trie.Add(item), Is.False);
- Assert.That(trie.Size, Is.EqualTo(i + 1));
- }
-
- for (int i = strings.Length - 1; i >= 0; i--)
- {
- var item = strings[i];
- Assert.That(() => trie.Remove(item), Is.True);
- Assert.That(() => trie.Remove(item), Is.False);
- Assert.That(trie.Size, Is.EqualTo(i));
- }
- }
-
- [Test]
- public void TrieContains_IsCorrect([ValueSource(nameof(TestStrings))] byte[] item)
- {
- Assert.That(() => trie.Contains(item), Is.False);
- trie.Add(item);
- Assert.That(() => trie.Contains(item), Is.True);
- trie.Remove(item);
- Assert.That(() => trie.Contains(item), Is.False);
- }
-
- [Test]
- public void TrieHowManyStartsWithPrefix_IsCorrect()
- {
- int length = TestStrings.Length;
- for (int i = 0; i < length; i++)
- {
- var item = TestStrings[i];
- Assert.That(() => trie.Add(item), Is.True);
- Assert.That(() => trie.HowManyStartsWithPrefix(item), Is.EqualTo(1));
- }
-
- for (int i = 0; i < length; i++)
- {
- var item = TestStrings[i];
- Assert.That(() => trie.HowManyStartsWithPrefix(item), Is.EqualTo(length - i));
- }
-
- Assert.That(() => trie.HowManyStartsWithPrefix("random_prefix"u8), Is.Zero);
- }
}
diff --git a/Zipper/Zipper/Trie.cs b/Zipper/Zipper/Trie.cs
index 8166f6a..974f806 100644
--- a/Zipper/Zipper/Trie.cs
+++ b/Zipper/Zipper/Trie.cs
@@ -7,12 +7,7 @@ namespace Zipper;
///
public class Trie
{
- private readonly Node rootNode = new(null, 0);
-
- ///
- /// Gets count of all strings stored in this trie.
- ///
- public int Size => rootNode.TotalDescendants;
+ private readonly Node rootNode = new();
///
/// Adds to this trie.
@@ -32,44 +27,11 @@ public bool Add(ReadOnlySpan item)
return false;
}
- lastNode.MarkAsEndOfWord();
-
- return true;
- }
-
- ///
- /// Checks if this trie contains .
- ///
- /// The byte sequence to seek.
- /// if is present in trie, otherwise.
- public bool Contains(ReadOnlySpan item)
- => GetNode(item, out var node) && node.EndOfWord;
-
- ///
- /// Removes from this trie.
- ///
- /// The byte sequence to remove.
- /// if was present in trie before removing it, otherwise.
- public bool Remove(ReadOnlySpan item)
- {
- if (!GetNode(item, out var node) || node.EndOfWord == false)
- {
- return false;
- }
-
- node.RemoveSelf();
+ lastNode.EndOfWord = true;
return true;
}
- ///
- /// Gets count of byte sequences stored in this trie that start with .
- ///
- /// Prefix to check against.
- /// Count of byte sequences stored in this trie that start with .
- public int HowManyStartsWithPrefix(ReadOnlySpan prefix)
- => GetNode(prefix, out var node) ? node.TotalDescendants : 0;
-
private bool GetNode(ReadOnlySpan prefix, [MaybeNullWhen(false)] out Node node)
{
node = rootNode;
@@ -87,25 +49,14 @@ private bool GetNode(ReadOnlySpan prefix, [MaybeNullWhen(false)] out Node
private class Node
{
private readonly Dictionary children = [];
- private readonly Node? parent;
- private readonly byte value;
- internal Node(Node? parent, byte value)
- {
- this.parent = parent;
- this.value = value;
- }
-
- public bool EndOfWord { get; private set; }
-
- // node itself is counted as descendant if marked as end of word
- public int TotalDescendants { get; set; }
+ public bool EndOfWord { get; set; }
public Node GetOrCreateChild(byte value)
{
if (!children.TryGetValue(value, out Node? node))
{
- node = new(this, value);
+ node = new();
children[value] = node;
return node;
@@ -114,46 +65,7 @@ public Node GetOrCreateChild(byte value)
return node;
}
- public void RemoveSelf()
- {
- EndOfWord = false;
- TotalDescendants--;
-
- if (parent == null)
- {
- return;
- }
-
- var lastNode = parent;
- var lastValue = value;
- while (lastNode.parent != null && lastNode.TotalDescendants == 1)
- {
- lastValue = lastNode.value;
- lastNode = lastNode.parent;
- }
-
- lastNode.children.Remove(lastValue);
-
- while (lastNode != null)
- {
- lastNode.TotalDescendants--;
- lastNode = lastNode.parent;
- }
- }
-
- public bool TryGetChild(byte value, [MaybeNullWhen(false)] out Node node) => children.TryGetValue(value, out node);
-
- public void MarkAsEndOfWord()
- {
- EndOfWord = true;
- TotalDescendants++;
-
- var lastNode = parent;
- while (lastNode != null)
- {
- lastNode.TotalDescendants++;
- lastNode = lastNode.parent;
- }
- }
+ public bool TryGetChild(byte value, [MaybeNullWhen(false)] out Node node)
+ => children.TryGetValue(value, out node);
}
}
From 5ef617b950b714eac5925827b9fba110ef09fe82 Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Tue, 4 Mar 2025 01:36:18 +0300
Subject: [PATCH 17/57] Converted trie to dictionary (hw3 - lzw) (wip)
---
Zipper/Zipper.Tests/TrieTests.cs | 32 +++++++++++++++++++++++++++++++
Zipper/Zipper/Trie.cs | 33 ++++++++++++++++++++++++++------
2 files changed, 59 insertions(+), 6 deletions(-)
diff --git a/Zipper/Zipper.Tests/TrieTests.cs b/Zipper/Zipper.Tests/TrieTests.cs
index 0a2a269..8344639 100644
--- a/Zipper/Zipper.Tests/TrieTests.cs
+++ b/Zipper/Zipper.Tests/TrieTests.cs
@@ -28,4 +28,36 @@ public void Setup()
{
trie = new();
}
+
+ [Test]
+ public void TrieAdd_And_TrieTryGetValue_ReturnsCorrectly_SingleValue([ValueSource(nameof(TestStrings))] byte[] bytes)
+ {
+ TestAddValue(trie, bytes);
+ }
+
+ [Test]
+ public void TrieAdd_And_TrieTryGetValue_ReturnsCorrectly_MultipleValues([ValueSource(nameof(TestStringsSet))] byte[][] strings)
+ {
+ foreach (var bytes in strings)
+ {
+ TestAddValue(trie, bytes);
+ }
+ }
+
+ private static void TestAddValue(Trie trie, byte[] bytes)
+ {
+ Assert.Multiple(() =>
+ {
+ Assert.That(trie.TryGetValue(bytes, out _), Is.False);
+
+ Assert.That(Add(trie, bytes), Is.True);
+ Assert.That(Add(trie, bytes), Is.False);
+
+ Assert.That(trie.TryGetValue(bytes, out int value), Is.True);
+ Assert.That(value, Is.EqualTo(bytes.Length));
+ });
+ }
+
+ private static bool Add(Trie trie, byte[] bytes)
+ => trie.Add(bytes, bytes.Length);
}
diff --git a/Zipper/Zipper/Trie.cs b/Zipper/Zipper/Trie.cs
index 974f806..def50b4 100644
--- a/Zipper/Zipper/Trie.cs
+++ b/Zipper/Zipper/Trie.cs
@@ -3,21 +3,22 @@ namespace Zipper;
using System.Diagnostics.CodeAnalysis;
///
-/// Trie data structure, also known as prefix tree.
+/// Trie data structure, also known as prefix tree, implemented as dictionary.
///
public class Trie
{
private readonly Node rootNode = new();
///
- /// Adds to this trie.
+ /// Adds associated with to this trie.
///
- /// The byte sequence to add.
- /// if wasn't present in trie before adding it, otherwise.
- public bool Add(ReadOnlySpan item)
+ /// The byte sequence to as key.
+ /// The number to add as value.
+ /// if wasn't present in trie before adding it, otherwise.
+ public bool Add(ReadOnlySpan key, int value)
{
var lastNode = rootNode;
- foreach (var character in item)
+ foreach (var character in key)
{
lastNode = lastNode.GetOrCreateChild(character);
}
@@ -28,10 +29,28 @@ public bool Add(ReadOnlySpan item)
}
lastNode.EndOfWord = true;
+ lastNode.Value = value;
return true;
}
+ ///
+ /// Tries to get value associated with .
+ ///
+ /// The key of the value to get.
+ ///
+ /// When this method returns, contains the value associated with , if is found, zero otherwise.
+ ///
+ /// if is found, otherwise.
+ public bool TryGetValue(ReadOnlySpan key, out int value)
+ {
+ var nodeExistsAndHasValue = GetNode(key, out var node) && node.EndOfWord;
+
+ value = node?.Value ?? 0;
+
+ return nodeExistsAndHasValue;
+ }
+
private bool GetNode(ReadOnlySpan prefix, [MaybeNullWhen(false)] out Node node)
{
node = rootNode;
@@ -52,6 +71,8 @@ private class Node
public bool EndOfWord { get; set; }
+ public int Value { get; set; }
+
public Node GetOrCreateChild(byte value)
{
if (!children.TryGetValue(value, out Node? node))
From 33afe78e68328016b30d9f4661eecc8cf5a325c2 Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Tue, 4 Mar 2025 22:12:30 +0300
Subject: [PATCH 18/57] Switched trie to store uint as value intstead of int
(hw3 - lzw) (wip)
---
Zipper/Zipper.Tests/TrieTests.cs | 4 ++--
Zipper/Zipper/Trie.cs | 6 +++---
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/Zipper/Zipper.Tests/TrieTests.cs b/Zipper/Zipper.Tests/TrieTests.cs
index 8344639..190e047 100644
--- a/Zipper/Zipper.Tests/TrieTests.cs
+++ b/Zipper/Zipper.Tests/TrieTests.cs
@@ -53,11 +53,11 @@ private static void TestAddValue(Trie trie, byte[] bytes)
Assert.That(Add(trie, bytes), Is.True);
Assert.That(Add(trie, bytes), Is.False);
- Assert.That(trie.TryGetValue(bytes, out int value), Is.True);
+ Assert.That(trie.TryGetValue(bytes, out uint value), Is.True);
Assert.That(value, Is.EqualTo(bytes.Length));
});
}
private static bool Add(Trie trie, byte[] bytes)
- => trie.Add(bytes, bytes.Length);
+ => trie.Add(bytes, (uint)bytes.Length);
}
diff --git a/Zipper/Zipper/Trie.cs b/Zipper/Zipper/Trie.cs
index def50b4..79cd6a3 100644
--- a/Zipper/Zipper/Trie.cs
+++ b/Zipper/Zipper/Trie.cs
@@ -15,7 +15,7 @@ public class Trie
/// The byte sequence to as key.
/// The number to add as value.
/// if wasn't present in trie before adding it, otherwise.
- public bool Add(ReadOnlySpan key, int value)
+ public bool Add(ReadOnlySpan key, uint value)
{
var lastNode = rootNode;
foreach (var character in key)
@@ -42,7 +42,7 @@ public bool Add(ReadOnlySpan key, int value)
/// When this method returns, contains the value associated with , if is found, zero otherwise.
///
/// if is found, otherwise.
- public bool TryGetValue(ReadOnlySpan key, out int value)
+ public bool TryGetValue(ReadOnlySpan key, out uint value)
{
var nodeExistsAndHasValue = GetNode(key, out var node) && node.EndOfWord;
@@ -71,7 +71,7 @@ private class Node
public bool EndOfWord { get; set; }
- public int Value { get; set; }
+ public uint Value { get; set; }
public Node GetOrCreateChild(byte value)
{
From d040fa1e1393ac171798b2f1648fbcdaa0d2103f Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Wed, 5 Mar 2025 02:21:58 +0300
Subject: [PATCH 19/57] Introduced constants for min and max width of numbers
in writer and reader (hw3 - lzw) (wip)
---
Zipper/Zipper/ArbitraryBitReader.cs | 12 +++++++++---
Zipper/Zipper/ArbitraryBitWriter.cs | 16 +++++++++++++---
2 files changed, 22 insertions(+), 6 deletions(-)
diff --git a/Zipper/Zipper/ArbitraryBitReader.cs b/Zipper/Zipper/ArbitraryBitReader.cs
index 5286ba3..973ad03 100644
--- a/Zipper/Zipper/ArbitraryBitReader.cs
+++ b/Zipper/Zipper/ArbitraryBitReader.cs
@@ -5,6 +5,12 @@ namespace Zipper;
///
internal class ArbitraryBitReader
{
+ ///
+ public const int MinWidth = ArbitraryBitWriter.MinWidth;
+
+ ///
+ public const int MaxWidth = ArbitraryBitWriter.MaxWidth;
+
private readonly Stream stream;
private readonly int width;
private byte buffer;
@@ -14,11 +20,11 @@ internal class ArbitraryBitReader
/// Initializes a new instance of the class.
///
/// Stream to write to.
- /// Width of integers between 4 and 32 bits.
+ /// Width of integers between and bits.
public ArbitraryBitReader(Stream stream, int width)
{
- ArgumentOutOfRangeException.ThrowIfLessThan(width, 4, nameof(width));
- ArgumentOutOfRangeException.ThrowIfGreaterThan(width, 32, nameof(width));
+ ArgumentOutOfRangeException.ThrowIfLessThan(width, MinWidth, nameof(width));
+ ArgumentOutOfRangeException.ThrowIfGreaterThan(width, MaxWidth, nameof(width));
this.stream = stream;
this.width = width;
diff --git a/Zipper/Zipper/ArbitraryBitWriter.cs b/Zipper/Zipper/ArbitraryBitWriter.cs
index 4789cbb..5e09717 100644
--- a/Zipper/Zipper/ArbitraryBitWriter.cs
+++ b/Zipper/Zipper/ArbitraryBitWriter.cs
@@ -5,6 +5,16 @@ namespace Zipper;
///
internal class ArbitraryBitWriter : IDisposable
{
+ ///
+ /// Smallest allowed width of numbers.
+ ///
+ public const int MinWidth = 4;
+
+ ///
+ /// Largest allowed width of numbers.
+ ///
+ public const int MaxWidth = 32;
+
private readonly Stream stream;
private readonly int width;
private readonly byte[] buffer;
@@ -16,12 +26,12 @@ internal class ArbitraryBitWriter : IDisposable
/// Initializes a new instance of the class.
///
/// Stream to write to.
- /// Width of integers between 4 and 32 bits.
+ /// Width of integers between and bits.
/// to leave the open after disposing the object, otherwise.
public ArbitraryBitWriter(Stream stream, int width, bool leaveOpen = false)
{
- ArgumentOutOfRangeException.ThrowIfLessThan(width, 4, nameof(width));
- ArgumentOutOfRangeException.ThrowIfGreaterThan(width, 32, nameof(width));
+ ArgumentOutOfRangeException.ThrowIfLessThan(width, MinWidth, nameof(width));
+ ArgumentOutOfRangeException.ThrowIfGreaterThan(width, MaxWidth, nameof(width));
this.stream = stream;
this.width = width;
From ccf6ce246f523dbf382e8a89e29b96e875c304bb Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Wed, 5 Mar 2025 02:40:02 +0300
Subject: [PATCH 20/57] Added checks for stream read-/writeabiltiy to writer
and reader (hw3 - lzw) (wip)
---
Zipper/Zipper/ArbitraryBitReader.cs | 5 +++++
Zipper/Zipper/ArbitraryBitWriter.cs | 5 +++++
2 files changed, 10 insertions(+)
diff --git a/Zipper/Zipper/ArbitraryBitReader.cs b/Zipper/Zipper/ArbitraryBitReader.cs
index 973ad03..2308421 100644
--- a/Zipper/Zipper/ArbitraryBitReader.cs
+++ b/Zipper/Zipper/ArbitraryBitReader.cs
@@ -26,6 +26,11 @@ public ArbitraryBitReader(Stream stream, int width)
ArgumentOutOfRangeException.ThrowIfLessThan(width, MinWidth, nameof(width));
ArgumentOutOfRangeException.ThrowIfGreaterThan(width, MaxWidth, nameof(width));
+ if (!stream.CanRead)
+ {
+ throw new ArgumentException("Stream does not support reading", nameof(stream));
+ }
+
this.stream = stream;
this.width = width;
bitsReadFromBuffer = null;
diff --git a/Zipper/Zipper/ArbitraryBitWriter.cs b/Zipper/Zipper/ArbitraryBitWriter.cs
index 5e09717..4c4edfe 100644
--- a/Zipper/Zipper/ArbitraryBitWriter.cs
+++ b/Zipper/Zipper/ArbitraryBitWriter.cs
@@ -33,6 +33,11 @@ public ArbitraryBitWriter(Stream stream, int width, bool leaveOpen = false)
ArgumentOutOfRangeException.ThrowIfLessThan(width, MinWidth, nameof(width));
ArgumentOutOfRangeException.ThrowIfGreaterThan(width, MaxWidth, nameof(width));
+ if (!stream.CanWrite)
+ {
+ throw new ArgumentException("Stream does not support writing", nameof(stream));
+ }
+
this.stream = stream;
this.width = width;
this.leaveOpen = leaveOpen;
From 3b73ae867f28425e0748fadd6475276e0f0b1b5a Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Wed, 5 Mar 2025 02:50:42 +0300
Subject: [PATCH 21/57] Added more tests for writer and reader (hw3 - lzw)
(wip)
---
.../ArbitraryBitReaderWriterTests.cs | 26 +++++++++++++++++++
1 file changed, 26 insertions(+)
diff --git a/Zipper/Zipper.Tests/ArbitraryBitReaderWriterTests.cs b/Zipper/Zipper.Tests/ArbitraryBitReaderWriterTests.cs
index e45222f..e67f8a1 100644
--- a/Zipper/Zipper.Tests/ArbitraryBitReaderWriterTests.cs
+++ b/Zipper/Zipper.Tests/ArbitraryBitReaderWriterTests.cs
@@ -4,6 +4,24 @@ public class ArbitraryBitReaderWriterTests
{
private static readonly TestData[] TestDataSource = GenerateData();
+ [Test]
+ public void ReaderAndWriter_ShouldThrowIf_InitializedWith_Width_InDisallowedRange()
+ {
+ Assert.Throws(() => new ArbitraryBitReader(Stream.Null, ArbitraryBitReader.MinWidth - 1));
+ Assert.Throws(() => new ArbitraryBitReader(Stream.Null, ArbitraryBitReader.MaxWidth + 1));
+
+ Assert.Throws(() => new ArbitraryBitWriter(Stream.Null, ArbitraryBitWriter.MinWidth - 1));
+ Assert.Throws(() => new ArbitraryBitWriter(Stream.Null, ArbitraryBitWriter.MaxWidth + 1));
+ }
+
+ [Test]
+ public void Reader_ShouldThrowIf_StreamCanNotRead()
+ => Assert.Throws(() => new ArbitraryBitReader(new TestStream(), ArbitraryBitReader.MinWidth));
+
+ [Test]
+ public void Writer_ShouldThrowIf_StreamCanNotWrite()
+ => Assert.Throws(() => new ArbitraryBitWriter(new TestStream(), ArbitraryBitWriter.MinWidth));
+
[Test]
public void Reader_ShouldReadValues_WrittenBy_Writer_Correctly([ValueSource(nameof(TestDataSource))] TestData data)
{
@@ -93,4 +111,12 @@ private static TestData[] GenerateData()
}
public readonly record struct TestData(int Width, uint[] Numbers);
+
+ // use MemoryStream, because implementing all Stream's abstract membrers leads to bad code coverage
+ private class TestStream : MemoryStream
+ {
+ public override bool CanRead => false;
+
+ public override bool CanWrite => false;
+ }
}
From fc3f02d2a24423c130314d07f791166e2f041820 Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Wed, 5 Mar 2025 03:05:32 +0300
Subject: [PATCH 22/57] Use ArrayPool in writer (hw3 - lzw) (wip)
---
Zipper/Zipper/ArbitraryBitWriter.cs | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/Zipper/Zipper/ArbitraryBitWriter.cs b/Zipper/Zipper/ArbitraryBitWriter.cs
index 4c4edfe..eba0fcb 100644
--- a/Zipper/Zipper/ArbitraryBitWriter.cs
+++ b/Zipper/Zipper/ArbitraryBitWriter.cs
@@ -1,5 +1,7 @@
namespace Zipper;
+using System.Buffers;
+
///
/// Writes unsigned integers of arbitrary width.
///
@@ -15,6 +17,8 @@ internal class ArbitraryBitWriter : IDisposable
///
public const int MaxWidth = 32;
+ private static readonly ArrayPool BufferPool = ArrayPool.Create();
+
private readonly Stream stream;
private readonly int width;
private readonly byte[] buffer;
@@ -41,7 +45,7 @@ public ArbitraryBitWriter(Stream stream, int width, bool leaveOpen = false)
this.stream = stream;
this.width = width;
this.leaveOpen = leaveOpen;
- buffer = new byte[width];
+ buffer = BufferPool.Rent(width);
bitsWrittenInBuffer = 0;
}
@@ -73,7 +77,7 @@ public void Write(uint number)
bitsWrittenInBuffer += bitsToBeWritten;
}
- if (bitsWrittenInBuffer >= buffer.Length * 8)
+ if (bitsWrittenInBuffer >= width * 8)
{
Flush();
}
@@ -120,6 +124,8 @@ protected virtual void Dispose(bool disposing)
if (disposing)
{
Flush();
+ BufferPool.Return(buffer);
+
if (!leaveOpen)
{
stream.Dispose();
From 0c4c56f8ba8496c4f7f564f9b5b2d1351be29229 Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Wed, 5 Mar 2025 03:07:32 +0300
Subject: [PATCH 23/57] Use local ArrayPool instead of shared in BWT (hw3 -
lzw) (wip)
---
Zipper/Zipper/BWT.cs | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/Zipper/Zipper/BWT.cs b/Zipper/Zipper/BWT.cs
index 45b5b78..eabe9ce 100644
--- a/Zipper/Zipper/BWT.cs
+++ b/Zipper/Zipper/BWT.cs
@@ -8,6 +8,8 @@ namespace Zipper;
///
internal static class BWT
{
+ private static readonly ArrayPool Pool = ArrayPool.Create();
+
///
/// Transforms given byte sequence using Burrows-Wheeler algorithm.
///
@@ -24,7 +26,7 @@ public static int ForwardTransform(Memory input, Span output)
return -1;
}
- int[] offsets = ArrayPool.Shared.Rent(length);
+ int[] offsets = Pool.Rent(length);
for (int i = 0; i < length; i++)
{
offsets[i] = i;
@@ -61,7 +63,7 @@ int Compare(int x, int y)
output[i] = inputSpan[(offsets[i] + length - 1) % length];
}
- ArrayPool.Shared.Return(offsets);
+ Pool.Return(offsets);
Debug.Assert(identityPosition.HasValue, "Identity position not found");
@@ -85,7 +87,7 @@ public static void InverseTransform(Span input, int identityIndex, Span.Shared.Rent(length);
+ int[] appearances = Pool.Rent(length);
Span lastAppearances = stackalloc int[256];
Span byteCounter = stackalloc int[256];
@@ -128,6 +130,6 @@ public static void InverseTransform(Span input, int identityIndex, Span.Shared.Return(appearances);
+ Pool.Return(appearances);
}
}
From 1ce4bec36bf3d7f041ec557165e770f6e76f603c Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Thu, 6 Mar 2025 01:13:29 +0300
Subject: [PATCH 24/57] Switched trie to use generic type as value (hw3 - lzw)
(wip)
---
Zipper/Zipper.Tests/TrieTests.cs | 10 +++++-----
Zipper/Zipper/Trie.cs | 23 ++++++++++++++---------
2 files changed, 19 insertions(+), 14 deletions(-)
diff --git a/Zipper/Zipper.Tests/TrieTests.cs b/Zipper/Zipper.Tests/TrieTests.cs
index 190e047..ee12c05 100644
--- a/Zipper/Zipper.Tests/TrieTests.cs
+++ b/Zipper/Zipper.Tests/TrieTests.cs
@@ -21,7 +21,7 @@ public class TrieTests
[.. TestUtil.GetRandomStrings()]
];
- private Trie trie;
+ private Trie trie;
[SetUp]
public void Setup()
@@ -44,7 +44,7 @@ public void TrieAdd_And_TrieTryGetValue_ReturnsCorrectly_MultipleValues([ValueSo
}
}
- private static void TestAddValue(Trie trie, byte[] bytes)
+ private static void TestAddValue(Trie trie, byte[] bytes)
{
Assert.Multiple(() =>
{
@@ -53,11 +53,11 @@ private static void TestAddValue(Trie trie, byte[] bytes)
Assert.That(Add(trie, bytes), Is.True);
Assert.That(Add(trie, bytes), Is.False);
- Assert.That(trie.TryGetValue(bytes, out uint value), Is.True);
+ Assert.That(trie.TryGetValue(bytes, out int value), Is.True);
Assert.That(value, Is.EqualTo(bytes.Length));
});
}
- private static bool Add(Trie trie, byte[] bytes)
- => trie.Add(bytes, (uint)bytes.Length);
+ private static bool Add(Trie trie, byte[] bytes)
+ => trie.Add(bytes, bytes.Length);
}
diff --git a/Zipper/Zipper/Trie.cs b/Zipper/Zipper/Trie.cs
index 79cd6a3..4202ea8 100644
--- a/Zipper/Zipper/Trie.cs
+++ b/Zipper/Zipper/Trie.cs
@@ -5,7 +5,9 @@ namespace Zipper;
///
/// Trie data structure, also known as prefix tree, implemented as dictionary.
///
-public class Trie
+/// Type of values.
+public class Trie
+ where T : struct
{
private readonly Node rootNode = new();
@@ -15,7 +17,7 @@ public class Trie
/// The byte sequence to as key.
/// The number to add as value.
/// if wasn't present in trie before adding it, otherwise.
- public bool Add(ReadOnlySpan key, uint value)
+ public bool Add(ReadOnlySpan key, T value)
{
var lastNode = rootNode;
foreach (var character in key)
@@ -39,16 +41,19 @@ public bool Add(ReadOnlySpan key, uint value)
///
/// The key of the value to get.
///
- /// When this method returns, contains the value associated with , if is found, zero otherwise.
+ /// When this method returns, contains the value associated with , if is found, otherwise.
///
/// if is found, otherwise.
- public bool TryGetValue(ReadOnlySpan key, out uint value)
+ public bool TryGetValue(ReadOnlySpan key, out T value)
{
- var nodeExistsAndHasValue = GetNode(key, out var node) && node.EndOfWord;
-
- value = node?.Value ?? 0;
+ value = default;
+ if (GetNode(key, out var node) && node.EndOfWord)
+ {
+ value = node.Value;
+ return true;
+ }
- return nodeExistsAndHasValue;
+ return false;
}
private bool GetNode(ReadOnlySpan prefix, [MaybeNullWhen(false)] out Node node)
@@ -71,7 +76,7 @@ private class Node
public bool EndOfWord { get; set; }
- public uint Value { get; set; }
+ public T Value { get; set; }
public Node GetOrCreateChild(byte value)
{
From 8cb223b36beeca490cdcff3772ff73fd2bc9e7d9 Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Thu, 6 Mar 2025 01:23:00 +0300
Subject: [PATCH 25/57] Switched Trie to be internal instead of public (hw3 -
lzw) (wip)
---
Zipper/Zipper/Trie.cs | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/Zipper/Zipper/Trie.cs b/Zipper/Zipper/Trie.cs
index 4202ea8..e46ba1c 100644
--- a/Zipper/Zipper/Trie.cs
+++ b/Zipper/Zipper/Trie.cs
@@ -6,7 +6,7 @@ namespace Zipper;
/// Trie data structure, also known as prefix tree, implemented as dictionary.
///
/// Type of values.
-public class Trie
+internal class Trie
where T : struct
{
private readonly Node rootNode = new();
From 20a8e95ae3bebbd41979b911bb4a8a42d30e64e7 Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Sun, 9 Mar 2025 15:11:30 +0300
Subject: [PATCH 26/57] Removed Dispose(bool) from writer (hw3 - lzw) (wip)
---
.../Zipper.Tests/ArbitraryBitWriterTests.cs | 24 -----------------
Zipper/Zipper/ArbitraryBitWriter.cs | 26 +++++--------------
2 files changed, 7 insertions(+), 43 deletions(-)
diff --git a/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs b/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs
index f5eb8a2..05daf30 100644
--- a/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs
+++ b/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs
@@ -97,22 +97,6 @@ public void Writer_Dispose_ShouldNotDisposeStream_IfLeaveOpenWasInitializedWith_
Assert.DoesNotThrow(() => _ = memory.Position);
}
- [Test]
- public void Writer_Dispose_ShouldDoNothing_IfCalledWithFalse()
- {
- var writer = new TestWriter(memory, width);
-
- for (int i = 0; i < 5; i++)
- {
- writer.Write((uint)(i * i));
- }
-
- var position = memory.Position;
-
- writer.Dispose();
- Assert.That(memory.Position, Is.EqualTo(position));
- }
-
[Test]
public void Writer_Write_ShouldThrow_IfDisposed()
{
@@ -132,12 +116,4 @@ public void Writer_Flush_ShouldThrow_IfDisposed()
Assert.Throws(writer.Flush);
}
-
- private class TestWriter(Stream stream, int width) : ArbitraryBitWriter(stream, width)
- {
- protected override void Dispose(bool disposing)
- {
- base.Dispose(false);
- }
- }
}
diff --git a/Zipper/Zipper/ArbitraryBitWriter.cs b/Zipper/Zipper/ArbitraryBitWriter.cs
index eba0fcb..72ad46f 100644
--- a/Zipper/Zipper/ArbitraryBitWriter.cs
+++ b/Zipper/Zipper/ArbitraryBitWriter.cs
@@ -106,32 +106,20 @@ public void Flush()
/// Releases all resources used by the current instance of the class.
///
public void Dispose()
- {
- Dispose(true);
- }
-
- ///
- /// Releases the unmanaged resources used by the and optionally releases the managed resources.
- ///
- /// to release both managed and unmanaged resources; to release only unmanaged resources.
- protected virtual void Dispose(bool disposing)
{
if (disposed)
{
return;
}
- if (disposing)
- {
- Flush();
- BufferPool.Return(buffer);
-
- if (!leaveOpen)
- {
- stream.Dispose();
- }
+ Flush();
+ BufferPool.Return(buffer);
- disposed = true;
+ if (!leaveOpen)
+ {
+ stream.Dispose();
}
+
+ disposed = true;
}
}
From ce7a127dc08082f1f9fe618ed4bc859335dad5d2 Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Sun, 9 Mar 2025 15:49:39 +0300
Subject: [PATCH 27/57] Changed trie to use char-by-char mode (hw3 - lzw) (wip)
---
Zipper/Zipper.Tests/TrieTests.cs | 98 +++++++++++++++++++++++----
Zipper/Zipper/Trie.cs | 113 ++++++++++++++-----------------
2 files changed, 135 insertions(+), 76 deletions(-)
diff --git a/Zipper/Zipper.Tests/TrieTests.cs b/Zipper/Zipper.Tests/TrieTests.cs
index ee12c05..cf7b353 100644
--- a/Zipper/Zipper.Tests/TrieTests.cs
+++ b/Zipper/Zipper.Tests/TrieTests.cs
@@ -21,6 +21,9 @@ public class TrieTests
[.. TestUtil.GetRandomStrings()]
];
+ private readonly byte testKey = 157;
+ private readonly int testValue = 252354;
+
private Trie trie;
[SetUp]
@@ -30,34 +33,99 @@ public void Setup()
}
[Test]
- public void TrieAdd_And_TrieTryGetValue_ReturnsCorrectly_SingleValue([ValueSource(nameof(TestStrings))] byte[] bytes)
+ public void AddChild_ShouldReturnTrue_IfChildDidNotExists()
{
- TestAddValue(trie, bytes);
+ Assert.Multiple(() =>
+ {
+ Assert.That(trie.HasChild(testKey), Is.False);
+ Assert.That(trie.AddChild(testKey, testValue), Is.True);
+ });
}
[Test]
- public void TrieAdd_And_TrieTryGetValue_ReturnsCorrectly_MultipleValues([ValueSource(nameof(TestStringsSet))] byte[][] strings)
+ public void HasChild_ShouldReturnTrue_IfAddedChild()
{
- foreach (var bytes in strings)
- {
- TestAddValue(trie, bytes);
- }
+ trie.AddChild(testKey, testValue);
+ Assert.That(trie.HasChild(testKey), Is.True);
}
- private static void TestAddValue(Trie trie, byte[] bytes)
+ [Test]
+ public void HasChild_ShouldReturnFalse_IfChildDoesNotExist()
+ {
+ Assert.That(trie.HasChild(testKey), Is.False);
+ }
+
+ [Test]
+ public void AddChild_ShouldReturnFalse_IfChildExisted()
+ {
+ trie.AddChild(testKey, testValue);
+ Assert.That(trie.AddChild(testKey, testValue), Is.False);
+ }
+
+ [Test]
+ public void Add_ShouldNotMove()
{
Assert.Multiple(() =>
{
- Assert.That(trie.TryGetValue(bytes, out _), Is.False);
+ Assert.That(trie.AtRoot, Is.True);
+ Assert.That(trie.AddChild(testKey, testValue), Is.True);
+ Assert.That(trie.AtRoot, Is.True);
+ });
+ }
+
+ [Test]
+ public void MoveForward_ShouldReturnTrue_IfMovingToAddedChild()
+ {
+ trie.AddChild(testKey, testValue);
+ Assert.That(trie.MoveForward(testKey), Is.True);
+ }
- Assert.That(Add(trie, bytes), Is.True);
- Assert.That(Add(trie, bytes), Is.False);
+ [Test]
+ public void MoveForward_ShouldReturnFalse_IfChildDoesNotExist()
+ {
+ Assert.That(trie.MoveForward(testKey), Is.False);
+ }
- Assert.That(trie.TryGetValue(bytes, out int value), Is.True);
- Assert.That(value, Is.EqualTo(bytes.Length));
+ [Test]
+ public void MoveForward_ShouldMove()
+ {
+ trie.AddChild(testKey, testValue);
+ Assert.Multiple(() =>
+ {
+ Assert.That(trie.AtRoot, Is.True);
+ Assert.That(trie.MoveForward(testKey), Is.True);
+ Assert.That(trie.AtRoot, Is.False);
});
}
- private static bool Add(Trie trie, byte[] bytes)
- => trie.Add(bytes, bytes.Length);
+ [Test]
+ public void Reset_ShouldReset_IfMoved()
+ {
+ Assert.That(trie.AtRoot, Is.True);
+
+ trie.AddChild(testKey, testValue);
+ trie.MoveForward(testKey);
+ Assert.That(trie.AtRoot, Is.False);
+
+ trie.Reset();
+ Assert.That(trie.AtRoot, Is.True);
+ }
+
+ [Test]
+ public void AddChild_ShouldAdd_Once()
+ {
+ int valueA = 3463235;
+ int valueB = 73334536;
+
+ trie.AddChild(testKey, valueA);
+ Assert.That(trie.AddChild(testKey, valueB), Is.False);
+ }
+
+ [Test]
+ public void CurrentValue_ShouldReturnAddedValue()
+ {
+ trie.AddChild(testKey, testValue);
+ trie.MoveForward(testKey);
+ Assert.That(trie.CurrentValue, Is.EqualTo(testValue));
+ }
}
diff --git a/Zipper/Zipper/Trie.cs b/Zipper/Zipper/Trie.cs
index e46ba1c..9145553 100644
--- a/Zipper/Zipper/Trie.cs
+++ b/Zipper/Zipper/Trie.cs
@@ -1,97 +1,88 @@
namespace Zipper;
-using System.Diagnostics.CodeAnalysis;
-
///
-/// Trie data structure, also known as prefix tree, implemented as dictionary.
+/// Trie data structure, also known as prefix tree, that can be traversed through.
///
/// Type of values.
internal class Trie
where T : struct
{
- private readonly Node rootNode = new();
+ private readonly Node rootNode = new(default);
+ private Node lastNode;
///
- /// Adds associated with to this trie.
+ /// Initializes a new instance of the class.
///
- /// The byte sequence to as key.
- /// The number to add as value.
- /// if wasn't present in trie before adding it, otherwise.
- public bool Add(ReadOnlySpan key, T value)
+ public Trie()
{
- var lastNode = rootNode;
- foreach (var character in key)
- {
- lastNode = lastNode.GetOrCreateChild(character);
- }
+ lastNode = rootNode;
+ }
- if (lastNode.EndOfWord)
+ ///
+ /// Gets value stored at current position in trie.
+ ///
+ public T CurrentValue => lastNode.Value;
+
+ ///
+ /// Gets a value indicating whether current position is root.
+ ///
+ public bool AtRoot => lastNode == rootNode;
+
+ ///
+ /// Resets position to root.
+ ///
+ public void Reset()
+ {
+ lastNode = rootNode;
+ }
+
+ ///
+ /// Adds child at current position with specified and .
+ ///
+ /// Key of child to add.
+ /// Value of child to add.
+ /// if child with specified key did not exist at current position, otherwise.
+ public bool AddChild(byte key, T value)
+ {
+ if (HasChild(key))
{
return false;
}
- lastNode.EndOfWord = true;
- lastNode.Value = value;
+ var node = new Node(value);
+ lastNode.Children[key] = node;
return true;
}
///
- /// Tries to get value associated with .
+ /// Moves forward if is found, otherwise doesn't move.
///
- /// The key of the value to get.
- ///
- /// When this method returns, contains the value associated with , if is found, otherwise.
- ///
- /// if is found, otherwise.
- public bool TryGetValue(ReadOnlySpan key, out T value)
+ /// Key to search for.
+ /// if moved forward, otherwise.
+ public bool MoveForward(byte key)
{
- value = default;
- if (GetNode(key, out var node) && node.EndOfWord)
+ if (lastNode.Children.TryGetValue(key, out var existingNode))
{
- value = node.Value;
+ lastNode = existingNode;
return true;
}
return false;
}
- private bool GetNode(ReadOnlySpan prefix, [MaybeNullWhen(false)] out Node node)
- {
- node = rootNode;
- foreach (var character in prefix)
- {
- if (!node.TryGetChild(character, out node))
- {
- return false;
- }
- }
-
- return true;
- }
+ ///
+ /// Checks if child with specified exists at current position.
+ ///
+ /// Key to search for.
+ /// if child with specified key did not exist at current position, otherwise.
+ public bool HasChild(byte key)
+ => lastNode.Children.ContainsKey(key);
- private class Node
+ private class Node(T value)
{
- private readonly Dictionary children = [];
-
- public bool EndOfWord { get; set; }
-
- public T Value { get; set; }
-
- public Node GetOrCreateChild(byte value)
- {
- if (!children.TryGetValue(value, out Node? node))
- {
- node = new();
- children[value] = node;
-
- return node;
- }
-
- return node;
- }
+ public Dictionary Children { get; } = [];
- public bool TryGetChild(byte value, [MaybeNullWhen(false)] out Node node)
- => children.TryGetValue(value, out node);
+ public T Value { get; } = value;
}
}
From 0e34d48e6c69ad6a95225849cec56560338ac61a Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Sun, 9 Mar 2025 15:52:20 +0300
Subject: [PATCH 28/57] Removed unnecessary test data from trie tests (hw3 -
lzw) (wip)
---
Zipper/Zipper.Tests/TrieTests.cs | 19 -------------------
1 file changed, 19 deletions(-)
diff --git a/Zipper/Zipper.Tests/TrieTests.cs b/Zipper/Zipper.Tests/TrieTests.cs
index cf7b353..ea98c39 100644
--- a/Zipper/Zipper.Tests/TrieTests.cs
+++ b/Zipper/Zipper.Tests/TrieTests.cs
@@ -2,25 +2,6 @@ namespace Zipper.Tests;
public class TrieTests
{
- private static readonly byte[][] TestStrings =
- [
- .. new string[]
- {
- string.Empty,
- "A",
- "AB",
- "ABC",
- "ABCD",
- }
- .Select(System.Text.Encoding.UTF8.GetBytes)
- ];
-
- private static readonly byte[][][] TestStringsSet =
- [
- TestStrings,
- [.. TestUtil.GetRandomStrings()]
- ];
-
private readonly byte testKey = 157;
private readonly int testValue = 252354;
From 73918405aa2d7927cf7aa75cebc323dd757f83ec Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Sun, 9 Mar 2025 16:03:47 +0300
Subject: [PATCH 29/57] Switched writer and reader back to int (hw3 - lzw)
(wip)
---
Zipper/Zipper.Tests/ArbitraryBitReaderWriterTests.cs | 12 ++++++------
Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs | 6 +++---
Zipper/Zipper/ArbitraryBitReader.cs | 8 ++++----
Zipper/Zipper/ArbitraryBitWriter.cs | 8 ++++----
4 files changed, 17 insertions(+), 17 deletions(-)
diff --git a/Zipper/Zipper.Tests/ArbitraryBitReaderWriterTests.cs b/Zipper/Zipper.Tests/ArbitraryBitReaderWriterTests.cs
index e67f8a1..05e8732 100644
--- a/Zipper/Zipper.Tests/ArbitraryBitReaderWriterTests.cs
+++ b/Zipper/Zipper.Tests/ArbitraryBitReaderWriterTests.cs
@@ -44,7 +44,7 @@ public void Reader_ShouldReadValues_WrittenBy_Writer_Correctly([ValueSource(name
{
Assert.Multiple(() =>
{
- Assert.That(reader.ReadNext(out uint number), Is.True);
+ Assert.That(reader.ReadNext(out int number), Is.True);
Assert.That(number, Is.EqualTo(data.Numbers[i]));
});
}
@@ -64,7 +64,7 @@ public void Reader_ReadNext_ShouldReturnFalse_WhenNoDataAvailable()
using var writer = new ArbitraryBitWriter(memory, width);
for (int i = 0; i < numbersCount; i++)
{
- writer.Write((uint)(i * i));
+ writer.Write(i * i);
}
}
@@ -75,7 +75,7 @@ public void Reader_ReadNext_ShouldReturnFalse_WhenNoDataAvailable()
{
Assert.Multiple(() =>
{
- Assert.That(reader.ReadNext(out uint number), Is.True);
+ Assert.That(reader.ReadNext(out int number), Is.True);
Assert.That(number, Is.EqualTo(i * i));
});
}
@@ -98,10 +98,10 @@ private static TestData[] GenerateData()
int width = i + minWidth;
long upperBound = 1L << width;
- var numbers = new uint[numbersLength];
+ var numbers = new int[numbersLength];
for (int j = 0; j < numbersLength; j++)
{
- numbers[j] = (uint)random.NextInt64(upperBound);
+ numbers[j] = (int)random.NextInt64(upperBound);
}
result[i] = new(width, numbers);
@@ -110,7 +110,7 @@ private static TestData[] GenerateData()
return result;
}
- public readonly record struct TestData(int Width, uint[] Numbers);
+ public readonly record struct TestData(int Width, int[] Numbers);
// use MemoryStream, because implementing all Stream's abstract membrers leads to bad code coverage
private class TestStream : MemoryStream
diff --git a/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs b/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs
index 05daf30..6a44a5e 100644
--- a/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs
+++ b/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs
@@ -38,7 +38,7 @@ public void Writer_Dispose_ShouldDoNothing_IfBufferIsEmpty()
// buffer is filled on every eight Write()
for (int i = 0; i < 8; i++)
{
- writer.Write((uint)(i * i));
+ writer.Write(i * i);
}
var position = memory.Position;
@@ -53,7 +53,7 @@ public void Writer_Flush_ShouldDoNothing_IfBufferIsEmpty()
// buffer is filled on every eight Write()
for (int i = 0; i < 8; i++)
{
- writer.Write((uint)(i * i));
+ writer.Write(i * i);
}
var position = memory.Position;
@@ -68,7 +68,7 @@ public void Writer_Flush_ShouldFlushHalfFilledBuffer()
// buffer is filled on every eight Write(), so write only 4 numbers
for (int i = 0; i < 4; i++)
{
- writer.Write((uint)(i * i));
+ writer.Write(i * i);
}
var position = memory.Position;
diff --git a/Zipper/Zipper/ArbitraryBitReader.cs b/Zipper/Zipper/ArbitraryBitReader.cs
index 2308421..3b5857a 100644
--- a/Zipper/Zipper/ArbitraryBitReader.cs
+++ b/Zipper/Zipper/ArbitraryBitReader.cs
@@ -1,7 +1,7 @@
namespace Zipper;
///
-/// Reads unsigned integers of arbitrary width.
+/// Reads integers of arbitrary width.
///
internal class ArbitraryBitReader
{
@@ -41,7 +41,7 @@ public ArbitraryBitReader(Stream stream, int width)
///
/// When this method returns, contains the value that was read, if read successfully, zero otherwise.
/// if was successfuly read, otherwise.
- public bool ReadNext(out uint number)
+ public bool ReadNext(out int number)
{
number = 0;
@@ -61,8 +61,8 @@ public bool ReadNext(out uint number)
}
int remainingBitsToRead = 8 - bitsReadFromBuffer.Value;
- uint mask = 0xFFu >> bitsReadFromBuffer.Value;
- uint toWrite = (buffer & mask) >> Math.Max(0, remainingBitsToRead - remainingWidth);
+ int mask = 0xFF >> bitsReadFromBuffer.Value;
+ int toWrite = (buffer & mask) >> Math.Max(0, remainingBitsToRead - remainingWidth);
int previousRemainingWidth = remainingWidth;
remainingWidth -= remainingBitsToRead;
diff --git a/Zipper/Zipper/ArbitraryBitWriter.cs b/Zipper/Zipper/ArbitraryBitWriter.cs
index 72ad46f..eda32c2 100644
--- a/Zipper/Zipper/ArbitraryBitWriter.cs
+++ b/Zipper/Zipper/ArbitraryBitWriter.cs
@@ -3,7 +3,7 @@ namespace Zipper;
using System.Buffers;
///
-/// Writes unsigned integers of arbitrary width.
+/// Writes integers of arbitrary width.
///
internal class ArbitraryBitWriter : IDisposable
{
@@ -53,11 +53,11 @@ public ArbitraryBitWriter(Stream stream, int width, bool leaveOpen = false)
/// Writes to the underlying stream.
///
/// Number to write.
- public void Write(uint number)
+ public void Write(int number)
{
ObjectDisposedException.ThrowIf(disposed, this);
- number &= 0xFFFFFFFF >> (32 - width);
+ number &= (int)(0xFFFFFFFF >> (32 - width));
int remainingWidth = width;
while (remainingWidth > 0)
@@ -66,7 +66,7 @@ public void Write(uint number)
int bitsWrittenToCurrentByte = bitsWrittenInBuffer % 8;
int bitsRemainingInCurrentByte = 8 - bitsWrittenToCurrentByte;
- uint toWrite = number >> Math.Max(0, remainingWidth - bitsRemainingInCurrentByte);
+ int toWrite = number >> Math.Max(0, remainingWidth - bitsRemainingInCurrentByte);
int previousRemainingWidth = remainingWidth;
remainingWidth -= bitsRemainingInCurrentByte;
remainingWidth = Math.Max(0, remainingWidth);
From cfc13a102feacd4a872712b0f04f2b9f8c7feaad Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Sun, 9 Mar 2025 16:31:12 +0300
Subject: [PATCH 30/57] LZW implementation (hw3 - lzw)
---
Zipper/Zipper/LZW/BlockType.cs | 22 ++++
Zipper/Zipper/LZW/LZWReader.cs | 178 ++++++++++++++++++++++++++++
Zipper/Zipper/LZW/LZWStream.cs | 206 +++++++++++++++++++++++++++++++++
Zipper/Zipper/LZW/LZWWriter.cs | 193 ++++++++++++++++++++++++++++++
Zipper/Zipper/ZipperMode.cs | 17 +++
5 files changed, 616 insertions(+)
create mode 100644 Zipper/Zipper/LZW/BlockType.cs
create mode 100644 Zipper/Zipper/LZW/LZWReader.cs
create mode 100644 Zipper/Zipper/LZW/LZWStream.cs
create mode 100644 Zipper/Zipper/LZW/LZWWriter.cs
create mode 100644 Zipper/Zipper/ZipperMode.cs
diff --git a/Zipper/Zipper/LZW/BlockType.cs b/Zipper/Zipper/LZW/BlockType.cs
new file mode 100644
index 0000000..ab4b2ce
--- /dev/null
+++ b/Zipper/Zipper/LZW/BlockType.cs
@@ -0,0 +1,22 @@
+namespace Zipper.LZW;
+
+///
+/// Block type used to mark blocks in .
+///
+internal enum BlockType : byte
+{
+ ///
+ /// Treat block as usual.
+ ///
+ Default = 0,
+
+ ///
+ /// All blocks after this one should not expand code table.
+ ///
+ FixCodeTableSize = 1,
+
+ ///
+ /// This block was written after and is the last one to be read.
+ ///
+ EndOfStream = 2,
+}
diff --git a/Zipper/Zipper/LZW/LZWReader.cs b/Zipper/Zipper/LZW/LZWReader.cs
new file mode 100644
index 0000000..90d0d42
--- /dev/null
+++ b/Zipper/Zipper/LZW/LZWReader.cs
@@ -0,0 +1,178 @@
+namespace Zipper.LZW;
+
+using System.Buffers;
+using System.Buffers.Binary;
+using System.Diagnostics;
+
+///
+/// Internal class used to read compressed data from stream.
+///
+internal class LZWReader : IDisposable
+{
+ private static readonly ArrayPool BlockPool = ArrayPool.Create();
+
+ private readonly Stream stream;
+ private readonly Dictionary storedCodes;
+ private MemoryStream? memory;
+ private byte[]? block;
+ private int blockSize;
+ private bool endOfStreamReached;
+
+ private byte[]? word;
+ private int wordPosition;
+
+ private ArbitraryBitReader? reader;
+ private int lastWordCode;
+ private int maxCodesCount;
+
+ ///
+ /// Initializes a new instance of the class.
+ ///
+ /// Stream to read from.
+ public LZWReader(Stream stream)
+ {
+ this.stream = stream;
+
+ storedCodes = [];
+ for (int i = 0; i < 256; i++)
+ {
+ storedCodes[i] = [(byte)i];
+ }
+
+ lastWordCode = 256;
+ maxCodesCount = int.MaxValue;
+ }
+
+ ///
+ /// Reads data from underlying stream, decompresses it and writes to .
+ ///
+ /// Buffer to write decompressed data to.
+ /// Count of read bytes.
+ /// Unexpected end of stream.
+ /// Invalid data stream.
+ public int Read(Span buffer)
+ {
+ int bufferPosition = 0;
+
+ while (bufferPosition < buffer.Length)
+ {
+ // write leftover word from previous (iteration) or (Read() call)
+ if (word != null)
+ {
+ int wordLength = Math.Min(buffer.Length - bufferPosition, word.Length - wordPosition);
+ word.AsSpan().Slice(wordPosition, wordLength).CopyTo(buffer[bufferPosition..]);
+
+ bufferPosition += wordLength;
+ wordPosition += wordLength;
+ if (bufferPosition >= buffer.Length)
+ {
+ break;
+ }
+
+ word = null;
+ }
+
+ if (block == null && !TryReadBuffer())
+ {
+ return 0;
+ }
+
+ Debug.Assert(block != null, "Block is null");
+ Debug.Assert(memory != null, "Memory is null");
+ Debug.Assert(reader != null, "Reader is null");
+
+ if (!reader.ReadNext(out int code))
+ {
+ if (endOfStreamReached)
+ {
+ break;
+ }
+
+ throw new EndOfStreamException();
+ }
+
+ if (!storedCodes.TryGetValue(code, out var readWord))
+ {
+ throw new InvalidDataException();
+ }
+
+ word = readWord;
+ wordPosition = 0;
+
+ if (lastWordCode <= maxCodesCount)
+ {
+ if (storedCodes.TryGetValue(lastWordCode, out var incompleteWord))
+ {
+ incompleteWord[^1] = word[0];
+ lastWordCode++;
+ }
+
+ var newWord = new byte[word.Length + 1];
+ word.CopyTo(newWord, 0);
+ storedCodes[lastWordCode] = newWord;
+ }
+
+ if (memory.Position >= blockSize)
+ {
+ BlockPool.Return(block);
+ block = null;
+ }
+ }
+
+ return bufferPosition;
+ }
+
+ ///
+ /// Disposes internal buffers.
+ ///
+ public void Dispose()
+ {
+ if (block != null)
+ {
+ BlockPool.Return(block);
+ }
+ }
+
+ private bool TryReadBuffer()
+ {
+ int headerSize = 4;
+ Span header = stackalloc byte[headerSize];
+ if (stream.Read(header) != headerSize)
+ {
+ return false;
+ }
+
+ var blockType = (BlockType)header[0];
+ var codeWidth = header[1];
+
+ blockSize = BinaryPrimitives.ReadUInt16LittleEndian(header[2..4]);
+ block = BlockPool.Rent(blockSize);
+
+ if (stream.Read(block, 0, blockSize) != blockSize)
+ {
+ throw new EndOfStreamException();
+ }
+
+ switch (blockType)
+ {
+ case BlockType.Default:
+ break;
+
+ case BlockType.FixCodeTableSize:
+ maxCodesCount = BinaryPrimitives.ReadInt32LittleEndian(block);
+ return TryReadBuffer();
+
+ case BlockType.EndOfStream:
+ endOfStreamReached = true;
+ break;
+
+ default:
+ throw new InvalidDataException();
+ }
+
+ memory = new(block);
+ reader = new(memory, codeWidth);
+
+ return true;
+ }
+}
diff --git a/Zipper/Zipper/LZW/LZWStream.cs b/Zipper/Zipper/LZW/LZWStream.cs
new file mode 100644
index 0000000..44c566e
--- /dev/null
+++ b/Zipper/Zipper/LZW/LZWStream.cs
@@ -0,0 +1,206 @@
+namespace Zipper.LZW;
+
+using System.Diagnostics;
+
+///
+/// Provides methods and properties used to compress and decompress streams by using the LZW algorithm.
+///
+public class LZWStream : Stream
+{
+ ///
+ /// Smallest allowed Block length.
+ ///
+ public const int MinBlockSize = 256;
+
+ ///
+ /// Largest allowed Block length.
+ ///
+ public const int MaxBlockSize = 64 * 1024;
+
+ private const int DefaultBlockSize = 1024;
+
+ private readonly Stream stream;
+ private readonly ZipperMode mode;
+ private readonly bool leaveOpen;
+
+ private readonly LZWWriter? writer;
+ private readonly LZWReader? reader;
+
+ private bool disposed;
+
+ ///
+ public LZWStream(Stream stream, ZipperMode mode = ZipperMode.Compress, bool leaveOpen = false)
+ : this(stream, DefaultBlockSize, mode, leaveOpen)
+ {
+ }
+
+ ///
+ /// Initializes a new instance of the class.
+ ///
+ /// The stream to which compressed data is written or from which data to uncompress is read.
+ /// The internal block size to use, should be between and .
+ /// that determines whether to compress or uncompress data.
+ ///
+ /// The value indicating whether should be disposed along with this instance,
+ /// if is .
+ ///
+ public LZWStream(Stream stream, int blockSize, ZipperMode mode = ZipperMode.Compress, bool leaveOpen = false)
+ {
+ ArgumentOutOfRangeException.ThrowIfLessThan(blockSize, MinBlockSize);
+ ArgumentOutOfRangeException.ThrowIfGreaterThan(blockSize, MaxBlockSize);
+
+ if (!Enum.IsDefined(mode))
+ {
+ throw new ArgumentException($"Value was neither {ZipperMode.Compress} nor {ZipperMode.Decompress}", nameof(mode));
+ }
+
+ if (mode == ZipperMode.Compress)
+ {
+ writer = new(stream, blockSize);
+ }
+ else
+ {
+ reader = new(stream);
+ }
+
+ this.stream = stream;
+ this.mode = mode;
+ this.leaveOpen = leaveOpen;
+ disposed = false;
+ }
+
+ ///
+ /// Gets a value indicating whether the stream supports reading.
+ ///
+ ///
+ public override bool CanRead => mode == ZipperMode.Decompress && stream.CanRead;
+
+ ///
+ /// Gets a value indicating whether the stream supports reading.
+ ///
+ ///
+ public override bool CanWrite => mode == ZipperMode.Compress && stream.CanWrite;
+
+ ///
+ /// Gets a value indicating whether the stream supports reading.
+ ///
+ ///
+ public override bool CanSeek => false;
+
+ ///
+ /// This property is not supported and always throws a .
+ ///
+ ///
+ public override long Length => throw new NotSupportedException();
+
+ ///
+ /// This property is not supported and always throws a .
+ ///
+ ///
+ public override long Position
+ {
+ get => throw new NotSupportedException();
+ set => throw new NotSupportedException();
+ }
+
+ ///
+ /// This method is not supported and always throws a .
+ ///
+ ///
+ public override long Seek(long offset, SeekOrigin origin)
+ => throw new NotSupportedException();
+
+ ///
+ /// This method is not supported and always throws a .
+ ///
+ ///
+ public override void SetLength(long value)
+ => throw new NotSupportedException();
+
+ ///
+ /// Flushes the internal buffers.
+ ///
+ ///
+ public override void Flush()
+ {
+ EnsureNotClosed();
+ if (mode == ZipperMode.Compress)
+ {
+ Debug.Assert(writer != null, "Writer is null");
+ writer.Flush();
+ }
+ }
+
+ ///
+ public override int Read(byte[] buffer, int offset, int count)
+ => Read(buffer.AsSpan(offset, count));
+
+ ///
+ public override void Write(byte[] buffer, int offset, int count)
+ => Write(buffer.AsSpan(offset, count));
+
+ ///
+ public override void Write(ReadOnlySpan buffer)
+ {
+ EnsureNotClosed();
+ EnsureMode(ZipperMode.Compress);
+
+ Debug.Assert(writer != null, "Writer is null");
+
+ writer.Write(buffer);
+ }
+
+ ///
+ public override int Read(Span buffer)
+ {
+ EnsureNotClosed();
+ EnsureMode(ZipperMode.Decompress);
+
+ Debug.Assert(reader != null, "Reader is null");
+
+ return reader.Read(buffer);
+ }
+
+ ///
+ protected override void Dispose(bool disposing)
+ {
+ if (disposed)
+ {
+ return;
+ }
+
+ if (disposing)
+ {
+ if (mode == ZipperMode.Compress)
+ {
+ Debug.Assert(writer != null, "Writer is null");
+ writer.Dispose();
+ }
+ else
+ {
+ Debug.Assert(reader != null, "Reader is null");
+ reader.Dispose();
+ }
+
+ if (!leaveOpen)
+ {
+ stream.Dispose();
+ }
+
+ disposed = true;
+ }
+ }
+
+ private void EnsureMode(ZipperMode mode)
+ {
+ if (this.mode != mode)
+ {
+ throw new InvalidOperationException();
+ }
+ }
+
+ private void EnsureNotClosed()
+ {
+ ObjectDisposedException.ThrowIf(disposed, this);
+ }
+}
diff --git a/Zipper/Zipper/LZW/LZWWriter.cs b/Zipper/Zipper/LZW/LZWWriter.cs
new file mode 100644
index 0000000..61d8e27
--- /dev/null
+++ b/Zipper/Zipper/LZW/LZWWriter.cs
@@ -0,0 +1,193 @@
+namespace Zipper.LZW;
+
+using System.Buffers;
+using System.Buffers.Binary;
+using System.Diagnostics;
+
+///
+/// Internal class used to write compressed data to stream.
+///
+internal class LZWWriter : IDisposable
+{
+ private const int DataOffset = 4;
+ private const int MaxCodesCount = (320 * 1024) - 1;
+
+ private static readonly ArrayPool BlockPool = ArrayPool.Create();
+
+ private readonly Stream stream;
+ private readonly int blockSize;
+ private readonly byte[] block;
+ private readonly MemoryStream memory;
+ private readonly Trie trie;
+
+ private ArbitraryBitWriter writer;
+ private int bitsWrittenInBlock;
+ private bool disableCodeTableExpansion;
+
+ private int blockIndex = 0;
+ private int bytesRead = 0;
+
+ private int codeWidth;
+ private int codesCount;
+
+ ///
+ /// Initializes a new instance of the class.
+ ///
+ /// Stream to write to.
+ /// The internal block size to use.
+ public LZWWriter(Stream stream, int blockSize)
+ {
+ ArgumentOutOfRangeException.ThrowIfLessThan(blockSize, LZWStream.MinBlockSize);
+ ArgumentOutOfRangeException.ThrowIfGreaterThan(blockSize, LZWStream.MaxBlockSize);
+
+ this.stream = stream;
+ this.blockSize = blockSize;
+
+ block = BlockPool.Rent(blockSize);
+ memory = new(block);
+ memory.Seek(DataOffset, SeekOrigin.Begin);
+ bitsWrittenInBlock = DataOffset * 8;
+
+ codeWidth = 8;
+ codesCount = 1 << codeWidth;
+
+ trie = new();
+ for (int i = 0; i < codesCount; i++)
+ {
+ trie.AddChild((byte)i, i);
+ }
+
+ writer = new(memory, codeWidth, true);
+ }
+
+ ///
+ /// Compresses data in and writes it to underlying stream.
+ ///
+ /// Buffer to read data from.
+ public void Write(ReadOnlySpan buffer)
+ {
+ for (int i = 0; i < buffer.Length; i++)
+ {
+ bytesRead++;
+ byte value = buffer[i];
+ if (trie.AtRoot)
+ {
+ trie.MoveForward(value);
+ continue;
+ }
+
+ if (!trie.HasChild(value))
+ {
+ if (!disableCodeTableExpansion)
+ {
+ trie.AddChild(value, codesCount);
+ }
+
+ bool bufferOverflow = bitsWrittenInBlock + codeWidth > blockSize * 8;
+
+ bool shouldIncrementCodeWidth = false;
+ bool shouldDisableCodeTableExpansion = false;
+ if (!disableCodeTableExpansion)
+ {
+ codesCount++;
+ shouldDisableCodeTableExpansion = codesCount >= MaxCodesCount;
+ shouldIncrementCodeWidth = codesCount >= 1 << codeWidth;
+ }
+
+ if (shouldDisableCodeTableExpansion)
+ {
+ disableCodeTableExpansion = true;
+ FlushInternal(BlockType.Default);
+ FlushInternal(BlockType.FixCodeTableSize);
+ }
+ else if (bufferOverflow || shouldIncrementCodeWidth)
+ {
+ FlushInternal(BlockType.Default);
+ }
+
+ if (shouldIncrementCodeWidth)
+ {
+ codeWidth++;
+
+ writer.Dispose();
+ writer = new(memory, codeWidth, true);
+ }
+
+ writer.Write(trie.CurrentValue);
+ bitsWrittenInBlock += codeWidth;
+
+ trie.Reset();
+ }
+
+ trie.MoveForward(value);
+ }
+ }
+
+ ///
+ /// Writes all pending data to the underlying stream.
+ ///
+ public void Flush()
+ {
+ FlushInternal(BlockType.Default);
+
+ writer.Write(trie.CurrentValue);
+ bitsWrittenInBlock += codeWidth;
+
+ trie.Reset();
+
+ FlushInternal(BlockType.Default);
+ }
+
+ ///
+ /// Writes all pending data to the underlying stream and disposes internal buffers.
+ ///
+ public void Dispose()
+ {
+ Flush();
+ FlushInternal(BlockType.EndOfStream);
+
+ memory.Dispose();
+ BlockPool.Return(block);
+ }
+
+ private void FlushInternal(BlockType type)
+ {
+ Debug.Assert(codeWidth <= 32, "Code width too large");
+ Debug.Assert(Enum.IsDefined(type), $"Unknown {nameof(BlockType)} parameter");
+
+ if (type == BlockType.FixCodeTableSize)
+ {
+ var binWriter = new BinaryWriter(memory);
+ binWriter.Write(MaxCodesCount);
+ binWriter.Flush();
+
+ Console.WriteLine($"disable code table expansion @ block #{blockIndex}");
+ }
+
+ writer.Flush();
+ int length = (int)memory.Position;
+ ushort dataLength = (ushort)(length - DataOffset);
+
+ if (!(dataLength == 0 && type == BlockType.Default))
+ {
+ block[0] = (byte)type;
+ block[1] = (byte)codeWidth;
+ BinaryPrimitives.WriteUInt16LittleEndian(block.AsSpan()[2..4], dataLength);
+
+ stream.Write(block, 0, length);
+ stream.Flush();
+
+ if (blockIndex < 20 || (blockIndex % 1024) == 0 || type == BlockType.FixCodeTableSize)
+ {
+ Console.WriteLine($"wrote block #{blockIndex}, read: {bytesRead / 1024.0:0.00} K, block size: {dataLength}, codeWidth: {codeWidth}, used codes: {codesCount}");
+ }
+
+ blockIndex++;
+ }
+
+ Array.Clear(block);
+
+ memory.Seek(DataOffset, SeekOrigin.Begin);
+ bitsWrittenInBlock = DataOffset * 8;
+ }
+}
diff --git a/Zipper/Zipper/ZipperMode.cs b/Zipper/Zipper/ZipperMode.cs
new file mode 100644
index 0000000..9df19ad
--- /dev/null
+++ b/Zipper/Zipper/ZipperMode.cs
@@ -0,0 +1,17 @@
+namespace Zipper;
+
+///
+/// Specifies whether to compress data to or decompress data from the underlying stream.
+///
+public enum ZipperMode
+{
+ ///
+ /// Compress data to the underlying stream.
+ ///
+ Compress,
+
+ ///
+ /// Decompress data from the underlying stream.
+ ///
+ Decompress,
+}
From 5ae229c9bf41b899fbf61e6f49c0b8dac95a426b Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Sun, 9 Mar 2025 16:55:21 +0300
Subject: [PATCH 31/57] Switch input parameter in BWT.ForwardTransform to Span
(hw3 - lzw)
---
Zipper/Zipper/BWT.cs | 13 ++++++++-----
1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/Zipper/Zipper/BWT.cs b/Zipper/Zipper/BWT.cs
index eabe9ce..607fb75 100644
--- a/Zipper/Zipper/BWT.cs
+++ b/Zipper/Zipper/BWT.cs
@@ -9,6 +9,7 @@ namespace Zipper;
internal static class BWT
{
private static readonly ArrayPool Pool = ArrayPool.Create();
+ private static readonly ArrayPool InputPool = ArrayPool.Create();
///
/// Transforms given byte sequence using Burrows-Wheeler algorithm.
@@ -16,7 +17,7 @@ internal static class BWT
/// Input byte sequence.
/// Span to write transofrmed input to.
/// Index that is used to reconstruct byte sequence.
- public static int ForwardTransform(Memory input, Span output)
+ public static int ForwardTransform(Span input, Span output)
{
Debug.Assert(input.Length == output.Length, "Length of input and output should be the same");
int length = input.Length;
@@ -32,12 +33,14 @@ public static int ForwardTransform(Memory input, Span output)
offsets[i] = i;
}
+ var inputCopy = InputPool.Rent(length);
+ input.CopyTo(inputCopy);
+
int Compare(int x, int y)
{
- var inputSpan = input.Span;
for (int i = 0; i < length; i++)
{
- int compare = inputSpan[(i + x) % length] - inputSpan[(i + y) % length];
+ int compare = inputCopy[(i + x) % length] - inputCopy[(i + y) % length];
if (compare != 0)
{
return compare;
@@ -51,7 +54,6 @@ int Compare(int x, int y)
offsetsSpan.Sort(Compare);
- var inputSpan = input.Span;
int? identityPosition = null;
for (int i = 0; i < length; i++)
{
@@ -60,10 +62,11 @@ int Compare(int x, int y)
identityPosition = i;
}
- output[i] = inputSpan[(offsets[i] + length - 1) % length];
+ output[i] = inputCopy[(offsets[i] + length - 1) % length];
}
Pool.Return(offsets);
+ InputPool.Return(inputCopy);
Debug.Assert(identityPosition.HasValue, "Identity position not found");
From 3ec0ab3fe3c2816cccff3704558c90028ebfeec4 Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Sun, 9 Mar 2025 16:56:43 +0300
Subject: [PATCH 32/57] Switch input parameter in forward and inverse transform
in BWT to be readonly (hw3 - lzw)
---
Zipper/Zipper/BWT.cs | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/Zipper/Zipper/BWT.cs b/Zipper/Zipper/BWT.cs
index 607fb75..099f06c 100644
--- a/Zipper/Zipper/BWT.cs
+++ b/Zipper/Zipper/BWT.cs
@@ -17,7 +17,7 @@ internal static class BWT
/// Input byte sequence.
/// Span to write transofrmed input to.
/// Index that is used to reconstruct byte sequence.
- public static int ForwardTransform(Span input, Span output)
+ public static int ForwardTransform(ReadOnlySpan input, Span output)
{
Debug.Assert(input.Length == output.Length, "Length of input and output should be the same");
int length = input.Length;
@@ -79,7 +79,7 @@ int Compare(int x, int y)
/// Transformed byte sequence.
/// Index that is used to reconstruct byte sequence.
/// Span to write reconstructed byte sequence to.
- public static void InverseTransform(Span input, int identityIndex, Span output)
+ public static void InverseTransform(ReadOnlySpan input, int identityIndex, Span output)
{
Debug.Assert(input.Length == output.Length, "Length of input and output should be the same");
From 032cc484f5065308d8e949faa1644783ce32c530 Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Sun, 9 Mar 2025 18:39:23 +0300
Subject: [PATCH 33/57] Disable debug logging in LZWWriter (hw3 - lzw)
---
Zipper/Zipper/LZW/LZWWriter.cs | 13 -------------
1 file changed, 13 deletions(-)
diff --git a/Zipper/Zipper/LZW/LZWWriter.cs b/Zipper/Zipper/LZW/LZWWriter.cs
index 61d8e27..59a7877 100644
--- a/Zipper/Zipper/LZW/LZWWriter.cs
+++ b/Zipper/Zipper/LZW/LZWWriter.cs
@@ -24,9 +24,6 @@ internal class LZWWriter : IDisposable
private int bitsWrittenInBlock;
private bool disableCodeTableExpansion;
- private int blockIndex = 0;
- private int bytesRead = 0;
-
private int codeWidth;
private int codesCount;
@@ -68,7 +65,6 @@ public void Write(ReadOnlySpan buffer)
{
for (int i = 0; i < buffer.Length; i++)
{
- bytesRead++;
byte value = buffer[i];
if (trie.AtRoot)
{
@@ -160,8 +156,6 @@ private void FlushInternal(BlockType type)
var binWriter = new BinaryWriter(memory);
binWriter.Write(MaxCodesCount);
binWriter.Flush();
-
- Console.WriteLine($"disable code table expansion @ block #{blockIndex}");
}
writer.Flush();
@@ -176,13 +170,6 @@ private void FlushInternal(BlockType type)
stream.Write(block, 0, length);
stream.Flush();
-
- if (blockIndex < 20 || (blockIndex % 1024) == 0 || type == BlockType.FixCodeTableSize)
- {
- Console.WriteLine($"wrote block #{blockIndex}, read: {bytesRead / 1024.0:0.00} K, block size: {dataLength}, codeWidth: {codeWidth}, used codes: {codesCount}");
- }
-
- blockIndex++;
}
Array.Clear(block);
From ca5ed633a6e66e39ddaf5cc6d130921b2f901c94 Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Sun, 9 Mar 2025 19:30:32 +0300
Subject: [PATCH 34/57] Use non-nullable int in bit writer for good code
coverage (hw3 - lzw)
---
Zipper/Zipper/ArbitraryBitReader.cs | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/Zipper/Zipper/ArbitraryBitReader.cs b/Zipper/Zipper/ArbitraryBitReader.cs
index 3b5857a..3f53ad6 100644
--- a/Zipper/Zipper/ArbitraryBitReader.cs
+++ b/Zipper/Zipper/ArbitraryBitReader.cs
@@ -14,7 +14,7 @@ internal class ArbitraryBitReader
private readonly Stream stream;
private readonly int width;
private byte buffer;
- private int? bitsReadFromBuffer;
+ private int bitsReadFromBuffer;
///
/// Initializes a new instance of the class.
@@ -33,7 +33,7 @@ public ArbitraryBitReader(Stream stream, int width)
this.stream = stream;
this.width = width;
- bitsReadFromBuffer = null;
+ bitsReadFromBuffer = int.MaxValue;
}
///
@@ -48,7 +48,7 @@ public bool ReadNext(out int number)
int remainingWidth = width;
while (remainingWidth > 0)
{
- if (bitsReadFromBuffer is null or >= 8)
+ if (bitsReadFromBuffer >= 8)
{
int readByte = stream.ReadByte();
if (readByte == -1)
@@ -60,8 +60,8 @@ public bool ReadNext(out int number)
bitsReadFromBuffer = 0;
}
- int remainingBitsToRead = 8 - bitsReadFromBuffer.Value;
- int mask = 0xFF >> bitsReadFromBuffer.Value;
+ int remainingBitsToRead = 8 - bitsReadFromBuffer;
+ int mask = 0xFF >> bitsReadFromBuffer;
int toWrite = (buffer & mask) >> Math.Max(0, remainingBitsToRead - remainingWidth);
int previousRemainingWidth = remainingWidth;
From 8d99900d508c54cdfea7ac6a5634df748c1393a7 Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Mon, 10 Mar 2025 15:29:59 +0300
Subject: [PATCH 35/57] Updated api docs in LZWStream (hw3 - lzw)
---
Zipper/Zipper/LZW/LZWStream.cs | 29 ++++++++++++++++++++++++-----
1 file changed, 24 insertions(+), 5 deletions(-)
diff --git a/Zipper/Zipper/LZW/LZWStream.cs b/Zipper/Zipper/LZW/LZWStream.cs
index 44c566e..d0933c4 100644
--- a/Zipper/Zipper/LZW/LZWStream.cs
+++ b/Zipper/Zipper/LZW/LZWStream.cs
@@ -44,6 +44,8 @@ public LZWStream(Stream stream, ZipperMode mode = ZipperMode.Compress, bool leav
/// The value indicating whether should be disposed along with this instance,
/// if is .
///
+ /// is not nor .
+ /// is out of range.
public LZWStream(Stream stream, int blockSize, ZipperMode mode = ZipperMode.Compress, bool leaveOpen = false)
{
ArgumentOutOfRangeException.ThrowIfLessThan(blockSize, MinBlockSize);
@@ -120,7 +122,7 @@ public override void SetLength(long value)
///
/// Flushes the internal buffers.
///
- ///
+ /// Stream is disposed.
public override void Flush()
{
EnsureNotClosed();
@@ -131,15 +133,32 @@ public override void Flush()
}
}
- ///
+ ///
+ /// Reads data from underlying stream, decompresses it and writes to .
+ ///
+ /// Buffer to write decompressed data to.
+ /// How many bytes to skip before reading from .
+ /// How many bytes to read from .
+ /// Count of read bytes.
+ /// Unexpected end of stream.
+ /// Invalid data stream.
+ /// Stream is set to mode.
+ /// Stream is disposed.
public override int Read(byte[] buffer, int offset, int count)
=> Read(buffer.AsSpan(offset, count));
- ///
+ ///
+ /// Reads data from underlying stream, decompresses it and writes to .
+ ///
+ /// Buffer to write decompressed data to.
+ /// How many bytes to skip before reading from .
+ /// How many bytes to read from .
+ /// Stream is set to mode.
+ /// Stream is disposed.
public override void Write(byte[] buffer, int offset, int count)
=> Write(buffer.AsSpan(offset, count));
- ///
+ ///
public override void Write(ReadOnlySpan buffer)
{
EnsureNotClosed();
@@ -150,7 +169,7 @@ public override void Write(ReadOnlySpan buffer)
writer.Write(buffer);
}
- ///
+ ///
public override int Read(Span buffer)
{
EnsureNotClosed();
From 4de2514be88809c721774247a76b8d902a8eca39 Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Mon, 10 Mar 2025 15:32:07 +0300
Subject: [PATCH 36/57] Swapped Read() and Write() in LZWStream for consistency
(hw3 - lzw)
---
Zipper/Zipper/LZW/LZWStream.cs | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/Zipper/Zipper/LZW/LZWStream.cs b/Zipper/Zipper/LZW/LZWStream.cs
index d0933c4..9323110 100644
--- a/Zipper/Zipper/LZW/LZWStream.cs
+++ b/Zipper/Zipper/LZW/LZWStream.cs
@@ -139,13 +139,10 @@ public override void Flush()
/// Buffer to write decompressed data to.
/// How many bytes to skip before reading from .
/// How many bytes to read from .
- /// Count of read bytes.
- /// Unexpected end of stream.
- /// Invalid data stream.
- /// Stream is set to mode.
+ /// Stream is set to mode.
/// Stream is disposed.
- public override int Read(byte[] buffer, int offset, int count)
- => Read(buffer.AsSpan(offset, count));
+ public override void Write(byte[] buffer, int offset, int count)
+ => Write(buffer.AsSpan(offset, count));
///
/// Reads data from underlying stream, decompresses it and writes to .
@@ -153,10 +150,13 @@ public override int Read(byte[] buffer, int offset, int count)
/// Buffer to write decompressed data to.
/// How many bytes to skip before reading from .
/// How many bytes to read from .
- /// Stream is set to mode.
+ /// Count of read bytes.
+ /// Unexpected end of stream.
+ /// Invalid data stream.
+ /// Stream is set to mode.
/// Stream is disposed.
- public override void Write(byte[] buffer, int offset, int count)
- => Write(buffer.AsSpan(offset, count));
+ public override int Read(byte[] buffer, int offset, int count)
+ => Read(buffer.AsSpan(offset, count));
///
public override void Write(ReadOnlySpan buffer)
From 2a0d6ec2f9067fd29f8c21fd88cdc30b87a4e3d7 Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Mon, 10 Mar 2025 15:36:55 +0300
Subject: [PATCH 37/57] Made ZipperMode required parameter in LZWStream
constructor (hw3 - lzw)
---
Zipper/Zipper/LZW/LZWStream.cs | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/Zipper/Zipper/LZW/LZWStream.cs b/Zipper/Zipper/LZW/LZWStream.cs
index 9323110..98d0759 100644
--- a/Zipper/Zipper/LZW/LZWStream.cs
+++ b/Zipper/Zipper/LZW/LZWStream.cs
@@ -29,7 +29,7 @@ public class LZWStream : Stream
private bool disposed;
///
- public LZWStream(Stream stream, ZipperMode mode = ZipperMode.Compress, bool leaveOpen = false)
+ public LZWStream(Stream stream, ZipperMode mode, bool leaveOpen = false)
: this(stream, DefaultBlockSize, mode, leaveOpen)
{
}
@@ -46,7 +46,7 @@ public LZWStream(Stream stream, ZipperMode mode = ZipperMode.Compress, bool leav
///
/// is not nor .
/// is out of range.
- public LZWStream(Stream stream, int blockSize, ZipperMode mode = ZipperMode.Compress, bool leaveOpen = false)
+ public LZWStream(Stream stream, int blockSize, ZipperMode mode, bool leaveOpen = false)
{
ArgumentOutOfRangeException.ThrowIfLessThan(blockSize, MinBlockSize);
ArgumentOutOfRangeException.ThrowIfGreaterThan(blockSize, MaxBlockSize);
From 027102c9eabbb1dddaf38e20dd97670aa3cf87ad Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Mon, 10 Mar 2025 15:41:50 +0300
Subject: [PATCH 38/57] Updated 'uncompress' -> 'decompress' in api docs for
consistency (hw3 - lzw)
---
Zipper/Zipper/LZW/LZWStream.cs | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/Zipper/Zipper/LZW/LZWStream.cs b/Zipper/Zipper/LZW/LZWStream.cs
index 98d0759..631d0ee 100644
--- a/Zipper/Zipper/LZW/LZWStream.cs
+++ b/Zipper/Zipper/LZW/LZWStream.cs
@@ -37,9 +37,9 @@ public LZWStream(Stream stream, ZipperMode mode, bool leaveOpen = false)
///
/// Initializes a new instance of the class.
///
- /// The stream to which compressed data is written or from which data to uncompress is read.
+ /// The stream to which compressed data is written or from which data to decompress is read.
/// The internal block size to use, should be between and .
- /// that determines whether to compress or uncompress data.
+ /// that determines whether to compress or decompress data.
///
/// The value indicating whether should be disposed along with this instance,
/// if is .
From 6fb71ead89bbc192328d03f3c9827f70747c4182 Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Mon, 10 Mar 2025 16:03:27 +0300
Subject: [PATCH 39/57] Added checks for stream read-/writeabiltiy to LZW
reader and writer (hw3 - lzw)
---
Zipper/Zipper/LZW/LZWReader.cs | 5 +++++
Zipper/Zipper/LZW/LZWWriter.cs | 5 +++++
2 files changed, 10 insertions(+)
diff --git a/Zipper/Zipper/LZW/LZWReader.cs b/Zipper/Zipper/LZW/LZWReader.cs
index 90d0d42..27a35d8 100644
--- a/Zipper/Zipper/LZW/LZWReader.cs
+++ b/Zipper/Zipper/LZW/LZWReader.cs
@@ -31,6 +31,11 @@ internal class LZWReader : IDisposable
/// Stream to read from.
public LZWReader(Stream stream)
{
+ if (!stream.CanRead)
+ {
+ throw new ArgumentException("Stream does not support reading", nameof(stream));
+ }
+
this.stream = stream;
storedCodes = [];
diff --git a/Zipper/Zipper/LZW/LZWWriter.cs b/Zipper/Zipper/LZW/LZWWriter.cs
index 59a7877..bc83168 100644
--- a/Zipper/Zipper/LZW/LZWWriter.cs
+++ b/Zipper/Zipper/LZW/LZWWriter.cs
@@ -37,6 +37,11 @@ public LZWWriter(Stream stream, int blockSize)
ArgumentOutOfRangeException.ThrowIfLessThan(blockSize, LZWStream.MinBlockSize);
ArgumentOutOfRangeException.ThrowIfGreaterThan(blockSize, LZWStream.MaxBlockSize);
+ if (!stream.CanWrite)
+ {
+ throw new ArgumentException("Stream does not support writing", nameof(stream));
+ }
+
this.stream = stream;
this.blockSize = blockSize;
From 53ded80e7900c5de6c5203ba2e516ec2366196ad Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Wed, 12 Mar 2025 01:52:55 +0300
Subject: [PATCH 40/57] Added BlockType.Flush and changed LZWStream.Flush
behavior (hw3 - lzw)
---
Zipper/Zipper/LZW/BlockType.cs | 5 +++++
Zipper/Zipper/LZW/LZWReader.cs | 21 ++++++++++++++++++---
Zipper/Zipper/LZW/LZWWriter.cs | 11 +++++++----
3 files changed, 30 insertions(+), 7 deletions(-)
diff --git a/Zipper/Zipper/LZW/BlockType.cs b/Zipper/Zipper/LZW/BlockType.cs
index ab4b2ce..42e7683 100644
--- a/Zipper/Zipper/LZW/BlockType.cs
+++ b/Zipper/Zipper/LZW/BlockType.cs
@@ -19,4 +19,9 @@ internal enum BlockType : byte
/// This block was written after and is the last one to be read.
///
EndOfStream = 2,
+
+ ///
+ /// This block was written after .
+ ///
+ Flush = 3,
}
diff --git a/Zipper/Zipper/LZW/LZWReader.cs b/Zipper/Zipper/LZW/LZWReader.cs
index 27a35d8..331fe0c 100644
--- a/Zipper/Zipper/LZW/LZWReader.cs
+++ b/Zipper/Zipper/LZW/LZWReader.cs
@@ -17,6 +17,7 @@ internal class LZWReader : IDisposable
private byte[]? block;
private int blockSize;
private bool endOfStreamReached;
+ private bool flushed;
private byte[]? word;
private int wordPosition;
@@ -93,6 +94,12 @@ public int Read(Span buffer)
break;
}
+ if (blockSize == 0)
+ {
+ block = null;
+ continue;
+ }
+
throw new EndOfStreamException();
}
@@ -112,9 +119,12 @@ public int Read(Span buffer)
lastWordCode++;
}
- var newWord = new byte[word.Length + 1];
- word.CopyTo(newWord, 0);
- storedCodes[lastWordCode] = newWord;
+ if (!flushed)
+ {
+ var newWord = new byte[word.Length + 1];
+ word.CopyTo(newWord, 0);
+ storedCodes[lastWordCode] = newWord;
+ }
}
if (memory.Position >= blockSize)
@@ -161,6 +171,7 @@ private bool TryReadBuffer()
switch (blockType)
{
case BlockType.Default:
+ flushed = false;
break;
case BlockType.FixCodeTableSize:
@@ -171,6 +182,10 @@ private bool TryReadBuffer()
endOfStreamReached = true;
break;
+ case BlockType.Flush:
+ flushed = true;
+ break;
+
default:
throw new InvalidDataException();
}
diff --git a/Zipper/Zipper/LZW/LZWWriter.cs b/Zipper/Zipper/LZW/LZWWriter.cs
index bc83168..e75b581 100644
--- a/Zipper/Zipper/LZW/LZWWriter.cs
+++ b/Zipper/Zipper/LZW/LZWWriter.cs
@@ -131,12 +131,15 @@ public void Flush()
{
FlushInternal(BlockType.Default);
- writer.Write(trie.CurrentValue);
- bitsWrittenInBlock += codeWidth;
+ if (!trie.AtRoot)
+ {
+ writer.Write(trie.CurrentValue);
+ bitsWrittenInBlock += codeWidth;
- trie.Reset();
+ trie.Reset();
- FlushInternal(BlockType.Default);
+ FlushInternal(BlockType.Flush);
+ }
}
///
From 16267da17f1dfbe9bf7d3c39d79d2f536b4e7460 Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Wed, 12 Mar 2025 01:53:38 +0300
Subject: [PATCH 41/57] Added tests for LZWStream (hw3 - lzw)
---
.../LZW/LZWStreamExceptionsTests.cs | 152 ++++++++++++++++++
.../LZW/LZWStreamReadWriteTests.cs | 78 +++++++++
Zipper/Zipper.Tests/LZW/LZWTestsSource.cs | 22 +++
3 files changed, 252 insertions(+)
create mode 100644 Zipper/Zipper.Tests/LZW/LZWStreamExceptionsTests.cs
create mode 100644 Zipper/Zipper.Tests/LZW/LZWStreamReadWriteTests.cs
create mode 100644 Zipper/Zipper.Tests/LZW/LZWTestsSource.cs
diff --git a/Zipper/Zipper.Tests/LZW/LZWStreamExceptionsTests.cs b/Zipper/Zipper.Tests/LZW/LZWStreamExceptionsTests.cs
new file mode 100644
index 0000000..02ad3d5
--- /dev/null
+++ b/Zipper/Zipper.Tests/LZW/LZWStreamExceptionsTests.cs
@@ -0,0 +1,152 @@
+namespace Zipper.Tests.LZW;
+
+using Zipper.LZW;
+
+public class LZWStreamExceptionsTests
+{
+ private LZWStream compressor;
+ private LZWStream decompressor;
+
+ [SetUp]
+ public void Setup()
+ {
+ compressor = new LZWStream(Stream.Null, ZipperMode.Compress);
+ decompressor = new LZWStream(Stream.Null, ZipperMode.Decompress);
+ }
+
+ [TearDown]
+ public void Teardown()
+ {
+ compressor.Dispose();
+ decompressor.Dispose();
+ }
+
+ [Test]
+ public void Constructor_ShouldThrowIf_BlockSize_IsIncorrect()
+ {
+ Assert.Throws(() => new LZWStream(Stream.Null, LZWStream.MinBlockSize - 1, ZipperMode.Compress));
+ Assert.Throws(() => new LZWStream(Stream.Null, LZWStream.MaxBlockSize + 1, ZipperMode.Compress));
+ }
+
+ [Test]
+ public void Constructor_ShouldThrowIf_Mode_IsNotDefined()
+ {
+ Assert.Throws(() => new LZWStream(Stream.Null, ZipperMode.Compress + 10));
+ Assert.Throws(() => new LZWStream(Stream.Null, ZipperMode.Decompress + 100));
+ }
+
+ [Test]
+ public void Constructor_ShouldThrowIf_Mode_IsCompress_And_Stream_CanNotWrite()
+ {
+ Assert.Throws(() => new LZWStream(new UnwriteableStream(), ZipperMode.Compress));
+ }
+
+ [Test]
+ public void Constructor_ShouldThrowIf_Mode_IsDecompress_And_Stream_CanNotRead()
+ {
+ Assert.Throws(() => new LZWStream(new UnreadableStream(), ZipperMode.Decompress));
+ }
+
+ [Test]
+ public void UnsupportedPropertiesAndMethods_ShouldThrow()
+ {
+ Assert.Throws(() => _ = compressor.Length);
+
+ Assert.Throws(() => _ = compressor.Position);
+ Assert.Throws(() => compressor.Position = 0);
+
+ Assert.Throws(() => compressor.Seek(0, SeekOrigin.Begin));
+ Assert.Throws(() => compressor.SetLength(0));
+ }
+
+ [Test]
+ public void CanRead_ShouldReturnFalse_AndCanWrite_ShouldReturnTrue_IfModeIs_Compress()
+ {
+ Assert.Multiple(() =>
+ {
+ Assert.That(compressor.CanRead, Is.False);
+ Assert.That(compressor.CanWrite, Is.True);
+ });
+ }
+
+ [Test]
+ public void CanRead_ShouldReturnTrue_And_CanWrite_ShouldReturnFalse_IfModeIs_Decompress()
+ {
+ Assert.Multiple(() =>
+ {
+ Assert.That(decompressor.CanRead, Is.True);
+ Assert.That(decompressor.CanWrite, Is.False);
+ });
+ }
+
+ [Test]
+ public void CanSeek_ShouldReturnFalse()
+ {
+ Assert.Multiple(() =>
+ {
+ Assert.That(compressor.CanSeek, Is.False);
+ Assert.That(decompressor.CanSeek, Is.False);
+ });
+ }
+
+ [Test]
+ public void Write_ShouldThrow_IfModeIs_Decompress()
+ {
+ Assert.Throws(() => decompressor.Write([]));
+ Assert.Throws(() => decompressor.Write([], 0, 0));
+ }
+
+ [Test]
+ public void Read_ShouldThrow_IfModeIs_Compress()
+ {
+ Assert.Throws(() => _ = compressor.Read([]));
+ Assert.Throws(() => _ = compressor.Read([], 0, 0));
+ }
+
+ [Test]
+ public void AllMethodsShouldThrow_IfDisposed()
+ {
+ compressor.Dispose();
+ decompressor.Dispose();
+
+ Assert.Throws(() => compressor.Write([]));
+ Assert.Throws(() => compressor.Write([], 0, 0));
+
+ Assert.Throws(() => _ = compressor.Read([]));
+ Assert.Throws(() => _ = compressor.Read([], 0, 0));
+
+ Assert.Throws(compressor.Flush);
+ }
+
+ [Test]
+ public void ReadAndWrite_ShouldThrow_IfArgumentsAreIncorrect()
+ {
+ int length = 8;
+ var buffer = new byte[length];
+
+ Test(buffer, -1, length);
+ Test(buffer, length, length);
+
+ Test(buffer, 0, -1);
+ Test(buffer, 0, length + 1);
+
+ Test(buffer, length / 2, length);
+ Test(buffer, 0, -1);
+
+ void Test(byte[] buffer, int offset, int count)
+ {
+ Assert.Throws(() => compressor.Write(buffer, offset, count));
+ Assert.Throws(() => _ = decompressor.Read(buffer, offset, count));
+ }
+ }
+
+ private class UnwriteableStream : MemoryStream
+ {
+ public override bool CanWrite => false;
+ }
+
+ private class UnreadableStream : MemoryStream
+ {
+ public override bool CanRead => false;
+ }
+}
diff --git a/Zipper/Zipper.Tests/LZW/LZWStreamReadWriteTests.cs b/Zipper/Zipper.Tests/LZW/LZWStreamReadWriteTests.cs
new file mode 100644
index 0000000..9472400
--- /dev/null
+++ b/Zipper/Zipper.Tests/LZW/LZWStreamReadWriteTests.cs
@@ -0,0 +1,78 @@
+namespace Zipper.Tests.LZW;
+
+using System.Text;
+using Zipper.LZW;
+
+public class LZWStreamReadWriteTests
+{
+ private static readonly int[] BufferSizes = [1, 2, 3, 7, 14, 19, 31, 63, 127, 255, 1023];
+
+ private MemoryStream stream;
+
+ [SetUp]
+ public void Setup()
+ {
+ stream = new();
+ }
+
+ [Test]
+ public void Read_ShouldReadData_WrittenBy_Write_Correctly([ValueSource(nameof(BufferSizes))] int readBufferSize)
+ {
+ var testData = LZWTestsSource.Data;
+
+ using (var compressor = new LZWStream(stream, ZipperMode.Compress, true))
+ {
+ compressor.Write(testData);
+ }
+
+ DecompressData_And_AssertThat_ItIsCorrect(testData, readBufferSize);
+ }
+
+ [Test]
+ public void Flush_ShouldNotAffect_DataToBeRead([ValueSource(nameof(BufferSizes))] int readWriteBufferSize)
+ {
+ var testData = LZWTestsSource.Data.AsSpan();
+
+ using (var compressor = new LZWStream(stream, ZipperMode.Compress, true))
+ {
+ for (int offset = 0; offset < testData.Length; offset += readWriteBufferSize)
+ {
+ var dataSlice = testData.Slice(offset, Math.Min(readWriteBufferSize, testData.Length - offset));
+ compressor.Write(dataSlice);
+ compressor.Flush();
+ }
+ }
+
+ DecompressData_And_AssertThat_ItIsCorrect(testData, readWriteBufferSize);
+ }
+
+ private void DecompressData_And_AssertThat_ItIsCorrect(ReadOnlySpan testData, int readBufferSize)
+ {
+ stream.Seek(0, SeekOrigin.Begin);
+
+ using var decompressor = new LZWStream(stream, ZipperMode.Decompress, true);
+ int offset = 0;
+ Span buffer = stackalloc byte[readBufferSize];
+
+ while (true)
+ {
+ int bytesRead = decompressor.Read(buffer);
+
+ Assert.That(bytesRead, Is.LessThanOrEqualTo(readBufferSize));
+
+ if (bytesRead == 0)
+ {
+ break;
+ }
+
+ Assert.That(offset + bytesRead, Is.LessThanOrEqualTo(testData.Length));
+
+ var slicedData = testData.Slice(offset, bytesRead);
+ var slicedBuffer = buffer[..bytesRead];
+
+ Assert.That(slicedBuffer.SequenceEqual(slicedData), Is.True);
+
+ offset += bytesRead;
+ }
+ }
+}
diff --git a/Zipper/Zipper.Tests/LZW/LZWTestsSource.cs b/Zipper/Zipper.Tests/LZW/LZWTestsSource.cs
new file mode 100644
index 0000000..7efa90a
--- /dev/null
+++ b/Zipper/Zipper.Tests/LZW/LZWTestsSource.cs
@@ -0,0 +1,22 @@
+namespace Zipper.Tests.LZW;
+
+[SetUpFixture]
+public class LZWTestsSource
+{
+ // "https://en.wikipedia.org/wiki/Lempel–Ziv–Welch";
+ private const string DataUrl = "https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch";
+
+ public static byte[] Data { get; private set; }
+
+ [OneTimeSetUp]
+ public static void DataSetup()
+ {
+ using var client = new HttpClient();
+ var response = client.Send(new HttpRequestMessage(HttpMethod.Get, DataUrl));
+ response.EnsureSuccessStatusCode();
+ using var content = response.Content.ReadAsStream();
+ using var memory = new MemoryStream();
+ content.CopyTo(memory);
+ Data = memory.ToArray();
+ }
+}
From e235467c91df6e5d76ae2a4e31929263903d1d75 Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Thu, 13 Mar 2025 01:20:11 +0300
Subject: [PATCH 42/57] Fixed api docs in LZWStream (hw3 - lzw)
---
Zipper/Zipper/LZW/LZWStream.cs | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/Zipper/Zipper/LZW/LZWStream.cs b/Zipper/Zipper/LZW/LZWStream.cs
index 631d0ee..5783a8e 100644
--- a/Zipper/Zipper/LZW/LZWStream.cs
+++ b/Zipper/Zipper/LZW/LZWStream.cs
@@ -8,12 +8,12 @@ namespace Zipper.LZW;
public class LZWStream : Stream
{
///
- /// Smallest allowed Block length.
+ /// Smallest allowed block length.
///
public const int MinBlockSize = 256;
///
- /// Largest allowed Block length.
+ /// Largest allowed block length.
///
public const int MaxBlockSize = 64 * 1024;
@@ -78,13 +78,13 @@ public LZWStream(Stream stream, int blockSize, ZipperMode mode, bool leaveOpen =
public override bool CanRead => mode == ZipperMode.Decompress && stream.CanRead;
///
- /// Gets a value indicating whether the stream supports reading.
+ /// Gets a value indicating whether the stream supports writing.
///
///
public override bool CanWrite => mode == ZipperMode.Compress && stream.CanWrite;
///
- /// Gets a value indicating whether the stream supports reading.
+ /// Gets a value indicating whether the stream supports seeking.
///
///
public override bool CanSeek => false;
@@ -134,9 +134,9 @@ public override void Flush()
}
///
- /// Reads data from underlying stream, decompresses it and writes to .
+ /// Reads data from , compresses it and writes it to the underlying stream.
///
- /// Buffer to write decompressed data to.
+ /// Buffer that contains data to be compressed.
/// How many bytes to skip before reading from .
/// How many bytes to read from .
/// Stream is set to mode.
@@ -145,12 +145,12 @@ public override void Write(byte[] buffer, int offset, int count)
=> Write(buffer.AsSpan(offset, count));
///
- /// Reads data from underlying stream, decompresses it and writes to .
+ /// Reads data from the underlying stream, decompresses it and writes to .
///
/// Buffer to write decompressed data to.
- /// How many bytes to skip before reading from .
- /// How many bytes to read from .
- /// Count of read bytes.
+ /// How many bytes to skip before writing to .
+ /// How many bytes to write to .
+ /// Count of read bytes, can be less than .
/// Unexpected end of stream.
/// Invalid data stream.
/// Stream is set to mode.
From ef78f30aebb7a67ae8987a82c5276081b5165594 Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Thu, 13 Mar 2025 17:48:01 +0300
Subject: [PATCH 43/57] Added BWTStream and BWTMode (hw3 - lzw)
---
Zipper/Zipper/BWTMode.cs | 17 ++
Zipper/Zipper/BWTStream.cs | 323 +++++++++++++++++++++++++++++++++++++
2 files changed, 340 insertions(+)
create mode 100644 Zipper/Zipper/BWTMode.cs
create mode 100644 Zipper/Zipper/BWTStream.cs
diff --git a/Zipper/Zipper/BWTMode.cs b/Zipper/Zipper/BWTMode.cs
new file mode 100644
index 0000000..d54036b
--- /dev/null
+++ b/Zipper/Zipper/BWTMode.cs
@@ -0,0 +1,17 @@
+namespace Zipper;
+
+///
+/// Specifies whether to transform data to or reconstruct data from the underlying stream in .
+///
+public enum BWTMode
+{
+ ///
+ /// Transform data and write it to the underlying stream.
+ ///
+ Transform,
+
+ ///
+ /// Read data from the underlying stream and reconstruct it.
+ ///
+ Reconstruct,
+}
diff --git a/Zipper/Zipper/BWTStream.cs b/Zipper/Zipper/BWTStream.cs
new file mode 100644
index 0000000..be8a652
--- /dev/null
+++ b/Zipper/Zipper/BWTStream.cs
@@ -0,0 +1,323 @@
+namespace Zipper;
+
+using System.Buffers;
+using System.Buffers.Binary;
+using System.Diagnostics;
+
+///
+/// Provides methods and properties used to transform and reconstruct data streams by using the BWT algorithm.
+///
+public class BWTStream : Stream
+{
+ ///
+ /// Smallest allowed block length.
+ ///
+ public const int MinBlockSize = 64;
+
+ ///
+ /// Largest allowed block length.
+ ///
+ public const int MaxBlockSize = 4 * 1024;
+
+ private const int DefaultBlockSize = 512;
+
+ private static readonly ArrayPool BlockPool = ArrayPool.Create();
+
+ private readonly Stream stream;
+ private readonly BWTMode mode;
+ private readonly bool leaveOpen;
+
+ private byte[]? block;
+ private int blockPosition;
+ private int blockSize;
+
+ private bool disposed;
+
+ ///
+ public BWTStream(Stream stream, BWTMode mode, bool leaveOpen = false)
+ : this(stream, DefaultBlockSize, mode, leaveOpen)
+ {
+ }
+
+ ///
+ /// Initializes a new instance of the class.
+ ///
+ /// The stream to which transformed data is written or from which data to reconstruct is read.
+ /// The internal block size to use, should be between and .
+ /// that determines whether to transform or reconstruct data.
+ ///
+ /// The value indicating whether should be disposed along with this instance,
+ /// if is .
+ ///
+ /// is not nor .
+ /// is out of range.
+ public BWTStream(Stream stream, int blockSize, BWTMode mode, bool leaveOpen = false)
+ {
+ ArgumentOutOfRangeException.ThrowIfLessThan(blockSize, MinBlockSize);
+ ArgumentOutOfRangeException.ThrowIfGreaterThan(blockSize, MaxBlockSize);
+
+ if (!Enum.IsDefined(mode))
+ {
+ throw new ArgumentException($"Value was neither {BWTMode.Transform} nor {BWTMode.Reconstruct}", nameof(mode));
+ }
+
+ if (mode == BWTMode.Transform)
+ {
+ if (!stream.CanWrite)
+ {
+ throw new ArgumentException("Stream does not support writing", nameof(stream));
+ }
+
+ this.blockSize = blockSize;
+ }
+ else
+ {
+ if (!stream.CanRead)
+ {
+ throw new ArgumentException("Stream does not support reading", nameof(stream));
+ }
+ }
+
+ this.stream = stream;
+ this.mode = mode;
+ this.leaveOpen = leaveOpen;
+ blockPosition = 0;
+ disposed = false;
+ }
+
+ ///
+ /// Gets a value indicating whether the stream supports reading.
+ ///
+ ///
+ public override bool CanRead => mode == BWTMode.Reconstruct && stream.CanRead;
+
+ ///
+ /// Gets a value indicating whether the stream supports writing.
+ ///
+ ///
+ public override bool CanWrite => mode == BWTMode.Transform && stream.CanWrite;
+
+ ///
+ /// Gets a value indicating whether the stream supports seeking.
+ ///
+ ///
+ public override bool CanSeek => false;
+
+ ///
+ /// This property is not supported and always throws a .
+ ///
+ ///
+ public override long Length => throw new NotSupportedException();
+
+ ///
+ /// This property is not supported and always throws a .
+ ///
+ ///
+ public override long Position
+ {
+ get => throw new NotSupportedException();
+ set => throw new NotSupportedException();
+ }
+
+ ///
+ /// This method is not supported and always throws a .
+ ///
+ ///
+ public override long Seek(long offset, SeekOrigin origin)
+ => throw new NotSupportedException();
+
+ ///
+ /// This method is not supported and always throws a .
+ ///
+ ///
+ public override void SetLength(long value)
+ => throw new NotSupportedException();
+
+ ///
+ /// Flushes the internal buffers.
+ ///
+ /// Stream is disposed.
+ public override void Flush()
+ {
+ EnsureNotClosed();
+ if (mode == BWTMode.Transform)
+ {
+ if (block != null)
+ {
+ WriteBlock();
+ }
+
+ stream.Flush();
+ }
+ }
+
+ ///
+ /// Reads data from , transforms it and writes it to the underlying stream.
+ ///
+ /// Buffer that contains data to be transformed.
+ /// How many bytes to skip before reading from .
+ /// How many bytes to read from .
+ /// Stream is set to mode.
+ /// Stream is disposed.
+ public override void Write(byte[] buffer, int offset, int count)
+ => Write(buffer.AsSpan(offset, count));
+
+ ///
+ /// Reads data from the underlying stream, reconstructs it and writes to .
+ ///
+ /// Buffer to write reconstructed data to.
+ /// How many bytes to skip before writing to .
+ /// How many bytes to write to .
+ /// Count of read bytes, can be less than .
+ /// Unexpected end of stream.
+ /// Invalid data stream.
+ /// Stream is set to mode.
+ /// Stream is disposed.
+ public override int Read(byte[] buffer, int offset, int count)
+ => Read(buffer.AsSpan(offset, count));
+
+ ///
+ public override void Write(ReadOnlySpan buffer)
+ {
+ EnsureNotClosed();
+ EnsureMode(BWTMode.Transform);
+
+ int bufferPosition = 0;
+ while (bufferPosition < buffer.Length)
+ {
+ if (block == null)
+ {
+ block = BlockPool.Rent(blockSize);
+ blockPosition = 0;
+ }
+
+ int copyLength = Math.Min(blockSize - blockPosition, buffer.Length - bufferPosition);
+ buffer.Slice(bufferPosition, copyLength).CopyTo(block.AsSpan().Slice(blockPosition, copyLength));
+
+ bufferPosition += copyLength;
+ blockPosition += copyLength;
+
+ if (blockPosition >= blockSize)
+ {
+ WriteBlock();
+ }
+ }
+ }
+
+ ///
+ public override int Read(Span buffer)
+ {
+ EnsureNotClosed();
+ EnsureMode(BWTMode.Reconstruct);
+
+ int bufferPosition = 0;
+ while (bufferPosition < buffer.Length)
+ {
+ if (block == null && !ReadBlock())
+ {
+ break;
+ }
+
+ int copyLength = Math.Min(blockSize - blockPosition, buffer.Length - bufferPosition);
+ block.AsSpan().Slice(blockPosition, copyLength).CopyTo(buffer.Slice(bufferPosition, copyLength));
+
+ bufferPosition += copyLength;
+ blockPosition += copyLength;
+
+ if (blockPosition >= blockSize)
+ {
+ block = null;
+ }
+ }
+
+ return bufferPosition;
+ }
+
+ ///
+ protected override void Dispose(bool disposing)
+ {
+ if (disposed)
+ {
+ return;
+ }
+
+ if (disposing)
+ {
+ Flush();
+
+ if (block != null)
+ {
+ BlockPool.Return(block);
+ }
+
+ if (!leaveOpen)
+ {
+ stream.Dispose();
+ }
+
+ disposed = true;
+ }
+ }
+
+ private void WriteBlock()
+ {
+ Debug.Assert(block != null, "Block is null");
+
+ var transformBuffer = BlockPool.Rent(blockPosition);
+ int identityIndex = BWT.ForwardTransform(block.AsSpan()[0..blockPosition], transformBuffer.AsSpan()[0..blockPosition]);
+
+ Span header = stackalloc byte[8];
+ BinaryPrimitives.WriteInt32LittleEndian(header[0..4], blockPosition);
+ BinaryPrimitives.WriteInt32LittleEndian(header[4..8], identityIndex);
+
+ stream.Write(header);
+ stream.Write(transformBuffer, 0, blockPosition);
+
+ BlockPool.Return(transformBuffer);
+ BlockPool.Return(block);
+
+ block = null;
+ }
+
+ private bool ReadBlock()
+ {
+ Debug.Assert(block == null, "Block was not returned before reading");
+
+ Span header = stackalloc byte[8];
+ if (stream.Read(header) != 8)
+ {
+ return false;
+ }
+
+ blockSize = BinaryPrimitives.ReadInt32LittleEndian(header[0..4]);
+ var identityIndex = BinaryPrimitives.ReadInt32LittleEndian(header[4..8]);
+
+ var transformedData = BlockPool.Rent(blockSize);
+ if (stream.Read(transformedData, 0, blockSize) != blockSize)
+ {
+ BlockPool.Return(transformedData);
+
+ return false;
+ }
+
+ block = BlockPool.Rent(blockSize);
+ BWT.InverseTransform(transformedData.AsSpan()[0..blockSize], identityIndex, block.AsSpan()[0..blockSize]);
+ BlockPool.Return(transformedData);
+ blockPosition = 0;
+
+ return true;
+ }
+
+ private void EnsureMode(BWTMode mode)
+ {
+ if (this.mode != mode)
+ {
+ throw new InvalidOperationException();
+ }
+ }
+
+ private void EnsureNotClosed()
+ {
+ ObjectDisposedException.ThrowIf(disposed, this);
+ }
+}
From 42b16d8d8dc628a71d0967d6466c9352d5ad1435 Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Thu, 13 Mar 2025 17:59:25 +0300
Subject: [PATCH 44/57] Overhauled LZWStream tests (hw3 - lzw)
---
.../Zipper.Tests/Streams/IStreamProvider.cs | 20 +++++++++++
Zipper/Zipper.Tests/Streams/LZWStreamTests.cs | 33 +++++++++++++++++++
.../StreamExceptionsTests.cs} | 28 ++++++++--------
.../StreamReadWriteTests.cs} | 28 ++++++++++------
.../StreamTestsSource.cs} | 4 +--
5 files changed, 87 insertions(+), 26 deletions(-)
create mode 100644 Zipper/Zipper.Tests/Streams/IStreamProvider.cs
create mode 100644 Zipper/Zipper.Tests/Streams/LZWStreamTests.cs
rename Zipper/Zipper.Tests/{LZW/LZWStreamExceptionsTests.cs => Streams/StreamExceptionsTests.cs} (76%)
rename Zipper/Zipper.Tests/{LZW/LZWStreamReadWriteTests.cs => Streams/StreamReadWriteTests.cs} (68%)
rename Zipper/Zipper.Tests/{LZW/LZWTestsSource.cs => Streams/StreamTestsSource.cs} (91%)
diff --git a/Zipper/Zipper.Tests/Streams/IStreamProvider.cs b/Zipper/Zipper.Tests/Streams/IStreamProvider.cs
new file mode 100644
index 0000000..2d45fc0
--- /dev/null
+++ b/Zipper/Zipper.Tests/Streams/IStreamProvider.cs
@@ -0,0 +1,20 @@
+namespace Zipper.Tests.Streams;
+
+public interface IStreamProvider
+ where TStream : Stream
+ where TMode : Enum
+{
+ public static abstract int MinBlockSize { get; }
+
+ public static abstract int MaxBlockSize { get; }
+
+ public static abstract TMode WritingMode { get; }
+
+ public static abstract TMode ReadingMode { get; }
+
+ public static abstract TMode UndefinedMode { get; }
+
+ public static abstract TStream CreateStream(Stream stream, int blockSize, TMode mode, bool leaveOpen = false);
+
+ public static abstract TStream CreateStream(Stream stream, TMode mode, bool leaveOpen = false);
+}
diff --git a/Zipper/Zipper.Tests/Streams/LZWStreamTests.cs b/Zipper/Zipper.Tests/Streams/LZWStreamTests.cs
new file mode 100644
index 0000000..2d3b672
--- /dev/null
+++ b/Zipper/Zipper.Tests/Streams/LZWStreamTests.cs
@@ -0,0 +1,33 @@
+namespace Zipper.Tests.Streams;
+
+using Zipper.LZW;
+
+public class LZWStreamTests
+{
+ public class LZWStreamProvider : IStreamProvider
+ {
+ public static int MinBlockSize => LZWStream.MinBlockSize;
+
+ public static int MaxBlockSize => LZWStream.MaxBlockSize;
+
+ public static ZipperMode WritingMode => ZipperMode.Compress;
+
+ public static ZipperMode ReadingMode => ZipperMode.Decompress;
+
+ public static ZipperMode UndefinedMode => ZipperMode.Decompress + 100;
+
+ public static LZWStream CreateStream(Stream stream, int blockSize, ZipperMode mode, bool leaveOpen = false)
+ => new(stream, blockSize, mode, leaveOpen);
+
+ public static LZWStream CreateStream(Stream stream, ZipperMode mode, bool leaveOpen = false)
+ => new(stream, mode, leaveOpen);
+ }
+
+ public class LZWStreamExceptionsTests : StreamExceptionsTests
+ {
+ }
+
+ public class LZWStreamReadWriteTests : StreamReadWriteTests
+ {
+ }
+}
diff --git a/Zipper/Zipper.Tests/LZW/LZWStreamExceptionsTests.cs b/Zipper/Zipper.Tests/Streams/StreamExceptionsTests.cs
similarity index 76%
rename from Zipper/Zipper.Tests/LZW/LZWStreamExceptionsTests.cs
rename to Zipper/Zipper.Tests/Streams/StreamExceptionsTests.cs
index 02ad3d5..a67b809 100644
--- a/Zipper/Zipper.Tests/LZW/LZWStreamExceptionsTests.cs
+++ b/Zipper/Zipper.Tests/Streams/StreamExceptionsTests.cs
@@ -1,17 +1,18 @@
-namespace Zipper.Tests.LZW;
+namespace Zipper.Tests.Streams;
-using Zipper.LZW;
-
-public class LZWStreamExceptionsTests
+public abstract class StreamExceptionsTests
+ where TStream : Stream
+ where TMode : Enum
+ where TProvider : IStreamProvider
{
- private LZWStream compressor;
- private LZWStream decompressor;
+ private TStream compressor;
+ private TStream decompressor;
[SetUp]
public void Setup()
{
- compressor = new LZWStream(Stream.Null, ZipperMode.Compress);
- decompressor = new LZWStream(Stream.Null, ZipperMode.Decompress);
+ compressor = TProvider.CreateStream(Stream.Null, TProvider.MinBlockSize, TProvider.WritingMode);
+ decompressor = TProvider.CreateStream(Stream.Null, TProvider.MinBlockSize, TProvider.ReadingMode);
}
[TearDown]
@@ -24,27 +25,26 @@ public void Teardown()
[Test]
public void Constructor_ShouldThrowIf_BlockSize_IsIncorrect()
{
- Assert.Throws(() => new LZWStream(Stream.Null, LZWStream.MinBlockSize - 1, ZipperMode.Compress));
- Assert.Throws(() => new LZWStream(Stream.Null, LZWStream.MaxBlockSize + 1, ZipperMode.Compress));
+ Assert.Throws(() => TProvider.CreateStream(Stream.Null, TProvider.MinBlockSize - 1, TProvider.WritingMode));
+ Assert.Throws(() => TProvider.CreateStream(Stream.Null, TProvider.MaxBlockSize + 1, TProvider.WritingMode));
}
[Test]
public void Constructor_ShouldThrowIf_Mode_IsNotDefined()
{
- Assert.Throws(() => new LZWStream(Stream.Null, ZipperMode.Compress + 10));
- Assert.Throws(() => new LZWStream(Stream.Null, ZipperMode.Decompress + 100));
+ Assert.Throws(() => TProvider.CreateStream(Stream.Null, TProvider.UndefinedMode));
}
[Test]
public void Constructor_ShouldThrowIf_Mode_IsCompress_And_Stream_CanNotWrite()
{
- Assert.Throws(() => new LZWStream(new UnwriteableStream(), ZipperMode.Compress));
+ Assert.Throws(() => TProvider.CreateStream(new UnwriteableStream(), TProvider.WritingMode));
}
[Test]
public void Constructor_ShouldThrowIf_Mode_IsDecompress_And_Stream_CanNotRead()
{
- Assert.Throws(() => new LZWStream(new UnreadableStream(), ZipperMode.Decompress));
+ Assert.Throws(() => TProvider.CreateStream(new UnreadableStream(), TProvider.ReadingMode));
}
[Test]
diff --git a/Zipper/Zipper.Tests/LZW/LZWStreamReadWriteTests.cs b/Zipper/Zipper.Tests/Streams/StreamReadWriteTests.cs
similarity index 68%
rename from Zipper/Zipper.Tests/LZW/LZWStreamReadWriteTests.cs
rename to Zipper/Zipper.Tests/Streams/StreamReadWriteTests.cs
index 9472400..c25ca13 100644
--- a/Zipper/Zipper.Tests/LZW/LZWStreamReadWriteTests.cs
+++ b/Zipper/Zipper.Tests/Streams/StreamReadWriteTests.cs
@@ -1,9 +1,9 @@
-namespace Zipper.Tests.LZW;
+namespace Zipper.Tests.Streams;
-using System.Text;
-using Zipper.LZW;
-
-public class LZWStreamReadWriteTests
+public abstract class StreamReadWriteTests
+ where TStream : Stream
+ where TMode : Enum
+ where TProvider : IStreamProvider
{
private static readonly int[] BufferSizes = [1, 2, 3, 7, 14, 19, 31, 63, 127, 255, 1023];
@@ -18,9 +18,9 @@ public void Setup()
[Test]
public void Read_ShouldReadData_WrittenBy_Write_Correctly([ValueSource(nameof(BufferSizes))] int readBufferSize)
{
- var testData = LZWTestsSource.Data;
+ var testData = GetData(readBufferSize);
- using (var compressor = new LZWStream(stream, ZipperMode.Compress, true))
+ using (var compressor = TProvider.CreateStream(stream, TProvider.WritingMode, true))
{
compressor.Write(testData);
}
@@ -31,9 +31,9 @@ public void Read_ShouldReadData_WrittenBy_Write_Correctly([ValueSource(nameof(Bu
[Test]
public void Flush_ShouldNotAffect_DataToBeRead([ValueSource(nameof(BufferSizes))] int readWriteBufferSize)
{
- var testData = LZWTestsSource.Data.AsSpan();
+ var testData = GetData(readWriteBufferSize);
- using (var compressor = new LZWStream(stream, ZipperMode.Compress, true))
+ using (var compressor = TProvider.CreateStream(stream, TProvider.WritingMode, true))
{
for (int offset = 0; offset < testData.Length; offset += readWriteBufferSize)
{
@@ -46,11 +46,19 @@ public void Flush_ShouldNotAffect_DataToBeRead([ValueSource(nameof(BufferSizes))
DecompressData_And_AssertThat_ItIsCorrect(testData, readWriteBufferSize);
}
+ private static ReadOnlySpan GetData(int bufferSize)
+ {
+ var data = StreamTestsSource.Data.AsSpan();
+ var relativeLength = Math.Clamp(bufferSize / (float)BufferSizes[^1], 0, 1);
+
+ return data[..(int)Math.Ceiling(data.Length * relativeLength)];
+ }
+
private void DecompressData_And_AssertThat_ItIsCorrect(ReadOnlySpan testData, int readBufferSize)
{
stream.Seek(0, SeekOrigin.Begin);
- using var decompressor = new LZWStream(stream, ZipperMode.Decompress, true);
+ using var decompressor = TProvider.CreateStream(stream, TProvider.ReadingMode, true);
int offset = 0;
Span buffer = stackalloc byte[readBufferSize];
diff --git a/Zipper/Zipper.Tests/LZW/LZWTestsSource.cs b/Zipper/Zipper.Tests/Streams/StreamTestsSource.cs
similarity index 91%
rename from Zipper/Zipper.Tests/LZW/LZWTestsSource.cs
rename to Zipper/Zipper.Tests/Streams/StreamTestsSource.cs
index 7efa90a..32b0013 100644
--- a/Zipper/Zipper.Tests/LZW/LZWTestsSource.cs
+++ b/Zipper/Zipper.Tests/Streams/StreamTestsSource.cs
@@ -1,7 +1,7 @@
-namespace Zipper.Tests.LZW;
+namespace Zipper.Tests.Streams;
[SetUpFixture]
-public class LZWTestsSource
+public class StreamTestsSource
{
// "https://en.wikipedia.org/wiki/Lempel–Ziv–Welch";
private const string DataUrl = "https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch";
From f0975eec14c7d73107546e720e6e1908a228d72a Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Thu, 13 Mar 2025 17:59:56 +0300
Subject: [PATCH 45/57] Added tests for BWTStream (hw3 - lzw)
---
Zipper/Zipper.Tests/Streams/BWTStreamTests.cs | 31 +++++++++++++++++++
1 file changed, 31 insertions(+)
create mode 100644 Zipper/Zipper.Tests/Streams/BWTStreamTests.cs
diff --git a/Zipper/Zipper.Tests/Streams/BWTStreamTests.cs b/Zipper/Zipper.Tests/Streams/BWTStreamTests.cs
new file mode 100644
index 0000000..1343f2f
--- /dev/null
+++ b/Zipper/Zipper.Tests/Streams/BWTStreamTests.cs
@@ -0,0 +1,31 @@
+namespace Zipper.Tests.Streams;
+
+public class BWTStreamTests
+{
+ public class BWTStreamProvider : IStreamProvider
+ {
+ public static int MinBlockSize => BWTStream.MinBlockSize;
+
+ public static int MaxBlockSize => BWTStream.MaxBlockSize;
+
+ public static BWTMode WritingMode => BWTMode.Transform;
+
+ public static BWTMode ReadingMode => BWTMode.Reconstruct;
+
+ public static BWTMode UndefinedMode => BWTMode.Reconstruct + 100;
+
+ public static BWTStream CreateStream(Stream stream, int blockSize, BWTMode mode, bool leaveOpen = false)
+ => new(stream, blockSize, mode, leaveOpen);
+
+ public static BWTStream CreateStream(Stream stream, BWTMode mode, bool leaveOpen = false)
+ => new(stream, mode, leaveOpen);
+ }
+
+ public class BWTStreamExceptionsTests : StreamExceptionsTests
+ {
+ }
+
+ public class BWTStreamReadWriteTests : StreamReadWriteTests
+ {
+ }
+}
From 0b63462f2fb95af3dcfd0fcde9d7ed19caa56c87 Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Thu, 13 Mar 2025 20:18:07 +0300
Subject: [PATCH 46/57] Added some checks in BWTStream (hw3 - lzw)
---
Zipper/Zipper/BWTStream.cs | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/Zipper/Zipper/BWTStream.cs b/Zipper/Zipper/BWTStream.cs
index be8a652..9e0adca 100644
--- a/Zipper/Zipper/BWTStream.cs
+++ b/Zipper/Zipper/BWTStream.cs
@@ -245,10 +245,7 @@ protected override void Dispose(bool disposing)
{
Flush();
- if (block != null)
- {
- BlockPool.Return(block);
- }
+ Debug.Assert(block == null, "Block was not returned");
if (!leaveOpen)
{
@@ -292,12 +289,17 @@ private bool ReadBlock()
blockSize = BinaryPrimitives.ReadInt32LittleEndian(header[0..4]);
var identityIndex = BinaryPrimitives.ReadInt32LittleEndian(header[4..8]);
+ if (identityIndex < 0 || identityIndex >= blockSize)
+ {
+ throw new InvalidDataException();
+ }
+
var transformedData = BlockPool.Rent(blockSize);
if (stream.Read(transformedData, 0, blockSize) != blockSize)
{
BlockPool.Return(transformedData);
- return false;
+ throw new EndOfStreamException();
}
block = BlockPool.Rent(blockSize);
From 2e449db1ac248623e21a16142a910642f86df74f Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Thu, 13 Mar 2025 20:19:16 +0300
Subject: [PATCH 47/57] Added image data as test source (hw3 - lzw)
---
.../Streams/StreamReadWriteTests.cs | 7 ++-
.../Zipper.Tests/Streams/StreamTestsSource.cs | 46 ++++++++++++++++---
2 files changed, 43 insertions(+), 10 deletions(-)
diff --git a/Zipper/Zipper.Tests/Streams/StreamReadWriteTests.cs b/Zipper/Zipper.Tests/Streams/StreamReadWriteTests.cs
index c25ca13..4927f45 100644
--- a/Zipper/Zipper.Tests/Streams/StreamReadWriteTests.cs
+++ b/Zipper/Zipper.Tests/Streams/StreamReadWriteTests.cs
@@ -18,7 +18,7 @@ public void Setup()
[Test]
public void Read_ShouldReadData_WrittenBy_Write_Correctly([ValueSource(nameof(BufferSizes))] int readBufferSize)
{
- var testData = GetData(readBufferSize);
+ var testData = GetData(StreamTestsSource.ImageData, readBufferSize);
using (var compressor = TProvider.CreateStream(stream, TProvider.WritingMode, true))
{
@@ -31,7 +31,7 @@ public void Read_ShouldReadData_WrittenBy_Write_Correctly([ValueSource(nameof(Bu
[Test]
public void Flush_ShouldNotAffect_DataToBeRead([ValueSource(nameof(BufferSizes))] int readWriteBufferSize)
{
- var testData = GetData(readWriteBufferSize);
+ var testData = GetData(StreamTestsSource.TextData, readWriteBufferSize);
using (var compressor = TProvider.CreateStream(stream, TProvider.WritingMode, true))
{
@@ -46,9 +46,8 @@ public void Flush_ShouldNotAffect_DataToBeRead([ValueSource(nameof(BufferSizes))
DecompressData_And_AssertThat_ItIsCorrect(testData, readWriteBufferSize);
}
- private static ReadOnlySpan GetData(int bufferSize)
+ private static ReadOnlySpan GetData(ReadOnlySpan data, int bufferSize)
{
- var data = StreamTestsSource.Data.AsSpan();
var relativeLength = Math.Clamp(bufferSize / (float)BufferSizes[^1], 0, 1);
return data[..(int)Math.Ceiling(data.Length * relativeLength)];
diff --git a/Zipper/Zipper.Tests/Streams/StreamTestsSource.cs b/Zipper/Zipper.Tests/Streams/StreamTestsSource.cs
index 32b0013..e970322 100644
--- a/Zipper/Zipper.Tests/Streams/StreamTestsSource.cs
+++ b/Zipper/Zipper.Tests/Streams/StreamTestsSource.cs
@@ -1,22 +1,56 @@
namespace Zipper.Tests.Streams;
+using System.Diagnostics.CodeAnalysis;
+
[SetUpFixture]
+[ExcludeFromCodeCoverage]
public class StreamTestsSource
{
- // "https://en.wikipedia.org/wiki/Lempel–Ziv–Welch";
- private const string DataUrl = "https://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch";
+ // "https://filesamples.com/samples/image/bmp/sample_640×426.bmp";
+ private const string ImageDataUrl = "https://filesamples.com/samples/image/bmp/sample_640%C3%97426.bmp";
+
+ // "https://neerc.ifmo.ru/wiki/index.php?title=Алгоритм_LZW";
+ private const string TextDataUrl = "https://neerc.ifmo.ru/wiki/index.php?title=%D0%90%D0%BB%D0%B3%D0%BE%D1%80%D0%B8%D1%82%D0%BC_LZW";
- public static byte[] Data { get; private set; }
+ public static byte[] TextData { get; private set; }
+
+ public static byte[] ImageData { get; private set; }
[OneTimeSetUp]
public static void DataSetup()
{
+ TextData = DownloadOrReuse(TextDataUrl);
+ ImageData = DownloadOrReuse(ImageDataUrl);
+ }
+
+ private static byte[] DownloadOrReuse(string url)
+ {
+ string testFilesDirectory = "test_files";
+ if (!Directory.Exists(testFilesDirectory))
+ {
+ Directory.CreateDirectory(testFilesDirectory);
+ }
+
+ var filename = $"test_file_{string.Concat(url.Select(c => char.IsAsciiLetterOrDigit(c) ? c : '_'))}";
+ var filePath = Path.Combine(testFilesDirectory, filename);
+
+ if (File.Exists(filePath))
+ {
+ return File.ReadAllBytes(filePath);
+ }
+
using var client = new HttpClient();
- var response = client.Send(new HttpRequestMessage(HttpMethod.Get, DataUrl));
+ using var memory = new MemoryStream();
+
+ var response = client.Send(new HttpRequestMessage(HttpMethod.Get, url));
response.EnsureSuccessStatusCode();
+
using var content = response.Content.ReadAsStream();
- using var memory = new MemoryStream();
content.CopyTo(memory);
- Data = memory.ToArray();
+
+ var data = memory.ToArray();
+ File.WriteAllBytes(filePath, data);
+
+ return data;
}
}
From 907a0c5efed8c7f251cc4bd10d93fe7c7a301f1f Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Thu, 13 Mar 2025 20:46:00 +0300
Subject: [PATCH 48/57] Made BWTStream and BWTMode internal (hw3 - lzw)
---
Zipper/Zipper.Tests/Streams/BWTStreamTests.cs | 2 +-
Zipper/Zipper/BWTMode.cs | 2 +-
Zipper/Zipper/BWTStream.cs | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/Zipper/Zipper.Tests/Streams/BWTStreamTests.cs b/Zipper/Zipper.Tests/Streams/BWTStreamTests.cs
index 1343f2f..689086d 100644
--- a/Zipper/Zipper.Tests/Streams/BWTStreamTests.cs
+++ b/Zipper/Zipper.Tests/Streams/BWTStreamTests.cs
@@ -1,6 +1,6 @@
namespace Zipper.Tests.Streams;
-public class BWTStreamTests
+internal class BWTStreamTests
{
public class BWTStreamProvider : IStreamProvider
{
diff --git a/Zipper/Zipper/BWTMode.cs b/Zipper/Zipper/BWTMode.cs
index d54036b..7774560 100644
--- a/Zipper/Zipper/BWTMode.cs
+++ b/Zipper/Zipper/BWTMode.cs
@@ -3,7 +3,7 @@ namespace Zipper;
///
/// Specifies whether to transform data to or reconstruct data from the underlying stream in .
///
-public enum BWTMode
+internal enum BWTMode
{
///
/// Transform data and write it to the underlying stream.
diff --git a/Zipper/Zipper/BWTStream.cs b/Zipper/Zipper/BWTStream.cs
index 9e0adca..34e76b1 100644
--- a/Zipper/Zipper/BWTStream.cs
+++ b/Zipper/Zipper/BWTStream.cs
@@ -7,7 +7,7 @@ namespace Zipper;
///
/// Provides methods and properties used to transform and reconstruct data streams by using the BWT algorithm.
///
-public class BWTStream : Stream
+internal class BWTStream : Stream
{
///
/// Smallest allowed block length.
From 7d8470f8a77bd0c8242557ce1354609d79f58f9b Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Fri, 14 Mar 2025 02:51:30 +0300
Subject: [PATCH 49/57] Write block size as int instead of ushort (hw3 - lzw)
---
Zipper/Zipper/LZW/LZWReader.cs | 4 ++--
Zipper/Zipper/LZW/LZWWriter.cs | 6 +++---
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/Zipper/Zipper/LZW/LZWReader.cs b/Zipper/Zipper/LZW/LZWReader.cs
index 331fe0c..4287b96 100644
--- a/Zipper/Zipper/LZW/LZWReader.cs
+++ b/Zipper/Zipper/LZW/LZWReader.cs
@@ -150,7 +150,7 @@ public void Dispose()
private bool TryReadBuffer()
{
- int headerSize = 4;
+ int headerSize = 6;
Span header = stackalloc byte[headerSize];
if (stream.Read(header) != headerSize)
{
@@ -160,7 +160,7 @@ private bool TryReadBuffer()
var blockType = (BlockType)header[0];
var codeWidth = header[1];
- blockSize = BinaryPrimitives.ReadUInt16LittleEndian(header[2..4]);
+ blockSize = BinaryPrimitives.ReadInt32LittleEndian(header[2..6]);
block = BlockPool.Rent(blockSize);
if (stream.Read(block, 0, blockSize) != blockSize)
diff --git a/Zipper/Zipper/LZW/LZWWriter.cs b/Zipper/Zipper/LZW/LZWWriter.cs
index e75b581..9e50b81 100644
--- a/Zipper/Zipper/LZW/LZWWriter.cs
+++ b/Zipper/Zipper/LZW/LZWWriter.cs
@@ -9,7 +9,7 @@ namespace Zipper.LZW;
///
internal class LZWWriter : IDisposable
{
- private const int DataOffset = 4;
+ private const int DataOffset = 6;
private const int MaxCodesCount = (320 * 1024) - 1;
private static readonly ArrayPool BlockPool = ArrayPool.Create();
@@ -168,13 +168,13 @@ private void FlushInternal(BlockType type)
writer.Flush();
int length = (int)memory.Position;
- ushort dataLength = (ushort)(length - DataOffset);
+ int dataLength = length - DataOffset;
if (!(dataLength == 0 && type == BlockType.Default))
{
block[0] = (byte)type;
block[1] = (byte)codeWidth;
- BinaryPrimitives.WriteUInt16LittleEndian(block.AsSpan()[2..4], dataLength);
+ BinaryPrimitives.WriteInt32LittleEndian(block.AsSpan()[2..6], dataLength);
stream.Write(block, 0, length);
stream.Flush();
From 07240dd7a9caf4bcb2df09681e18601b29e7d5ec Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Fri, 14 Mar 2025 02:53:25 +0300
Subject: [PATCH 50/57] Changed block sizes (hw3 - lzw)
---
Zipper/Zipper/BWTStream.cs | 6 +++---
Zipper/Zipper/LZW/LZWStream.cs | 6 +++---
2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/Zipper/Zipper/BWTStream.cs b/Zipper/Zipper/BWTStream.cs
index 34e76b1..56369bd 100644
--- a/Zipper/Zipper/BWTStream.cs
+++ b/Zipper/Zipper/BWTStream.cs
@@ -12,14 +12,14 @@ internal class BWTStream : Stream
///
/// Smallest allowed block length.
///
- public const int MinBlockSize = 64;
+ public const int MinBlockSize = 1024;
///
/// Largest allowed block length.
///
- public const int MaxBlockSize = 4 * 1024;
+ public const int MaxBlockSize = 16384;
- private const int DefaultBlockSize = 512;
+ private const int DefaultBlockSize = (MinBlockSize + MaxBlockSize) / 2;
private static readonly ArrayPool BlockPool = ArrayPool.Create();
diff --git a/Zipper/Zipper/LZW/LZWStream.cs b/Zipper/Zipper/LZW/LZWStream.cs
index 5783a8e..bda1288 100644
--- a/Zipper/Zipper/LZW/LZWStream.cs
+++ b/Zipper/Zipper/LZW/LZWStream.cs
@@ -10,14 +10,14 @@ public class LZWStream : Stream
///
/// Smallest allowed block length.
///
- public const int MinBlockSize = 256;
+ public const int MinBlockSize = 4096;
///
/// Largest allowed block length.
///
- public const int MaxBlockSize = 64 * 1024;
+ public const int MaxBlockSize = 256 * 1024;
- private const int DefaultBlockSize = 1024;
+ private const int DefaultBlockSize = (MinBlockSize + MaxBlockSize) / 2;
private readonly Stream stream;
private readonly ZipperMode mode;
From e21b9640db927171af6ae0b7969902ab397828cf Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Fri, 14 Mar 2025 02:56:04 +0300
Subject: [PATCH 51/57] Added ZipperStream and tests for it (hw3 - lzw)
---
.../Zipper.Tests/Streams/ZipperStreamTests.cs | 31 +++
Zipper/Zipper/ZipperStream.cs | 211 ++++++++++++++++++
2 files changed, 242 insertions(+)
create mode 100644 Zipper/Zipper.Tests/Streams/ZipperStreamTests.cs
create mode 100644 Zipper/Zipper/ZipperStream.cs
diff --git a/Zipper/Zipper.Tests/Streams/ZipperStreamTests.cs b/Zipper/Zipper.Tests/Streams/ZipperStreamTests.cs
new file mode 100644
index 0000000..c566ad3
--- /dev/null
+++ b/Zipper/Zipper.Tests/Streams/ZipperStreamTests.cs
@@ -0,0 +1,31 @@
+namespace Zipper.Tests.Streams;
+
+internal class ZipperStreamTests
+{
+ public class ZipperStreamProvider : IStreamProvider
+ {
+ public static int MinBlockSize => ZipperStream.MinBlockSize;
+
+ public static int MaxBlockSize => ZipperStream.MaxBlockSize;
+
+ public static ZipperMode WritingMode => ZipperMode.Compress;
+
+ public static ZipperMode ReadingMode => ZipperMode.Decompress;
+
+ public static ZipperMode UndefinedMode => ZipperMode.Decompress + 100;
+
+ public static ZipperStream CreateStream(Stream stream, int blockSize, ZipperMode mode, bool leaveOpen = false)
+ => new(stream, blockSize, mode, leaveOpen);
+
+ public static ZipperStream CreateStream(Stream stream, ZipperMode mode, bool leaveOpen = false)
+ => new(stream, mode, leaveOpen);
+ }
+
+ public class ZipperStreamExceptionsTests : StreamExceptionsTests
+ {
+ }
+
+ public class ZipperStreamReadWriteTests : StreamReadWriteTests
+ {
+ }
+}
diff --git a/Zipper/Zipper/ZipperStream.cs b/Zipper/Zipper/ZipperStream.cs
new file mode 100644
index 0000000..2861641
--- /dev/null
+++ b/Zipper/Zipper/ZipperStream.cs
@@ -0,0 +1,211 @@
+namespace Zipper;
+
+using Zipper.LZW;
+
+///
+/// Provides methods and properties used to compress and decompress streams.
+///
+public class ZipperStream : Stream
+{
+ ///
+ /// Smallest allowed block length.
+ ///
+ public const int MinBlockSize = LZWStream.MinBlockSize;
+
+ ///
+ /// Largest allowed block length.
+ ///
+ public const int MaxBlockSize = LZWStream.MaxBlockSize;
+
+ private const int DefaultBlockSize = (MinBlockSize + MaxBlockSize) / 2;
+
+ private readonly LZWStream lzwStream;
+ private readonly BWTStream bwtStream;
+
+ private readonly Stream stream;
+ private readonly ZipperMode mode;
+ private readonly bool leaveOpen;
+
+ private bool disposed;
+
+ ///
+ public ZipperStream(Stream stream, ZipperMode mode, bool leaveOpen = false)
+ : this(stream, DefaultBlockSize, mode, leaveOpen)
+ {
+ }
+
+ ///
+ /// Initializes a new instance of the class.
+ ///
+ /// The stream to which compressed data is written or from which data to decompress is read.
+ /// The internal block size to use, should be between and .
+ /// that determines whether to compress or decompress data.
+ ///
+ /// The value indicating whether should be disposed along with this instance,
+ /// if is .
+ ///
+ /// is not nor .
+ /// is out of range.
+ public ZipperStream(Stream stream, int blockSize, ZipperMode mode, bool leaveOpen = false)
+ {
+ ArgumentOutOfRangeException.ThrowIfLessThan(blockSize, MinBlockSize);
+ ArgumentOutOfRangeException.ThrowIfGreaterThan(blockSize, MaxBlockSize);
+
+ if (!Enum.IsDefined(mode))
+ {
+ throw new ArgumentException($"Value was neither {ZipperMode.Compress} nor {ZipperMode.Decompress}", nameof(mode));
+ }
+
+ float relativeBlockSize = (blockSize - MinBlockSize) / (float)(MaxBlockSize - MinBlockSize);
+ int bwtBlockSize = (int)(BWTStream.MinBlockSize + (relativeBlockSize * (BWTStream.MaxBlockSize - BWTStream.MinBlockSize)));
+
+ this.stream = stream;
+
+ lzwStream = new(stream, blockSize, mode, true);
+ bwtStream = new(lzwStream, bwtBlockSize, mode == ZipperMode.Compress ? BWTMode.Transform : BWTMode.Reconstruct, true);
+
+ this.mode = mode;
+ this.leaveOpen = leaveOpen;
+ disposed = false;
+ }
+
+ ///
+ /// Gets a value indicating whether the stream supports reading.
+ ///
+ ///
+ public override bool CanRead => mode == ZipperMode.Decompress && stream.CanRead;
+
+ ///
+ /// Gets a value indicating whether the stream supports writing.
+ ///
+ ///
+ public override bool CanWrite => mode == ZipperMode.Compress && stream.CanWrite;
+
+ ///
+ /// Gets a value indicating whether the stream supports seeking.
+ ///
+ ///
+ public override bool CanSeek => false;
+
+ ///
+ /// This property is not supported and always throws a .
+ ///
+ ///
+ public override long Length => throw new NotSupportedException();
+
+ ///
+ /// This property is not supported and always throws a .
+ ///
+ ///
+ public override long Position
+ {
+ get => throw new NotSupportedException();
+ set => throw new NotSupportedException();
+ }
+
+ ///
+ /// This method is not supported and always throws a .
+ ///
+ ///
+ public override long Seek(long offset, SeekOrigin origin)
+ => throw new NotSupportedException();
+
+ ///
+ /// This method is not supported and always throws a .
+ ///
+ ///
+ public override void SetLength(long value)
+ => throw new NotSupportedException();
+
+ ///
+ /// Flushes the internal buffers.
+ ///
+ /// Stream is disposed.
+ public override void Flush()
+ {
+ EnsureNotClosed();
+
+ if (mode == ZipperMode.Compress)
+ {
+ bwtStream.Flush();
+ }
+ }
+
+ ///
+ /// Reads data from , compresses it and writes it to the underlying stream.
+ ///
+ /// Buffer that contains data to be compressed.
+ /// How many bytes to skip before reading from .
+ /// How many bytes to read from .
+ /// Stream is set to mode.
+ /// Stream is disposed.
+ public override void Write(byte[] buffer, int offset, int count)
+ => Write(buffer.AsSpan(offset, count));
+
+ ///
+ /// Reads data from the underlying stream, decompresses it and writes to .
+ ///
+ /// Buffer to write decompressed data to.
+ /// How many bytes to skip before writing to .
+ /// How many bytes to write to .
+ /// Count of read bytes, can be less than .
+ /// Unexpected end of stream.
+ /// Invalid data stream.
+ /// Stream is set to mode.
+ /// Stream is disposed.
+ public override int Read(byte[] buffer, int offset, int count)
+ => Read(buffer.AsSpan(offset, count));
+
+ ///
+ public override void Write(ReadOnlySpan buffer)
+ {
+ EnsureNotClosed();
+ EnsureMode(ZipperMode.Compress);
+
+ bwtStream.Write(buffer);
+ }
+
+ ///
+ public override int Read(Span buffer)
+ {
+ EnsureNotClosed();
+ EnsureMode(ZipperMode.Decompress);
+
+ return bwtStream.Read(buffer);
+ }
+
+ ///
+ protected override void Dispose(bool disposing)
+ {
+ if (disposed)
+ {
+ return;
+ }
+
+ if (disposing)
+ {
+ bwtStream.Dispose();
+ lzwStream.Dispose();
+
+ if (!leaveOpen)
+ {
+ stream.Dispose();
+ }
+
+ disposed = true;
+ }
+ }
+
+ private void EnsureMode(ZipperMode mode)
+ {
+ if (this.mode != mode)
+ {
+ throw new InvalidOperationException();
+ }
+ }
+
+ private void EnsureNotClosed()
+ {
+ ObjectDisposedException.ThrowIf(disposed, this);
+ }
+}
From 2005b625ee963750ae8b5ea4de43559d67a56f32 Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Fri, 14 Mar 2025 03:51:06 +0300
Subject: [PATCH 52/57] Added cli for zipper (hw3 - lzw)
---
Zipper/Zipper.Cli/FileZipper.cs | 88 ++++++++++++++
Zipper/Zipper.Cli/Program.cs | 179 ++++++++++++++++++++++++++++
Zipper/Zipper.Cli/Zipper.Cli.csproj | 14 +++
Zipper/Zipper.sln | 14 +++
4 files changed, 295 insertions(+)
create mode 100644 Zipper/Zipper.Cli/FileZipper.cs
create mode 100644 Zipper/Zipper.Cli/Program.cs
create mode 100644 Zipper/Zipper.Cli/Zipper.Cli.csproj
diff --git a/Zipper/Zipper.Cli/FileZipper.cs b/Zipper/Zipper.Cli/FileZipper.cs
new file mode 100644
index 0000000..4e83d02
--- /dev/null
+++ b/Zipper/Zipper.Cli/FileZipper.cs
@@ -0,0 +1,88 @@
+namespace Zipper.Cli;
+
+using System.Buffers;
+
+///
+/// Provides methods and properties used to compress and decompress files.
+///
+internal class FileZipper : IDisposable
+{
+ private const int BufferSize = 512 * 1024;
+ private static readonly ArrayPool BufferPool = ArrayPool.Create();
+
+ private readonly Stream readFrom;
+ private readonly Stream writeTo;
+ private readonly long inputFileSize;
+ private readonly byte[] buffer;
+
+ private long bytesReadFromInput;
+
+ ///
+ /// Initializes a new instance of the class.
+ ///
+ /// Mode to use.
+ /// File to read data from.
+ /// File to write compressed/decompressed data to.
+ public FileZipper(ZipperMode mode, string inputFilePath, string outputFilePath)
+ {
+ inputFileSize = new FileInfo(inputFilePath).Length;
+
+ var inputFile = File.OpenRead(inputFilePath);
+ var outputFile = File.Create(outputFilePath);
+
+ if (mode == ZipperMode.Compress)
+ {
+ readFrom = inputFile;
+ writeTo = new ZipperStream(outputFile, ZipperStream.MaxBlockSize, mode);
+ }
+ else
+ {
+ readFrom = new ZipperStream(inputFile, ZipperStream.MaxBlockSize, mode);
+ writeTo = outputFile;
+ }
+
+ bytesReadFromInput = 0;
+ buffer = BufferPool.Rent(BufferSize);
+
+ EndOfFile = false;
+ }
+
+ ///
+ /// Gets progress as value between 0 and 1.
+ ///
+ public float Progress => (float)bytesReadFromInput / inputFileSize;
+
+ ///
+ /// Gets a value indicating whether end of file was reached.
+ ///
+ public bool EndOfFile { get; private set; }
+
+ ///
+ /// Compresses or decompresses part of input file.
+ ///
+ public void ReadAndWriteSingleBuffer()
+ {
+ int bytesRead = readFrom.Read(buffer, 0, BufferSize);
+
+ if (bytesRead == 0)
+ {
+ EndOfFile = true;
+ return;
+ }
+
+ bytesReadFromInput += bytesRead;
+
+ writeTo.Write(buffer, 0, bytesRead);
+ }
+
+ ///
+ /// Disposes all used files.
+ ///
+ public void Dispose()
+ {
+ BufferPool.Return(buffer);
+
+ readFrom.Dispose();
+ writeTo.Dispose();
+ }
+}
diff --git a/Zipper/Zipper.Cli/Program.cs b/Zipper/Zipper.Cli/Program.cs
new file mode 100644
index 0000000..39da260
--- /dev/null
+++ b/Zipper/Zipper.Cli/Program.cs
@@ -0,0 +1,179 @@
+using System.Diagnostics;
+using Zipper;
+using Zipper.Cli;
+
+const string helpMessage =
+"""
+Zipper - console tool for compressing and decompressing files
+
+Usage: dotnet run -- [options]
+Options:
+ -h -? --help | Print this help message
+ ------------------------------------------------
+ -c --compress | Compress specified file
+ ------------------------------------------------
+ -u --uncompress | Decompress
+ -d --decompress | specified file
+ ------------------------------------------------
+ -f --force | Overwrite files without asking
+
+File path should be the first argument (unless --help specified)
+Options can be specified in any order
+Only either '--compress' or '--decompress' can be used at the same time
+""";
+
+args = [.. args.Select(x => x.Trim())];
+
+if (args.Length == 0 || (args.Length == 1 && args[0] is "-h" or "--help" or "-?"))
+{
+ Console.WriteLine(helpMessage);
+
+ return 0;
+}
+
+string filePath = args[0];
+bool force = false;
+ZipperMode? mode = null;
+
+foreach (var arg in args.Skip(1))
+{
+ switch (arg)
+ {
+ case "-u" or "-d" or "--uncompress" or "--decompress":
+ if (mode != null)
+ {
+ Console.Error.WriteLine("Error: '--compress' or '--decompress' option can only be specified once");
+ return 1;
+ }
+
+ mode = ZipperMode.Decompress;
+ break;
+
+ case "-c" or "--compress":
+ if (mode != null)
+ {
+ Console.Error.WriteLine("Error: '--compress' or '--decompress' option can only be specified once");
+ return 1;
+ }
+
+ mode = ZipperMode.Compress;
+ break;
+
+ case "-f" or "--force":
+ force = true;
+ break;
+
+ default:
+ Console.Error.WriteLine("Error: unknown argument");
+ return 1;
+ }
+}
+
+if (mode == null)
+{
+ Console.Error.WriteLine("Error: neither '--compress' nor '--decompress' were specified");
+ return 1;
+}
+
+if (!File.Exists(filePath))
+{
+ Console.Error.WriteLine($"Error: file '{filePath}' does not exist");
+ return 1;
+}
+
+const string zippedExtension = ".zipped";
+string? newFilePath = null;
+if (mode == ZipperMode.Compress)
+{
+ newFilePath = $"{filePath}{zippedExtension}";
+}
+else
+{
+ if (!filePath.EndsWith(zippedExtension))
+ {
+ Console.Error.WriteLine($"Error: extension of the specified file is not {zippedExtension}");
+ return 1;
+ }
+
+ newFilePath = filePath[..^zippedExtension.Length];
+}
+
+if (!force && File.Exists(newFilePath))
+{
+ Console.Write($"File '{newFilePath}' already exists, overwrite? (y/n): ");
+ if (Console.ReadLine()?.Trim() != "y")
+ {
+ Console.WriteLine("Cancelled");
+ return 0;
+ }
+}
+
+const string hideCursorEscape = "\e[?25l";
+const string showCursorEscape = "\e[?25h";
+const string moveToLeftEscape = "\e[0G";
+const string clearLineEscape = "\e[2K";
+const string waitingSymblols = @"|/-\";
+
+Console.Write(hideCursorEscape);
+
+using (var fileZipper = new FileZipper(mode.Value, filePath, newFilePath))
+{
+ var stopwatch = Stopwatch.StartNew();
+ var lastLoggedTime = stopwatch.Elapsed;
+ int step = 0;
+ while (!fileZipper.EndOfFile)
+ {
+ fileZipper.ReadAndWriteSingleBuffer();
+
+ if (stopwatch.Elapsed - lastLoggedTime > TimeSpan.FromMilliseconds(4))
+ {
+ Console.Write(moveToLeftEscape);
+ RenderProgress(fileZipper.Progress, stopwatch.Elapsed, step, waitingSymblols);
+ lastLoggedTime = stopwatch.Elapsed;
+ }
+
+ step += 1;
+ }
+}
+
+Console.Write(clearLineEscape);
+Console.Write(moveToLeftEscape);
+Console.Write(showCursorEscape);
+
+if (mode == ZipperMode.Compress)
+{
+ var inputFileSize = new FileInfo(filePath).Length;
+ var outputFileSize = new FileInfo(newFilePath).Length;
+ var compressionRate = (float)inputFileSize / outputFileSize;
+
+ Console.WriteLine($"Compression rate: {compressionRate}");
+}
+
+return 0;
+
+static void RenderProgress(float progress, TimeSpan time, int step, string stepString)
+{
+ Console.Write($" {stepString[step % stepString.Length]} ");
+ Console.Write("[");
+
+ for (int i = 0; i <= 100; i++)
+ {
+ Console.Write(progress >= i / 100f ? '=' : ' ');
+ }
+
+ Console.Write("]");
+ Console.Write($" {progress * 100,5:0.0} %");
+
+ if (time.TotalMinutes < 1)
+ {
+ Console.Write($" {time.Seconds} s");
+ }
+ else if (time.TotalHours < 1)
+ {
+ Console.Write($" {time.Minutes} m {time.Seconds:00} s");
+ }
+ else
+ {
+ Console.Write($" {time.Hours} h {time.Minutes:00} m {time.Seconds:00} s");
+ }
+}
diff --git a/Zipper/Zipper.Cli/Zipper.Cli.csproj b/Zipper/Zipper.Cli/Zipper.Cli.csproj
new file mode 100644
index 0000000..0465f53
--- /dev/null
+++ b/Zipper/Zipper.Cli/Zipper.Cli.csproj
@@ -0,0 +1,14 @@
+
+
+
+ Exe
+ net9.0
+ enable
+ enable
+
+
+
+
+
+
+
diff --git a/Zipper/Zipper.sln b/Zipper/Zipper.sln
index 213ddd2..60e7c7e 100644
--- a/Zipper/Zipper.sln
+++ b/Zipper/Zipper.sln
@@ -7,6 +7,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Zipper", "Zipper\Zipper.csp
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Zipper.Tests", "Zipper.Tests\Zipper.Tests.csproj", "{10C8B0BF-1C7E-4169-8DBB-3B138722C444}"
EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Zipper.Cli", "Zipper.Cli\Zipper.Cli.csproj", "{800EFECF-6B0E-4A90-BDD9-717642A6309E}"
+EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@@ -41,6 +43,18 @@ Global
{10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Release|x64.Build.0 = Release|Any CPU
{10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Release|x86.ActiveCfg = Release|Any CPU
{10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Release|x86.Build.0 = Release|Any CPU
+ {800EFECF-6B0E-4A90-BDD9-717642A6309E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {800EFECF-6B0E-4A90-BDD9-717642A6309E}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {800EFECF-6B0E-4A90-BDD9-717642A6309E}.Debug|x64.ActiveCfg = Debug|Any CPU
+ {800EFECF-6B0E-4A90-BDD9-717642A6309E}.Debug|x64.Build.0 = Debug|Any CPU
+ {800EFECF-6B0E-4A90-BDD9-717642A6309E}.Debug|x86.ActiveCfg = Debug|Any CPU
+ {800EFECF-6B0E-4A90-BDD9-717642A6309E}.Debug|x86.Build.0 = Debug|Any CPU
+ {800EFECF-6B0E-4A90-BDD9-717642A6309E}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {800EFECF-6B0E-4A90-BDD9-717642A6309E}.Release|Any CPU.Build.0 = Release|Any CPU
+ {800EFECF-6B0E-4A90-BDD9-717642A6309E}.Release|x64.ActiveCfg = Release|Any CPU
+ {800EFECF-6B0E-4A90-BDD9-717642A6309E}.Release|x64.Build.0 = Release|Any CPU
+ {800EFECF-6B0E-4A90-BDD9-717642A6309E}.Release|x86.ActiveCfg = Release|Any CPU
+ {800EFECF-6B0E-4A90-BDD9-717642A6309E}.Release|x86.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
From 20e49bf45e0c59036fcd2737d431fa0682cc9477 Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Thu, 20 Mar 2025 16:36:33 +0300
Subject: [PATCH 53/57] Changed BWT to allow output to be longer than input
(hw3 - lzw)
---
Zipper/Zipper/BWT.cs | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/Zipper/Zipper/BWT.cs b/Zipper/Zipper/BWT.cs
index 099f06c..b96769a 100644
--- a/Zipper/Zipper/BWT.cs
+++ b/Zipper/Zipper/BWT.cs
@@ -19,7 +19,8 @@ internal static class BWT
/// Index that is used to reconstruct byte sequence.
public static int ForwardTransform(ReadOnlySpan input, Span output)
{
- Debug.Assert(input.Length == output.Length, "Length of input and output should be the same");
+ ArgumentOutOfRangeException.ThrowIfLessThan(output.Length, input.Length, nameof(output));
+
int length = input.Length;
if (length == 0)
@@ -81,7 +82,7 @@ int Compare(int x, int y)
/// Span to write reconstructed byte sequence to.
public static void InverseTransform(ReadOnlySpan input, int identityIndex, Span output)
{
- Debug.Assert(input.Length == output.Length, "Length of input and output should be the same");
+ ArgumentOutOfRangeException.ThrowIfLessThan(output.Length, input.Length, nameof(output));
if (identityIndex == -1)
{
@@ -124,13 +125,13 @@ public static void InverseTransform(ReadOnlySpan input, int identityIndex,
int lastIdentityIndex = identityIndex;
byte lastByte = input[lastIdentityIndex];
- output[^1] = input[identityIndex];
+ output[length - 1] = input[identityIndex];
for (int i = 1; i < length; i++)
{
lastIdentityIndex = appearances[lastIdentityIndex] + lesserBytesCounter[lastByte];
lastByte = input[lastIdentityIndex];
- output[^(i + 1)] = lastByte;
+ output[length - (i + 1)] = lastByte;
}
Pool.Return(appearances);
From aad99c1089bc40397ac09032cda6aaa3cadb09a3 Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Thu, 20 Mar 2025 16:37:15 +0300
Subject: [PATCH 54/57] Changed BWT.ForwardTransform to return positive index
and added checks in BWT.InverseTransform (hw3 - lzw)
---
Zipper/Zipper/BWT.cs | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/Zipper/Zipper/BWT.cs b/Zipper/Zipper/BWT.cs
index b96769a..740ed20 100644
--- a/Zipper/Zipper/BWT.cs
+++ b/Zipper/Zipper/BWT.cs
@@ -25,7 +25,7 @@ public static int ForwardTransform(ReadOnlySpan input, Span output)
if (length == 0)
{
- return -1;
+ return 0;
}
int[] offsets = Pool.Rent(length);
@@ -82,10 +82,14 @@ int Compare(int x, int y)
/// Span to write reconstructed byte sequence to.
public static void InverseTransform(ReadOnlySpan input, int identityIndex, Span output)
{
+ ArgumentOutOfRangeException.ThrowIfNegative(identityIndex, nameof(identityIndex));
+ ArgumentOutOfRangeException.ThrowIfGreaterThan(identityIndex, input.Length, nameof(identityIndex));
+
ArgumentOutOfRangeException.ThrowIfLessThan(output.Length, input.Length, nameof(output));
- if (identityIndex == -1)
+ if (input.Length <= 1)
{
+ input.CopyTo(output);
return;
}
From fb950497d43b1b922b0ee1826abc87564b1b4c67 Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Thu, 20 Mar 2025 16:39:26 +0300
Subject: [PATCH 55/57] Updated tests for BWT (hw3 - lzw)
---
Zipper/Zipper.Tests/BWTTests.cs | 34 +++++++++++++++++++++++++++++++--
1 file changed, 32 insertions(+), 2 deletions(-)
diff --git a/Zipper/Zipper.Tests/BWTTests.cs b/Zipper/Zipper.Tests/BWTTests.cs
index 62c0db8..4d74512 100644
--- a/Zipper/Zipper.Tests/BWTTests.cs
+++ b/Zipper/Zipper.Tests/BWTTests.cs
@@ -25,8 +25,38 @@ public void InverseTransform_ShouldBe_SameAs_Input([ValueSource(nameof(TestData)
Span transformed = stackalloc byte[input.Length];
var index = BWT.ForwardTransform(input, transformed);
- Span reconstructed = stackalloc byte[input.Length];
+ Span reconstructed = stackalloc byte[input.Length + 16];
BWT.InverseTransform(transformed, index, reconstructed);
- Assert.That(reconstructed.SequenceEqual(input), Is.True);
+ Assert.That(reconstructed[..input.Length].SequenceEqual(input), Is.True);
+ }
+
+ [Test]
+ public void Transform_ShouldThrowIf_InputIsLargerThanOutput()
+ {
+ int inputLength = 16;
+ int outputLength = inputLength - 1;
+ Assert.Throws(() => BWT.ForwardTransform(stackalloc byte[inputLength], stackalloc byte[outputLength]));
+ }
+
+ [Test]
+ public void InverseTransform_ShouldThrowIf_InputIsLargerThanOutput()
+ {
+ int inputLength = 16;
+ int outputLength = inputLength - 1;
+ Assert.Throws(() => BWT.InverseTransform(stackalloc byte[inputLength], 0, stackalloc byte[outputLength]));
+ }
+
+ [Test]
+ public void InverseTransform_ShouldThrowIf_IdentityIndexIsNegative()
+ {
+ int length = 16;
+ Assert.Throws(() => BWT.InverseTransform(stackalloc byte[length], -1, stackalloc byte[length]));
+ }
+
+ [Test]
+ public void InverseTransform_ShouldThrowIf_IdentityIndexIs_GreaterThanOrEqualTo_InputLength()
+ {
+ int length = 16;
+ Assert.Throws(() => BWT.InverseTransform(stackalloc byte[length], length + 1, stackalloc byte[length]));
}
}
From c74bca34d22b3851a69ebb25aa23f65634fe9ff9 Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Thu, 20 Mar 2025 16:40:04 +0300
Subject: [PATCH 56/57] Made LZWStream internal (hw3 - lzw)
---
Zipper/Zipper.Tests/Streams/LZWStreamTests.cs | 2 +-
Zipper/Zipper/LZW/LZWStream.cs | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/Zipper/Zipper.Tests/Streams/LZWStreamTests.cs b/Zipper/Zipper.Tests/Streams/LZWStreamTests.cs
index 2d3b672..dd36279 100644
--- a/Zipper/Zipper.Tests/Streams/LZWStreamTests.cs
+++ b/Zipper/Zipper.Tests/Streams/LZWStreamTests.cs
@@ -2,7 +2,7 @@ namespace Zipper.Tests.Streams;
using Zipper.LZW;
-public class LZWStreamTests
+internal class LZWStreamTests
{
public class LZWStreamProvider : IStreamProvider
{
diff --git a/Zipper/Zipper/LZW/LZWStream.cs b/Zipper/Zipper/LZW/LZWStream.cs
index bda1288..78d2a20 100644
--- a/Zipper/Zipper/LZW/LZWStream.cs
+++ b/Zipper/Zipper/LZW/LZWStream.cs
@@ -5,7 +5,7 @@ namespace Zipper.LZW;
///
/// Provides methods and properties used to compress and decompress streams by using the LZW algorithm.
///
-public class LZWStream : Stream
+internal class LZWStream : Stream
{
///
/// Smallest allowed block length.
From c8f6aed9a2df38b6664ae39c952f17e66758961d Mon Sep 17 00:00:00 2001
From: ilya-krivtsov <180809461+ilya-krivtsov@users.noreply.github.com>
Date: Fri, 30 May 2025 16:59:41 +0300
Subject: [PATCH 57/57] Added BWT switch to ZipperStream.cs (hw3 - lzw)
---
Zipper/Zipper.Cli/FileZipper.cs | 38 ++++++++++++++++++-
.../Zipper.Tests/Streams/ZipperStreamTests.cs | 37 +++++++++++++++---
Zipper/Zipper/ZipperStream.cs | 32 +++++++++++-----
3 files changed, 91 insertions(+), 16 deletions(-)
diff --git a/Zipper/Zipper.Cli/FileZipper.cs b/Zipper/Zipper.Cli/FileZipper.cs
index 4e83d02..63f097d 100644
--- a/Zipper/Zipper.Cli/FileZipper.cs
+++ b/Zipper/Zipper.Cli/FileZipper.cs
@@ -1,6 +1,7 @@
namespace Zipper.Cli;
using System.Buffers;
+using System.Diagnostics;
///
/// Provides methods and properties used to compress and decompress files.
@@ -10,8 +11,13 @@ internal class FileZipper : IDisposable
private const int BufferSize = 512 * 1024;
private static readonly ArrayPool BufferPool = ArrayPool.Create();
+ private readonly string? outputFileName;
+ private readonly string? outputFileNameTempA;
+ private readonly string? outputFileNameTempB;
+
private readonly Stream readFrom;
private readonly Stream writeTo;
+ private readonly Stream? writeToAlt;
private readonly long inputFileSize;
private readonly byte[] buffer;
@@ -28,15 +34,23 @@ public FileZipper(ZipperMode mode, string inputFilePath, string outputFilePath)
inputFileSize = new FileInfo(inputFilePath).Length;
var inputFile = File.OpenRead(inputFilePath);
- var outputFile = File.Create(outputFilePath);
if (mode == ZipperMode.Compress)
{
+ outputFileName = outputFilePath;
+ outputFileNameTempA = Path.GetTempFileName();
+ outputFileNameTempB = Path.GetTempFileName();
+
+ var outputFileA = File.Create(outputFileNameTempA);
+ var outputFileB = File.Create(outputFileNameTempB);
+
readFrom = inputFile;
- writeTo = new ZipperStream(outputFile, ZipperStream.MaxBlockSize, mode);
+ writeTo = new ZipperStream(outputFileA, ZipperStream.MaxBlockSize, mode);
+ writeToAlt = new ZipperStream(outputFileB, ZipperStream.MaxBlockSize, mode, useBwt: true);
}
else
{
+ var outputFile = File.Create(outputFilePath);
readFrom = new ZipperStream(inputFile, ZipperStream.MaxBlockSize, mode);
writeTo = outputFile;
}
@@ -73,6 +87,7 @@ public void ReadAndWriteSingleBuffer()
bytesReadFromInput += bytesRead;
writeTo.Write(buffer, 0, bytesRead);
+ writeToAlt?.Write(buffer, 0, bytesRead);
}
///
@@ -84,5 +99,24 @@ public void Dispose()
readFrom.Dispose();
writeTo.Dispose();
+ writeToAlt?.Dispose();
+
+ if (outputFileName != null)
+ {
+ Debug.Assert(outputFileNameTempA != null, $"{nameof(outputFileNameTempA)} is null");
+ Debug.Assert(outputFileNameTempB != null, $"{nameof(outputFileNameTempB)} is null");
+
+ var tempLengthA = new FileInfo(outputFileNameTempA).Length;
+ var tempLengthB = new FileInfo(outputFileNameTempB).Length;
+
+ if (tempLengthA < tempLengthB)
+ {
+ File.Move(outputFileNameTempA, outputFileName, true);
+ }
+ else
+ {
+ File.Move(outputFileNameTempB, outputFileName, true);
+ }
+ }
}
}
diff --git a/Zipper/Zipper.Tests/Streams/ZipperStreamTests.cs b/Zipper/Zipper.Tests/Streams/ZipperStreamTests.cs
index c566ad3..2865937 100644
--- a/Zipper/Zipper.Tests/Streams/ZipperStreamTests.cs
+++ b/Zipper/Zipper.Tests/Streams/ZipperStreamTests.cs
@@ -2,7 +2,7 @@ namespace Zipper.Tests.Streams;
internal class ZipperStreamTests
{
- public class ZipperStreamProvider : IStreamProvider
+ public class ZipperStreamProviderWithoutBwt : IStreamProvider
{
public static int MinBlockSize => ZipperStream.MinBlockSize;
@@ -15,17 +15,44 @@ public class ZipperStreamProvider : IStreamProvider
public static ZipperMode UndefinedMode => ZipperMode.Decompress + 100;
public static ZipperStream CreateStream(Stream stream, int blockSize, ZipperMode mode, bool leaveOpen = false)
- => new(stream, blockSize, mode, leaveOpen);
+ => new(stream, blockSize, mode, leaveOpen, false);
public static ZipperStream CreateStream(Stream stream, ZipperMode mode, bool leaveOpen = false)
- => new(stream, mode, leaveOpen);
+ => new(stream, mode, leaveOpen, false);
}
- public class ZipperStreamExceptionsTests : StreamExceptionsTests
+ public class ZipperStreamProviderWithBwt : IStreamProvider
+ {
+ public static int MinBlockSize => ZipperStream.MinBlockSize;
+
+ public static int MaxBlockSize => ZipperStream.MaxBlockSize;
+
+ public static ZipperMode WritingMode => ZipperMode.Compress;
+
+ public static ZipperMode ReadingMode => ZipperMode.Decompress;
+
+ public static ZipperMode UndefinedMode => ZipperMode.Decompress + 100;
+
+ public static ZipperStream CreateStream(Stream stream, int blockSize, ZipperMode mode, bool leaveOpen = false)
+ => new(stream, blockSize, mode, leaveOpen, true);
+
+ public static ZipperStream CreateStream(Stream stream, ZipperMode mode, bool leaveOpen = false)
+ => new(stream, mode, leaveOpen, true);
+ }
+
+ public class ZipperStreamWithoutBwtExceptionsTests : StreamExceptionsTests
+ {
+ }
+
+ public class ZipperStreamWithoutBwtReadWriteTests : StreamReadWriteTests
+ {
+ }
+
+ public class ZipperStreamWithBwtExceptionsTests : StreamExceptionsTests
{
}
- public class ZipperStreamReadWriteTests : StreamReadWriteTests
+ public class ZipperStreamWithBwtReadWriteTests : StreamReadWriteTests
{
}
}
diff --git a/Zipper/Zipper/ZipperStream.cs b/Zipper/Zipper/ZipperStream.cs
index 2861641..62eba44 100644
--- a/Zipper/Zipper/ZipperStream.cs
+++ b/Zipper/Zipper/ZipperStream.cs
@@ -20,7 +20,7 @@ public class ZipperStream : Stream
private const int DefaultBlockSize = (MinBlockSize + MaxBlockSize) / 2;
private readonly LZWStream lzwStream;
- private readonly BWTStream bwtStream;
+ private readonly Stream outerStream;
private readonly Stream stream;
private readonly ZipperMode mode;
@@ -28,9 +28,15 @@ public class ZipperStream : Stream
private bool disposed;
- ///
+ ///
public ZipperStream(Stream stream, ZipperMode mode, bool leaveOpen = false)
- : this(stream, DefaultBlockSize, mode, leaveOpen)
+ : this(stream, DefaultBlockSize, mode, leaveOpen, false)
+ {
+ }
+
+ ///
+ public ZipperStream(Stream stream, ZipperMode mode, bool leaveOpen = false, bool useBwt = false)
+ : this(stream, DefaultBlockSize, mode, leaveOpen, useBwt)
{
}
@@ -44,9 +50,10 @@ public ZipperStream(Stream stream, ZipperMode mode, bool leaveOpen = false)
/// The value indicating whether should be disposed along with this instance,
/// if is .
///
+ /// The value indicating whether to use Burrows-Wheeler transformation internally.
/// is not nor .
/// is out of range.
- public ZipperStream(Stream stream, int blockSize, ZipperMode mode, bool leaveOpen = false)
+ public ZipperStream(Stream stream, int blockSize, ZipperMode mode, bool leaveOpen = false, bool useBwt = false)
{
ArgumentOutOfRangeException.ThrowIfLessThan(blockSize, MinBlockSize);
ArgumentOutOfRangeException.ThrowIfGreaterThan(blockSize, MaxBlockSize);
@@ -62,7 +69,14 @@ public ZipperStream(Stream stream, int blockSize, ZipperMode mode, bool leaveOpe
this.stream = stream;
lzwStream = new(stream, blockSize, mode, true);
- bwtStream = new(lzwStream, bwtBlockSize, mode == ZipperMode.Compress ? BWTMode.Transform : BWTMode.Reconstruct, true);
+ if (useBwt)
+ {
+ outerStream = new BWTStream(lzwStream, bwtBlockSize, mode == ZipperMode.Compress ? BWTMode.Transform : BWTMode.Reconstruct, true);
+ }
+ else
+ {
+ outerStream = lzwStream;
+ }
this.mode = mode;
this.leaveOpen = leaveOpen;
@@ -127,7 +141,7 @@ public override void Flush()
if (mode == ZipperMode.Compress)
{
- bwtStream.Flush();
+ outerStream.Flush();
}
}
@@ -162,7 +176,7 @@ public override void Write(ReadOnlySpan buffer)
EnsureNotClosed();
EnsureMode(ZipperMode.Compress);
- bwtStream.Write(buffer);
+ outerStream.Write(buffer);
}
///
@@ -171,7 +185,7 @@ public override int Read(Span buffer)
EnsureNotClosed();
EnsureMode(ZipperMode.Decompress);
- return bwtStream.Read(buffer);
+ return outerStream.Read(buffer);
}
///
@@ -184,7 +198,7 @@ protected override void Dispose(bool disposing)
if (disposing)
{
- bwtStream.Dispose();
+ outerStream.Dispose();
lzwStream.Dispose();
if (!leaveOpen)