diff --git a/Zipper/Zipper.Cli/FileZipper.cs b/Zipper/Zipper.Cli/FileZipper.cs new file mode 100644 index 0000000..63f097d --- /dev/null +++ b/Zipper/Zipper.Cli/FileZipper.cs @@ -0,0 +1,122 @@ +namespace Zipper.Cli; + +using System.Buffers; +using System.Diagnostics; + +/// +/// Provides methods and properties used to compress and decompress files. +/// +internal class FileZipper : IDisposable +{ + private const int BufferSize = 512 * 1024; + private static readonly ArrayPool BufferPool = ArrayPool.Create(); + + private readonly string? outputFileName; + private readonly string? outputFileNameTempA; + private readonly string? outputFileNameTempB; + + private readonly Stream readFrom; + private readonly Stream writeTo; + private readonly Stream? writeToAlt; + private readonly long inputFileSize; + private readonly byte[] buffer; + + private long bytesReadFromInput; + + /// + /// Initializes a new instance of the class. + /// + /// Mode to use. + /// File to read data from. + /// File to write compressed/decompressed data to. + public FileZipper(ZipperMode mode, string inputFilePath, string outputFilePath) + { + inputFileSize = new FileInfo(inputFilePath).Length; + + var inputFile = File.OpenRead(inputFilePath); + + if (mode == ZipperMode.Compress) + { + outputFileName = outputFilePath; + outputFileNameTempA = Path.GetTempFileName(); + outputFileNameTempB = Path.GetTempFileName(); + + var outputFileA = File.Create(outputFileNameTempA); + var outputFileB = File.Create(outputFileNameTempB); + + readFrom = inputFile; + writeTo = new ZipperStream(outputFileA, ZipperStream.MaxBlockSize, mode); + writeToAlt = new ZipperStream(outputFileB, ZipperStream.MaxBlockSize, mode, useBwt: true); + } + else + { + var outputFile = File.Create(outputFilePath); + readFrom = new ZipperStream(inputFile, ZipperStream.MaxBlockSize, mode); + writeTo = outputFile; + } + + bytesReadFromInput = 0; + buffer = BufferPool.Rent(BufferSize); + + EndOfFile = false; + } + + /// + /// Gets progress as value between 0 and 1. + /// + public float Progress => (float)bytesReadFromInput / inputFileSize; + + /// + /// Gets a value indicating whether end of file was reached. + /// + public bool EndOfFile { get; private set; } + + /// + /// Compresses or decompresses part of input file. + /// + public void ReadAndWriteSingleBuffer() + { + int bytesRead = readFrom.Read(buffer, 0, BufferSize); + + if (bytesRead == 0) + { + EndOfFile = true; + return; + } + + bytesReadFromInput += bytesRead; + + writeTo.Write(buffer, 0, bytesRead); + writeToAlt?.Write(buffer, 0, bytesRead); + } + + /// + /// Disposes all used files. + /// + public void Dispose() + { + BufferPool.Return(buffer); + + readFrom.Dispose(); + writeTo.Dispose(); + writeToAlt?.Dispose(); + + if (outputFileName != null) + { + Debug.Assert(outputFileNameTempA != null, $"{nameof(outputFileNameTempA)} is null"); + Debug.Assert(outputFileNameTempB != null, $"{nameof(outputFileNameTempB)} is null"); + + var tempLengthA = new FileInfo(outputFileNameTempA).Length; + var tempLengthB = new FileInfo(outputFileNameTempB).Length; + + if (tempLengthA < tempLengthB) + { + File.Move(outputFileNameTempA, outputFileName, true); + } + else + { + File.Move(outputFileNameTempB, outputFileName, true); + } + } + } +} diff --git a/Zipper/Zipper.Cli/Program.cs b/Zipper/Zipper.Cli/Program.cs new file mode 100644 index 0000000..39da260 --- /dev/null +++ b/Zipper/Zipper.Cli/Program.cs @@ -0,0 +1,179 @@ +using System.Diagnostics; +using Zipper; +using Zipper.Cli; + +const string helpMessage = +""" +Zipper - console tool for compressing and decompressing files + +Usage: dotnet run -- [options] +Options: + -h -? --help | Print this help message + ------------------------------------------------ + -c --compress | Compress specified file + ------------------------------------------------ + -u --uncompress | Decompress + -d --decompress | specified file + ------------------------------------------------ + -f --force | Overwrite files without asking + +File path should be the first argument (unless --help specified) +Options can be specified in any order +Only either '--compress' or '--decompress' can be used at the same time +"""; + +args = [.. args.Select(x => x.Trim())]; + +if (args.Length == 0 || (args.Length == 1 && args[0] is "-h" or "--help" or "-?")) +{ + Console.WriteLine(helpMessage); + + return 0; +} + +string filePath = args[0]; +bool force = false; +ZipperMode? mode = null; + +foreach (var arg in args.Skip(1)) +{ + switch (arg) + { + case "-u" or "-d" or "--uncompress" or "--decompress": + if (mode != null) + { + Console.Error.WriteLine("Error: '--compress' or '--decompress' option can only be specified once"); + return 1; + } + + mode = ZipperMode.Decompress; + break; + + case "-c" or "--compress": + if (mode != null) + { + Console.Error.WriteLine("Error: '--compress' or '--decompress' option can only be specified once"); + return 1; + } + + mode = ZipperMode.Compress; + break; + + case "-f" or "--force": + force = true; + break; + + default: + Console.Error.WriteLine("Error: unknown argument"); + return 1; + } +} + +if (mode == null) +{ + Console.Error.WriteLine("Error: neither '--compress' nor '--decompress' were specified"); + return 1; +} + +if (!File.Exists(filePath)) +{ + Console.Error.WriteLine($"Error: file '{filePath}' does not exist"); + return 1; +} + +const string zippedExtension = ".zipped"; +string? newFilePath = null; +if (mode == ZipperMode.Compress) +{ + newFilePath = $"{filePath}{zippedExtension}"; +} +else +{ + if (!filePath.EndsWith(zippedExtension)) + { + Console.Error.WriteLine($"Error: extension of the specified file is not {zippedExtension}"); + return 1; + } + + newFilePath = filePath[..^zippedExtension.Length]; +} + +if (!force && File.Exists(newFilePath)) +{ + Console.Write($"File '{newFilePath}' already exists, overwrite? (y/n): "); + if (Console.ReadLine()?.Trim() != "y") + { + Console.WriteLine("Cancelled"); + return 0; + } +} + +const string hideCursorEscape = "\e[?25l"; +const string showCursorEscape = "\e[?25h"; +const string moveToLeftEscape = "\e[0G"; +const string clearLineEscape = "\e[2K"; +const string waitingSymblols = @"|/-\"; + +Console.Write(hideCursorEscape); + +using (var fileZipper = new FileZipper(mode.Value, filePath, newFilePath)) +{ + var stopwatch = Stopwatch.StartNew(); + var lastLoggedTime = stopwatch.Elapsed; + int step = 0; + while (!fileZipper.EndOfFile) + { + fileZipper.ReadAndWriteSingleBuffer(); + + if (stopwatch.Elapsed - lastLoggedTime > TimeSpan.FromMilliseconds(4)) + { + Console.Write(moveToLeftEscape); + RenderProgress(fileZipper.Progress, stopwatch.Elapsed, step, waitingSymblols); + lastLoggedTime = stopwatch.Elapsed; + } + + step += 1; + } +} + +Console.Write(clearLineEscape); +Console.Write(moveToLeftEscape); +Console.Write(showCursorEscape); + +if (mode == ZipperMode.Compress) +{ + var inputFileSize = new FileInfo(filePath).Length; + var outputFileSize = new FileInfo(newFilePath).Length; + var compressionRate = (float)inputFileSize / outputFileSize; + + Console.WriteLine($"Compression rate: {compressionRate}"); +} + +return 0; + +static void RenderProgress(float progress, TimeSpan time, int step, string stepString) +{ + Console.Write($" {stepString[step % stepString.Length]} "); + Console.Write("["); + + for (int i = 0; i <= 100; i++) + { + Console.Write(progress >= i / 100f ? '=' : ' '); + } + + Console.Write("]"); + Console.Write($" {progress * 100,5:0.0} %"); + + if (time.TotalMinutes < 1) + { + Console.Write($" {time.Seconds} s"); + } + else if (time.TotalHours < 1) + { + Console.Write($" {time.Minutes} m {time.Seconds:00} s"); + } + else + { + Console.Write($" {time.Hours} h {time.Minutes:00} m {time.Seconds:00} s"); + } +} diff --git a/Zipper/Zipper.Cli/Zipper.Cli.csproj b/Zipper/Zipper.Cli/Zipper.Cli.csproj new file mode 100644 index 0000000..0465f53 --- /dev/null +++ b/Zipper/Zipper.Cli/Zipper.Cli.csproj @@ -0,0 +1,14 @@ + + + + Exe + net9.0 + enable + enable + + + + + + + diff --git a/Zipper/Zipper.Tests/ArbitraryBitReaderWriterTests.cs b/Zipper/Zipper.Tests/ArbitraryBitReaderWriterTests.cs new file mode 100644 index 0000000..05e8732 --- /dev/null +++ b/Zipper/Zipper.Tests/ArbitraryBitReaderWriterTests.cs @@ -0,0 +1,122 @@ +namespace Zipper.Tests; + +public class ArbitraryBitReaderWriterTests +{ + private static readonly TestData[] TestDataSource = GenerateData(); + + [Test] + public void ReaderAndWriter_ShouldThrowIf_InitializedWith_Width_InDisallowedRange() + { + Assert.Throws(() => new ArbitraryBitReader(Stream.Null, ArbitraryBitReader.MinWidth - 1)); + Assert.Throws(() => new ArbitraryBitReader(Stream.Null, ArbitraryBitReader.MaxWidth + 1)); + + Assert.Throws(() => new ArbitraryBitWriter(Stream.Null, ArbitraryBitWriter.MinWidth - 1)); + Assert.Throws(() => new ArbitraryBitWriter(Stream.Null, ArbitraryBitWriter.MaxWidth + 1)); + } + + [Test] + public void Reader_ShouldThrowIf_StreamCanNotRead() + => Assert.Throws(() => new ArbitraryBitReader(new TestStream(), ArbitraryBitReader.MinWidth)); + + [Test] + public void Writer_ShouldThrowIf_StreamCanNotWrite() + => Assert.Throws(() => new ArbitraryBitWriter(new TestStream(), ArbitraryBitWriter.MinWidth)); + + [Test] + public void Reader_ShouldReadValues_WrittenBy_Writer_Correctly([ValueSource(nameof(TestDataSource))] TestData data) + { + int memorySize = (int)Math.Ceiling(data.Width * data.Numbers.Length / 8f); + var backingMemory = new byte[memorySize]; + + using (var memory = new MemoryStream(backingMemory)) + { + using var writer = new ArbitraryBitWriter(memory, data.Width); + for (int i = 0; i < data.Numbers.Length; i++) + { + writer.Write(data.Numbers[i]); + } + } + + using (var memory = new MemoryStream(backingMemory)) + { + var reader = new ArbitraryBitReader(memory, data.Width); + for (int i = 0; i < data.Numbers.Length; i++) + { + Assert.Multiple(() => + { + Assert.That(reader.ReadNext(out int number), Is.True); + Assert.That(number, Is.EqualTo(data.Numbers[i])); + }); + } + } + } + + [Test] + public void Reader_ReadNext_ShouldReturnFalse_WhenNoDataAvailable() + { + int width = 7; + int numbersCount = 5; + int memorySize = (int)Math.Ceiling(width * numbersCount / 8f); + var backingMemory = new byte[memorySize]; + + using (var memory = new MemoryStream(backingMemory)) + { + using var writer = new ArbitraryBitWriter(memory, width); + for (int i = 0; i < numbersCount; i++) + { + writer.Write(i * i); + } + } + + using (var memory = new MemoryStream(backingMemory)) + { + var reader = new ArbitraryBitReader(memory, width); + for (int i = 0; i < numbersCount; i++) + { + Assert.Multiple(() => + { + Assert.That(reader.ReadNext(out int number), Is.True); + Assert.That(number, Is.EqualTo(i * i)); + }); + } + + Assert.That(reader.ReadNext(out _), Is.False); + } + } + + private static TestData[] GenerateData() + { + var random = new Random(872375823); + + int minWidth = 4; + int maxWidth = 32; + int numbersLength = 21; + + var result = new TestData[maxWidth - minWidth + 1]; + for (int i = 0; i < result.Length; i++) + { + int width = i + minWidth; + long upperBound = 1L << width; + + var numbers = new int[numbersLength]; + for (int j = 0; j < numbersLength; j++) + { + numbers[j] = (int)random.NextInt64(upperBound); + } + + result[i] = new(width, numbers); + } + + return result; + } + + public readonly record struct TestData(int Width, int[] Numbers); + + // use MemoryStream, because implementing all Stream's abstract membrers leads to bad code coverage + private class TestStream : MemoryStream + { + public override bool CanRead => false; + + public override bool CanWrite => false; + } +} diff --git a/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs b/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs new file mode 100644 index 0000000..6a44a5e --- /dev/null +++ b/Zipper/Zipper.Tests/ArbitraryBitWriterTests.cs @@ -0,0 +1,119 @@ +namespace Zipper.Tests; + +public class ArbitraryBitWriterTests +{ + private readonly int width = 11; + private MemoryStream memory; + private ArbitraryBitWriter writer; + + [SetUp] + public void Setup() + { + memory = new MemoryStream(); + writer = new(memory, width, true); + } + + [TearDown] + public void Teardown() + { + writer.Dispose(); + memory.Dispose(); + } + + [Test] + public void Writer_Dispose_ShouldDoNothing_IfCalledTwice() + { + writer.Write(123); + + writer.Dispose(); + var position = memory.Position; + + writer.Dispose(); + Assert.That(memory.Position, Is.EqualTo(position)); + } + + [Test] + public void Writer_Dispose_ShouldDoNothing_IfBufferIsEmpty() + { + // buffer is filled on every eight Write() + for (int i = 0; i < 8; i++) + { + writer.Write(i * i); + } + + var position = memory.Position; + + writer.Dispose(); + Assert.That(memory.Position, Is.EqualTo(position)); + } + + [Test] + public void Writer_Flush_ShouldDoNothing_IfBufferIsEmpty() + { + // buffer is filled on every eight Write() + for (int i = 0; i < 8; i++) + { + writer.Write(i * i); + } + + var position = memory.Position; + + writer.Flush(); + Assert.That(memory.Position, Is.EqualTo(position)); + } + + [Test] + public void Writer_Flush_ShouldFlushHalfFilledBuffer() + { + // buffer is filled on every eight Write(), so write only 4 numbers + for (int i = 0; i < 4; i++) + { + writer.Write(i * i); + } + + var position = memory.Position; + + writer.Flush(); + Assert.That(memory.Position, Is.Not.EqualTo(position)); + } + + [Test] + public void Writer_Dispose_ShouldDisposeStream_IfLeaveOpenWasInitializedWith_False() + { + var closingWriter = new ArbitraryBitWriter(memory, width, false); + + closingWriter.Dispose(); + + // getting any property should throw if stream was disposed + Assert.Throws(() => _ = memory.Position); + } + + [Test] + public void Writer_Dispose_ShouldNotDisposeStream_IfLeaveOpenWasInitializedWith_True() + { + writer.Dispose(); + + // getting any property should not throw if stream was disposed + Assert.DoesNotThrow(() => _ = memory.Position); + } + + [Test] + public void Writer_Write_ShouldThrow_IfDisposed() + { + writer.Write(123); + + writer.Dispose(); + + Assert.Throws(() => writer.Write(456)); + } + + [Test] + public void Writer_Flush_ShouldThrow_IfDisposed() + { + writer.Write(123); + + writer.Dispose(); + + Assert.Throws(writer.Flush); + } +} diff --git a/Zipper/Zipper.Tests/BWTTests.cs b/Zipper/Zipper.Tests/BWTTests.cs new file mode 100644 index 0000000..4d74512 --- /dev/null +++ b/Zipper/Zipper.Tests/BWTTests.cs @@ -0,0 +1,62 @@ +namespace Zipper.Tests; + +public class BWTTests +{ + private static readonly string[] StringTestData = + [ + string.Empty, + + "A", + "BB", + "CCCCCC", + "ABACABA", + "ABABABABAB", + ]; + + private static readonly byte[][] TestData = + [ + .. StringTestData.Select(System.Text.Encoding.UTF8.GetBytes), + .. TestUtil.GetRandomStrings() + ]; + + [Test] + public void InverseTransform_ShouldBe_SameAs_Input([ValueSource(nameof(TestData))] byte[] input) + { + Span transformed = stackalloc byte[input.Length]; + var index = BWT.ForwardTransform(input, transformed); + + Span reconstructed = stackalloc byte[input.Length + 16]; + BWT.InverseTransform(transformed, index, reconstructed); + Assert.That(reconstructed[..input.Length].SequenceEqual(input), Is.True); + } + + [Test] + public void Transform_ShouldThrowIf_InputIsLargerThanOutput() + { + int inputLength = 16; + int outputLength = inputLength - 1; + Assert.Throws(() => BWT.ForwardTransform(stackalloc byte[inputLength], stackalloc byte[outputLength])); + } + + [Test] + public void InverseTransform_ShouldThrowIf_InputIsLargerThanOutput() + { + int inputLength = 16; + int outputLength = inputLength - 1; + Assert.Throws(() => BWT.InverseTransform(stackalloc byte[inputLength], 0, stackalloc byte[outputLength])); + } + + [Test] + public void InverseTransform_ShouldThrowIf_IdentityIndexIsNegative() + { + int length = 16; + Assert.Throws(() => BWT.InverseTransform(stackalloc byte[length], -1, stackalloc byte[length])); + } + + [Test] + public void InverseTransform_ShouldThrowIf_IdentityIndexIs_GreaterThanOrEqualTo_InputLength() + { + int length = 16; + Assert.Throws(() => BWT.InverseTransform(stackalloc byte[length], length + 1, stackalloc byte[length])); + } +} diff --git a/Zipper/Zipper.Tests/GlobalSuppressions.cs b/Zipper/Zipper.Tests/GlobalSuppressions.cs new file mode 100644 index 0000000..eeeaf17 --- /dev/null +++ b/Zipper/Zipper.Tests/GlobalSuppressions.cs @@ -0,0 +1,8 @@ +// This file is used by Code Analysis to maintain SuppressMessage +// attributes that are applied to this project. +// Project-level suppressions either have no target or are given +// a specific target and scoped to a namespace, type, member, etc. + +using System.Diagnostics.CodeAnalysis; + +[assembly: SuppressMessage("StyleCop.CSharp.DocumentationRules", "SA1600:Elements should be documented", Justification = "This is tests project")] diff --git a/Zipper/Zipper.Tests/Streams/BWTStreamTests.cs b/Zipper/Zipper.Tests/Streams/BWTStreamTests.cs new file mode 100644 index 0000000..689086d --- /dev/null +++ b/Zipper/Zipper.Tests/Streams/BWTStreamTests.cs @@ -0,0 +1,31 @@ +namespace Zipper.Tests.Streams; + +internal class BWTStreamTests +{ + public class BWTStreamProvider : IStreamProvider + { + public static int MinBlockSize => BWTStream.MinBlockSize; + + public static int MaxBlockSize => BWTStream.MaxBlockSize; + + public static BWTMode WritingMode => BWTMode.Transform; + + public static BWTMode ReadingMode => BWTMode.Reconstruct; + + public static BWTMode UndefinedMode => BWTMode.Reconstruct + 100; + + public static BWTStream CreateStream(Stream stream, int blockSize, BWTMode mode, bool leaveOpen = false) + => new(stream, blockSize, mode, leaveOpen); + + public static BWTStream CreateStream(Stream stream, BWTMode mode, bool leaveOpen = false) + => new(stream, mode, leaveOpen); + } + + public class BWTStreamExceptionsTests : StreamExceptionsTests + { + } + + public class BWTStreamReadWriteTests : StreamReadWriteTests + { + } +} diff --git a/Zipper/Zipper.Tests/Streams/IStreamProvider.cs b/Zipper/Zipper.Tests/Streams/IStreamProvider.cs new file mode 100644 index 0000000..2d45fc0 --- /dev/null +++ b/Zipper/Zipper.Tests/Streams/IStreamProvider.cs @@ -0,0 +1,20 @@ +namespace Zipper.Tests.Streams; + +public interface IStreamProvider + where TStream : Stream + where TMode : Enum +{ + public static abstract int MinBlockSize { get; } + + public static abstract int MaxBlockSize { get; } + + public static abstract TMode WritingMode { get; } + + public static abstract TMode ReadingMode { get; } + + public static abstract TMode UndefinedMode { get; } + + public static abstract TStream CreateStream(Stream stream, int blockSize, TMode mode, bool leaveOpen = false); + + public static abstract TStream CreateStream(Stream stream, TMode mode, bool leaveOpen = false); +} diff --git a/Zipper/Zipper.Tests/Streams/LZWStreamTests.cs b/Zipper/Zipper.Tests/Streams/LZWStreamTests.cs new file mode 100644 index 0000000..dd36279 --- /dev/null +++ b/Zipper/Zipper.Tests/Streams/LZWStreamTests.cs @@ -0,0 +1,33 @@ +namespace Zipper.Tests.Streams; + +using Zipper.LZW; + +internal class LZWStreamTests +{ + public class LZWStreamProvider : IStreamProvider + { + public static int MinBlockSize => LZWStream.MinBlockSize; + + public static int MaxBlockSize => LZWStream.MaxBlockSize; + + public static ZipperMode WritingMode => ZipperMode.Compress; + + public static ZipperMode ReadingMode => ZipperMode.Decompress; + + public static ZipperMode UndefinedMode => ZipperMode.Decompress + 100; + + public static LZWStream CreateStream(Stream stream, int blockSize, ZipperMode mode, bool leaveOpen = false) + => new(stream, blockSize, mode, leaveOpen); + + public static LZWStream CreateStream(Stream stream, ZipperMode mode, bool leaveOpen = false) + => new(stream, mode, leaveOpen); + } + + public class LZWStreamExceptionsTests : StreamExceptionsTests + { + } + + public class LZWStreamReadWriteTests : StreamReadWriteTests + { + } +} diff --git a/Zipper/Zipper.Tests/Streams/StreamExceptionsTests.cs b/Zipper/Zipper.Tests/Streams/StreamExceptionsTests.cs new file mode 100644 index 0000000..a67b809 --- /dev/null +++ b/Zipper/Zipper.Tests/Streams/StreamExceptionsTests.cs @@ -0,0 +1,152 @@ +namespace Zipper.Tests.Streams; + +public abstract class StreamExceptionsTests + where TStream : Stream + where TMode : Enum + where TProvider : IStreamProvider +{ + private TStream compressor; + private TStream decompressor; + + [SetUp] + public void Setup() + { + compressor = TProvider.CreateStream(Stream.Null, TProvider.MinBlockSize, TProvider.WritingMode); + decompressor = TProvider.CreateStream(Stream.Null, TProvider.MinBlockSize, TProvider.ReadingMode); + } + + [TearDown] + public void Teardown() + { + compressor.Dispose(); + decompressor.Dispose(); + } + + [Test] + public void Constructor_ShouldThrowIf_BlockSize_IsIncorrect() + { + Assert.Throws(() => TProvider.CreateStream(Stream.Null, TProvider.MinBlockSize - 1, TProvider.WritingMode)); + Assert.Throws(() => TProvider.CreateStream(Stream.Null, TProvider.MaxBlockSize + 1, TProvider.WritingMode)); + } + + [Test] + public void Constructor_ShouldThrowIf_Mode_IsNotDefined() + { + Assert.Throws(() => TProvider.CreateStream(Stream.Null, TProvider.UndefinedMode)); + } + + [Test] + public void Constructor_ShouldThrowIf_Mode_IsCompress_And_Stream_CanNotWrite() + { + Assert.Throws(() => TProvider.CreateStream(new UnwriteableStream(), TProvider.WritingMode)); + } + + [Test] + public void Constructor_ShouldThrowIf_Mode_IsDecompress_And_Stream_CanNotRead() + { + Assert.Throws(() => TProvider.CreateStream(new UnreadableStream(), TProvider.ReadingMode)); + } + + [Test] + public void UnsupportedPropertiesAndMethods_ShouldThrow() + { + Assert.Throws(() => _ = compressor.Length); + + Assert.Throws(() => _ = compressor.Position); + Assert.Throws(() => compressor.Position = 0); + + Assert.Throws(() => compressor.Seek(0, SeekOrigin.Begin)); + Assert.Throws(() => compressor.SetLength(0)); + } + + [Test] + public void CanRead_ShouldReturnFalse_AndCanWrite_ShouldReturnTrue_IfModeIs_Compress() + { + Assert.Multiple(() => + { + Assert.That(compressor.CanRead, Is.False); + Assert.That(compressor.CanWrite, Is.True); + }); + } + + [Test] + public void CanRead_ShouldReturnTrue_And_CanWrite_ShouldReturnFalse_IfModeIs_Decompress() + { + Assert.Multiple(() => + { + Assert.That(decompressor.CanRead, Is.True); + Assert.That(decompressor.CanWrite, Is.False); + }); + } + + [Test] + public void CanSeek_ShouldReturnFalse() + { + Assert.Multiple(() => + { + Assert.That(compressor.CanSeek, Is.False); + Assert.That(decompressor.CanSeek, Is.False); + }); + } + + [Test] + public void Write_ShouldThrow_IfModeIs_Decompress() + { + Assert.Throws(() => decompressor.Write([])); + Assert.Throws(() => decompressor.Write([], 0, 0)); + } + + [Test] + public void Read_ShouldThrow_IfModeIs_Compress() + { + Assert.Throws(() => _ = compressor.Read([])); + Assert.Throws(() => _ = compressor.Read([], 0, 0)); + } + + [Test] + public void AllMethodsShouldThrow_IfDisposed() + { + compressor.Dispose(); + decompressor.Dispose(); + + Assert.Throws(() => compressor.Write([])); + Assert.Throws(() => compressor.Write([], 0, 0)); + + Assert.Throws(() => _ = compressor.Read([])); + Assert.Throws(() => _ = compressor.Read([], 0, 0)); + + Assert.Throws(compressor.Flush); + } + + [Test] + public void ReadAndWrite_ShouldThrow_IfArgumentsAreIncorrect() + { + int length = 8; + var buffer = new byte[length]; + + Test(buffer, -1, length); + Test(buffer, length, length); + + Test(buffer, 0, -1); + Test(buffer, 0, length + 1); + + Test(buffer, length / 2, length); + Test(buffer, 0, -1); + + void Test(byte[] buffer, int offset, int count) + { + Assert.Throws(() => compressor.Write(buffer, offset, count)); + Assert.Throws(() => _ = decompressor.Read(buffer, offset, count)); + } + } + + private class UnwriteableStream : MemoryStream + { + public override bool CanWrite => false; + } + + private class UnreadableStream : MemoryStream + { + public override bool CanRead => false; + } +} diff --git a/Zipper/Zipper.Tests/Streams/StreamReadWriteTests.cs b/Zipper/Zipper.Tests/Streams/StreamReadWriteTests.cs new file mode 100644 index 0000000..4927f45 --- /dev/null +++ b/Zipper/Zipper.Tests/Streams/StreamReadWriteTests.cs @@ -0,0 +1,85 @@ +namespace Zipper.Tests.Streams; + +public abstract class StreamReadWriteTests + where TStream : Stream + where TMode : Enum + where TProvider : IStreamProvider +{ + private static readonly int[] BufferSizes = [1, 2, 3, 7, 14, 19, 31, 63, 127, 255, 1023]; + + private MemoryStream stream; + + [SetUp] + public void Setup() + { + stream = new(); + } + + [Test] + public void Read_ShouldReadData_WrittenBy_Write_Correctly([ValueSource(nameof(BufferSizes))] int readBufferSize) + { + var testData = GetData(StreamTestsSource.ImageData, readBufferSize); + + using (var compressor = TProvider.CreateStream(stream, TProvider.WritingMode, true)) + { + compressor.Write(testData); + } + + DecompressData_And_AssertThat_ItIsCorrect(testData, readBufferSize); + } + + [Test] + public void Flush_ShouldNotAffect_DataToBeRead([ValueSource(nameof(BufferSizes))] int readWriteBufferSize) + { + var testData = GetData(StreamTestsSource.TextData, readWriteBufferSize); + + using (var compressor = TProvider.CreateStream(stream, TProvider.WritingMode, true)) + { + for (int offset = 0; offset < testData.Length; offset += readWriteBufferSize) + { + var dataSlice = testData.Slice(offset, Math.Min(readWriteBufferSize, testData.Length - offset)); + compressor.Write(dataSlice); + compressor.Flush(); + } + } + + DecompressData_And_AssertThat_ItIsCorrect(testData, readWriteBufferSize); + } + + private static ReadOnlySpan GetData(ReadOnlySpan data, int bufferSize) + { + var relativeLength = Math.Clamp(bufferSize / (float)BufferSizes[^1], 0, 1); + + return data[..(int)Math.Ceiling(data.Length * relativeLength)]; + } + + private void DecompressData_And_AssertThat_ItIsCorrect(ReadOnlySpan testData, int readBufferSize) + { + stream.Seek(0, SeekOrigin.Begin); + + using var decompressor = TProvider.CreateStream(stream, TProvider.ReadingMode, true); + int offset = 0; + Span buffer = stackalloc byte[readBufferSize]; + + while (true) + { + int bytesRead = decompressor.Read(buffer); + + Assert.That(bytesRead, Is.LessThanOrEqualTo(readBufferSize)); + + if (bytesRead == 0) + { + break; + } + + Assert.That(offset + bytesRead, Is.LessThanOrEqualTo(testData.Length)); + + var slicedData = testData.Slice(offset, bytesRead); + var slicedBuffer = buffer[..bytesRead]; + + Assert.That(slicedBuffer.SequenceEqual(slicedData), Is.True); + + offset += bytesRead; + } + } +} diff --git a/Zipper/Zipper.Tests/Streams/StreamTestsSource.cs b/Zipper/Zipper.Tests/Streams/StreamTestsSource.cs new file mode 100644 index 0000000..e970322 --- /dev/null +++ b/Zipper/Zipper.Tests/Streams/StreamTestsSource.cs @@ -0,0 +1,56 @@ +namespace Zipper.Tests.Streams; + +using System.Diagnostics.CodeAnalysis; + +[SetUpFixture] +[ExcludeFromCodeCoverage] +public class StreamTestsSource +{ + // "https://filesamples.com/samples/image/bmp/sample_640×426.bmp"; + private const string ImageDataUrl = "https://filesamples.com/samples/image/bmp/sample_640%C3%97426.bmp"; + + // "https://neerc.ifmo.ru/wiki/index.php?title=Алгоритм_LZW"; + private const string TextDataUrl = "https://neerc.ifmo.ru/wiki/index.php?title=%D0%90%D0%BB%D0%B3%D0%BE%D1%80%D0%B8%D1%82%D0%BC_LZW"; + + public static byte[] TextData { get; private set; } + + public static byte[] ImageData { get; private set; } + + [OneTimeSetUp] + public static void DataSetup() + { + TextData = DownloadOrReuse(TextDataUrl); + ImageData = DownloadOrReuse(ImageDataUrl); + } + + private static byte[] DownloadOrReuse(string url) + { + string testFilesDirectory = "test_files"; + if (!Directory.Exists(testFilesDirectory)) + { + Directory.CreateDirectory(testFilesDirectory); + } + + var filename = $"test_file_{string.Concat(url.Select(c => char.IsAsciiLetterOrDigit(c) ? c : '_'))}"; + var filePath = Path.Combine(testFilesDirectory, filename); + + if (File.Exists(filePath)) + { + return File.ReadAllBytes(filePath); + } + + using var client = new HttpClient(); + using var memory = new MemoryStream(); + + var response = client.Send(new HttpRequestMessage(HttpMethod.Get, url)); + response.EnsureSuccessStatusCode(); + + using var content = response.Content.ReadAsStream(); + content.CopyTo(memory); + + var data = memory.ToArray(); + File.WriteAllBytes(filePath, data); + + return data; + } +} diff --git a/Zipper/Zipper.Tests/Streams/ZipperStreamTests.cs b/Zipper/Zipper.Tests/Streams/ZipperStreamTests.cs new file mode 100644 index 0000000..2865937 --- /dev/null +++ b/Zipper/Zipper.Tests/Streams/ZipperStreamTests.cs @@ -0,0 +1,58 @@ +namespace Zipper.Tests.Streams; + +internal class ZipperStreamTests +{ + public class ZipperStreamProviderWithoutBwt : IStreamProvider + { + public static int MinBlockSize => ZipperStream.MinBlockSize; + + public static int MaxBlockSize => ZipperStream.MaxBlockSize; + + public static ZipperMode WritingMode => ZipperMode.Compress; + + public static ZipperMode ReadingMode => ZipperMode.Decompress; + + public static ZipperMode UndefinedMode => ZipperMode.Decompress + 100; + + public static ZipperStream CreateStream(Stream stream, int blockSize, ZipperMode mode, bool leaveOpen = false) + => new(stream, blockSize, mode, leaveOpen, false); + + public static ZipperStream CreateStream(Stream stream, ZipperMode mode, bool leaveOpen = false) + => new(stream, mode, leaveOpen, false); + } + + public class ZipperStreamProviderWithBwt : IStreamProvider + { + public static int MinBlockSize => ZipperStream.MinBlockSize; + + public static int MaxBlockSize => ZipperStream.MaxBlockSize; + + public static ZipperMode WritingMode => ZipperMode.Compress; + + public static ZipperMode ReadingMode => ZipperMode.Decompress; + + public static ZipperMode UndefinedMode => ZipperMode.Decompress + 100; + + public static ZipperStream CreateStream(Stream stream, int blockSize, ZipperMode mode, bool leaveOpen = false) + => new(stream, blockSize, mode, leaveOpen, true); + + public static ZipperStream CreateStream(Stream stream, ZipperMode mode, bool leaveOpen = false) + => new(stream, mode, leaveOpen, true); + } + + public class ZipperStreamWithoutBwtExceptionsTests : StreamExceptionsTests + { + } + + public class ZipperStreamWithoutBwtReadWriteTests : StreamReadWriteTests + { + } + + public class ZipperStreamWithBwtExceptionsTests : StreamExceptionsTests + { + } + + public class ZipperStreamWithBwtReadWriteTests : StreamReadWriteTests + { + } +} diff --git a/Zipper/Zipper.Tests/TestUtil.cs b/Zipper/Zipper.Tests/TestUtil.cs new file mode 100644 index 0000000..00b8ccd --- /dev/null +++ b/Zipper/Zipper.Tests/TestUtil.cs @@ -0,0 +1,20 @@ +namespace Zipper.Tests; + +public static class TestUtil +{ + public static IEnumerable GetRandomStrings() + { + int seed = 74687324; + var random = new Random(seed); + + int steps = 16; + int length = 256; + + for (int i = 0; i < steps; i++) + { + var buffer = new byte[length]; + random.NextBytes(buffer); + yield return buffer; + } + } +} diff --git a/Zipper/Zipper.Tests/TrieTests.cs b/Zipper/Zipper.Tests/TrieTests.cs new file mode 100644 index 0000000..ea98c39 --- /dev/null +++ b/Zipper/Zipper.Tests/TrieTests.cs @@ -0,0 +1,112 @@ +namespace Zipper.Tests; + +public class TrieTests +{ + private readonly byte testKey = 157; + private readonly int testValue = 252354; + + private Trie trie; + + [SetUp] + public void Setup() + { + trie = new(); + } + + [Test] + public void AddChild_ShouldReturnTrue_IfChildDidNotExists() + { + Assert.Multiple(() => + { + Assert.That(trie.HasChild(testKey), Is.False); + Assert.That(trie.AddChild(testKey, testValue), Is.True); + }); + } + + [Test] + public void HasChild_ShouldReturnTrue_IfAddedChild() + { + trie.AddChild(testKey, testValue); + Assert.That(trie.HasChild(testKey), Is.True); + } + + [Test] + public void HasChild_ShouldReturnFalse_IfChildDoesNotExist() + { + Assert.That(trie.HasChild(testKey), Is.False); + } + + [Test] + public void AddChild_ShouldReturnFalse_IfChildExisted() + { + trie.AddChild(testKey, testValue); + Assert.That(trie.AddChild(testKey, testValue), Is.False); + } + + [Test] + public void Add_ShouldNotMove() + { + Assert.Multiple(() => + { + Assert.That(trie.AtRoot, Is.True); + Assert.That(trie.AddChild(testKey, testValue), Is.True); + Assert.That(trie.AtRoot, Is.True); + }); + } + + [Test] + public void MoveForward_ShouldReturnTrue_IfMovingToAddedChild() + { + trie.AddChild(testKey, testValue); + Assert.That(trie.MoveForward(testKey), Is.True); + } + + [Test] + public void MoveForward_ShouldReturnFalse_IfChildDoesNotExist() + { + Assert.That(trie.MoveForward(testKey), Is.False); + } + + [Test] + public void MoveForward_ShouldMove() + { + trie.AddChild(testKey, testValue); + Assert.Multiple(() => + { + Assert.That(trie.AtRoot, Is.True); + Assert.That(trie.MoveForward(testKey), Is.True); + Assert.That(trie.AtRoot, Is.False); + }); + } + + [Test] + public void Reset_ShouldReset_IfMoved() + { + Assert.That(trie.AtRoot, Is.True); + + trie.AddChild(testKey, testValue); + trie.MoveForward(testKey); + Assert.That(trie.AtRoot, Is.False); + + trie.Reset(); + Assert.That(trie.AtRoot, Is.True); + } + + [Test] + public void AddChild_ShouldAdd_Once() + { + int valueA = 3463235; + int valueB = 73334536; + + trie.AddChild(testKey, valueA); + Assert.That(trie.AddChild(testKey, valueB), Is.False); + } + + [Test] + public void CurrentValue_ShouldReturnAddedValue() + { + trie.AddChild(testKey, testValue); + trie.MoveForward(testKey); + Assert.That(trie.CurrentValue, Is.EqualTo(testValue)); + } +} diff --git a/Zipper/Zipper.Tests/Zipper.Tests.csproj b/Zipper/Zipper.Tests/Zipper.Tests.csproj new file mode 100644 index 0000000..78b1af6 --- /dev/null +++ b/Zipper/Zipper.Tests/Zipper.Tests.csproj @@ -0,0 +1,28 @@ + + + + net9.0 + latest + enable + enable + + true + + + + + + + + + + + + + + + + + + + diff --git a/Zipper/Zipper.sln b/Zipper/Zipper.sln new file mode 100644 index 0000000..60e7c7e --- /dev/null +++ b/Zipper/Zipper.sln @@ -0,0 +1,62 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.0.31903.59 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Zipper", "Zipper\Zipper.csproj", "{9E9FE984-8520-41D1-A74D-6C0DF2AE79D5}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Zipper.Tests", "Zipper.Tests\Zipper.Tests.csproj", "{10C8B0BF-1C7E-4169-8DBB-3B138722C444}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Zipper.Cli", "Zipper.Cli\Zipper.Cli.csproj", "{800EFECF-6B0E-4A90-BDD9-717642A6309E}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Release|Any CPU = Release|Any CPU + Release|x64 = Release|x64 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {9E9FE984-8520-41D1-A74D-6C0DF2AE79D5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {9E9FE984-8520-41D1-A74D-6C0DF2AE79D5}.Debug|Any CPU.Build.0 = Debug|Any CPU + {9E9FE984-8520-41D1-A74D-6C0DF2AE79D5}.Debug|x64.ActiveCfg = Debug|Any CPU + {9E9FE984-8520-41D1-A74D-6C0DF2AE79D5}.Debug|x64.Build.0 = Debug|Any CPU + {9E9FE984-8520-41D1-A74D-6C0DF2AE79D5}.Debug|x86.ActiveCfg = Debug|Any CPU + {9E9FE984-8520-41D1-A74D-6C0DF2AE79D5}.Debug|x86.Build.0 = Debug|Any CPU + {9E9FE984-8520-41D1-A74D-6C0DF2AE79D5}.Release|Any CPU.ActiveCfg = Release|Any CPU + {9E9FE984-8520-41D1-A74D-6C0DF2AE79D5}.Release|Any CPU.Build.0 = Release|Any CPU + {9E9FE984-8520-41D1-A74D-6C0DF2AE79D5}.Release|x64.ActiveCfg = Release|Any CPU + {9E9FE984-8520-41D1-A74D-6C0DF2AE79D5}.Release|x64.Build.0 = Release|Any CPU + {9E9FE984-8520-41D1-A74D-6C0DF2AE79D5}.Release|x86.ActiveCfg = Release|Any CPU + {9E9FE984-8520-41D1-A74D-6C0DF2AE79D5}.Release|x86.Build.0 = Release|Any CPU + {10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Debug|Any CPU.Build.0 = Debug|Any CPU + {10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Debug|x64.ActiveCfg = Debug|Any CPU + {10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Debug|x64.Build.0 = Debug|Any CPU + {10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Debug|x86.ActiveCfg = Debug|Any CPU + {10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Debug|x86.Build.0 = Debug|Any CPU + {10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Release|Any CPU.ActiveCfg = Release|Any CPU + {10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Release|Any CPU.Build.0 = Release|Any CPU + {10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Release|x64.ActiveCfg = Release|Any CPU + {10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Release|x64.Build.0 = Release|Any CPU + {10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Release|x86.ActiveCfg = Release|Any CPU + {10C8B0BF-1C7E-4169-8DBB-3B138722C444}.Release|x86.Build.0 = Release|Any CPU + {800EFECF-6B0E-4A90-BDD9-717642A6309E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {800EFECF-6B0E-4A90-BDD9-717642A6309E}.Debug|Any CPU.Build.0 = Debug|Any CPU + {800EFECF-6B0E-4A90-BDD9-717642A6309E}.Debug|x64.ActiveCfg = Debug|Any CPU + {800EFECF-6B0E-4A90-BDD9-717642A6309E}.Debug|x64.Build.0 = Debug|Any CPU + {800EFECF-6B0E-4A90-BDD9-717642A6309E}.Debug|x86.ActiveCfg = Debug|Any CPU + {800EFECF-6B0E-4A90-BDD9-717642A6309E}.Debug|x86.Build.0 = Debug|Any CPU + {800EFECF-6B0E-4A90-BDD9-717642A6309E}.Release|Any CPU.ActiveCfg = Release|Any CPU + {800EFECF-6B0E-4A90-BDD9-717642A6309E}.Release|Any CPU.Build.0 = Release|Any CPU + {800EFECF-6B0E-4A90-BDD9-717642A6309E}.Release|x64.ActiveCfg = Release|Any CPU + {800EFECF-6B0E-4A90-BDD9-717642A6309E}.Release|x64.Build.0 = Release|Any CPU + {800EFECF-6B0E-4A90-BDD9-717642A6309E}.Release|x86.ActiveCfg = Release|Any CPU + {800EFECF-6B0E-4A90-BDD9-717642A6309E}.Release|x86.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/Zipper/Zipper/ArbitraryBitReader.cs b/Zipper/Zipper/ArbitraryBitReader.cs new file mode 100644 index 0000000..3f53ad6 --- /dev/null +++ b/Zipper/Zipper/ArbitraryBitReader.cs @@ -0,0 +1,76 @@ +namespace Zipper; + +/// +/// Reads integers of arbitrary width. +/// +internal class ArbitraryBitReader +{ + /// + public const int MinWidth = ArbitraryBitWriter.MinWidth; + + /// + public const int MaxWidth = ArbitraryBitWriter.MaxWidth; + + private readonly Stream stream; + private readonly int width; + private byte buffer; + private int bitsReadFromBuffer; + + /// + /// Initializes a new instance of the class. + /// + /// Stream to write to. + /// Width of integers between and bits. + public ArbitraryBitReader(Stream stream, int width) + { + ArgumentOutOfRangeException.ThrowIfLessThan(width, MinWidth, nameof(width)); + ArgumentOutOfRangeException.ThrowIfGreaterThan(width, MaxWidth, nameof(width)); + + if (!stream.CanRead) + { + throw new ArgumentException("Stream does not support reading", nameof(stream)); + } + + this.stream = stream; + this.width = width; + bitsReadFromBuffer = int.MaxValue; + } + + /// + /// Reads number from underlying stream and stores it in the . + /// + /// When this method returns, contains the value that was read, if read successfully, zero otherwise. + /// if was successfuly read, otherwise. + public bool ReadNext(out int number) + { + number = 0; + + int remainingWidth = width; + while (remainingWidth > 0) + { + if (bitsReadFromBuffer >= 8) + { + int readByte = stream.ReadByte(); + if (readByte == -1) + { + return false; + } + + buffer = (byte)readByte; + bitsReadFromBuffer = 0; + } + + int remainingBitsToRead = 8 - bitsReadFromBuffer; + int mask = 0xFF >> bitsReadFromBuffer; + int toWrite = (buffer & mask) >> Math.Max(0, remainingBitsToRead - remainingWidth); + + int previousRemainingWidth = remainingWidth; + remainingWidth -= remainingBitsToRead; + remainingWidth = Math.Max(0, remainingWidth); + bitsReadFromBuffer += previousRemainingWidth - remainingWidth; + number |= toWrite << remainingWidth; + } + + return true; + } +} diff --git a/Zipper/Zipper/ArbitraryBitWriter.cs b/Zipper/Zipper/ArbitraryBitWriter.cs new file mode 100644 index 0000000..eda32c2 --- /dev/null +++ b/Zipper/Zipper/ArbitraryBitWriter.cs @@ -0,0 +1,125 @@ +namespace Zipper; + +using System.Buffers; + +/// +/// Writes integers of arbitrary width. +/// +internal class ArbitraryBitWriter : IDisposable +{ + /// + /// Smallest allowed width of numbers. + /// + public const int MinWidth = 4; + + /// + /// Largest allowed width of numbers. + /// + public const int MaxWidth = 32; + + private static readonly ArrayPool BufferPool = ArrayPool.Create(); + + private readonly Stream stream; + private readonly int width; + private readonly byte[] buffer; + private readonly bool leaveOpen; + private int bitsWrittenInBuffer; + private bool disposed = false; + + /// + /// Initializes a new instance of the class. + /// + /// Stream to write to. + /// Width of integers between and bits. + /// to leave the open after disposing the object, otherwise. + public ArbitraryBitWriter(Stream stream, int width, bool leaveOpen = false) + { + ArgumentOutOfRangeException.ThrowIfLessThan(width, MinWidth, nameof(width)); + ArgumentOutOfRangeException.ThrowIfGreaterThan(width, MaxWidth, nameof(width)); + + if (!stream.CanWrite) + { + throw new ArgumentException("Stream does not support writing", nameof(stream)); + } + + this.stream = stream; + this.width = width; + this.leaveOpen = leaveOpen; + buffer = BufferPool.Rent(width); + bitsWrittenInBuffer = 0; + } + + /// + /// Writes to the underlying stream. + /// + /// Number to write. + public void Write(int number) + { + ObjectDisposedException.ThrowIf(disposed, this); + + number &= (int)(0xFFFFFFFF >> (32 - width)); + + int remainingWidth = width; + while (remainingWidth > 0) + { + int bufferOffset = bitsWrittenInBuffer / 8; + int bitsWrittenToCurrentByte = bitsWrittenInBuffer % 8; + int bitsRemainingInCurrentByte = 8 - bitsWrittenToCurrentByte; + + int toWrite = number >> Math.Max(0, remainingWidth - bitsRemainingInCurrentByte); + int previousRemainingWidth = remainingWidth; + remainingWidth -= bitsRemainingInCurrentByte; + remainingWidth = Math.Max(0, remainingWidth); + + int bitsToBeWritten = previousRemainingWidth - remainingWidth; + int bitsToBeLeftInCurrentByte = bitsRemainingInCurrentByte - bitsToBeWritten; + buffer[bufferOffset] |= (byte)(toWrite << bitsToBeLeftInCurrentByte); + bitsWrittenInBuffer += bitsToBeWritten; + } + + if (bitsWrittenInBuffer >= width * 8) + { + Flush(); + } + } + + /// + /// Flushes the internal buffer. + /// + public void Flush() + { + ObjectDisposedException.ThrowIf(disposed, this); + + if (bitsWrittenInBuffer == 0) + { + return; + } + + int bytesWrittenInBuffer = (int)Math.Ceiling(bitsWrittenInBuffer / 8f); + stream.Write(buffer, 0, bytesWrittenInBuffer); + + Array.Clear(buffer); + bitsWrittenInBuffer = 0; + } + + /// + /// Releases all resources used by the current instance of the class. + /// + public void Dispose() + { + if (disposed) + { + return; + } + + Flush(); + BufferPool.Return(buffer); + + if (!leaveOpen) + { + stream.Dispose(); + } + + disposed = true; + } +} diff --git a/Zipper/Zipper/BWT.cs b/Zipper/Zipper/BWT.cs new file mode 100644 index 0000000..740ed20 --- /dev/null +++ b/Zipper/Zipper/BWT.cs @@ -0,0 +1,143 @@ +namespace Zipper; + +using System.Buffers; +using System.Diagnostics; + +/// +/// Burrows-Wheeler transform implementation. +/// +internal static class BWT +{ + private static readonly ArrayPool Pool = ArrayPool.Create(); + private static readonly ArrayPool InputPool = ArrayPool.Create(); + + /// + /// Transforms given byte sequence using Burrows-Wheeler algorithm. + /// + /// Input byte sequence. + /// Span to write transofrmed input to. + /// Index that is used to reconstruct byte sequence. + public static int ForwardTransform(ReadOnlySpan input, Span output) + { + ArgumentOutOfRangeException.ThrowIfLessThan(output.Length, input.Length, nameof(output)); + + int length = input.Length; + + if (length == 0) + { + return 0; + } + + int[] offsets = Pool.Rent(length); + for (int i = 0; i < length; i++) + { + offsets[i] = i; + } + + var inputCopy = InputPool.Rent(length); + input.CopyTo(inputCopy); + + int Compare(int x, int y) + { + for (int i = 0; i < length; i++) + { + int compare = inputCopy[(i + x) % length] - inputCopy[(i + y) % length]; + if (compare != 0) + { + return compare; + } + } + + return 0; + } + + var offsetsSpan = offsets.AsSpan(0, length); + + offsetsSpan.Sort(Compare); + + int? identityPosition = null; + for (int i = 0; i < length; i++) + { + if (offsets[i] == 0) + { + identityPosition = i; + } + + output[i] = inputCopy[(offsets[i] + length - 1) % length]; + } + + Pool.Return(offsets); + InputPool.Return(inputCopy); + + Debug.Assert(identityPosition.HasValue, "Identity position not found"); + + return identityPosition.Value; + } + + /// + /// Reconstructs byte sequence transformed with Burrows-Wheeler algorithm. + /// + /// Transformed byte sequence. + /// Index that is used to reconstruct byte sequence. + /// Span to write reconstructed byte sequence to. + public static void InverseTransform(ReadOnlySpan input, int identityIndex, Span output) + { + ArgumentOutOfRangeException.ThrowIfNegative(identityIndex, nameof(identityIndex)); + ArgumentOutOfRangeException.ThrowIfGreaterThan(identityIndex, input.Length, nameof(identityIndex)); + + ArgumentOutOfRangeException.ThrowIfLessThan(output.Length, input.Length, nameof(output)); + + if (input.Length <= 1) + { + input.CopyTo(output); + return; + } + + int length = input.Length; + + int[] appearances = Pool.Rent(length); + Span lastAppearances = stackalloc int[256]; + Span byteCounter = stackalloc int[256]; + + for (int i = 0; i < 256; i++) + { + lastAppearances[i] = -1; + } + + for (int i = 0; i < length; i++) + { + byte currentByte = input[i]; + byteCounter[currentByte]++; + + int lastAppearance = lastAppearances[currentByte]; + appearances[i] = lastAppearance == -1 ? 0 : appearances[lastAppearance] + 1; + lastAppearances[currentByte] = i; + } + + Span lesserBytesCounter = stackalloc int[256]; + int previousCount = 0; + for (int i = 0; i < 256; i++) + { + if (byteCounter[i] == 0) + { + continue; + } + + lesserBytesCounter[i] = previousCount; + previousCount += byteCounter[i]; + } + + int lastIdentityIndex = identityIndex; + byte lastByte = input[lastIdentityIndex]; + output[length - 1] = input[identityIndex]; + + for (int i = 1; i < length; i++) + { + lastIdentityIndex = appearances[lastIdentityIndex] + lesserBytesCounter[lastByte]; + lastByte = input[lastIdentityIndex]; + output[length - (i + 1)] = lastByte; + } + + Pool.Return(appearances); + } +} diff --git a/Zipper/Zipper/BWTMode.cs b/Zipper/Zipper/BWTMode.cs new file mode 100644 index 0000000..7774560 --- /dev/null +++ b/Zipper/Zipper/BWTMode.cs @@ -0,0 +1,17 @@ +namespace Zipper; + +/// +/// Specifies whether to transform data to or reconstruct data from the underlying stream in . +/// +internal enum BWTMode +{ + /// + /// Transform data and write it to the underlying stream. + /// + Transform, + + /// + /// Read data from the underlying stream and reconstruct it. + /// + Reconstruct, +} diff --git a/Zipper/Zipper/BWTStream.cs b/Zipper/Zipper/BWTStream.cs new file mode 100644 index 0000000..56369bd --- /dev/null +++ b/Zipper/Zipper/BWTStream.cs @@ -0,0 +1,325 @@ +namespace Zipper; + +using System.Buffers; +using System.Buffers.Binary; +using System.Diagnostics; + +/// +/// Provides methods and properties used to transform and reconstruct data streams by using the BWT algorithm. +/// +internal class BWTStream : Stream +{ + /// + /// Smallest allowed block length. + /// + public const int MinBlockSize = 1024; + + /// + /// Largest allowed block length. + /// + public const int MaxBlockSize = 16384; + + private const int DefaultBlockSize = (MinBlockSize + MaxBlockSize) / 2; + + private static readonly ArrayPool BlockPool = ArrayPool.Create(); + + private readonly Stream stream; + private readonly BWTMode mode; + private readonly bool leaveOpen; + + private byte[]? block; + private int blockPosition; + private int blockSize; + + private bool disposed; + + /// + public BWTStream(Stream stream, BWTMode mode, bool leaveOpen = false) + : this(stream, DefaultBlockSize, mode, leaveOpen) + { + } + + /// + /// Initializes a new instance of the class. + /// + /// The stream to which transformed data is written or from which data to reconstruct is read. + /// The internal block size to use, should be between and . + /// that determines whether to transform or reconstruct data. + /// + /// The value indicating whether should be disposed along with this instance, + /// if is . + /// + /// is not nor . + /// is out of range. + public BWTStream(Stream stream, int blockSize, BWTMode mode, bool leaveOpen = false) + { + ArgumentOutOfRangeException.ThrowIfLessThan(blockSize, MinBlockSize); + ArgumentOutOfRangeException.ThrowIfGreaterThan(blockSize, MaxBlockSize); + + if (!Enum.IsDefined(mode)) + { + throw new ArgumentException($"Value was neither {BWTMode.Transform} nor {BWTMode.Reconstruct}", nameof(mode)); + } + + if (mode == BWTMode.Transform) + { + if (!stream.CanWrite) + { + throw new ArgumentException("Stream does not support writing", nameof(stream)); + } + + this.blockSize = blockSize; + } + else + { + if (!stream.CanRead) + { + throw new ArgumentException("Stream does not support reading", nameof(stream)); + } + } + + this.stream = stream; + this.mode = mode; + this.leaveOpen = leaveOpen; + blockPosition = 0; + disposed = false; + } + + /// + /// Gets a value indicating whether the stream supports reading. + /// + /// + public override bool CanRead => mode == BWTMode.Reconstruct && stream.CanRead; + + /// + /// Gets a value indicating whether the stream supports writing. + /// + /// + public override bool CanWrite => mode == BWTMode.Transform && stream.CanWrite; + + /// + /// Gets a value indicating whether the stream supports seeking. + /// + /// + public override bool CanSeek => false; + + /// + /// This property is not supported and always throws a . + /// + /// + public override long Length => throw new NotSupportedException(); + + /// + /// This property is not supported and always throws a . + /// + /// + public override long Position + { + get => throw new NotSupportedException(); + set => throw new NotSupportedException(); + } + + /// + /// This method is not supported and always throws a . + /// + /// + public override long Seek(long offset, SeekOrigin origin) + => throw new NotSupportedException(); + + /// + /// This method is not supported and always throws a . + /// + /// + public override void SetLength(long value) + => throw new NotSupportedException(); + + /// + /// Flushes the internal buffers. + /// + /// Stream is disposed. + public override void Flush() + { + EnsureNotClosed(); + if (mode == BWTMode.Transform) + { + if (block != null) + { + WriteBlock(); + } + + stream.Flush(); + } + } + + /// + /// Reads data from , transforms it and writes it to the underlying stream. + /// + /// Buffer that contains data to be transformed. + /// How many bytes to skip before reading from . + /// How many bytes to read from . + /// Stream is set to mode. + /// Stream is disposed. + public override void Write(byte[] buffer, int offset, int count) + => Write(buffer.AsSpan(offset, count)); + + /// + /// Reads data from the underlying stream, reconstructs it and writes to . + /// + /// Buffer to write reconstructed data to. + /// How many bytes to skip before writing to . + /// How many bytes to write to . + /// Count of read bytes, can be less than . + /// Unexpected end of stream. + /// Invalid data stream. + /// Stream is set to mode. + /// Stream is disposed. + public override int Read(byte[] buffer, int offset, int count) + => Read(buffer.AsSpan(offset, count)); + + /// + public override void Write(ReadOnlySpan buffer) + { + EnsureNotClosed(); + EnsureMode(BWTMode.Transform); + + int bufferPosition = 0; + while (bufferPosition < buffer.Length) + { + if (block == null) + { + block = BlockPool.Rent(blockSize); + blockPosition = 0; + } + + int copyLength = Math.Min(blockSize - blockPosition, buffer.Length - bufferPosition); + buffer.Slice(bufferPosition, copyLength).CopyTo(block.AsSpan().Slice(blockPosition, copyLength)); + + bufferPosition += copyLength; + blockPosition += copyLength; + + if (blockPosition >= blockSize) + { + WriteBlock(); + } + } + } + + /// + public override int Read(Span buffer) + { + EnsureNotClosed(); + EnsureMode(BWTMode.Reconstruct); + + int bufferPosition = 0; + while (bufferPosition < buffer.Length) + { + if (block == null && !ReadBlock()) + { + break; + } + + int copyLength = Math.Min(blockSize - blockPosition, buffer.Length - bufferPosition); + block.AsSpan().Slice(blockPosition, copyLength).CopyTo(buffer.Slice(bufferPosition, copyLength)); + + bufferPosition += copyLength; + blockPosition += copyLength; + + if (blockPosition >= blockSize) + { + block = null; + } + } + + return bufferPosition; + } + + /// + protected override void Dispose(bool disposing) + { + if (disposed) + { + return; + } + + if (disposing) + { + Flush(); + + Debug.Assert(block == null, "Block was not returned"); + + if (!leaveOpen) + { + stream.Dispose(); + } + + disposed = true; + } + } + + private void WriteBlock() + { + Debug.Assert(block != null, "Block is null"); + + var transformBuffer = BlockPool.Rent(blockPosition); + int identityIndex = BWT.ForwardTransform(block.AsSpan()[0..blockPosition], transformBuffer.AsSpan()[0..blockPosition]); + + Span header = stackalloc byte[8]; + BinaryPrimitives.WriteInt32LittleEndian(header[0..4], blockPosition); + BinaryPrimitives.WriteInt32LittleEndian(header[4..8], identityIndex); + + stream.Write(header); + stream.Write(transformBuffer, 0, blockPosition); + + BlockPool.Return(transformBuffer); + BlockPool.Return(block); + + block = null; + } + + private bool ReadBlock() + { + Debug.Assert(block == null, "Block was not returned before reading"); + + Span header = stackalloc byte[8]; + if (stream.Read(header) != 8) + { + return false; + } + + blockSize = BinaryPrimitives.ReadInt32LittleEndian(header[0..4]); + var identityIndex = BinaryPrimitives.ReadInt32LittleEndian(header[4..8]); + + if (identityIndex < 0 || identityIndex >= blockSize) + { + throw new InvalidDataException(); + } + + var transformedData = BlockPool.Rent(blockSize); + if (stream.Read(transformedData, 0, blockSize) != blockSize) + { + BlockPool.Return(transformedData); + + throw new EndOfStreamException(); + } + + block = BlockPool.Rent(blockSize); + BWT.InverseTransform(transformedData.AsSpan()[0..blockSize], identityIndex, block.AsSpan()[0..blockSize]); + BlockPool.Return(transformedData); + blockPosition = 0; + + return true; + } + + private void EnsureMode(BWTMode mode) + { + if (this.mode != mode) + { + throw new InvalidOperationException(); + } + } + + private void EnsureNotClosed() + { + ObjectDisposedException.ThrowIf(disposed, this); + } +} diff --git a/Zipper/Zipper/LZW/BlockType.cs b/Zipper/Zipper/LZW/BlockType.cs new file mode 100644 index 0000000..42e7683 --- /dev/null +++ b/Zipper/Zipper/LZW/BlockType.cs @@ -0,0 +1,27 @@ +namespace Zipper.LZW; + +/// +/// Block type used to mark blocks in . +/// +internal enum BlockType : byte +{ + /// + /// Treat block as usual. + /// + Default = 0, + + /// + /// All blocks after this one should not expand code table. + /// + FixCodeTableSize = 1, + + /// + /// This block was written after and is the last one to be read. + /// + EndOfStream = 2, + + /// + /// This block was written after . + /// + Flush = 3, +} diff --git a/Zipper/Zipper/LZW/LZWReader.cs b/Zipper/Zipper/LZW/LZWReader.cs new file mode 100644 index 0000000..4287b96 --- /dev/null +++ b/Zipper/Zipper/LZW/LZWReader.cs @@ -0,0 +1,198 @@ +namespace Zipper.LZW; + +using System.Buffers; +using System.Buffers.Binary; +using System.Diagnostics; + +/// +/// Internal class used to read compressed data from stream. +/// +internal class LZWReader : IDisposable +{ + private static readonly ArrayPool BlockPool = ArrayPool.Create(); + + private readonly Stream stream; + private readonly Dictionary storedCodes; + private MemoryStream? memory; + private byte[]? block; + private int blockSize; + private bool endOfStreamReached; + private bool flushed; + + private byte[]? word; + private int wordPosition; + + private ArbitraryBitReader? reader; + private int lastWordCode; + private int maxCodesCount; + + /// + /// Initializes a new instance of the class. + /// + /// Stream to read from. + public LZWReader(Stream stream) + { + if (!stream.CanRead) + { + throw new ArgumentException("Stream does not support reading", nameof(stream)); + } + + this.stream = stream; + + storedCodes = []; + for (int i = 0; i < 256; i++) + { + storedCodes[i] = [(byte)i]; + } + + lastWordCode = 256; + maxCodesCount = int.MaxValue; + } + + /// + /// Reads data from underlying stream, decompresses it and writes to . + /// + /// Buffer to write decompressed data to. + /// Count of read bytes. + /// Unexpected end of stream. + /// Invalid data stream. + public int Read(Span buffer) + { + int bufferPosition = 0; + + while (bufferPosition < buffer.Length) + { + // write leftover word from previous (iteration) or (Read() call) + if (word != null) + { + int wordLength = Math.Min(buffer.Length - bufferPosition, word.Length - wordPosition); + word.AsSpan().Slice(wordPosition, wordLength).CopyTo(buffer[bufferPosition..]); + + bufferPosition += wordLength; + wordPosition += wordLength; + if (bufferPosition >= buffer.Length) + { + break; + } + + word = null; + } + + if (block == null && !TryReadBuffer()) + { + return 0; + } + + Debug.Assert(block != null, "Block is null"); + Debug.Assert(memory != null, "Memory is null"); + Debug.Assert(reader != null, "Reader is null"); + + if (!reader.ReadNext(out int code)) + { + if (endOfStreamReached) + { + break; + } + + if (blockSize == 0) + { + block = null; + continue; + } + + throw new EndOfStreamException(); + } + + if (!storedCodes.TryGetValue(code, out var readWord)) + { + throw new InvalidDataException(); + } + + word = readWord; + wordPosition = 0; + + if (lastWordCode <= maxCodesCount) + { + if (storedCodes.TryGetValue(lastWordCode, out var incompleteWord)) + { + incompleteWord[^1] = word[0]; + lastWordCode++; + } + + if (!flushed) + { + var newWord = new byte[word.Length + 1]; + word.CopyTo(newWord, 0); + storedCodes[lastWordCode] = newWord; + } + } + + if (memory.Position >= blockSize) + { + BlockPool.Return(block); + block = null; + } + } + + return bufferPosition; + } + + /// + /// Disposes internal buffers. + /// + public void Dispose() + { + if (block != null) + { + BlockPool.Return(block); + } + } + + private bool TryReadBuffer() + { + int headerSize = 6; + Span header = stackalloc byte[headerSize]; + if (stream.Read(header) != headerSize) + { + return false; + } + + var blockType = (BlockType)header[0]; + var codeWidth = header[1]; + + blockSize = BinaryPrimitives.ReadInt32LittleEndian(header[2..6]); + block = BlockPool.Rent(blockSize); + + if (stream.Read(block, 0, blockSize) != blockSize) + { + throw new EndOfStreamException(); + } + + switch (blockType) + { + case BlockType.Default: + flushed = false; + break; + + case BlockType.FixCodeTableSize: + maxCodesCount = BinaryPrimitives.ReadInt32LittleEndian(block); + return TryReadBuffer(); + + case BlockType.EndOfStream: + endOfStreamReached = true; + break; + + case BlockType.Flush: + flushed = true; + break; + + default: + throw new InvalidDataException(); + } + + memory = new(block); + reader = new(memory, codeWidth); + + return true; + } +} diff --git a/Zipper/Zipper/LZW/LZWStream.cs b/Zipper/Zipper/LZW/LZWStream.cs new file mode 100644 index 0000000..78d2a20 --- /dev/null +++ b/Zipper/Zipper/LZW/LZWStream.cs @@ -0,0 +1,225 @@ +namespace Zipper.LZW; + +using System.Diagnostics; + +/// +/// Provides methods and properties used to compress and decompress streams by using the LZW algorithm. +/// +internal class LZWStream : Stream +{ + /// + /// Smallest allowed block length. + /// + public const int MinBlockSize = 4096; + + /// + /// Largest allowed block length. + /// + public const int MaxBlockSize = 256 * 1024; + + private const int DefaultBlockSize = (MinBlockSize + MaxBlockSize) / 2; + + private readonly Stream stream; + private readonly ZipperMode mode; + private readonly bool leaveOpen; + + private readonly LZWWriter? writer; + private readonly LZWReader? reader; + + private bool disposed; + + /// + public LZWStream(Stream stream, ZipperMode mode, bool leaveOpen = false) + : this(stream, DefaultBlockSize, mode, leaveOpen) + { + } + + /// + /// Initializes a new instance of the class. + /// + /// The stream to which compressed data is written or from which data to decompress is read. + /// The internal block size to use, should be between and . + /// that determines whether to compress or decompress data. + /// + /// The value indicating whether should be disposed along with this instance, + /// if is . + /// + /// is not nor . + /// is out of range. + public LZWStream(Stream stream, int blockSize, ZipperMode mode, bool leaveOpen = false) + { + ArgumentOutOfRangeException.ThrowIfLessThan(blockSize, MinBlockSize); + ArgumentOutOfRangeException.ThrowIfGreaterThan(blockSize, MaxBlockSize); + + if (!Enum.IsDefined(mode)) + { + throw new ArgumentException($"Value was neither {ZipperMode.Compress} nor {ZipperMode.Decompress}", nameof(mode)); + } + + if (mode == ZipperMode.Compress) + { + writer = new(stream, blockSize); + } + else + { + reader = new(stream); + } + + this.stream = stream; + this.mode = mode; + this.leaveOpen = leaveOpen; + disposed = false; + } + + /// + /// Gets a value indicating whether the stream supports reading. + /// + /// + public override bool CanRead => mode == ZipperMode.Decompress && stream.CanRead; + + /// + /// Gets a value indicating whether the stream supports writing. + /// + /// + public override bool CanWrite => mode == ZipperMode.Compress && stream.CanWrite; + + /// + /// Gets a value indicating whether the stream supports seeking. + /// + /// + public override bool CanSeek => false; + + /// + /// This property is not supported and always throws a . + /// + /// + public override long Length => throw new NotSupportedException(); + + /// + /// This property is not supported and always throws a . + /// + /// + public override long Position + { + get => throw new NotSupportedException(); + set => throw new NotSupportedException(); + } + + /// + /// This method is not supported and always throws a . + /// + /// + public override long Seek(long offset, SeekOrigin origin) + => throw new NotSupportedException(); + + /// + /// This method is not supported and always throws a . + /// + /// + public override void SetLength(long value) + => throw new NotSupportedException(); + + /// + /// Flushes the internal buffers. + /// + /// Stream is disposed. + public override void Flush() + { + EnsureNotClosed(); + if (mode == ZipperMode.Compress) + { + Debug.Assert(writer != null, "Writer is null"); + writer.Flush(); + } + } + + /// + /// Reads data from , compresses it and writes it to the underlying stream. + /// + /// Buffer that contains data to be compressed. + /// How many bytes to skip before reading from . + /// How many bytes to read from . + /// Stream is set to mode. + /// Stream is disposed. + public override void Write(byte[] buffer, int offset, int count) + => Write(buffer.AsSpan(offset, count)); + + /// + /// Reads data from the underlying stream, decompresses it and writes to . + /// + /// Buffer to write decompressed data to. + /// How many bytes to skip before writing to . + /// How many bytes to write to . + /// Count of read bytes, can be less than . + /// Unexpected end of stream. + /// Invalid data stream. + /// Stream is set to mode. + /// Stream is disposed. + public override int Read(byte[] buffer, int offset, int count) + => Read(buffer.AsSpan(offset, count)); + + /// + public override void Write(ReadOnlySpan buffer) + { + EnsureNotClosed(); + EnsureMode(ZipperMode.Compress); + + Debug.Assert(writer != null, "Writer is null"); + + writer.Write(buffer); + } + + /// + public override int Read(Span buffer) + { + EnsureNotClosed(); + EnsureMode(ZipperMode.Decompress); + + Debug.Assert(reader != null, "Reader is null"); + + return reader.Read(buffer); + } + + /// + protected override void Dispose(bool disposing) + { + if (disposed) + { + return; + } + + if (disposing) + { + if (mode == ZipperMode.Compress) + { + Debug.Assert(writer != null, "Writer is null"); + writer.Dispose(); + } + else + { + Debug.Assert(reader != null, "Reader is null"); + reader.Dispose(); + } + + if (!leaveOpen) + { + stream.Dispose(); + } + + disposed = true; + } + } + + private void EnsureMode(ZipperMode mode) + { + if (this.mode != mode) + { + throw new InvalidOperationException(); + } + } + + private void EnsureNotClosed() + { + ObjectDisposedException.ThrowIf(disposed, this); + } +} diff --git a/Zipper/Zipper/LZW/LZWWriter.cs b/Zipper/Zipper/LZW/LZWWriter.cs new file mode 100644 index 0000000..9e50b81 --- /dev/null +++ b/Zipper/Zipper/LZW/LZWWriter.cs @@ -0,0 +1,188 @@ +namespace Zipper.LZW; + +using System.Buffers; +using System.Buffers.Binary; +using System.Diagnostics; + +/// +/// Internal class used to write compressed data to stream. +/// +internal class LZWWriter : IDisposable +{ + private const int DataOffset = 6; + private const int MaxCodesCount = (320 * 1024) - 1; + + private static readonly ArrayPool BlockPool = ArrayPool.Create(); + + private readonly Stream stream; + private readonly int blockSize; + private readonly byte[] block; + private readonly MemoryStream memory; + private readonly Trie trie; + + private ArbitraryBitWriter writer; + private int bitsWrittenInBlock; + private bool disableCodeTableExpansion; + + private int codeWidth; + private int codesCount; + + /// + /// Initializes a new instance of the class. + /// + /// Stream to write to. + /// The internal block size to use. + public LZWWriter(Stream stream, int blockSize) + { + ArgumentOutOfRangeException.ThrowIfLessThan(blockSize, LZWStream.MinBlockSize); + ArgumentOutOfRangeException.ThrowIfGreaterThan(blockSize, LZWStream.MaxBlockSize); + + if (!stream.CanWrite) + { + throw new ArgumentException("Stream does not support writing", nameof(stream)); + } + + this.stream = stream; + this.blockSize = blockSize; + + block = BlockPool.Rent(blockSize); + memory = new(block); + memory.Seek(DataOffset, SeekOrigin.Begin); + bitsWrittenInBlock = DataOffset * 8; + + codeWidth = 8; + codesCount = 1 << codeWidth; + + trie = new(); + for (int i = 0; i < codesCount; i++) + { + trie.AddChild((byte)i, i); + } + + writer = new(memory, codeWidth, true); + } + + /// + /// Compresses data in and writes it to underlying stream. + /// + /// Buffer to read data from. + public void Write(ReadOnlySpan buffer) + { + for (int i = 0; i < buffer.Length; i++) + { + byte value = buffer[i]; + if (trie.AtRoot) + { + trie.MoveForward(value); + continue; + } + + if (!trie.HasChild(value)) + { + if (!disableCodeTableExpansion) + { + trie.AddChild(value, codesCount); + } + + bool bufferOverflow = bitsWrittenInBlock + codeWidth > blockSize * 8; + + bool shouldIncrementCodeWidth = false; + bool shouldDisableCodeTableExpansion = false; + if (!disableCodeTableExpansion) + { + codesCount++; + shouldDisableCodeTableExpansion = codesCount >= MaxCodesCount; + shouldIncrementCodeWidth = codesCount >= 1 << codeWidth; + } + + if (shouldDisableCodeTableExpansion) + { + disableCodeTableExpansion = true; + FlushInternal(BlockType.Default); + FlushInternal(BlockType.FixCodeTableSize); + } + else if (bufferOverflow || shouldIncrementCodeWidth) + { + FlushInternal(BlockType.Default); + } + + if (shouldIncrementCodeWidth) + { + codeWidth++; + + writer.Dispose(); + writer = new(memory, codeWidth, true); + } + + writer.Write(trie.CurrentValue); + bitsWrittenInBlock += codeWidth; + + trie.Reset(); + } + + trie.MoveForward(value); + } + } + + /// + /// Writes all pending data to the underlying stream. + /// + public void Flush() + { + FlushInternal(BlockType.Default); + + if (!trie.AtRoot) + { + writer.Write(trie.CurrentValue); + bitsWrittenInBlock += codeWidth; + + trie.Reset(); + + FlushInternal(BlockType.Flush); + } + } + + /// + /// Writes all pending data to the underlying stream and disposes internal buffers. + /// + public void Dispose() + { + Flush(); + FlushInternal(BlockType.EndOfStream); + + memory.Dispose(); + BlockPool.Return(block); + } + + private void FlushInternal(BlockType type) + { + Debug.Assert(codeWidth <= 32, "Code width too large"); + Debug.Assert(Enum.IsDefined(type), $"Unknown {nameof(BlockType)} parameter"); + + if (type == BlockType.FixCodeTableSize) + { + var binWriter = new BinaryWriter(memory); + binWriter.Write(MaxCodesCount); + binWriter.Flush(); + } + + writer.Flush(); + int length = (int)memory.Position; + int dataLength = length - DataOffset; + + if (!(dataLength == 0 && type == BlockType.Default)) + { + block[0] = (byte)type; + block[1] = (byte)codeWidth; + BinaryPrimitives.WriteInt32LittleEndian(block.AsSpan()[2..6], dataLength); + + stream.Write(block, 0, length); + stream.Flush(); + } + + Array.Clear(block); + + memory.Seek(DataOffset, SeekOrigin.Begin); + bitsWrittenInBlock = DataOffset * 8; + } +} diff --git a/Zipper/Zipper/Trie.cs b/Zipper/Zipper/Trie.cs new file mode 100644 index 0000000..9145553 --- /dev/null +++ b/Zipper/Zipper/Trie.cs @@ -0,0 +1,88 @@ +namespace Zipper; + +/// +/// Trie data structure, also known as prefix tree, that can be traversed through. +/// +/// Type of values. +internal class Trie + where T : struct +{ + private readonly Node rootNode = new(default); + private Node lastNode; + + /// + /// Initializes a new instance of the class. + /// + public Trie() + { + lastNode = rootNode; + } + + /// + /// Gets value stored at current position in trie. + /// + public T CurrentValue => lastNode.Value; + + /// + /// Gets a value indicating whether current position is root. + /// + public bool AtRoot => lastNode == rootNode; + + /// + /// Resets position to root. + /// + public void Reset() + { + lastNode = rootNode; + } + + /// + /// Adds child at current position with specified and . + /// + /// Key of child to add. + /// Value of child to add. + /// if child with specified key did not exist at current position, otherwise. + public bool AddChild(byte key, T value) + { + if (HasChild(key)) + { + return false; + } + + var node = new Node(value); + lastNode.Children[key] = node; + + return true; + } + + /// + /// Moves forward if is found, otherwise doesn't move. + /// + /// Key to search for. + /// if moved forward, otherwise. + public bool MoveForward(byte key) + { + if (lastNode.Children.TryGetValue(key, out var existingNode)) + { + lastNode = existingNode; + return true; + } + + return false; + } + + /// + /// Checks if child with specified exists at current position. + /// + /// Key to search for. + /// if child with specified key did not exist at current position, otherwise. + public bool HasChild(byte key) + => lastNode.Children.ContainsKey(key); + + private class Node(T value) + { + public Dictionary Children { get; } = []; + + public T Value { get; } = value; + } +} diff --git a/Zipper/Zipper/Zipper.csproj b/Zipper/Zipper/Zipper.csproj new file mode 100644 index 0000000..036d040 --- /dev/null +++ b/Zipper/Zipper/Zipper.csproj @@ -0,0 +1,14 @@ + + + + Library + net9.0 + enable + enable + + + + + + + diff --git a/Zipper/Zipper/ZipperMode.cs b/Zipper/Zipper/ZipperMode.cs new file mode 100644 index 0000000..9df19ad --- /dev/null +++ b/Zipper/Zipper/ZipperMode.cs @@ -0,0 +1,17 @@ +namespace Zipper; + +/// +/// Specifies whether to compress data to or decompress data from the underlying stream. +/// +public enum ZipperMode +{ + /// + /// Compress data to the underlying stream. + /// + Compress, + + /// + /// Decompress data from the underlying stream. + /// + Decompress, +} diff --git a/Zipper/Zipper/ZipperStream.cs b/Zipper/Zipper/ZipperStream.cs new file mode 100644 index 0000000..62eba44 --- /dev/null +++ b/Zipper/Zipper/ZipperStream.cs @@ -0,0 +1,225 @@ +namespace Zipper; + +using Zipper.LZW; + +/// +/// Provides methods and properties used to compress and decompress streams. +/// +public class ZipperStream : Stream +{ + /// + /// Smallest allowed block length. + /// + public const int MinBlockSize = LZWStream.MinBlockSize; + + /// + /// Largest allowed block length. + /// + public const int MaxBlockSize = LZWStream.MaxBlockSize; + + private const int DefaultBlockSize = (MinBlockSize + MaxBlockSize) / 2; + + private readonly LZWStream lzwStream; + private readonly Stream outerStream; + + private readonly Stream stream; + private readonly ZipperMode mode; + private readonly bool leaveOpen; + + private bool disposed; + + /// + public ZipperStream(Stream stream, ZipperMode mode, bool leaveOpen = false) + : this(stream, DefaultBlockSize, mode, leaveOpen, false) + { + } + + /// + public ZipperStream(Stream stream, ZipperMode mode, bool leaveOpen = false, bool useBwt = false) + : this(stream, DefaultBlockSize, mode, leaveOpen, useBwt) + { + } + + /// + /// Initializes a new instance of the class. + /// + /// The stream to which compressed data is written or from which data to decompress is read. + /// The internal block size to use, should be between and . + /// that determines whether to compress or decompress data. + /// + /// The value indicating whether should be disposed along with this instance, + /// if is . + /// + /// The value indicating whether to use Burrows-Wheeler transformation internally. + /// is not nor . + /// is out of range. + public ZipperStream(Stream stream, int blockSize, ZipperMode mode, bool leaveOpen = false, bool useBwt = false) + { + ArgumentOutOfRangeException.ThrowIfLessThan(blockSize, MinBlockSize); + ArgumentOutOfRangeException.ThrowIfGreaterThan(blockSize, MaxBlockSize); + + if (!Enum.IsDefined(mode)) + { + throw new ArgumentException($"Value was neither {ZipperMode.Compress} nor {ZipperMode.Decompress}", nameof(mode)); + } + + float relativeBlockSize = (blockSize - MinBlockSize) / (float)(MaxBlockSize - MinBlockSize); + int bwtBlockSize = (int)(BWTStream.MinBlockSize + (relativeBlockSize * (BWTStream.MaxBlockSize - BWTStream.MinBlockSize))); + + this.stream = stream; + + lzwStream = new(stream, blockSize, mode, true); + if (useBwt) + { + outerStream = new BWTStream(lzwStream, bwtBlockSize, mode == ZipperMode.Compress ? BWTMode.Transform : BWTMode.Reconstruct, true); + } + else + { + outerStream = lzwStream; + } + + this.mode = mode; + this.leaveOpen = leaveOpen; + disposed = false; + } + + /// + /// Gets a value indicating whether the stream supports reading. + /// + /// + public override bool CanRead => mode == ZipperMode.Decompress && stream.CanRead; + + /// + /// Gets a value indicating whether the stream supports writing. + /// + /// + public override bool CanWrite => mode == ZipperMode.Compress && stream.CanWrite; + + /// + /// Gets a value indicating whether the stream supports seeking. + /// + /// + public override bool CanSeek => false; + + /// + /// This property is not supported and always throws a . + /// + /// + public override long Length => throw new NotSupportedException(); + + /// + /// This property is not supported and always throws a . + /// + /// + public override long Position + { + get => throw new NotSupportedException(); + set => throw new NotSupportedException(); + } + + /// + /// This method is not supported and always throws a . + /// + /// + public override long Seek(long offset, SeekOrigin origin) + => throw new NotSupportedException(); + + /// + /// This method is not supported and always throws a . + /// + /// + public override void SetLength(long value) + => throw new NotSupportedException(); + + /// + /// Flushes the internal buffers. + /// + /// Stream is disposed. + public override void Flush() + { + EnsureNotClosed(); + + if (mode == ZipperMode.Compress) + { + outerStream.Flush(); + } + } + + /// + /// Reads data from , compresses it and writes it to the underlying stream. + /// + /// Buffer that contains data to be compressed. + /// How many bytes to skip before reading from . + /// How many bytes to read from . + /// Stream is set to mode. + /// Stream is disposed. + public override void Write(byte[] buffer, int offset, int count) + => Write(buffer.AsSpan(offset, count)); + + /// + /// Reads data from the underlying stream, decompresses it and writes to . + /// + /// Buffer to write decompressed data to. + /// How many bytes to skip before writing to . + /// How many bytes to write to . + /// Count of read bytes, can be less than . + /// Unexpected end of stream. + /// Invalid data stream. + /// Stream is set to mode. + /// Stream is disposed. + public override int Read(byte[] buffer, int offset, int count) + => Read(buffer.AsSpan(offset, count)); + + /// + public override void Write(ReadOnlySpan buffer) + { + EnsureNotClosed(); + EnsureMode(ZipperMode.Compress); + + outerStream.Write(buffer); + } + + /// + public override int Read(Span buffer) + { + EnsureNotClosed(); + EnsureMode(ZipperMode.Decompress); + + return outerStream.Read(buffer); + } + + /// + protected override void Dispose(bool disposing) + { + if (disposed) + { + return; + } + + if (disposing) + { + outerStream.Dispose(); + lzwStream.Dispose(); + + if (!leaveOpen) + { + stream.Dispose(); + } + + disposed = true; + } + } + + private void EnsureMode(ZipperMode mode) + { + if (this.mode != mode) + { + throw new InvalidOperationException(); + } + } + + private void EnsureNotClosed() + { + ObjectDisposedException.ThrowIf(disposed, this); + } +}