diff --git a/ChecksumCalculator/ChecksumCalculator.sln b/ChecksumCalculator/ChecksumCalculator.sln new file mode 100644 index 0000000..ad86db7 --- /dev/null +++ b/ChecksumCalculator/ChecksumCalculator.sln @@ -0,0 +1,22 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ChecksumCalculator", "ChecksumCalculator\ChecksumCalculator.csproj", "{E5B72DED-8E4D-4570-B3F1-A94FB83B7EE1}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ChecksumCalculatorTests", "ChecksumCalculatorTests\ChecksumCalculatorTests.csproj", "{3B376E7C-E6A1-4A70-B5D4-79C68AB4A39B}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {E5B72DED-8E4D-4570-B3F1-A94FB83B7EE1}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {E5B72DED-8E4D-4570-B3F1-A94FB83B7EE1}.Debug|Any CPU.Build.0 = Debug|Any CPU + {E5B72DED-8E4D-4570-B3F1-A94FB83B7EE1}.Release|Any CPU.ActiveCfg = Release|Any CPU + {E5B72DED-8E4D-4570-B3F1-A94FB83B7EE1}.Release|Any CPU.Build.0 = Release|Any CPU + {3B376E7C-E6A1-4A70-B5D4-79C68AB4A39B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {3B376E7C-E6A1-4A70-B5D4-79C68AB4A39B}.Debug|Any CPU.Build.0 = Debug|Any CPU + {3B376E7C-E6A1-4A70-B5D4-79C68AB4A39B}.Release|Any CPU.ActiveCfg = Release|Any CPU + {3B376E7C-E6A1-4A70-B5D4-79C68AB4A39B}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection +EndGlobal diff --git a/ChecksumCalculator/ChecksumCalculator/ChecksumCalculator.csproj b/ChecksumCalculator/ChecksumCalculator/ChecksumCalculator.csproj new file mode 100644 index 0000000..806b616 --- /dev/null +++ b/ChecksumCalculator/ChecksumCalculator/ChecksumCalculator.csproj @@ -0,0 +1,21 @@ + + + + Exe + net9.0 + enable + enable + + + + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + + + + + + + + diff --git a/ChecksumCalculator/ChecksumCalculator/ParallelChecksumCalc.cs b/ChecksumCalculator/ChecksumCalculator/ParallelChecksumCalc.cs new file mode 100644 index 0000000..5fd3177 --- /dev/null +++ b/ChecksumCalculator/ChecksumCalculator/ParallelChecksumCalc.cs @@ -0,0 +1,124 @@ +// +// Copyright (c) khusainovilas. All rights reserved. +// + +namespace ChecksumCalculator; + +using System.Security.Cryptography; +using System.Text; + +/// +/// Calculates a deterministic MD5 checksum of a directory in multithreaded mode. +/// +public class ParallelChecksumCalc +{ + private static readonly int MaxDegreeOfParallelism = Environment.ProcessorCount; + private static readonly SemaphoreSlim FileReadSemaphore = new(MaxDegreeOfParallelism, MaxDegreeOfParallelism); + + /// + /// Asynchronously calculates a directory checksum using parallelism. + /// + /// Directory path. + /// Operation cancellation token. + /// A representing the asynchronous operation. + public async Task ComputeChecksumAsync(string directoryPath, CancellationToken cancellationToken = default) + { + ArgumentException.ThrowIfNullOrWhiteSpace(directoryPath); + + var fullPath = Path.GetFullPath(directoryPath); + + if (!Directory.Exists(fullPath)) + { + throw new DirectoryNotFoundException($"Directory not found: {fullPath}"); + } + + return await this.ComputeDirectoryHashAsync(fullPath, cancellationToken) + .ConfigureAwait(false); + } + + /// + /// Asynchronously calculates the checksum and returns it as a lowercase string. + /// + /// Directory path. + /// Cancellation token. + /// A representing the asynchronous operation. + public async Task ComputeChecksumBase64Async(string directoryPath, CancellationToken cancellationToken = default) + { + var hash = await this.ComputeChecksumAsync(directoryPath, cancellationToken).ConfigureAwait(false); + + return Convert.ToBase64String(hash); + } + + private static async ValueTask SemaphoreSlimWaitAsync(CancellationToken cancellationToken) + { + await FileReadSemaphore.WaitAsync(cancellationToken).ConfigureAwait(false); + } + + private async Task ComputeDirectoryHashAsync( + string directoryPath, + CancellationToken cancellationToken) + { + var directoryName = Path.GetFileName(directoryPath); + if (string.IsNullOrEmpty(directoryName)) + { + directoryName = directoryPath; + } + + var nameBytes = Encoding.UTF8.GetBytes(directoryName); + + var entries = Directory.GetFileSystemEntries(directoryPath) + .OrderBy(Path.GetFileName, StringComparer.Ordinal) + .ToArray(); + + var childHashTasks = new List>(entries.Length); + + foreach (var entryPath in entries) + { + childHashTasks.Add( + Directory.Exists(entryPath) + ? this.ComputeDirectoryHashAsync(entryPath, cancellationToken) + : this.ComputeFileHashAsync(entryPath, cancellationToken)); + } + + var childHashes = await Task.WhenAll(childHashTasks).ConfigureAwait(false); + + var totalLength = nameBytes.Length + childHashes.Sum(hash => hash.Length); + var combinedBuffer = new byte[totalLength]; + + Buffer.BlockCopy(nameBytes, 0, combinedBuffer, 0, nameBytes.Length); + + var currentOffset = nameBytes.Length; + foreach (var childHash in childHashes) + { + Buffer.BlockCopy(childHash, 0, combinedBuffer, currentOffset, childHash.Length); + currentOffset += childHash.Length; + } + + using var md5 = MD5.Create(); + return md5.ComputeHash(combinedBuffer); + } + + private async Task ComputeFileHashAsync(string filePath, CancellationToken cancellationToken) + { + var fileName = Path.GetFileName(filePath); + var nameBytes = Encoding.UTF8.GetBytes(fileName); + + await SemaphoreSlimWaitAsync(cancellationToken); + + try + { + await using var fileStream = File.OpenRead(filePath); + using var memoryStream = new MemoryStream(nameBytes.Length + (int)fileStream.Length); + + memoryStream.Write(nameBytes); + await fileStream.CopyToAsync(memoryStream, cancellationToken).ConfigureAwait(false); + memoryStream.Position = 0; + using var md5 = MD5.Create(); + return await md5.ComputeHashAsync(memoryStream, cancellationToken).ConfigureAwait(false); + } + finally + { + FileReadSemaphore.Release(); + } + } +} \ No newline at end of file diff --git a/ChecksumCalculator/ChecksumCalculator/Program.cs b/ChecksumCalculator/ChecksumCalculator/Program.cs new file mode 100644 index 0000000..dc0d649 --- /dev/null +++ b/ChecksumCalculator/ChecksumCalculator/Program.cs @@ -0,0 +1,41 @@ +// +// Copyright (c) khusainovilas. All rights reserved. +// + +using System.Diagnostics; +using ChecksumCalculator; + +if (args.Length == 0) +{ + return; +} + +var directoryPath = args[0]; + +if (!Directory.Exists(directoryPath)) +{ + Console.WriteLine($"Error: directory not found: {directoryPath}"); + return; +} + +var sequential = new SequentialChecksumCalc(); +var parallel = new ParallelChecksumCalc(); + +Console.WriteLine($"Calculating checksum for: {Path.GetFullPath(directoryPath)}"); +Console.WriteLine(); + +var sw = Stopwatch.StartNew(); +var hashSeq = await sequential.ComputeChecksumBase64Async(directoryPath); +sw.Stop(); +var timeSeq = sw.Elapsed.TotalSeconds; + +sw.Restart(); +var hashPar = await parallel.ComputeChecksumBase64Async(directoryPath); +sw.Stop(); +var timePar = sw.Elapsed.TotalSeconds; + +Console.WriteLine($"Sequential → {hashSeq} ({timeSeq:F3}s)"); +Console.WriteLine($"Parallel → {hashPar} ({timePar:F3}s)"); +Console.WriteLine(); + +Console.WriteLine(hashSeq == hashPar ? "Success: Hashes are identical" : "Failure: Hashes differ — something went wrong!"); \ No newline at end of file diff --git a/ChecksumCalculator/ChecksumCalculator/SequentialChecksumCalc.cs b/ChecksumCalculator/ChecksumCalculator/SequentialChecksumCalc.cs new file mode 100644 index 0000000..a44f16a --- /dev/null +++ b/ChecksumCalculator/ChecksumCalculator/SequentialChecksumCalc.cs @@ -0,0 +1,109 @@ +// +// Copyright (c) khusainovilas. All rights reserved. +// + +namespace ChecksumCalculator; + +using System.Security.Cryptography; +using System.Text; + +/// +/// Calculates a MD5 checksum of a directory in sequential mode. +/// +public class SequentialChecksumCalc +{ + private readonly MD5 md5 = MD5.Create(); + + /// + /// Asynchronously calculates a deterministic checksum of a directory (single-threaded across tasks). + /// + /// /// + /// Full or relative path to the directory for which the hash sum should be calculated. + /// + /// + /// Operation cancellation token. Allows you to abort a lengthy calculation. + /// + /// A representing the asynchronous operation. + public async Task ComputeChecksumAsync(string directoryPath, CancellationToken cancellationToken = default) + { + ArgumentException.ThrowIfNullOrWhiteSpace(directoryPath); + + var fullPath = Path.GetFullPath(directoryPath); + + if (!Directory.Exists(fullPath)) + { + throw new DirectoryNotFoundException($"Directory not found: {fullPath}"); + } + + return await this.ComputeDirectoryHashAsync(fullPath, cancellationToken); + } + + /// + /// Returns the hash as a lowercase hex string. + /// + /// + /// Full or relative path to the directory. + /// + /// + /// Operation cancellation token. + /// + /// A representing the asynchronous operation. + public async Task ComputeChecksumBase64Async(string directoryPath, CancellationToken cancellationToken = default) + { + var hash = await this.ComputeChecksumAsync(directoryPath, cancellationToken).ConfigureAwait(false); + + return Convert.ToBase64String(hash); + } + + private async Task ComputeDirectoryHashAsync(string directoryPath, CancellationToken cancellationToken) + { + var name = Path.GetFileName(directoryPath); + if (string.IsNullOrEmpty(name)) + { + name = directoryPath; + } + + var nameBytes = Encoding.UTF8.GetBytes(name); + + var entries = Directory.GetFileSystemEntries(directoryPath).OrderBy(Path.GetFileName, StringComparer.Ordinal).ToArray(); + + var childHashes = new List(entries.Length); + + foreach (var entry in entries) + { + cancellationToken.ThrowIfCancellationRequested(); + + var childHash = Directory.Exists(entry) ? await this.ComputeDirectoryHashAsync(entry, cancellationToken) : await this.ComputeFileHashAsync(entry, cancellationToken); + + childHashes.Add(childHash); + } + + var totalLength = nameBytes.Length + childHashes.Sum(h => h.Length); + var buffer = new byte[totalLength]; + + Buffer.BlockCopy(nameBytes, 0, buffer, 0, nameBytes.Length); + + var offset = nameBytes.Length; + foreach (var hash in childHashes) + { + Buffer.BlockCopy(hash, 0, buffer, offset, hash.Length); + offset += hash.Length; + } + + return this.md5.ComputeHash(buffer); + } + + private async Task ComputeFileHashAsync(string filePath, CancellationToken cancellationToken) + { + var nameBytes = Encoding.UTF8.GetBytes(Path.GetFileName(filePath)); + + await using var stream = File.OpenRead(filePath); + using var memoryStream = new MemoryStream(nameBytes.Length + (int)stream.Length); + + memoryStream.Write(nameBytes); + await stream.CopyToAsync(memoryStream, cancellationToken); + memoryStream.Position = 0; + + return await this.md5.ComputeHashAsync(memoryStream, cancellationToken); + } +} \ No newline at end of file diff --git a/ChecksumCalculator/ChecksumCalculator/stylecop.json b/ChecksumCalculator/ChecksumCalculator/stylecop.json new file mode 100644 index 0000000..76c8e76 --- /dev/null +++ b/ChecksumCalculator/ChecksumCalculator/stylecop.json @@ -0,0 +1,9 @@ +{ + "$schema": "https://raw.githubusercontent.com/DotNetAnalyzers/StyleCopAnalyzers/master/StyleCop.Analyzers/StyleCop.Analyzers/Settings/stylecop.schema.json", + "settings": { + "documentationRules": { + "companyName": "khusainovilas", + "copyrightText": "Copyright (c) {companyName}. All rights reserved." + } + } +} \ No newline at end of file diff --git a/ChecksumCalculator/ChecksumCalculatorTests/ChecksumCalculatorTests.cs b/ChecksumCalculator/ChecksumCalculatorTests/ChecksumCalculatorTests.cs new file mode 100644 index 0000000..4c864d3 --- /dev/null +++ b/ChecksumCalculator/ChecksumCalculatorTests/ChecksumCalculatorTests.cs @@ -0,0 +1,123 @@ +// +// Copyright (c) khusainovilas. All rights reserved. +// + +namespace ChecksumCalculatorTests; + +using ChecksumCalculator; + +/// +/// Checksum Calculator implementation tests. +/// +public class ChecksumCalculatorTests +{ + private SequentialChecksumCalc sequential = null!; + private ParallelChecksumCalc parallel = null!; + + [SetUp] + public void Setup() + { + this.sequential = new SequentialChecksumCalc(); + this.parallel = new ParallelChecksumCalc(); + } + + /// + /// Checks that the serial and parallel calculators return the same hash + /// for a complex directory structure with files, subfolders, and different names. + /// + [Test] + public async Task SequentialChecksumCalc_And_ParallelChecksumCalc_WithComplexDirectoryStructure_ReturnSameHash() + { + var testDirectory = CreateComplexTestDirectory(); + + try + { + var sequentialHash = await this.sequential.ComputeChecksumBase64Async(testDirectory); + var parallelHash = await this.parallel.ComputeChecksumBase64Async(testDirectory); + + Assert.That(parallelHash, Is.EqualTo(sequentialHash)); + } + finally + { + Directory.Delete(testDirectory, recursive: true); + } + } + + /// + /// Checks that files with the same content but different names produce different hashes. + /// + [Test] + public async Task SequentialChecksumCalc_WithSameFileContentButDifferentNames_ReturnsDifferentHashes() + { + // Arrange + var dir1 = Path.Combine(Path.GetTempPath(), "same_content_diff_name_1_" + Guid.NewGuid().ToString("N")); + var dir2 = Path.Combine(Path.GetTempPath(), "same_content_diff_name_2_" + Guid.NewGuid().ToString("N")); + + Directory.CreateDirectory(dir1); + Directory.CreateDirectory(dir2); + + const string content = "identical content"; + await File.WriteAllTextAsync(Path.Combine(dir1, "first.txt"), content); + await File.WriteAllTextAsync(Path.Combine(dir2, "second.txt"), content); + + try + { + var hash1 = await this.sequential.ComputeChecksumBase64Async(dir1); + var hash2 = await this.sequential.ComputeChecksumBase64Async(dir2); + + Assert.That(hash2, Is.Not.EqualTo(hash1)); + } + finally + { + Directory.Delete(dir1, recursive: true); + Directory.Delete(dir2, recursive: true); + } + } + + /// + /// Checks that the order in which files are created in the file system does not affect the resulting hash. + /// + [Test] + public async Task SequentialChecksumCalc_FileOrderDoesNotMatter_ReturnsSameHash() + { + var dirPath = Path.Combine(Path.GetTempPath(), "file_order_test_" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(dirPath); + + await File.WriteAllTextAsync(Path.Combine(dirPath, "c.txt"), "third"); + await File.WriteAllTextAsync(Path.Combine(dirPath, "a.txt"), "first"); + await File.WriteAllTextAsync(Path.Combine(dirPath, "b.txt"), "second"); + + var hash1 = await this.sequential.ComputeChecksumBase64Async(dirPath); + + Directory.Delete(dirPath, recursive: true); + Directory.CreateDirectory(dirPath); + + await File.WriteAllTextAsync(Path.Combine(dirPath, "a.txt"), "first"); + await File.WriteAllTextAsync(Path.Combine(dirPath, "b.txt"), "second"); + await File.WriteAllTextAsync(Path.Combine(dirPath, "c.txt"), "third"); + + var hash2 = await this.sequential.ComputeChecksumBase64Async(dirPath); + + Assert.That(hash2, Is.EqualTo(hash1)); + Directory.Delete(dirPath, recursive: true); + } + + /// + /// Helper method: creates a complex test directory and file structure. + /// + private static string CreateComplexTestDirectory() + { + var root = Path.Combine(Path.GetTempPath(), "complex_checksum_test_" + Guid.NewGuid().ToString("N")); + Directory.CreateDirectory(root); + + Directory.CreateDirectory(Path.Combine(root, "subfolder")); + Directory.CreateDirectory(Path.Combine(root, "кириллица")); + + File.WriteAllText(Path.Combine(root, "hello.txt"), "Hello!"); + File.WriteAllBytes(Path.Combine(root, "empty.bin"), Array.Empty()); + File.WriteAllText(Path.Combine(root, "subfolder", "deep.txt"), "deep content"); + File.WriteAllText(Path.Combine(root, "кириллица", "заказ.txt"), "Молоко"); + + return root; + } +} \ No newline at end of file diff --git a/ChecksumCalculator/ChecksumCalculatorTests/ChecksumCalculatorTests.csproj b/ChecksumCalculator/ChecksumCalculatorTests/ChecksumCalculatorTests.csproj new file mode 100644 index 0000000..f4358e2 --- /dev/null +++ b/ChecksumCalculator/ChecksumCalculatorTests/ChecksumCalculatorTests.csproj @@ -0,0 +1,39 @@ + + + + net9.0 + latest + enable + enable + false + + + + + + + + + + + + + + + + + + + + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + + + + + + + + + diff --git a/ChecksumCalculator/ChecksumCalculatorTests/stylecop.json b/ChecksumCalculator/ChecksumCalculatorTests/stylecop.json new file mode 100644 index 0000000..76c8e76 --- /dev/null +++ b/ChecksumCalculator/ChecksumCalculatorTests/stylecop.json @@ -0,0 +1,9 @@ +{ + "$schema": "https://raw.githubusercontent.com/DotNetAnalyzers/StyleCopAnalyzers/master/StyleCop.Analyzers/StyleCop.Analyzers/Settings/stylecop.schema.json", + "settings": { + "documentationRules": { + "companyName": "khusainovilas", + "copyrightText": "Copyright (c) {companyName}. All rights reserved." + } + } +} \ No newline at end of file