-
Notifications
You must be signed in to change notification settings - Fork 0
TEST1 Checksum Calculator #6
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,22 @@ | ||
| | ||
| Microsoft Visual Studio Solution File, Format Version 12.00 | ||
| Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ChecksumCalculator", "ChecksumCalculator\ChecksumCalculator.csproj", "{E5B72DED-8E4D-4570-B3F1-A94FB83B7EE1}" | ||
| EndProject | ||
| Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ChecksumCalculatorTests", "ChecksumCalculatorTests\ChecksumCalculatorTests.csproj", "{3B376E7C-E6A1-4A70-B5D4-79C68AB4A39B}" | ||
| EndProject | ||
| Global | ||
| GlobalSection(SolutionConfigurationPlatforms) = preSolution | ||
| Debug|Any CPU = Debug|Any CPU | ||
| Release|Any CPU = Release|Any CPU | ||
| EndGlobalSection | ||
| GlobalSection(ProjectConfigurationPlatforms) = postSolution | ||
| {E5B72DED-8E4D-4570-B3F1-A94FB83B7EE1}.Debug|Any CPU.ActiveCfg = Debug|Any CPU | ||
| {E5B72DED-8E4D-4570-B3F1-A94FB83B7EE1}.Debug|Any CPU.Build.0 = Debug|Any CPU | ||
| {E5B72DED-8E4D-4570-B3F1-A94FB83B7EE1}.Release|Any CPU.ActiveCfg = Release|Any CPU | ||
| {E5B72DED-8E4D-4570-B3F1-A94FB83B7EE1}.Release|Any CPU.Build.0 = Release|Any CPU | ||
| {3B376E7C-E6A1-4A70-B5D4-79C68AB4A39B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU | ||
| {3B376E7C-E6A1-4A70-B5D4-79C68AB4A39B}.Debug|Any CPU.Build.0 = Debug|Any CPU | ||
| {3B376E7C-E6A1-4A70-B5D4-79C68AB4A39B}.Release|Any CPU.ActiveCfg = Release|Any CPU | ||
| {3B376E7C-E6A1-4A70-B5D4-79C68AB4A39B}.Release|Any CPU.Build.0 = Release|Any CPU | ||
| EndGlobalSection | ||
| EndGlobal |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,21 @@ | ||
| <Project Sdk="Microsoft.NET.Sdk"> | ||
|
|
||
| <PropertyGroup> | ||
| <OutputType>Exe</OutputType> | ||
| <TargetFramework>net9.0</TargetFramework> | ||
| <ImplicitUsings>enable</ImplicitUsings> | ||
| <Nullable>enable</Nullable> | ||
| </PropertyGroup> | ||
|
|
||
| <ItemGroup> | ||
| <PackageReference Include="StyleCop.Analyzers" Version="1.2.0-beta.556"> | ||
| <PrivateAssets>all</PrivateAssets> | ||
| <IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets> | ||
| </PackageReference> | ||
| </ItemGroup> | ||
|
|
||
| <ItemGroup> | ||
| <AdditionalFiles Include="stylecop.json" /> | ||
| </ItemGroup> | ||
|
|
||
| </Project> |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,124 @@ | ||
| // <copyright file="ParallelChecksumCalc.cs" company="khusainovilas"> | ||
| // Copyright (c) khusainovilas. All rights reserved. | ||
| // </copyright> | ||
|
|
||
| namespace ChecksumCalculator; | ||
|
|
||
| using System.Security.Cryptography; | ||
| using System.Text; | ||
|
|
||
| /// <summary> | ||
| /// Calculates a deterministic MD5 checksum of a directory in multithreaded mode. | ||
| /// </summary> | ||
| public class ParallelChecksumCalc | ||
| { | ||
| private static readonly int MaxDegreeOfParallelism = Environment.ProcessorCount; | ||
| private static readonly SemaphoreSlim FileReadSemaphore = new(MaxDegreeOfParallelism, MaxDegreeOfParallelism); | ||
|
|
||
| /// <summary> | ||
| /// Asynchronously calculates a directory checksum using parallelism. | ||
| /// </summary> | ||
| /// <param name="directoryPath">Directory path.</param> | ||
| /// <param name="cancellationToken">Operation cancellation token.</param> | ||
| /// <returns>A <see cref="Task"/> representing the asynchronous operation.</returns> | ||
| public async Task<byte[]> ComputeChecksumAsync(string directoryPath, CancellationToken cancellationToken = default) | ||
| { | ||
| ArgumentException.ThrowIfNullOrWhiteSpace(directoryPath); | ||
|
|
||
| var fullPath = Path.GetFullPath(directoryPath); | ||
|
|
||
| if (!Directory.Exists(fullPath)) | ||
| { | ||
| throw new DirectoryNotFoundException($"Directory not found: {fullPath}"); | ||
| } | ||
|
|
||
| return await this.ComputeDirectoryHashAsync(fullPath, cancellationToken) | ||
| .ConfigureAwait(false); | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Asynchronously calculates the checksum and returns it as a lowercase string. | ||
| /// </summary> | ||
| /// <param name="directoryPath">Directory path.</param> | ||
| /// <param name="cancellationToken">Cancellation token.</param> | ||
| /// <returns>A <see cref="Task"/> representing the asynchronous operation.</returns> | ||
| public async Task<string> ComputeChecksumBase64Async(string directoryPath, CancellationToken cancellationToken = default) | ||
| { | ||
| var hash = await this.ComputeChecksumAsync(directoryPath, cancellationToken).ConfigureAwait(false); | ||
|
|
||
| return Convert.ToBase64String(hash); | ||
| } | ||
|
|
||
| private static async ValueTask SemaphoreSlimWaitAsync(CancellationToken cancellationToken) | ||
| { | ||
| await FileReadSemaphore.WaitAsync(cancellationToken).ConfigureAwait(false); | ||
| } | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Используйте |
||
|
|
||
| private async Task<byte[]> ComputeDirectoryHashAsync( | ||
| string directoryPath, | ||
| CancellationToken cancellationToken) | ||
| { | ||
| var directoryName = Path.GetFileName(directoryPath); | ||
| if (string.IsNullOrEmpty(directoryName)) | ||
| { | ||
| directoryName = directoryPath; | ||
| } | ||
|
|
||
| var nameBytes = Encoding.UTF8.GetBytes(directoryName); | ||
|
|
||
| var entries = Directory.GetFileSystemEntries(directoryPath) | ||
| .OrderBy(Path.GetFileName, StringComparer.Ordinal) | ||
| .ToArray(); | ||
|
|
||
| var childHashTasks = new List<Task<byte[]>>(entries.Length); | ||
|
|
||
| foreach (var entryPath in entries) | ||
| { | ||
| childHashTasks.Add( | ||
| Directory.Exists(entryPath) | ||
| ? this.ComputeDirectoryHashAsync(entryPath, cancellationToken) | ||
| : this.ComputeFileHashAsync(entryPath, cancellationToken)); | ||
| } | ||
|
|
||
| var childHashes = await Task.WhenAll(childHashTasks).ConfigureAwait(false); | ||
|
|
||
| var totalLength = nameBytes.Length + childHashes.Sum(hash => hash.Length); | ||
| var combinedBuffer = new byte[totalLength]; | ||
|
|
||
| Buffer.BlockCopy(nameBytes, 0, combinedBuffer, 0, nameBytes.Length); | ||
|
|
||
| var currentOffset = nameBytes.Length; | ||
| foreach (var childHash in childHashes) | ||
| { | ||
| Buffer.BlockCopy(childHash, 0, combinedBuffer, currentOffset, childHash.Length); | ||
| currentOffset += childHash.Length; | ||
| } | ||
|
|
||
|
Check warning on line 96 in ChecksumCalculator/ChecksumCalculator/ParallelChecksumCalc.cs
|
||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Стоило бы поправить предупреждения, а то по делу ведь. И по правилам собираться должно без предупреждений. |
||
| using var md5 = MD5.Create(); | ||
| return md5.ComputeHash(combinedBuffer); | ||
|
Comment on lines
+97
to
+98
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Есть у MD5 статический метод, который делает то же самое (правда, называется как-то по-другому). Объект MD5 тут низачем. |
||
| } | ||
|
|
||
| private async Task<byte[]> ComputeFileHashAsync(string filePath, CancellationToken cancellationToken) | ||
| { | ||
| var fileName = Path.GetFileName(filePath); | ||
| var nameBytes = Encoding.UTF8.GetBytes(fileName); | ||
|
|
||
| await SemaphoreSlimWaitAsync(cancellationToken); | ||
|
|
||
| try | ||
| { | ||
| await using var fileStream = File.OpenRead(filePath); | ||
| using var memoryStream = new MemoryStream(nameBytes.Length + (int)fileStream.Length); | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Так Вы весть файл в память зачитаете, что не очень, если это какая-нибудь метагеномная сборка в пару терабайт размером. |
||
|
|
||
| memoryStream.Write(nameBytes); | ||
| await fileStream.CopyToAsync(memoryStream, cancellationToken).ConfigureAwait(false); | ||
| memoryStream.Position = 0; | ||
| using var md5 = MD5.Create(); | ||
| return await md5.ComputeHashAsync(memoryStream, cancellationToken).ConfigureAwait(false); | ||
| } | ||
| finally | ||
| { | ||
| FileReadSemaphore.Release(); | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,41 @@ | ||
| // <copyright file="Program.cs" company="khusainovilas"> | ||
| // Copyright (c) khusainovilas. All rights reserved. | ||
| // </copyright> | ||
|
|
||
| using System.Diagnostics; | ||
| using ChecksumCalculator; | ||
|
|
||
| if (args.Length == 0) | ||
| { | ||
| return; | ||
| } | ||
|
|
||
| var directoryPath = args[0]; | ||
|
|
||
| if (!Directory.Exists(directoryPath)) | ||
| { | ||
| Console.WriteLine($"Error: directory not found: {directoryPath}"); | ||
| return; | ||
| } | ||
|
|
||
| var sequential = new SequentialChecksumCalc(); | ||
| var parallel = new ParallelChecksumCalc(); | ||
|
|
||
| Console.WriteLine($"Calculating checksum for: {Path.GetFullPath(directoryPath)}"); | ||
| Console.WriteLine(); | ||
|
|
||
| var sw = Stopwatch.StartNew(); | ||
| var hashSeq = await sequential.ComputeChecksumBase64Async(directoryPath); | ||
| sw.Stop(); | ||
| var timeSeq = sw.Elapsed.TotalSeconds; | ||
|
|
||
| sw.Restart(); | ||
| var hashPar = await parallel.ComputeChecksumBase64Async(directoryPath); | ||
| sw.Stop(); | ||
| var timePar = sw.Elapsed.TotalSeconds; | ||
|
|
||
| Console.WriteLine($"Sequential → {hashSeq} ({timeSeq:F3}s)"); | ||
| Console.WriteLine($"Parallel → {hashPar} ({timePar:F3}s)"); | ||
| Console.WriteLine(); | ||
|
|
||
| Console.WriteLine(hashSeq == hashPar ? "Success: Hashes are identical" : "Failure: Hashes differ — something went wrong!"); |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,109 @@ | ||
| // <copyright file="SequentialChecksumCalc.cs" company="khusainovilas"> | ||
| // Copyright (c) khusainovilas. All rights reserved. | ||
| // </copyright> | ||
|
|
||
| namespace ChecksumCalculator; | ||
|
|
||
| using System.Security.Cryptography; | ||
| using System.Text; | ||
|
|
||
| /// <summary> | ||
| /// Calculates a MD5 checksum of a directory in sequential mode. | ||
| /// </summary> | ||
| public class SequentialChecksumCalc | ||
| { | ||
| private readonly MD5 md5 = MD5.Create(); | ||
|
|
||
| /// <summary> | ||
| /// Asynchronously calculates a deterministic checksum of a directory (single-threaded across tasks). | ||
| /// </summary> | ||
| /// /// <param name="directoryPath"> | ||
| /// Full or relative path to the directory for which the hash sum should be calculated. | ||
| /// </param> | ||
| /// <param name="cancellationToken"> | ||
| /// Operation cancellation token. Allows you to abort a lengthy calculation. | ||
| /// </param> | ||
| /// <returns>A <see cref="Task"/> representing the asynchronous operation.</returns> | ||
| public async Task<byte[]> ComputeChecksumAsync(string directoryPath, CancellationToken cancellationToken = default) | ||
| { | ||
| ArgumentException.ThrowIfNullOrWhiteSpace(directoryPath); | ||
|
|
||
| var fullPath = Path.GetFullPath(directoryPath); | ||
|
|
||
| if (!Directory.Exists(fullPath)) | ||
| { | ||
| throw new DirectoryNotFoundException($"Directory not found: {fullPath}"); | ||
| } | ||
|
|
||
| return await this.ComputeDirectoryHashAsync(fullPath, cancellationToken); | ||
| } | ||
|
|
||
| /// <summary> | ||
| /// Returns the hash as a lowercase hex string. | ||
| /// </summary> | ||
| /// <param name="directoryPath"> | ||
| /// Full or relative path to the directory. | ||
| /// </param> | ||
| /// <param name="cancellationToken"> | ||
| /// Operation cancellation token. | ||
| /// </param> | ||
| /// <returns>A <see cref="Task"/> representing the asynchronous operation.</returns> | ||
| public async Task<string> ComputeChecksumBase64Async(string directoryPath, CancellationToken cancellationToken = default) | ||
| { | ||
| var hash = await this.ComputeChecksumAsync(directoryPath, cancellationToken).ConfigureAwait(false); | ||
|
|
||
| return Convert.ToBase64String(hash); | ||
| } | ||
|
|
||
| private async Task<byte[]> ComputeDirectoryHashAsync(string directoryPath, CancellationToken cancellationToken) | ||
| { | ||
| var name = Path.GetFileName(directoryPath); | ||
| if (string.IsNullOrEmpty(name)) | ||
| { | ||
| name = directoryPath; | ||
| } | ||
|
|
||
| var nameBytes = Encoding.UTF8.GetBytes(name); | ||
|
|
||
| var entries = Directory.GetFileSystemEntries(directoryPath).OrderBy(Path.GetFileName, StringComparer.Ordinal).ToArray(); | ||
|
|
||
| var childHashes = new List<byte[]>(entries.Length); | ||
|
|
||
| foreach (var entry in entries) | ||
| { | ||
| cancellationToken.ThrowIfCancellationRequested(); | ||
|
|
||
| var childHash = Directory.Exists(entry) ? await this.ComputeDirectoryHashAsync(entry, cancellationToken) : await this.ComputeFileHashAsync(entry, cancellationToken); | ||
|
|
||
| childHashes.Add(childHash); | ||
| } | ||
|
|
||
| var totalLength = nameBytes.Length + childHashes.Sum(h => h.Length); | ||
| var buffer = new byte[totalLength]; | ||
|
|
||
| Buffer.BlockCopy(nameBytes, 0, buffer, 0, nameBytes.Length); | ||
|
|
||
| var offset = nameBytes.Length; | ||
| foreach (var hash in childHashes) | ||
| { | ||
| Buffer.BlockCopy(hash, 0, buffer, offset, hash.Length); | ||
| offset += hash.Length; | ||
| } | ||
|
|
||
| return this.md5.ComputeHash(buffer); | ||
| } | ||
|
|
||
| private async Task<byte[]> ComputeFileHashAsync(string filePath, CancellationToken cancellationToken) | ||
| { | ||
| var nameBytes = Encoding.UTF8.GetBytes(Path.GetFileName(filePath)); | ||
|
|
||
| await using var stream = File.OpenRead(filePath); | ||
| using var memoryStream = new MemoryStream(nameBytes.Length + (int)stream.Length); | ||
|
|
||
| memoryStream.Write(nameBytes); | ||
| await stream.CopyToAsync(memoryStream, cancellationToken); | ||
| memoryStream.Position = 0; | ||
|
|
||
| return await this.md5.ComputeHashAsync(memoryStream, cancellationToken); | ||
| } | ||
| } | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Тут довольно много чего можно унифицировать с параллельной версией (в т.ч. по смыслу — хеш от одного файла всё равно считается последовательно всегда), так что можно было применить немного архитектурной магии (паттерны "Стратегия", "Шаблонный метод" и т.п.) и сократить размер кода в полтора раза. Архитектурную магию мы ещё не проходили, так что это не ошибка, но можно было хоть попытаться :) |
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,9 @@ | ||
| { | ||
| "$schema": "https://raw.githubusercontent.com/DotNetAnalyzers/StyleCopAnalyzers/master/StyleCop.Analyzers/StyleCop.Analyzers/Settings/stylecop.schema.json", | ||
| "settings": { | ||
| "documentationRules": { | ||
| "companyName": "khusainovilas", | ||
| "copyrightText": "Copyright (c) {companyName}. All rights reserved." | ||
| } | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Я бы такими низкоуровневыми вещами не занимался, а оставил бы это штатному пулу потоков. Это не ошибка, но в целом пустая трата усилий.