Skip to content

Commit f4d84a8

Browse files
authored
Pauldorsch/bugfix invalid pipreport files (#1205)
* ignore pregenerated pipreports that don't cover the correct set of dependencies * add validation to the pre-generated pipreport to prevent underdetection for overridden reports * dispose of telemetry object * move re-used code to a common utility method
1 parent 13744ee commit f4d84a8

File tree

12 files changed

+8700
-131
lines changed

12 files changed

+8700
-131
lines changed
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
namespace Microsoft.ComponentDetection.Common.Telemetry.Records;
2+
3+
public class PipReportTypeTelemetryRecord : BaseDetectionTelemetryRecord
4+
{
5+
public override string RecordName => "PipReportType";
6+
7+
public bool PreGenerated { get; set; }
8+
9+
public string FilePath { get; set; }
10+
11+
public int PackageCount { get; set; }
12+
}

src/Microsoft.ComponentDetection.Detectors/pip/PipComponentDetector.cs

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,12 +70,9 @@ protected override async Task OnFileFoundAsync(ProcessRequest processRequest, ID
7070
try
7171
{
7272
var initialPackages = await this.pythonCommandService.ParseFileAsync(file.Location, pythonExePath);
73-
var listedPackage = initialPackages.Where(tuple => tuple.PackageString != null)
74-
.Select(tuple => tuple.PackageString)
75-
.Where(x => !string.IsNullOrWhiteSpace(x))
76-
.Select(x => new PipDependencySpecification(x))
77-
.Where(x => !x.PackageIsUnsafe())
78-
.Where(x => x.PackageConditionsMet(this.pythonResolver.GetPythonEnvironmentVariables()))
73+
var listedPackage = SharedPipUtilities.ParsedPackagesToPipDependencies(
74+
initialPackages,
75+
this.pythonResolver.GetPythonEnvironmentVariables())
7976
.ToList();
8077

8178
var roots = await this.pythonResolver.ResolveRootsAsync(singleFileComponentRecorder, listedPackage);

src/Microsoft.ComponentDetection.Detectors/pip/PipReportComponentDetector.cs

Lines changed: 62 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ namespace Microsoft.ComponentDetection.Detectors.Pip;
22

33
using System;
44
using System.Collections.Generic;
5+
using System.Collections.Immutable;
56
using System.Diagnostics;
67
using System.IO;
78
using System.Linq;
@@ -158,6 +159,10 @@ protected override async Task OnFileFoundAsync(ProcessRequest processRequest, ID
158159
}
159160

160161
var stopwatch = Stopwatch.StartNew();
162+
using var pipReportTypeRecord = new PipReportTypeTelemetryRecord
163+
{
164+
FilePath = file.Location,
165+
};
161166

162167
// Search for a pre-generated pip report file in the same directory as the file being scanned.
163168
var fileParentDirectory = Path.GetDirectoryName(file.Location);
@@ -190,12 +195,27 @@ protected override async Task OnFileFoundAsync(ProcessRequest processRequest, ID
190195
this.Logger.LogInformation("PipReport: Using pre-generated pip report '{ReportFile}' for package file '{File}'.", existingReport.FullName, file.Location);
191196
var reportOutput = await this.fileUtilityService.ReadAllTextAsync(existingReport);
192197
var report = JsonConvert.DeserializeObject<PipInstallationReport>(reportOutput);
193-
reports.Add(report);
198+
199+
if (await this.IsValidPreGeneratedReportAsync(report, pythonExePath, file.Location))
200+
{
201+
reports.Add(report);
202+
}
203+
else
204+
{
205+
this.Logger.LogInformation(
206+
"PipReport: Pre-generated pip report '{ReportFile}' is invalid. Did not contain all requested components in package file '{File}'.",
207+
existingReport.FullName,
208+
file.Location);
209+
}
194210
}
195211
}
196-
else
212+
213+
var foundPreGeneratedReport = reports.Any();
214+
pipReportTypeRecord.PreGenerated = foundPreGeneratedReport;
215+
if (!foundPreGeneratedReport)
197216
{
198217
this.Logger.LogInformation("PipReport: Generating pip installation report for {File}", file.Location);
218+
pipReportTypeRecord.PreGenerated = false;
199219

200220
// create linked cancellation token that will cancel if the file level timeout is reached, or if the parent token is cancelled.
201221
// default to only using parent token if the env var is not set or is invalid
@@ -240,12 +260,6 @@ protected override async Task OnFileFoundAsync(ProcessRequest processRequest, ID
240260
return;
241261
}
242262

243-
this.Logger.LogInformation(
244-
"PipReport: Pip installation report for {File} completed in {TotalSeconds} seconds with {PkgCount} detected packages.",
245-
file.Location,
246-
stopwatch.ElapsedMilliseconds / 1000.0,
247-
report.InstallItems?.Length ?? 0);
248-
249263
// Now that all installed packages are known, we can build a graph of the dependencies.
250264
if (report.InstallItems is not null)
251265
{
@@ -254,6 +268,14 @@ protected override async Task OnFileFoundAsync(ProcessRequest processRequest, ID
254268
}
255269
}
256270

271+
var packageCount = singleFileComponentRecorder.GetDetectedComponents()?.Keys?.ToImmutableHashSet().Count ?? 0;
272+
pipReportTypeRecord.PackageCount = packageCount;
273+
this.Logger.LogInformation(
274+
"PipReport: Pip installation report for {File} completed in {TotalSeconds} seconds with {PkgCount} detected packages.",
275+
file.Location,
276+
stopwatch.ElapsedMilliseconds / 1000.0,
277+
packageCount);
278+
257279
stopwatch.Stop();
258280
}
259281
catch (Exception e)
@@ -421,12 +443,9 @@ private async Task RegisterExplicitComponentsInFileAsync(
421443
return;
422444
}
423445

424-
var listedPackage = initialPackages.Where(tuple => tuple.PackageString != null)
425-
.Select(tuple => tuple.PackageString)
426-
.Where(x => !string.IsNullOrWhiteSpace(x))
427-
.Select(x => new PipDependencySpecification(x))
428-
.Where(x => !x.PackageIsUnsafe())
429-
.Where(x => x.PackageConditionsMet(this.pythonResolver.GetPythonEnvironmentVariables()))
446+
var listedPackage = SharedPipUtilities.ParsedPackagesToPipDependencies(
447+
initialPackages,
448+
this.pythonResolver.GetPythonEnvironmentVariables())
430449
.ToList();
431450

432451
listedPackage.Select(x => (x.Name, Version: x.GetHighestExplicitPackageVersion()))
@@ -442,6 +461,35 @@ private async Task RegisterExplicitComponentsInFileAsync(
442461
.ForEach(gitComponent => recorder.RegisterUsage(gitComponent, isExplicitReferencedDependency: true));
443462
}
444463

464+
/// <summary>
465+
/// Confirms that the detected report at least contains all of the packages directly requested
466+
/// in the pip file. This prevents invalid reports from being used to create the dependency graph.
467+
/// </summary>
468+
private async Task<bool> IsValidPreGeneratedReportAsync(PipInstallationReport report, string pythonExePath, string filePath)
469+
{
470+
try
471+
{
472+
var initialPackages = await this.pythonCommandService.ParseFileAsync(filePath, pythonExePath);
473+
var listedPackage = SharedPipUtilities.ParsedPackagesToPipDependencies(
474+
initialPackages,
475+
this.pythonResolver.GetPythonEnvironmentVariables())
476+
.Select(x => x.Name)
477+
.ToImmutableSortedSet();
478+
479+
var reportRequestedPackages = report.InstallItems
480+
.Where(package => package.Requested)
481+
.Select(package => package.Metadata.Name)
482+
.ToImmutableSortedSet();
483+
484+
return listedPackage.IsSubsetOf(reportRequestedPackages);
485+
}
486+
catch (Exception e)
487+
{
488+
this.Logger.LogWarning(e, "PipReport: Failed to validate pre-generated report for {File}", filePath);
489+
return false;
490+
}
491+
}
492+
445493
private PipReportOverrideBehavior GetPipReportOverrideBehavior()
446494
{
447495
if (!this.envVarService.DoesEnvironmentVariableExist(PipReportOverrideBehaviorEnvVar))
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
namespace Microsoft.ComponentDetection.Detectors.Pip;
2+
3+
using System.Collections.Generic;
4+
using System.Linq;
5+
using Microsoft.ComponentDetection.Contracts.TypedComponent;
6+
7+
public static class SharedPipUtilities
8+
{
9+
/// <summary>
10+
/// Converts a list of parsed packages to a list of PipDependencySpecifications. Also performs some validations,
11+
/// such as filtering out unsafe packages and checking if the package conditions are met.
12+
/// </summary>
13+
/// <param name="parsedPackages">List of packages and git components.</param>
14+
/// <param name="pythonEnvironmentVariables">Python environment specifiers.</param>
15+
/// <returns>Enumerable containing the converted, sanitized Pip dependency specs.</returns>
16+
public static IEnumerable<PipDependencySpecification> ParsedPackagesToPipDependencies(
17+
IList<(string PackageString, GitComponent Component)> parsedPackages,
18+
Dictionary<string, string> pythonEnvironmentVariables) =>
19+
parsedPackages.Where(tuple => tuple.PackageString != null)
20+
.Select(tuple => tuple.PackageString)
21+
.Where(x => !string.IsNullOrWhiteSpace(x))
22+
.Select(x => new PipDependencySpecification(x))
23+
.Where(x => !x.PackageIsUnsafe())
24+
.Where(x => x.PackageConditionsMet(pythonEnvironmentVariables));
25+
}

test/Microsoft.ComponentDetection.Detectors.Tests/Microsoft.ComponentDetection.Detectors.Tests.csproj

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@
4646
<None Update="Mocks\pip_report_single_pkg_bad_version.json">
4747
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
4848
</None>
49+
<None Update="Mocks\Invalid\invalid.component-detection-pip-report.json">
50+
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
51+
</None>
4952
<None Update="Mocks\test.component-detection-pip-report.json">
5053
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
5154
</None>

test/Microsoft.ComponentDetection.Detectors.Tests/Mocks/Invalid/invalid.component-detection-pip-report.json

Lines changed: 251 additions & 0 deletions
Large diffs are not rendered by default.

test/Microsoft.ComponentDetection.Detectors.Tests/PipReportComponentDetectorTests.cs

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -535,6 +535,10 @@ public async Task TestPipReportDetector_OverrideSkipAsync()
535535
[TestMethod]
536536
public async Task TestPipReportDetector_SimplePregeneratedFile_Async()
537537
{
538+
this.pythonCommandService
539+
.Setup(x => x.ParseFileAsync(It.IsAny<string>(), It.IsAny<string>()))
540+
.ReturnsAsync(new List<(string PackageString, GitComponent Component)> { ("requests", null) });
541+
538542
var file1 = Path.Join(Directory.GetCurrentDirectory(), "Mocks", "requirements.txt");
539543
var pregeneratedFile = Path.Join(Directory.GetCurrentDirectory(), "Mocks", "test.component-detection-pip-report.json");
540544

@@ -561,6 +565,59 @@ public async Task TestPipReportDetector_SimplePregeneratedFile_Async()
561565
idnaComponent.Version.Should().Be("3.7");
562566
}
563567

568+
[TestMethod]
569+
public async Task TestPipReportDetector_InvalidPregeneratedFile_Async()
570+
{
571+
this.pipCommandService.Setup(x => x.GenerateInstallationReportAsync(It.IsAny<string>(), It.IsAny<string>(), It.IsAny<CancellationToken>()))
572+
.ReturnsAsync((this.simpleExtrasReport, null));
573+
574+
this.pythonCommandService
575+
.Setup(x => x.ParseFileAsync(It.IsAny<string>(), It.IsAny<string>()))
576+
.ReturnsAsync(new List<(string PackageString, GitComponent Component)> { ("requests", null) });
577+
578+
var file1 = Path.Join(Directory.GetCurrentDirectory(), "Mocks", "Invalid", "requirements.txt");
579+
580+
// this pre-generated file does not contains the 'requests' package, and so the report
581+
// validator should fail and the detector should continue as if no pre-generated file was found
582+
var pregeneratedFile = Path.Join(Directory.GetCurrentDirectory(), "Mocks", "Invalid", "invalid.component-detection-pip-report.json");
583+
584+
var (result, componentRecorder) = await this.DetectorTestUtility
585+
.WithFile("requirements.txt", string.Empty, fileLocation: file1)
586+
.ExecuteDetectorAsync();
587+
588+
// found invalid pre-generated file
589+
this.mockLogger.Verify(x => x.Log(
590+
LogLevel.Information,
591+
It.IsAny<EventId>(),
592+
It.Is<It.IsAnyType>((o, t) => o.ToString().Contains("is invalid")),
593+
It.IsAny<Exception>(),
594+
(Func<It.IsAnyType, Exception, string>)It.IsAny<object>()));
595+
596+
// fell back to generating the report itself
597+
this.mockLogger.Verify(x => x.Log(
598+
LogLevel.Information,
599+
It.IsAny<EventId>(),
600+
It.Is<It.IsAnyType>((o, t) => o.ToString().StartsWith("PipReport: Generating pip installation report")),
601+
It.IsAny<Exception>(),
602+
(Func<It.IsAnyType, Exception, string>)It.IsAny<object>()));
603+
604+
this.pipCommandService.Verify(
605+
x => x.GenerateInstallationReportAsync(It.IsAny<string>(), It.IsAny<string>(), It.IsAny<CancellationToken>()),
606+
Times.Once);
607+
608+
// verify results
609+
result.ResultCode.Should().Be(ProcessingResultCode.Success);
610+
611+
var detectedComponents = componentRecorder.GetDetectedComponents();
612+
var pipComponents = detectedComponents.Where(detectedComponent => detectedComponent.Component.Id.Contains("pip")).ToList();
613+
614+
var requestsComponent = pipComponents.Single(x => ((PipComponent)x.Component).Name.Equals("requests")).Component as PipComponent;
615+
requestsComponent.Version.Should().Be("2.32.3");
616+
617+
var idnaComponent = pipComponents.Single(x => ((PipComponent)x.Component).Name.Equals("idna")).Component as PipComponent;
618+
idnaComponent.Version.Should().Be("3.7");
619+
}
620+
564621
private List<(string PackageString, GitComponent Component)> ToGitTuple(IList<string> components)
565622
{
566623
return components.Select<string, (string, GitComponent)>(dep => (dep, null)).ToList();

0 commit comments

Comments
 (0)