@@ -2,6 +2,7 @@ namespace Microsoft.ComponentDetection.Detectors.Pip;
2
2
3
3
using System ;
4
4
using System . Collections . Generic ;
5
+ using System . Collections . Immutable ;
5
6
using System . Diagnostics ;
6
7
using System . IO ;
7
8
using System . Linq ;
@@ -158,6 +159,10 @@ protected override async Task OnFileFoundAsync(ProcessRequest processRequest, ID
158
159
}
159
160
160
161
var stopwatch = Stopwatch . StartNew ( ) ;
162
+ using var pipReportTypeRecord = new PipReportTypeTelemetryRecord
163
+ {
164
+ FilePath = file . Location ,
165
+ } ;
161
166
162
167
// Search for a pre-generated pip report file in the same directory as the file being scanned.
163
168
var fileParentDirectory = Path . GetDirectoryName ( file . Location ) ;
@@ -190,12 +195,27 @@ protected override async Task OnFileFoundAsync(ProcessRequest processRequest, ID
190
195
this . Logger . LogInformation ( "PipReport: Using pre-generated pip report '{ReportFile}' for package file '{File}'." , existingReport . FullName , file . Location ) ;
191
196
var reportOutput = await this . fileUtilityService . ReadAllTextAsync ( existingReport ) ;
192
197
var report = JsonConvert . DeserializeObject < PipInstallationReport > ( reportOutput ) ;
193
- reports . Add ( report ) ;
198
+
199
+ if ( await this . IsValidPreGeneratedReportAsync ( report , pythonExePath , file . Location ) )
200
+ {
201
+ reports . Add ( report ) ;
202
+ }
203
+ else
204
+ {
205
+ this . Logger . LogInformation (
206
+ "PipReport: Pre-generated pip report '{ReportFile}' is invalid. Did not contain all requested components in package file '{File}'." ,
207
+ existingReport . FullName ,
208
+ file . Location ) ;
209
+ }
194
210
}
195
211
}
196
- else
212
+
213
+ var foundPreGeneratedReport = reports . Any ( ) ;
214
+ pipReportTypeRecord . PreGenerated = foundPreGeneratedReport ;
215
+ if ( ! foundPreGeneratedReport )
197
216
{
198
217
this . Logger . LogInformation ( "PipReport: Generating pip installation report for {File}" , file . Location ) ;
218
+ pipReportTypeRecord . PreGenerated = false ;
199
219
200
220
// create linked cancellation token that will cancel if the file level timeout is reached, or if the parent token is cancelled.
201
221
// default to only using parent token if the env var is not set or is invalid
@@ -240,12 +260,6 @@ protected override async Task OnFileFoundAsync(ProcessRequest processRequest, ID
240
260
return ;
241
261
}
242
262
243
- this . Logger . LogInformation (
244
- "PipReport: Pip installation report for {File} completed in {TotalSeconds} seconds with {PkgCount} detected packages." ,
245
- file . Location ,
246
- stopwatch . ElapsedMilliseconds / 1000.0 ,
247
- report . InstallItems ? . Length ?? 0 ) ;
248
-
249
263
// Now that all installed packages are known, we can build a graph of the dependencies.
250
264
if ( report . InstallItems is not null )
251
265
{
@@ -254,6 +268,14 @@ protected override async Task OnFileFoundAsync(ProcessRequest processRequest, ID
254
268
}
255
269
}
256
270
271
+ var packageCount = singleFileComponentRecorder . GetDetectedComponents ( ) ? . Keys ? . ToImmutableHashSet ( ) . Count ?? 0 ;
272
+ pipReportTypeRecord . PackageCount = packageCount ;
273
+ this . Logger . LogInformation (
274
+ "PipReport: Pip installation report for {File} completed in {TotalSeconds} seconds with {PkgCount} detected packages." ,
275
+ file . Location ,
276
+ stopwatch . ElapsedMilliseconds / 1000.0 ,
277
+ packageCount ) ;
278
+
257
279
stopwatch . Stop ( ) ;
258
280
}
259
281
catch ( Exception e )
@@ -421,12 +443,9 @@ private async Task RegisterExplicitComponentsInFileAsync(
421
443
return ;
422
444
}
423
445
424
- var listedPackage = initialPackages . Where ( tuple => tuple . PackageString != null )
425
- . Select ( tuple => tuple . PackageString )
426
- . Where ( x => ! string . IsNullOrWhiteSpace ( x ) )
427
- . Select ( x => new PipDependencySpecification ( x ) )
428
- . Where ( x => ! x . PackageIsUnsafe ( ) )
429
- . Where ( x => x . PackageConditionsMet ( this . pythonResolver . GetPythonEnvironmentVariables ( ) ) )
446
+ var listedPackage = SharedPipUtilities . ParsedPackagesToPipDependencies (
447
+ initialPackages ,
448
+ this . pythonResolver . GetPythonEnvironmentVariables ( ) )
430
449
. ToList ( ) ;
431
450
432
451
listedPackage . Select ( x => ( x . Name , Version : x . GetHighestExplicitPackageVersion ( ) ) )
@@ -442,6 +461,35 @@ private async Task RegisterExplicitComponentsInFileAsync(
442
461
. ForEach ( gitComponent => recorder . RegisterUsage ( gitComponent , isExplicitReferencedDependency : true ) ) ;
443
462
}
444
463
464
+ /// <summary>
465
+ /// Confirms that the detected report at least contains all of the packages directly requested
466
+ /// in the pip file. This prevents invalid reports from being used to create the dependency graph.
467
+ /// </summary>
468
+ private async Task < bool > IsValidPreGeneratedReportAsync ( PipInstallationReport report , string pythonExePath , string filePath )
469
+ {
470
+ try
471
+ {
472
+ var initialPackages = await this . pythonCommandService . ParseFileAsync ( filePath , pythonExePath ) ;
473
+ var listedPackage = SharedPipUtilities . ParsedPackagesToPipDependencies (
474
+ initialPackages ,
475
+ this . pythonResolver . GetPythonEnvironmentVariables ( ) )
476
+ . Select ( x => x . Name )
477
+ . ToImmutableSortedSet ( ) ;
478
+
479
+ var reportRequestedPackages = report . InstallItems
480
+ . Where ( package => package . Requested )
481
+ . Select ( package => package . Metadata . Name )
482
+ . ToImmutableSortedSet ( ) ;
483
+
484
+ return listedPackage . IsSubsetOf ( reportRequestedPackages ) ;
485
+ }
486
+ catch ( Exception e )
487
+ {
488
+ this . Logger . LogWarning ( e , "PipReport: Failed to validate pre-generated report for {File}" , filePath ) ;
489
+ return false ;
490
+ }
491
+ }
492
+
445
493
private PipReportOverrideBehavior GetPipReportOverrideBehavior ( )
446
494
{
447
495
if ( ! this . envVarService . DoesEnvironmentVariableExist ( PipReportOverrideBehaviorEnvVar ) )
0 commit comments