diff --git a/private/bufpkg/bufmodule/added_module.go b/private/bufpkg/bufmodule/added_module.go index bcb6a21ff7..3e6d0e3d26 100644 --- a/private/bufpkg/bufmodule/added_module.go +++ b/private/bufpkg/bufmodule/added_module.go @@ -182,16 +182,51 @@ func (a *addedModule) ToModule( // e.proto // f.proto // g.proto + // module-d:c1 + // h.proto // // Then, you have this dependency graph: // // module-a -> module-b:c1, module-c:c2 // module-b:c1 -> module-c:c1 + // module-b:c2 -> module-c:c2 + // module-c:c1 -> module-d:c1 // - // Note that module-b depends on an earlier commit of module-c than module-a does. + // Note that module-a depends on module-b:c1, which in turn depends on module-c:c1, but module-a also depends on + // module-c:c2. When resolving the dependency graph, module-c:c1 is replaced with the newer module-c:c2. The commit + // module-c:c2 doesn't depend on module-d, so module-d:c1 is dropped. Graphically, this graph: // - // If we were to just use the dependencies in the ModuleSet to compute the digest, the following - // would happen as a calculation: + // +-------------+ +-------------+ +-------------+ +-------------+ + // | | | | | | | | + // | module-a:c1 |--->| module-b:c1 |--->| module-c:c1 |--->| module-d:c1 | + // | | | | | | | | + // +-------------+ +-------------+ +-------------+ +-------------+ + // \ + // \ + // \ + // \ +-------------+ + // \ | | + // ------>| module-c:c2 | + // | | + // +-------------+ + // + // is pruned to this graph: + // + // +-------------+ +-------------+ + // | | | | + // | module-a:c1 |--->| module-b:c1 | + // | | | | + // +-------------+ +-------------+ + // \ | + // \ | + // \ v + // \ +-------------+ + // \ | | + // ------>| module-c:c2 | + // | | + // +-------------+ + // + // Calculating the b5 digest for module-a would result in the calculation: // // DIGEST(module-a) = digest( // // module-a contents @@ -220,11 +255,12 @@ func (a *addedModule) ToModule( // ), // ) // - // Note that to compute the digest of module-b:c1, we would actually use the digest of - // module-c:c2, as opposed to module-c:c1, since within the ModuleSet, we would resolve - // to use module-c:c2 instead of module-c:c1. + // Note that to compute the digest of module-b:c1 within the context of module-a, we would actually + // - use the digest of module-c:c2, as opposed to module-c:c1, since within the ModuleSet, we would resolve + // to use module-c:c2 instead of module-c:c1; + // - not even consider the digest of module-d:c1, since within the ModuleSet, module-d doesn't exist. // - // We should be using module-c:c1 to compute the digest of module-b:c1: + // We should be using module-c:c1 and module-d:c1 to compute the digest of module-b:c1, always: // // DIGEST(module-a) = digest( // // module-a contents @@ -241,6 +277,11 @@ func (a *addedModule) ToModule( // README.md, // e.proto, // f.proto, + // // module-d:c1 digest + // DIGEST( + // README.md, + // h.proto, + // ), // ), // ), // // module-c:c2 digest @@ -252,61 +293,44 @@ func (a *addedModule) ToModule( // ), // ) // - // To accomplish this, we need to take the dependencies of the declared ModuleKeys (ie what - // the Module actually says is in its buf.lock). This function enables us to do that for - // digest calculations. Within the Module, we say that if we get a remote Module, use the - // declared ModuleKeys instead of whatever Module we have resolved to for a given ModuleFullName. - getDeclaredDepModuleKeysB5 := func() ([]ModuleKey, error) { - moduleData, err := getModuleData() - if err != nil { - return nil, err - } - declaredDepModuleKeys, err := moduleData.DeclaredDepModuleKeys() + // However, module-d:c1 is not even present in the ModuleSet because it is not a dependency of module-c:c2. + // Effectively, when calculating the digest of a remote module, we need to be certain of two things: + // 1. That all dependencies of the remote module are in the ModuleSet, at exactly the commit that the remote module + // requires. + // 2. That (1) also holds transitively for dependencies of the remote module. + // + // These properties are impossible to guarantee all the time. For that reason, we cannot predictably calculate the B5 + // digest of a remote module within a different ModuleSet from the one it was originally calculated. Instead, we must + // defer to its pre-calculated digest (i.e., the one in the ModuleKey). + // + // Note that this _does not_ side-step tamper-proofing. Tamper-proofing occurs at the ModuleData level. + getRemoteB5Digest := func() (Digest, error) { + remoteDigest, err := a.remoteModuleKey.Digest() if err != nil { return nil, err } - if len(declaredDepModuleKeys) == 0 { - return nil, nil - } - var digestType DigestType - for i, moduleKey := range declaredDepModuleKeys { - digest, err := moduleKey.Digest() - if err != nil { - return nil, err - } - if i == 0 { - digestType = digest.Type() - } else if digestType != digest.Type() { - return nil, syserror.Newf("multiple digest types found in DeclaredDepModuleKeys: %v, %v", digestType, digest.Type()) - } - } + digestType := remoteDigest.Type() switch digestType { case DigestTypeB4: - // The declared ModuleKey dependencies for a commit may be stored in v1 buf.lock file, - // in which case they will use B4 digests. B4 digests aren't allowed to be used as - // input to the B5 digest calculation, so we perform a call to convert all ModuleKeys - // from B4 to B5 by using the commit provider. - commitKeysToFetch := make([]CommitKey, len(declaredDepModuleKeys)) - for i, declaredDepModuleKey := range declaredDepModuleKeys { - commitKey, err := NewCommitKey(declaredDepModuleKey.ModuleFullName().Registry(), declaredDepModuleKey.CommitID(), DigestTypeB5) - if err != nil { - return nil, err - } - commitKeysToFetch[i] = commitKey + commitKey, err := NewCommitKey( + a.remoteModuleKey.ModuleFullName().Registry(), + a.remoteModuleKey.CommitID(), + DigestTypeB5, + ) + if err != nil { + return nil, err } - commits, err := commitProvider.GetCommitsForCommitKeys(ctx, commitKeysToFetch) + commits, err := commitProvider.GetCommitsForCommitKeys(ctx, []CommitKey{commitKey}) if err != nil { return nil, err } - if len(commits) != len(commitKeysToFetch) { - return nil, syserror.Newf("expected %d commit(s), got %d", commitKeysToFetch, len(commits)) + if len(commits) != 1 { + return nil, syserror.Newf("expected %d commit(s), got %d", 1, len(commits)) } - return slicesext.Map(commits, func(commit Commit) ModuleKey { - return commit.ModuleKey() - }), nil + return commits[0].ModuleKey().Digest() case DigestTypeB5: - // No need to fetch b5 digests - we've already got them stored in the module's declared dependencies. - return declaredDepModuleKeys, nil + // We already have a remote B5 digest, simply return it. + return remoteDigest, nil default: return nil, syserror.Newf("unsupported digest type: %v", digestType) } @@ -321,7 +345,7 @@ func (a *addedModule) ToModule( false, getV1BufYAMLObjectData, getV1BufLockObjectData, - getDeclaredDepModuleKeysB5, + getRemoteB5Digest, a.remoteTargetPaths, a.remoteTargetExcludePaths, "", diff --git a/private/bufpkg/bufmodule/module.go b/private/bufpkg/bufmodule/module.go index 18eb3d3940..a46c360c7b 100644 --- a/private/bufpkg/bufmodule/module.go +++ b/private/bufpkg/bufmodule/module.go @@ -241,16 +241,16 @@ func ModuleDirectModuleDeps(module Module) ([]ModuleDep, error) { type module struct { ModuleReadBucket - ctx context.Context - getBucket func() (storage.ReadBucket, error) - bucketID string - moduleFullName ModuleFullName - commitID uuid.UUID - isTarget bool - isLocal bool - getV1BufYAMLObjectData func() (ObjectData, error) - getV1BufLockObjectData func() (ObjectData, error) - getDeclaredDepModuleKeysB5 func() ([]ModuleKey, error) + ctx context.Context + getBucket func() (storage.ReadBucket, error) + bucketID string + moduleFullName ModuleFullName + commitID uuid.UUID + isTarget bool + isLocal bool + getV1BufYAMLObjectData func() (ObjectData, error) + getV1BufLockObjectData func() (ObjectData, error) + getRemoteB5Digest func() (Digest, error) moduleSet ModuleSet @@ -270,7 +270,7 @@ func newModule( isLocal bool, getV1BufYAMLObjectData func() (ObjectData, error), getV1BufLockObjectData func() (ObjectData, error), - getDeclaredDepModuleKeysB5 func() ([]ModuleKey, error), + getRemoteB5Digest func() (Digest, error), targetPaths []string, targetExcludePaths []string, protoFileTargetPath string, @@ -313,16 +313,16 @@ func newModule( } module := &module{ - ctx: ctx, - getBucket: syncOnceValuesGetBucketWithStorageMatcherApplied, - bucketID: bucketID, - moduleFullName: moduleFullName, - commitID: commitID, - isTarget: isTarget, - isLocal: isLocal, - getV1BufYAMLObjectData: syncext.OnceValues(getV1BufYAMLObjectData), - getV1BufLockObjectData: syncext.OnceValues(getV1BufLockObjectData), - getDeclaredDepModuleKeysB5: syncext.OnceValues(getDeclaredDepModuleKeysB5), + ctx: ctx, + getBucket: syncOnceValuesGetBucketWithStorageMatcherApplied, + bucketID: bucketID, + moduleFullName: moduleFullName, + commitID: commitID, + isTarget: isTarget, + isLocal: isLocal, + getV1BufYAMLObjectData: syncext.OnceValues(getV1BufYAMLObjectData), + getV1BufLockObjectData: syncext.OnceValues(getV1BufLockObjectData), + getRemoteB5Digest: syncext.OnceValues(getRemoteB5Digest), } moduleReadBucket, err := newModuleReadBucketForModule( ctx, @@ -399,16 +399,15 @@ func (m *module) ModuleSet() ModuleSet { func (m *module) withIsTarget(isTarget bool) (Module, error) { // We don't just call newModule directly as we don't want to double syncext.OnceValues stuff. newModule := &module{ - ctx: m.ctx, - getBucket: m.getBucket, - bucketID: m.bucketID, - moduleFullName: m.moduleFullName, - commitID: m.commitID, - isTarget: isTarget, - isLocal: m.isLocal, - getV1BufYAMLObjectData: m.getV1BufYAMLObjectData, - getV1BufLockObjectData: m.getV1BufLockObjectData, - getDeclaredDepModuleKeysB5: m.getDeclaredDepModuleKeysB5, + ctx: m.ctx, + getBucket: m.getBucket, + bucketID: m.bucketID, + moduleFullName: m.moduleFullName, + commitID: m.commitID, + isTarget: isTarget, + isLocal: m.isLocal, + getV1BufYAMLObjectData: m.getV1BufYAMLObjectData, + getV1BufLockObjectData: m.getV1BufLockObjectData, } moduleReadBucket, ok := m.ModuleReadBucket.(*moduleReadBucket) if !ok { @@ -452,34 +451,22 @@ func newGetDigestFuncForModuleAndDigestType(module *module, digestType DigestTyp } return getB4Digest(module.ctx, bucket, v1BufYAMLObjectData, v1BufLockObjectData) case DigestTypeB5: + if !module.isLocal { + // For remote modules to have consistent B5 digests, they must not change their + // dependencies -- the considered dependencies must be exactly the dependencies the remote module was originally + // built with. This means that we cannot consider the modules within the current ModuleSet, as those may not + // match. Instead, there are two ways to do this deterministically: + // 1. Construct a new ModuleSet with just this module and its declared deps. + // 2. Fetch the module's digest remotely from the BSR. + // + // Given that we are not tamper proofing here (tamper-proofing is done at the ModuleData level), we opt to fetch + // the digest remotely from the BSR. + return module.getRemoteB5Digest() + } moduleDeps, err := module.ModuleDeps() if err != nil { return nil, err } - // For remote modules to have consistent B5 digests, they must not change the digests of their - // dependencies based on the local workspace. Use the pruned b5 module keys from - // ModuleData.DeclaredDepModuleKeys to calculate the digest. - if !module.isLocal { - declaredDepModuleKeys, err := module.getDeclaredDepModuleKeysB5() - if err != nil { - return nil, err - } - moduleDepFullNames := make(map[string]struct{}, len(moduleDeps)) - for _, dep := range moduleDeps { - fullName := dep.ModuleFullName() - if fullName == nil { - return nil, syserror.Newf("remote module dependencies should have full names") - } - moduleDepFullNames[fullName.String()] = struct{}{} - } - prunedDepModuleKeys := make([]ModuleKey, 0, len(declaredDepModuleKeys)) - for _, dep := range declaredDepModuleKeys { - if _, ok := moduleDepFullNames[dep.ModuleFullName().String()]; ok { - prunedDepModuleKeys = append(prunedDepModuleKeys, dep) - } - } - return getB5DigestForBucketAndDepModuleKeys(module.ctx, bucket, prunedDepModuleKeys) - } return getB5DigestForBucketAndModuleDeps(module.ctx, bucket, moduleDeps) default: return nil, syserror.Newf("unknown DigestType: %v", digestType) diff --git a/private/bufpkg/bufmodule/module_set_builder.go b/private/bufpkg/bufmodule/module_set_builder.go index fe39aa3755..1cbb9df86e 100644 --- a/private/bufpkg/bufmodule/module_set_builder.go +++ b/private/bufpkg/bufmodule/module_set_builder.go @@ -335,10 +335,10 @@ func (b *moduleSetBuilder) AddLocalModule( true, func() (ObjectData, error) { return localModuleOptions.v1BufYAMLObjectData, nil }, func() (ObjectData, error) { return localModuleOptions.v1BufLockObjectData, nil }, - func() ([]ModuleKey, error) { + func() (Digest, error) { // See comment in added_module.go when we construct remote Modules for why // we have this function in the first place. - return nil, syserror.Newf("getDeclaredDepModuleKeysB5 should never be called for a local Module") + return nil, syserror.Newf("getRemoteB5Digest should never be called for a local Module") }, localModuleOptions.targetPaths, localModuleOptions.targetExcludePaths,