Skip to content

Commit af63d65

Browse files
committed
Merged PR 12878148: bridge: Force sequential message handling for confidential containers
[cherry-picked from f81b450894206a79fff4d63182ff034ba503ebdb] This PR contains 2 commits. The first one is the fix: **bridge: Force sequential message handling for confidential containers** This fixes a vulnerability (and reduces the surface for other similar potential vulnerabilities) in confidential containers where if the host sends a mount/unmount modification request concurrently with an ongoing CreateContainer request, the host could re-order or skip image layers for the container to be started. While this could be fixed by adding mutex lock/unlock around the individual modifyMappedVirtualDisk/modifyCombinedLayers/CreateContainer functions, we decided that in order to prevent any more of this class of issues, the UVM, when running in confidential mode, should just not allow concurrent requests (with exception for any actually long-running requests, which for now is just waitProcess). The second one adds a log entry for when the processing thread blocks. This will make it easier to debug should the gcs "hung" on a request. This PR is created on ADO targeting the conf branch as this security vulnerability is not public yet. This fix should be backported to main once deployed. Related work items: #33357501, #34327300 Signed-off-by: Tingmao Wang <tingmaowang@microsoft.com>
1 parent 0c7325f commit af63d65

File tree

2 files changed

+76
-23
lines changed

2 files changed

+76
-23
lines changed

cmd/gcs/main.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ import (
3131
"github.com/Microsoft/hcsshim/internal/log"
3232
"github.com/Microsoft/hcsshim/internal/oc"
3333
"github.com/Microsoft/hcsshim/internal/version"
34+
"github.com/Microsoft/hcsshim/pkg/amdsevsnp"
3435
"github.com/Microsoft/hcsshim/pkg/securitypolicy"
3536
)
3637

@@ -362,6 +363,10 @@ func main() {
362363
b := bridge.Bridge{
363364
Handler: mux,
364365
EnableV4: *v4,
366+
367+
// For confidential containers, we protect ourselves against attacks caused
368+
// by concurrent modifications, by processing one request at a time.
369+
ForceSequential: amdsevsnp.IsSNP(),
365370
}
366371
h := hcsv2.NewHost(rtime, tport, initialEnforcer, logWriter)
367372
// Initialize virtual pod support in the host

internal/guest/bridge/bridge.go

Lines changed: 71 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,10 @@ type Bridge struct {
177177
Handler Handler
178178
// EnableV4 enables the v4+ bridge and the schema v2+ interfaces.
179179
EnableV4 bool
180+
// Setting ForceSequential to true will force the bridge to only process one
181+
// request at a time, except for certain long-running operations (as defined
182+
// in asyncMessages).
183+
ForceSequential bool
180184

181185
// responseChan is the response channel used for both request/response
182186
// and publish notification workflows.
@@ -191,6 +195,14 @@ type Bridge struct {
191195
protVer prot.ProtocolVersion
192196
}
193197

198+
// Messages that will be processed asynchronously even in sequential mode. Note
199+
// that in sequential mode, these messages will still wait for any in-progress
200+
// non-async messages to be handled before they are processed, but once they are
201+
// "acknowledged", the rest will be done asynchronously.
202+
var alwaysAsyncMessages map[prot.MessageIdentifier]bool = map[prot.MessageIdentifier]bool{
203+
prot.ComputeSystemWaitForProcessV1: true,
204+
}
205+
194206
// AssignHandlers creates and assigns the appropriate bridge
195207
// events to be listen for and intercepted on `mux` before forwarding
196208
// to `gcs` for handling.
@@ -238,6 +250,10 @@ func (b *Bridge) ListenAndServe(bridgeIn io.ReadCloser, bridgeOut io.WriteCloser
238250
defer close(requestErrChan)
239251
defer bridgeIn.Close()
240252

253+
if b.ForceSequential {
254+
log.G(context.Background()).Info("bridge: ForceSequential enabled")
255+
}
256+
241257
// Receive bridge requests and schedule them to be processed.
242258
go func() {
243259
var recverr error
@@ -340,30 +356,36 @@ func (b *Bridge) ListenAndServe(bridgeIn io.ReadCloser, bridgeOut io.WriteCloser
340356
}()
341357
// Process each bridge request async and create the response writer.
342358
go func() {
343-
for req := range requestChan {
344-
go func(r *Request) {
345-
br := bridgeResponse{
346-
ctx: r.Context,
347-
header: &prot.MessageHeader{
348-
Type: prot.GetResponseIdentifier(r.Header.Type),
349-
ID: r.Header.ID,
350-
},
351-
}
352-
resp, err := b.Handler.ServeMsg(r)
353-
if resp == nil {
354-
resp = &prot.MessageResponseBase{}
355-
}
356-
resp.Base().ActivityID = r.ActivityID
357-
if err != nil {
358-
span := trace.FromContext(r.Context)
359-
if span != nil {
360-
oc.SetSpanStatus(span, err)
361-
}
362-
setErrorForResponseBase(resp.Base(), err, "gcs" /* moduleName */)
359+
doOneRequest := func(r *Request) {
360+
br := bridgeResponse{
361+
ctx: r.Context,
362+
header: &prot.MessageHeader{
363+
Type: prot.GetResponseIdentifier(r.Header.Type),
364+
ID: r.Header.ID,
365+
},
366+
}
367+
resp, err := b.Handler.ServeMsg(r)
368+
if resp == nil {
369+
resp = &prot.MessageResponseBase{}
370+
}
371+
resp.Base().ActivityID = r.ActivityID
372+
if err != nil {
373+
span := trace.FromContext(r.Context)
374+
if span != nil {
375+
oc.SetSpanStatus(span, err)
363376
}
364-
br.response = resp
365-
b.responseChan <- br
366-
}(req)
377+
setErrorForResponseBase(resp.Base(), err, "gcs" /* moduleName */)
378+
}
379+
br.response = resp
380+
b.responseChan <- br
381+
}
382+
383+
for req := range requestChan {
384+
if b.ForceSequential && !alwaysAsyncMessages[req.Header.Type] {
385+
runSequentialRequestHandler(req, doOneRequest)
386+
} else {
387+
go doOneRequest(req)
388+
}
367389
}
368390
}()
369391
// Process each bridge response sync. This channel is for request/response and publish workflows.
@@ -423,6 +445,32 @@ func (b *Bridge) ListenAndServe(bridgeIn io.ReadCloser, bridgeOut io.WriteCloser
423445
}
424446
}
425447

448+
// Do handleFn(r), but prints a warning if handleFn does not, or takes too long
449+
// to return.
450+
func runSequentialRequestHandler(r *Request, handleFn func(*Request)) {
451+
// Note that this is only a context used for triggering the blockage
452+
// warning, the request processing still uses r.Context. We don't want to
453+
// cancel the request handling itself when we reach the 5s timeout.
454+
timeoutCtx, cancel := context.WithTimeout(r.Context, 5*time.Second)
455+
go func() {
456+
<-timeoutCtx.Done()
457+
if errors.Is(timeoutCtx.Err(), context.DeadlineExceeded) {
458+
log.G(timeoutCtx).WithFields(logrus.Fields{
459+
// We want to log those even though we're providing r.Context, since if
460+
// the request never finishes the span end log will never get written,
461+
// and we may therefore not be able to find out about the following info
462+
// otherwise:
463+
"message-type": r.Header.Type.String(),
464+
"message-id": r.Header.ID,
465+
"activity-id": r.ActivityID,
466+
"container-id": r.ContainerID,
467+
}).Warnf("bridge: request processing thread in sequential mode blocked on the current request for more than 5 seconds")
468+
}
469+
}()
470+
defer cancel()
471+
handleFn(r)
472+
}
473+
426474
// PublishNotification writes a specific notification to the bridge.
427475
func (b *Bridge) PublishNotification(n *prot.ContainerNotification) {
428476
ctx, span := oc.StartSpan(context.Background(),

0 commit comments

Comments
 (0)