From 857c5e1b7a81de152a23289f214f5844cdb8de06 Mon Sep 17 00:00:00 2001 From: Brian Walsh Date: Mon, 22 Dec 2025 15:07:45 -0800 Subject: [PATCH 01/30] adds req/rsp logging --- cmd/server/logging_middleware.go | 89 ++++++++++++++++++++++++++++++++ cmd/server/main.go | 4 ++ 2 files changed, 93 insertions(+) create mode 100644 cmd/server/logging_middleware.go diff --git a/cmd/server/logging_middleware.go b/cmd/server/logging_middleware.go new file mode 100644 index 0000000..b550095 --- /dev/null +++ b/cmd/server/logging_middleware.go @@ -0,0 +1,89 @@ +// Package main contains the HTTP server entrypoint and middleware, including +// logging middleware that redacts sensitive authentication headers and logs +// both request and response metadata. +package main + +import ( + "bytes" + "io" + "log" + + "github.com/gin-gonic/gin" +) + +// bodyLogWriter wraps gin.ResponseWriter to capture the response body +// while still writing it through to the underlying writer. +// +// It is used by logging middleware to log the response payload after +// the handler has executed. +type bodyLogWriter struct { + gin.ResponseWriter + // body accumulates the bytes written to the response. + body *bytes.Buffer +} + +// Write implements the io.Writer interface for bodyLogWriter. +// +// It appends the written bytes to the internal buffer so they can be +// logged later, and then forwards the write to the embedded +// gin.ResponseWriter so the client still receives the response. +func (w bodyLogWriter) Write(b []byte) (int, error) { + // Capture response body. + w.body.Write(b) + // Write through to original writer. + return w.ResponseWriter.Write(b) +} + +// RequestLogRedactingAuth returns a gin.HandlerFunc middleware that logs +// incoming HTTP requests and outgoing HTTP responses while redacting +// sensitive authentication headers. +// +// Behavior: +// - Clones the incoming request headers and replaces values for +// `Authorization` and `X-Api-Key` with the literal value `[REDACTED]` +// before logging. +// - Logs the request method, path, query string, and sanitized headers +// at the start of the request. +// - Wraps gin's ResponseWriter to capture the response body. +// - After the handler chain completes (c.Next), logs the response status +// code, path, and the captured response body. +// +// This middleware is best placed early in the gin engine's middleware +// chain so that it can observe all modifications performed by subsequent +// handlers. +func RequestLogRedactingAuth() gin.HandlerFunc { + return func(c *gin.Context) { + // Clone and sanitize headers to avoid logging secrets. + h := c.Request.Header.Clone() + if h.Get("Authorization") != "" { + h.Set("Authorization", "[REDACTED]") + } + if h.Get("X-Api-Key") != "" { + h.Set("X-Api-Key", "[REDACTED]") + } + + // Read and log request body, then restore it. + var reqBodyBuf bytes.Buffer + if c.Request.Body != nil { + _, _ = io.Copy(&reqBodyBuf, c.Request.Body) + // Restore body for handlers. + c.Request.Body = io.NopCloser(bytes.NewReader(reqBodyBuf.Bytes())) + } + + log.Printf("http request method=%s path=%s query=%s headers=%v body=%s", + c.Request.Method, c.Request.URL.Path, c.Request.URL.RawQuery, h, reqBodyBuf.String()) + + // Wrap ResponseWriter to capture body for logging. + blw := &bodyLogWriter{ + ResponseWriter: c.Writer, + body: &bytes.Buffer{}, + } + c.Writer = blw + + // Proceed with the remaining handlers. + c.Next() + + log.Printf("http response status=%d path=%s body=%s", + c.Writer.Status(), c.Request.URL.Path, blw.body.String()) + } +} diff --git a/cmd/server/main.go b/cmd/server/main.go index 97104fa..c41444f 100644 --- a/cmd/server/main.go +++ b/cmd/server/main.go @@ -86,6 +86,10 @@ func main() { // actual endpoint handlers. r.Use(validator) + // Add middleware AFTER NewRouter (it returns *gin.Engine) + var requestLogger = RequestLogRedactingAuth() + r.Use(requestLogger) + // Register HTTP routes on the Gin engine. // Health endpoint: typically used by Kubernetes or other systems to From 3d332d7b45fde37e090d502fb3f00189c0359c7c Mon Sep 17 00:00:00 2001 From: Brian Walsh Date: Mon, 22 Dec 2025 15:08:21 -0800 Subject: [PATCH 02/30] chore: update generated --- internal/apigen/go.mod | 8 +++---- internal/apigen/go.sum | 49 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 4 deletions(-) create mode 100644 internal/apigen/go.sum diff --git a/internal/apigen/go.mod b/internal/apigen/go.mod index d99a9be..0270e51 100644 --- a/internal/apigen/go.mod +++ b/internal/apigen/go.mod @@ -1,6 +1,6 @@ module github.com/calypr/drs-server -go 1.19 +go 1.24.0 require github.com/gin-gonic/gin v1.9.1 @@ -24,9 +24,9 @@ require ( github.com/ugorji/go/codec v1.2.11 // indirect golang.org/x/arch v0.3.0 // indirect golang.org/x/crypto v0.45.0 // indirect - golang.org/x/net v0.23.0 // indirect - golang.org/x/sys v0.8.0 // indirect - golang.org/x/text v0.9.0 // indirect + golang.org/x/net v0.47.0 // indirect + golang.org/x/sys v0.38.0 // indirect + golang.org/x/text v0.31.0 // indirect google.golang.org/protobuf v1.33.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/internal/apigen/go.sum b/internal/apigen/go.sum new file mode 100644 index 0000000..1bbfa6f --- /dev/null +++ b/internal/apigen/go.sum @@ -0,0 +1,49 @@ +github.com/bytedance/sonic v1.5.0/go.mod h1:ED5hyg4y6t3/9Ku1R6dU/4KyJ48DZ4jPhfY1O2AihPM= +github.com/bytedance/sonic v1.9.1/go.mod h1:i736AoUSYt75HyZLoJW9ERYxcy6eaN6h4BZXU064P/U= +github.com/chenzhuoyu/base64x v0.0.0-20211019084208-fb5309c8db06/go.mod h1:DH46F32mSOjUmXrMHnKwZdA8wcEefY7UVqBKYGjpdQY= +github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311/go.mod h1:b583jCggY9gE99b6G5LEC39OIiVsWj+R97kbl5odCEk= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA= +github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI= +github.com/gin-gonic/gin v1.9.1/go.mod h1:hPrL7YrpYKXt5YId3A/Tnip5kqbEAP+KLuI3SUcPTeU= +github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY= +github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY= +github.com/go-playground/validator/v10 v10.14.0/go.mod h1:9iXMNT7sEkjXb0I+enO7QXmzG6QCsPWY4zveKFVRSyU= +github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= +github.com/klauspost/cpuid/v2 v2.2.4/go.mod h1:RVVoqg1df56z8g3pUjL/3lE5UfnlrJX8tyFgg4nqhuY= +github.com/leodido/go-urn v1.2.4/go.mod h1:7ZrI8mTSeBSHl/UaRyKQW1qZeMgak41ANeCNaVckg+4= +github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/pelletier/go-toml/v2 v2.0.8/go.mod h1:vuYfssBdrU2XDZ9bYydBu6t+6a6PYNcZljzZR9VXg+4= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= +github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= +github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= +github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= +github.com/ugorji/go/codec v1.2.11/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= +golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= +golang.org/x/arch v0.3.0/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= +golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4= +golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU= +golang.org/x/sys v0.0.0-20220704084225-05e143d24a9e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM= +google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= From ee0bb4c62246cf1aec9e1fd72ca5d451ace1abaf Mon Sep 17 00:00:00 2001 From: Brian Walsh Date: Mon, 22 Dec 2025 17:14:14 -0800 Subject: [PATCH 03/30] adds req/rsp validation; relaxes path --- cmd/server/buffering_writer.go | 123 ++++++++++++ cmd/server/main.go | 24 ++- cmd/server/openapi_response_log.go | 187 +++++++++++++++++ cmd/server/openapi_response_validator.go | 165 +++++++++++++++ cmd/server/openapi_validator.go | 35 +++- cmd/server/service_info.go | 66 ++++-- docs/adr-0001-request-response-validation.md | 201 +++++++++++++++++++ 7 files changed, 777 insertions(+), 24 deletions(-) create mode 100644 cmd/server/buffering_writer.go create mode 100644 cmd/server/openapi_response_log.go create mode 100644 cmd/server/openapi_response_validator.go create mode 100644 docs/adr-0001-request-response-validation.md diff --git a/cmd/server/buffering_writer.go b/cmd/server/buffering_writer.go new file mode 100644 index 0000000..601684c --- /dev/null +++ b/cmd/server/buffering_writer.go @@ -0,0 +1,123 @@ +package main + +import ( + "bufio" + "bytes" + "net" + "net/http" + + "github.com/gin-gonic/gin" +) + +type bufferingWriter struct { + underlying gin.ResponseWriter + + header http.Header + status int + body bytes.Buffer + maxBody int64 + tooLarge bool + + wroteHeader bool + committed bool +} + +func newBufferingWriter(w gin.ResponseWriter, maxBody int64) *bufferingWriter { + // Start with a copy of current headers (if any) + h := make(http.Header) + for k, v := range w.Header() { + vs := make([]string, len(v)) + copy(vs, v) + h[k] = vs + } + return &bufferingWriter{ + underlying: w, + header: h, + maxBody: maxBody, + } +} + +func (w *bufferingWriter) Header() http.Header { + return w.header +} + +func (w *bufferingWriter) WriteHeader(statusCode int) { + if w.committed { + return + } + w.status = statusCode + w.wroteHeader = true +} + +func (w *bufferingWriter) Write(p []byte) (int, error) { + if w.committed { + return w.underlying.Write(p) + } + // Default status if never set + if !w.wroteHeader && w.status == 0 { + w.status = http.StatusOK + w.wroteHeader = true + } + + // Enforce max buffer + if w.maxBody > 0 && int64(w.body.Len()+len(p)) > w.maxBody { + w.tooLarge = true + // We still buffer up to maxBody, and then ignore the rest (or you can stop buffering entirely) + remaining := int(w.maxBody - int64(w.body.Len())) + if remaining > 0 { + _, _ = w.body.Write(p[:remaining]) + } + return len(p), nil + } + + _, _ = w.body.Write(p) + return len(p), nil +} + +func (w *bufferingWriter) WriteString(s string) (int, error) { + return w.Write([]byte(s)) +} + +// commit writes the buffered response to the underlying writer exactly once. +func (w *bufferingWriter) commit() error { + if w.committed { + return nil + } + w.committed = true + + // Copy headers + dst := w.underlying.Header() + for k := range dst { + dst.Del(k) + } + for k, v := range w.header { + vs := make([]string, len(v)) + copy(vs, v) + dst[k] = vs + } + + // Write status + body + if w.status == 0 { + w.status = http.StatusOK + } + w.underlying.WriteHeader(w.status) + _, err := w.underlying.Write(w.body.Bytes()) + return err +} + +// Gin interfaces passthrough (important in production) +func (w *bufferingWriter) Status() int { + if w.status == 0 { + return w.underlying.Status() + } + return w.status +} +func (w *bufferingWriter) Size() int { return w.body.Len() } +func (w *bufferingWriter) Written() bool { return w.wroteHeader || w.body.Len() > 0 } +func (w *bufferingWriter) WriteHeaderNow() { /* no-op until commit */ } +func (w *bufferingWriter) Pusher() http.Pusher { return w.underlying.Pusher() } +func (w *bufferingWriter) Flush() { /* buffering: no flush until commit */ } +func (w *bufferingWriter) CloseNotify() <-chan bool { return w.underlying.CloseNotify() } +func (w *bufferingWriter) Hijack() (net.Conn, *bufio.ReadWriter, error) { + return w.underlying.Hijack() +} diff --git a/cmd/server/main.go b/cmd/server/main.go index c41444f..988ef7c 100644 --- a/cmd/server/main.go +++ b/cmd/server/main.go @@ -63,16 +63,23 @@ func main() { // Ensure any buffered log entries are flushed before the process exits. defer func() { _ = log.Sync() }() - // Build the OpenAPI validator middleware using the resolved spec path. + // Build the OpenAPI requestValidator middleware using the resolved spec path. // The second argument (true) can be used to enable strict mode or // similar behavior, depending on newSpecValidator's implementation. - // If the validator fails to initialize, the server cannot safely start, + // If the requestValidator fails to initialize, the server cannot safely start, // so the process is terminated with a fatal log. - validator, err := newSpecValidator(specPath, true) + requestValidator, err := newSpecValidator(specPath, true) if err != nil { - log.Fatal("openapi validator", zap.Error(err)) + log.Fatal("openapi requestValidator", zap.Error(err)) } + // build the response validator middleware + // using default config + // If the responseValidator fails to initialize, the server cannot safely start, + respCfg := DefaultResponseValidatorConfig() + respCfg.Mode = ResponseValidationAudit // prod default; use Enforce in CI + responseValidator := NewOpenAPIResponseValidator(respCfg) + // Create a new Gin engine instance. Gin provides routing, middleware, // and HTTP handler abstractions. r := gin.New() @@ -81,14 +88,15 @@ func main() { // handlers from crashing the server by recovering and returning a 500. r.Use(gin.Recovery()) - // Attach the OpenAPI validator middleware so that all incoming requests + // Attach the OpenAPI requestValidator middleware so that all incoming requests // are validated against the OpenAPI specification before reaching the // actual endpoint handlers. - r.Use(validator) + r.Use(requestValidator) + r.Use(responseValidator) // Add middleware AFTER NewRouter (it returns *gin.Engine) - var requestLogger = RequestLogRedactingAuth() - r.Use(requestLogger) + //var requestLogger = RequestLogRedactingAuth() + //r.Use(requestLogger) // Register HTTP routes on the Gin engine. diff --git a/cmd/server/openapi_response_log.go b/cmd/server/openapi_response_log.go new file mode 100644 index 0000000..fb71092 --- /dev/null +++ b/cmd/server/openapi_response_log.go @@ -0,0 +1,187 @@ +package main + +import ( + "bytes" + "crypto/sha256" + "encoding/hex" + "log" + "net/http" + "strings" + "time" + + "github.com/gin-gonic/gin" +) + +// logOpenAPIResponseIssue logs an OpenAPI response validation failure. +// Intended for audit mode (and also for enforce-mode diagnostics). +func logOpenAPIResponseIssue(c *gin.Context, cfg ResponseValidatorConfig, detail string, status int, hdr http.Header, body []byte) { + // ---- request metadata ---- + method := c.Request.Method + path := c.Request.URL.Path + query := c.Request.URL.RawQuery + contentType := hdr.Get("Content-Type") + + // Pull common request IDs if present + reqID := firstNonEmpty( + c.GetHeader("X-Request-Id"), + c.GetHeader("X-Request-ID"), + c.GetHeader("X-Correlation-Id"), + c.GetHeader("X-Correlation-ID"), + c.GetString("request_id"), // if you set it earlier in middleware + ) + + // Latency if gin has one stored; otherwise compute from start time if you keep it in context. + latency := time.Duration(0) + if v, ok := c.Get("request_start"); ok { + if t0, ok := v.(time.Time); ok { + latency = time.Since(t0) + } + } + + // ---- header redaction ---- + redacted := redactHeaders(hdr, cfg.RedactHeaders) + + // ---- body logging controls ---- + const maxBodyLog = 4096 // keep logs bounded; tune as needed + snippet, truncated := safeBodySnippet(body, maxBodyLog) + + // In some cases it's helpful to log a body hash for correlation without logging full body + bodyHash := sha256Hex(body) + + // ---- emit ---- + // Keep it single-line and machine-friendly. + // NOTE: Avoid dumping *all* headers by default; it can be very noisy. + if cfg.LogHeaders { + log.Printf( + "openapi_response_invalid mode=%s method=%s path=%s query=%s status=%d content_type=%q req_id=%q latency_ms=%d detail=%q body_sha256=%s body_truncated=%t body=%q headers=%s", + cfg.Mode, method, path, query, status, contentType, reqID, latency.Milliseconds(), + detail, bodyHash, truncated, snippet, headerKVs(redacted), + ) + } else { + log.Printf( + "openapi_response_invalid mode=%s method=%s path=%s query=%s status=%d content_type=%q req_id=%q latency_ms=%d detail=%q body_sha256=%s body_truncated=%t body=%q", + cfg.Mode, method, path, query, status, contentType, reqID, latency.Milliseconds(), + detail, bodyHash, truncated, snippet, + ) + } +} + +func redactHeaders(h http.Header, redactKeys []string) http.Header { + out := cloneHeader(h) + + // Build a canonical set of keys to redact (case-insensitive). + redactSet := map[string]struct{}{} + for _, k := range redactKeys { + k = strings.TrimSpace(k) + if k == "" { + continue + } + redactSet[strings.ToLower(k)] = struct{}{} + redactSet[strings.ToLower(http.CanonicalHeaderKey(k))] = struct{}{} + } + + // Always redact some defaults even if config forgets them. + for _, k := range []string{"Authorization", "X-Api-Key", "X-Amz-Security-Token"} { + redactSet[strings.ToLower(k)] = struct{}{} + redactSet[strings.ToLower(http.CanonicalHeaderKey(k))] = struct{}{} + } + + for k := range out { + if _, ok := redactSet[strings.ToLower(k)]; ok { + out.Set(k, "[REDACTED]") + } + } + + return out +} + +// safeBodySnippet returns a loggable snippet of body (escaped) and whether it was truncated. +// If the body is mostly binary, it emits a short hex prefix instead of raw bytes. +func safeBodySnippet(body []byte, max int) (string, bool) { + if max <= 0 { + return "", len(body) > 0 + } + truncated := false + b := body + if len(b) > max { + b = b[:max] + truncated = true + } + + // If it looks binary, log as hex prefix. + if looksBinary(b) { + // hex expands 2x; keep it bounded + hexBytes := b + if len(hexBytes) > 512 { + hexBytes = hexBytes[:512] + truncated = true + } + return "hex:" + hex.EncodeToString(hexBytes), truncated + } + + // Otherwise, return as a safe quoted string; non-printables are escaped by %q at log.Printf call site. + return string(b), truncated +} + +func looksBinary(b []byte) bool { + // Heuristic: if there are many NULs or too many non-printables, treat as binary. + if len(b) == 0 { + return false + } + nul := bytes.Count(b, []byte{0}) + if nul > 0 { + return true + } + nonPrintable := 0 + for _, c := range b { + if c == '\n' || c == '\r' || c == '\t' { + continue + } + if c < 0x20 || c == 0x7f { + nonPrintable++ + } + } + return nonPrintable > len(b)/10 +} + +func headerKVs(h http.Header) string { + // Produce a compact string like: k1=v1;k2=v2 + // Avoid huge headers; caller already controls cfg.LogHeaders. + var parts []string + for k, vs := range h { + // collapse multi-values + v := strings.Join(vs, ",") + // hard cap per-header + if len(v) > 256 { + v = v[:256] + "…" + } + parts = append(parts, k+"="+v) + } + // Stable ordering is nice but not required; add sort.Strings(parts) if you prefer. + return strings.Join(parts, ";") +} + +func sha256Hex(b []byte) string { + sum := sha256Sum(b) // wrapper to avoid extra imports in other files if desired + return sum +} + +// Minimal wrapper so you can keep imports tidy if you want. +// If you don't care, just inline crypto/sha256 and hex in sha256Hex. +func sha256Sum(b []byte) string { + // local inline implementation to keep this file self-contained + // (uses small helper to avoid extra dependency churn) + // NOTE: If you already imported crypto/sha256 + hex above, you can simplify. + h := sha256.New() + _, _ = h.Write(b) + return hex.EncodeToString(h.Sum(nil)) +} + +func firstNonEmpty(vals ...string) string { + for _, v := range vals { + if strings.TrimSpace(v) != "" { + return v + } + } + return "" +} diff --git a/cmd/server/openapi_response_validator.go b/cmd/server/openapi_response_validator.go new file mode 100644 index 0000000..f676092 --- /dev/null +++ b/cmd/server/openapi_response_validator.go @@ -0,0 +1,165 @@ +package main + +import ( + "bytes" + "fmt" + "io" + "log" + "net/http" + "strings" + + "github.com/getkin/kin-openapi/openapi3filter" + "github.com/getkin/kin-openapi/routers" + "github.com/gin-gonic/gin" +) + +type ResponseValidationMode string + +const ( + ResponseValidationAudit ResponseValidationMode = "audit" // log only + ResponseValidationEnforce ResponseValidationMode = "enforce" // block invalid responses +) + +type ResponseValidatorConfig struct { + Mode ResponseValidationMode + MaxBodyBytes int64 // cap buffering to avoid OOM + LogHeaders bool // include response headers in logs + RedactHeaders []string + AllowEmptyOn204 bool // treat empty body as ok for 204 even if handler wrote nothing + SkipContentTypes []string +} + +func DefaultResponseValidatorConfig() ResponseValidatorConfig { + return ResponseValidatorConfig{ + Mode: ResponseValidationAudit, + MaxBodyBytes: 2 << 20, // 2MiB + LogHeaders: false, + RedactHeaders: []string{"authorization", "x-api-key", "x-amz-security-token"}, + AllowEmptyOn204: true, + SkipContentTypes: []string{"text/event-stream"}, // SSE, etc. + } +} + +// NewOpenAPIResponseValidator validates responses against the OpenAPI operation matched by the request validator. +// Requires the request validator to set: +// - "oapi.route" (routers.Route) +// - "oapi.pathParams" (map[string]string) +func NewOpenAPIResponseValidator(cfg ResponseValidatorConfig) gin.HandlerFunc { + return func(c *gin.Context) { + log.Printf("OpenAPI response validation middleware started") + // If the request validator didn't attach route info, skip. + vRoute, ok := c.Get("oapi.route") + if !ok { + log.Printf("Didn't find oapi.route in context; skipping response validation") + c.Next() + return + } + route, ok := vRoute.(*routers.Route) + if !ok { + log.Printf("oapi.route in context has wrong type; skipping response validation") + c.Next() + return + } + + vPP, _ := c.Get("oapi.pathParams") + pathParams, _ := vPP.(map[string]string) + + // Wrap writer with a buffering, late-commit writer + bw := newBufferingWriter(c.Writer, cfg.MaxBodyBytes) + c.Writer = bw + + // Run downstream handlers + c.Next() + + // If handler aborted with an error and wrote nothing, you may skip validation if desired. + // Here we still validate what was produced (if anything). + status := bw.status + if status == 0 { + // Gin defaults to 200 if never set explicitly + status = http.StatusOK + } + + // Skip streaming / content types you don't want to buffer/validate + ct := bw.header.Get("Content-Type") + if shouldSkipContentType(ct, cfg.SkipContentTypes) { + // Commit without validating + _ = bw.commit() + return + } + + // If response too large to buffer, decide policy + if bw.tooLarge { + msg := fmt.Sprintf("response body exceeded max buffer (%d bytes); skipping OpenAPI response validation", cfg.MaxBodyBytes) + if cfg.Mode == ResponseValidationEnforce { + // In enforce mode, safest is to fail closed *before committing*. + // We can still send a 500 because we haven’t committed yet. + c.Writer = bw.underlying // restore + c.Status(http.StatusInternalServerError) + c.Header("Content-Type", "application/json") + c.Writer.Write([]byte(fmt.Sprintf(`{"error":"openapi response validation failed","detail":%q}`, msg))) + return + } + // audit mode: commit and log + logOpenAPIResponseIssue(c, cfg, msg, status, bw.header, bw.body.Bytes()) + _ = bw.commit() + return + } + + // Validate response using kin-openapi + bodyReader := bytes.NewReader(bw.body.Bytes()) + respInput := &openapi3filter.ResponseValidationInput{ + RequestValidationInput: &openapi3filter.RequestValidationInput{ + Request: c.Request, + PathParams: pathParams, + Route: route, + }, + Status: status, + Header: cloneHeader(bw.header), + Body: io.NopCloser(bodyReader), + } + + log.Printf("OpenAPI response validation before ValidateResponse") + if err := openapi3filter.ValidateResponse(c.Request.Context(), respInput); err != nil { + // Invalid response + log.Printf("OpenAPI response validation failed: %v", err) + detail := err.Error() + logOpenAPIResponseIssue(c, cfg, detail, status, bw.header, bw.body.Bytes()) + + if cfg.Mode == ResponseValidationEnforce { + // Fail closed with a controlled 500, and DO NOT commit the invalid response. + c.Writer = bw.underlying // restore + c.Status(http.StatusInternalServerError) + c.Header("Content-Type", "application/json") + _, _ = c.Writer.Write([]byte(fmt.Sprintf(`{"error":"openapi response validation failed","detail":%q}`, detail))) + return + } + // audit: commit anyway + } + + // Commit the buffered response + _ = bw.commit() + } +} + +func shouldSkipContentType(ct string, skip []string) bool { + ct = strings.ToLower(strings.TrimSpace(strings.Split(ct, ";")[0])) + if ct == "" { + return false + } + for _, s := range skip { + if ct == strings.ToLower(strings.TrimSpace(s)) { + return true + } + } + return false +} + +func cloneHeader(h http.Header) http.Header { + out := make(http.Header, len(h)) + for k, v := range h { + vs := make([]string, len(v)) + copy(vs, v) + out[k] = vs + } + return out +} diff --git a/cmd/server/openapi_validator.go b/cmd/server/openapi_validator.go index 125a68e..221a973 100644 --- a/cmd/server/openapi_validator.go +++ b/cmd/server/openapi_validator.go @@ -3,6 +3,7 @@ package main import ( "context" "fmt" + "log" "net/http" "github.com/getkin/kin-openapi/openapi3" @@ -32,6 +33,26 @@ func newSpecValidator(specPath string, lenientRoutes bool) (gin.HandlerFunc, err return nil, fmt.Errorf("load spec %q: %w", specPath, err) } + // + if len(spec.Servers) > 0 { + for _, server := range spec.Servers { + log.Printf("Loaded OpenAPI server URL: %s", server.URL) + } + spec.Servers = openapi3.Servers{{URL: "/"}} // accept any host, root base path + log.Printf("Neutralize servers so route matching isn’t constrained e.g. /") + } + // Log all paths loaded from the spec for visibility. + if spec.Paths != nil { + for path := range spec.Paths.Map() { + log.Printf("Loaded OpenAPI path: %s", path) + } + } + // spot check service-info + if pi := spec.Paths.Find("/service-info"); pi != nil { + log.Printf("/service-info GET defined? %v", pi.Get != nil) + log.Printf("/service-info POST defined? %v", pi.Post != nil) + } + // Validate the spec once at startup. Some specs with extensions may warn/fail; // adjust as needed (e.g., log warning instead of returning error). if err := spec.Validate(context.Background()); err != nil { @@ -50,6 +71,7 @@ func newSpecValidator(specPath string, lenientRoutes bool) (gin.HandlerFunc, err // Lenient mode: if lenientRoutes is true, skip OpenAPI validation // for unknown routes and continue the handler chain. if lenientRoutes { + log.Printf("error finding route %s, but lenientRoutes - proceeding: %v", c.Request.URL.Path, err) c.Next() return } @@ -61,6 +83,10 @@ func newSpecValidator(specPath string, lenientRoutes bool) (gin.HandlerFunc, err return } + // Attach route and path params to context for downstream handlers + c.Set("oapi.route", route) + c.Set("oapi.pathParams", pathParams) + // Validate request against the OpenAPI operation schema in := &openapi3filter.RequestValidationInput{ Request: c.Request, @@ -69,7 +95,7 @@ func newSpecValidator(specPath string, lenientRoutes bool) (gin.HandlerFunc, err Options: &openapi3filter.Options{ // If you want spec-defined security enforcement, plug in: // AuthenticationFunc: yourAuthFunc, - AuthenticationFunc: nil, + AuthenticationFunc: allowAllAuth, }, } @@ -84,3 +110,10 @@ func newSpecValidator(specPath string, lenientRoutes bool) (gin.HandlerFunc, err c.Next() }, nil } + +// Replace this with real auth logic (e.g., checking Authorization headers, API keys, etc.). +func allowAllAuth(ctx context.Context, input *openapi3filter.AuthenticationInput) error { + // input.SecurityScheme and input.Scopes describe what the spec requires. + // Implement your checks here. Return nil on success, or an error on failure. + return nil +} diff --git a/cmd/server/service_info.go b/cmd/server/service_info.go index d8b896d..28191d1 100644 --- a/cmd/server/service_info.go +++ b/cmd/server/service_info.go @@ -2,8 +2,6 @@ package main import ( "net/http" - "os" - "time" "github.com/gin-gonic/gin" ) @@ -12,6 +10,56 @@ import ( // Gin router. The endpoint returns basic metadata about the running service // as a JSON payload, which can be used for diagnostics and observability. func registerServiceInfoRoute(r *gin.Engine) { + + var serviceInfoResponse = map[string]any{ + "id": "drs-example", + "name": "Example DRS Service", + "type": map[string]any{ + "group": "org.ga4gh", + "artifact": "drs", + "version": "1.0.0", + }, + "description": "GA4GH DRS example service", + "organization": map[string]any{ + "name": "Example Org", + "url": "https://example.org", + }, + "contactUrl": "mailto:support@example.org", + "documentationUrl": "https://example.org/docs", + "createdAt": "2020-01-01T00:00:00.000Z", + "updatedAt": "2020-01-02T00:00:00.000Z", + "environment": "prod", + "version": "1.2.3", + "drs_version": "1.3.0", + "service_url": "https://drs.example.org", + "maxBulkRequestLength": 1000, // Deprecated + "timestamp": "2024-01-01T12:00:00.000Z", + "drs": map[string]any{ + "maxBulkRequestLength": 1000, + "objectCount": 12345, + "totalObjectSize": 987654321, + "uploadRequestSupported": true, + "objectRegistrationSupported": true, + "supportedUploadMethods": []string{ + "s3", + "gs", + "https", + }, + "maxUploadSize": 1099511627776, + "maxUploadRequestLength": 100, + "maxRegisterRequestLength": 100, + "validateUploadChecksums": true, + "validateUploadFileSizes": true, + "relatedFileStorageSupported": true, + "deleteSupported": true, + "maxBulkDeleteLength": 500, + "deleteStorageDataSupported": true, + "accessMethodUpdateSupported": true, + "maxBulkAccessMethodUpdateLength": 250, + "validateAccessMethodUpdates": true, + }, + } + // Register a handler for HTTP GET requests on the `/service-info` path. // Gin will invoke the anonymous function for each incoming request // matching this route. @@ -22,18 +70,6 @@ func registerServiceInfoRoute(r *gin.Engine) { // * `version`: the service version read from the SERVICE_VERSION // environment variable (may be empty if not set). // * `timestamp`: the current UTC time, formatted as an RFC3339Nano string. - c.JSON(http.StatusOK, gin.H{ - // `name` identifies the service. This is currently hard-coded but could - // be wired to build\-time variables in the future. - "name": "my-service", - - // `version` is derived from the SERVICE_VERSION environment variable, - // allowing deployments to inject the build or release version. - "version": os.Getenv("SERVICE_VERSION"), - - // `timestamp` indicates when the response was generated, using UTC and - // RFC3339Nano for high-precision, machine\-readable timestamps. - "timestamp": time.Now().UTC().Format(time.RFC3339Nano), - }) + c.JSON(http.StatusOK, serviceInfoResponse) }) } diff --git a/docs/adr-0001-request-response-validation.md b/docs/adr-0001-request-response-validation.md new file mode 100644 index 0000000..3338bd6 --- /dev/null +++ b/docs/adr-0001-request-response-validation.md @@ -0,0 +1,201 @@ +# ADR-0001: OpenAPI Request and Response Validation in Gin + +## Status + +Accepted + +## Date + +2025-12-22 + +## Context + +The DRS server is implemented in Go (Gin) with server scaffolding generated from OpenAPI. Contract correctness matters for interoperability and operational safety. The OpenAPI spec includes deep schemas and recursive references (e.g., `DrsObject → ContentsObject → DrsObject`), making “generator-only correctness” insufficient. + +We need: + +* Strong request/response contract guarantees +* Clear operational visibility into contract drift +* A safe rollout path (audit-first in prod) +* CI enforcement to prevent regressions + +## Decision + +Implement OpenAPI validation middleware using **kin-openapi** (`openapi3filter`) as the validation engine. + +* **Request validation**: enforce at runtime by default (reject invalid requests) +* **Response validation**: buffering + late-commit middleware with two modes: + + * **Audit mode** (default in production): log/metrics on violations, allow response + * **Enforce mode** (CI / pre-prod): block invalid responses (fail closed with 500) + +Black-box contract testing complements runtime validation in CI. + +## Mermaid diagram + +```mermaid +flowchart TB + Client[Client] -->|HTTP Request| Gin[gin Engine] + + subgraph MiddlewareChain[Middleware Chain] + LogReq[Request Logging redact auth] --> ReqVal[OpenAPI Request Validation enforce] + ReqVal --> Handler[Application Handlers generated] + Handler --> RespVal[OpenAPI Response Validation audit or enforce] + RespVal --> Commit[Late Commit Writer buffered] + end + + Gin --> MiddlewareChain -->|HTTP Response| Client + + Spec[OpenAPI Spec local or embedded] --> ReqVal + Spec --> RespVal + + CI[CI Pipeline] --> Contract[Contract Tests] + Spec --> Contract + Contract --> Report[Fail build on violations] + +``` + +## Configuration and feature gating + +### Should validation be configurable? + +Yes—**validation must be configurable**, but with guardrails. + +Reasons: + +* Response validation has real operational tradeoffs (buffering, streaming endpoints, large payloads) +* Production rollout should support “audit-first” to avoid outages +* Different environments need different strictness (dev vs CI vs prod) +* Some endpoints may need opt-out (SSE, downloads, websocket/hijack) + +### Recommended configuration shape + +A single config block controlling both request and response validation: + +* `validation.enabled` (master switch) +* `validation.request.mode` = `off | enforce` +* `validation.response.mode` = `off | audit | enforce` +* `validation.response.maxBodyBytes` +* `validation.response.skipContentTypes` (e.g., `text/event-stream`) +* `validation.excludeRoutes` (optional allowlist/denylist) + +Example (YAML): + +```yaml +validation: + enabled: true + request: + mode: enforce + response: + mode: audit # audit in prod, enforce in CI + maxBodyBytes: 2097152 + skipContentTypes: + - text/event-stream + - application/octet-stream +``` + +Environment mapping: + +* **CI**: request=enforce, response=enforce +* **Dev**: request=enforce, response=audit (or enforce) +* **Prod**: request=enforce, response=audit + +### Guardrails (what *not* to do) + +* Do not ship prod with validation silently disabled by default. +* Do not make “off” the default for request validation unless you have strong external gateways that already validate. +* Do not enforce response validation on endpoints that stream or return huge bodies without explicit handling. + +### Suggested default behavior + +* Default `validation.enabled=true` +* Default `request.mode=enforce` +* Default `response.mode=audit` +* Default `maxBodyBytes=2MiB` +* Default `skipContentTypes` includes SSE and known streaming types + +## Alternatives Considered + +1. Generator-only validation: rejected (does not validate runtime behavior; fragile with recursion) +2. Fully dereferenced spec + codegen: rejected (can explode recursion and break generators) +3. CI-only contract testing: insufficient alone (doesn’t protect runtime deployments) + +## Consequences + +### Positive + +* Strong contract guarantees with realistic deployment posture +* Audit-first rollout for response validation +* CI enforcement prevents regressions +* Metrics/logs provide contract drift visibility + +### Negative + +* Response validation requires buffering and careful exclusions +* Some endpoints must be skipped or handled specially (streaming/hijack) +* Adds some latency and memory overhead proportional to buffered bodies + +## References + +* kin-openapi: [https://github.com/getkin/kin-openapi](https://github.com/getkin/kin-openapi) + openapi3filter docs: [https://pkg.go.dev/github.com/getkin/kin-openapi/openapi3filter](https://pkg.go.dev/github.com/getkin/kin-openapi/openapi3filter) +* oapi-codegen middleware patterns (net/http): [https://github.com/oapi-codegen/oapi-codegen/tree/main/pkg/middleware/nethttp](https://github.com/oapi-codegen/oapi-codegen/tree/main/pkg/middleware/nethttp) +* Dredd: [https://dredd.org/en/latest/](https://dredd.org/en/latest/) +* Schemathesis: [https://schemathesis.readthedocs.io/](https://schemathesis.readthedocs.io/) +* Background reading: + + * [https://medium.com/@tamas.szabo/validating-openapi-requests-and-responses-in-go-3e2b1c7e5a73](https://medium.com/@tamas.szabo/validating-openapi-requests-and-responses-in-go-3e2b1c7e5a73) + * [https://apisyouwonthate.com/blog/api-contract-testing-vs-runtime-validation/](https://apisyouwonthate.com/blog/api-contract-testing-vs-runtime-validation/) + +## Implementation + +NewOpenAPIResponseValidator **doesn’t** load the YAML itself. + +`NewOpenAPIResponseValidator` validates against the **OpenAPI spec object that was already loaded by the request validator**, indirectly, via the **matched route** stored in Gin’s context. + +### The flow + +1. **Request validator loads the YAML** at startup: + + * `loader.LoadFromFile(specPath)` → `*openapi3.T` +2. It builds an OpenAPI router from that spec: + + * `gorillamux.NewRouter(spec)` +3. For each request, it finds the matching OpenAPI operation: + + * `route, pathParams := r.FindRoute(req)` +4. It **stores that route** on the Gin context: + + * `c.Set("oapi.route", route)` + * `c.Set("oapi.pathParams", pathParams)` +5. **Response validator reads those values**: + + * `route := c.MustGet("oapi.route").(routers.Route)` + * `pathParams := c.MustGet("oapi.pathParams").(map[string]string)` +6. Then it calls: + + * `openapi3filter.ValidateResponse(ctx, ResponseValidationInput{ Route: route, ... })` + +That `route` object contains the operation metadata tied to the spec that was loaded in step 1, so the response validator is effectively validating against the same loaded spec—without reloading YAML. + +### What you need to ensure + +* The **request validator middleware must run before** the response validator middleware. +* The request validator must **store `oapi.route` and `oapi.pathParams`**. + +### If you want response validation without request validation + +You’d need to have the response validator: + +* load the spec itself, and/or +* match the route again (`FindRoute`) using its own OpenAPI router. + +But in practice, sharing the matched route from request validation is the cleanest and fastest approach. + + +## Follow-up Actions + +* Add Prometheus counters for request/response violations +* Add CI job that runs response validation in enforce mode +* Document excluded endpoints (streaming, downloads) and why +* Add dashboards for validation failures by route/status From 92dfa9cd280c32de00a4a2129abfda515ecea787 Mon Sep 17 00:00:00 2001 From: Brian Walsh Date: Tue, 23 Dec 2025 06:00:26 -0800 Subject: [PATCH 04/30] refactors handlers, middleware pkg --- cmd/server/{ => handlers}/healthz.go | 0 cmd/server/{ => handlers}/service_info.go | 0 cmd/server/{ => middleware}/buffering_writer.go | 0 cmd/server/{ => middleware}/logging_middleware.go | 0 cmd/server/{ => middleware}/openapi_response_log.go | 0 cmd/server/{ => middleware}/openapi_response_validator.go | 0 cmd/server/{ => middleware}/openapi_validator.go | 0 7 files changed, 0 insertions(+), 0 deletions(-) rename cmd/server/{ => handlers}/healthz.go (100%) rename cmd/server/{ => handlers}/service_info.go (100%) rename cmd/server/{ => middleware}/buffering_writer.go (100%) rename cmd/server/{ => middleware}/logging_middleware.go (100%) rename cmd/server/{ => middleware}/openapi_response_log.go (100%) rename cmd/server/{ => middleware}/openapi_response_validator.go (100%) rename cmd/server/{ => middleware}/openapi_validator.go (100%) diff --git a/cmd/server/healthz.go b/cmd/server/handlers/healthz.go similarity index 100% rename from cmd/server/healthz.go rename to cmd/server/handlers/healthz.go diff --git a/cmd/server/service_info.go b/cmd/server/handlers/service_info.go similarity index 100% rename from cmd/server/service_info.go rename to cmd/server/handlers/service_info.go diff --git a/cmd/server/buffering_writer.go b/cmd/server/middleware/buffering_writer.go similarity index 100% rename from cmd/server/buffering_writer.go rename to cmd/server/middleware/buffering_writer.go diff --git a/cmd/server/logging_middleware.go b/cmd/server/middleware/logging_middleware.go similarity index 100% rename from cmd/server/logging_middleware.go rename to cmd/server/middleware/logging_middleware.go diff --git a/cmd/server/openapi_response_log.go b/cmd/server/middleware/openapi_response_log.go similarity index 100% rename from cmd/server/openapi_response_log.go rename to cmd/server/middleware/openapi_response_log.go diff --git a/cmd/server/openapi_response_validator.go b/cmd/server/middleware/openapi_response_validator.go similarity index 100% rename from cmd/server/openapi_response_validator.go rename to cmd/server/middleware/openapi_response_validator.go diff --git a/cmd/server/openapi_validator.go b/cmd/server/middleware/openapi_validator.go similarity index 100% rename from cmd/server/openapi_validator.go rename to cmd/server/middleware/openapi_validator.go From 3de432aca4ea86b2f8b68ea7b14c021e4e042c47 Mon Sep 17 00:00:00 2001 From: Brian Date: Tue, 23 Dec 2025 09:23:49 -0800 Subject: [PATCH 05/30] Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- cmd/server/main.go | 6 ++---- cmd/server/middleware/openapi_response_validator.go | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/cmd/server/main.go b/cmd/server/main.go index 988ef7c..3570213 100644 --- a/cmd/server/main.go +++ b/cmd/server/main.go @@ -73,13 +73,11 @@ func main() { log.Fatal("openapi requestValidator", zap.Error(err)) } - // build the response validator middleware - // using default config - // If the responseValidator fails to initialize, the server cannot safely start, + // Build the response validator middleware using the default configuration. + // The validator runs in audit mode by default (use Enforce in CI). respCfg := DefaultResponseValidatorConfig() respCfg.Mode = ResponseValidationAudit // prod default; use Enforce in CI responseValidator := NewOpenAPIResponseValidator(respCfg) - // Create a new Gin engine instance. Gin provides routing, middleware, // and HTTP handler abstractions. r := gin.New() diff --git a/cmd/server/middleware/openapi_response_validator.go b/cmd/server/middleware/openapi_response_validator.go index f676092..0dbff52 100644 --- a/cmd/server/middleware/openapi_response_validator.go +++ b/cmd/server/middleware/openapi_response_validator.go @@ -32,7 +32,7 @@ type ResponseValidatorConfig struct { func DefaultResponseValidatorConfig() ResponseValidatorConfig { return ResponseValidatorConfig{ Mode: ResponseValidationAudit, - MaxBodyBytes: 2 << 20, // 2MiB + MaxBodyBytes: 2 * 1024 * 1024, // 2MiB LogHeaders: false, RedactHeaders: []string{"authorization", "x-api-key", "x-amz-security-token"}, AllowEmptyOn204: true, From 0eea29bcef40d48dda302f037a0942511856c295 Mon Sep 17 00:00:00 2001 From: Brian Walsh Date: Thu, 25 Dec 2025 17:13:04 -0800 Subject: [PATCH 06/30] refactors handlers, middleware pkg --- cmd/server/handlers/healthz.go | 4 ++-- cmd/server/handlers/service_info.go | 4 ++-- cmd/server/main.go | 15 ++++++++++++--- cmd/server/middleware/buffering_writer.go | 2 +- cmd/server/middleware/logging_middleware.go | 2 +- cmd/server/middleware/openapi_response_log.go | 2 +- .../middleware/openapi_response_validator.go | 2 +- cmd/server/middleware/openapi_validator.go | 9 ++------- 8 files changed, 22 insertions(+), 18 deletions(-) diff --git a/cmd/server/handlers/healthz.go b/cmd/server/handlers/healthz.go index 2cff29b..bce938d 100644 --- a/cmd/server/handlers/healthz.go +++ b/cmd/server/handlers/healthz.go @@ -1,4 +1,4 @@ -package main +package handlers import ( "net/http" @@ -11,7 +11,7 @@ import ( // This endpoint is typically used by load balancers, orchestration systems // (such as Kubernetes), or monitoring tools to verify that the service is // running and able to respond to HTTP requests. -func registerHealthzRoute(r *gin.Engine) { +func RegisterHealthzRoute(r *gin.Engine) { // Register a handler for HTTP GET requests on the `/healthz` path. // For each incoming request that matches this route, Gin will call // the anonymous function below. diff --git a/cmd/server/handlers/service_info.go b/cmd/server/handlers/service_info.go index 28191d1..d5a7006 100644 --- a/cmd/server/handlers/service_info.go +++ b/cmd/server/handlers/service_info.go @@ -1,4 +1,4 @@ -package main +package handlers import ( "net/http" @@ -9,7 +9,7 @@ import ( // registerServiceInfoRoute adds the `/service-info` endpoint to the provided // Gin router. The endpoint returns basic metadata about the running service // as a JSON payload, which can be used for diagnostics and observability. -func registerServiceInfoRoute(r *gin.Engine) { +func RegisterServiceInfoRoute(r *gin.Engine) { var serviceInfoResponse = map[string]any{ "id": "drs-example", diff --git a/cmd/server/main.go b/cmd/server/main.go index 3570213..85d08bb 100644 --- a/cmd/server/main.go +++ b/cmd/server/main.go @@ -6,6 +6,8 @@ import ( "os" "time" + "github.com/calypr/drs-server/cmd/server/handlers" + "github.com/calypr/drs-server/cmd/server/middleware" "github.com/gin-gonic/gin" "go.uber.org/zap" ) @@ -68,11 +70,18 @@ func main() { // similar behavior, depending on newSpecValidator's implementation. // If the requestValidator fails to initialize, the server cannot safely start, // so the process is terminated with a fatal log. - requestValidator, err := newSpecValidator(specPath, true) + requestValidator, err := middleware.NewSpecValidator(specPath, true) if err != nil { log.Fatal("openapi requestValidator", zap.Error(err)) } + // build the response validator middleware + // using default config + // If the responseValidator fails to initialize, the server cannot safely start, + respCfg := middleware.DefaultResponseValidatorConfig() + respCfg.Mode = middleware.ResponseValidationAudit // prod default; use Enforce in CI + responseValidator := middleware.NewOpenAPIResponseValidator(respCfg) + // Build the response validator middleware using the default configuration. // The validator runs in audit mode by default (use Enforce in CI). respCfg := DefaultResponseValidatorConfig() @@ -100,11 +109,11 @@ func main() { // Health endpoint: typically used by Kubernetes or other systems to // check if the service is alive and ready to receive traffic. - registerHealthzRoute(r) + handlers.RegisterHealthzRoute(r) // Service info endpoint: exposes basic metadata about the service, // such as name, version, and current timestamp. - registerServiceInfoRoute(r) + handlers.RegisterServiceInfoRoute(r) // Construct the HTTP server using the Gin engine as the handler. // ReadHeaderTimeout limits the time allowed to read request headers, diff --git a/cmd/server/middleware/buffering_writer.go b/cmd/server/middleware/buffering_writer.go index 601684c..f5c582d 100644 --- a/cmd/server/middleware/buffering_writer.go +++ b/cmd/server/middleware/buffering_writer.go @@ -1,4 +1,4 @@ -package main +package middleware import ( "bufio" diff --git a/cmd/server/middleware/logging_middleware.go b/cmd/server/middleware/logging_middleware.go index b550095..61e8414 100644 --- a/cmd/server/middleware/logging_middleware.go +++ b/cmd/server/middleware/logging_middleware.go @@ -1,7 +1,7 @@ // Package main contains the HTTP server entrypoint and middleware, including // logging middleware that redacts sensitive authentication headers and logs // both request and response metadata. -package main +package middleware import ( "bytes" diff --git a/cmd/server/middleware/openapi_response_log.go b/cmd/server/middleware/openapi_response_log.go index fb71092..0d65d17 100644 --- a/cmd/server/middleware/openapi_response_log.go +++ b/cmd/server/middleware/openapi_response_log.go @@ -1,4 +1,4 @@ -package main +package middleware import ( "bytes" diff --git a/cmd/server/middleware/openapi_response_validator.go b/cmd/server/middleware/openapi_response_validator.go index 0dbff52..f1725f8 100644 --- a/cmd/server/middleware/openapi_response_validator.go +++ b/cmd/server/middleware/openapi_response_validator.go @@ -1,4 +1,4 @@ -package main +package middleware import ( "bytes" diff --git a/cmd/server/middleware/openapi_validator.go b/cmd/server/middleware/openapi_validator.go index 221a973..079977a 100644 --- a/cmd/server/middleware/openapi_validator.go +++ b/cmd/server/middleware/openapi_validator.go @@ -1,4 +1,4 @@ -package main +package middleware import ( "context" @@ -12,11 +12,6 @@ import ( "github.com/gin-gonic/gin" ) -// Convenience wrapper: default lenientRoutes to true. -func newDefaultSpecValidator(specPath string) (gin.HandlerFunc, error) { - return newSpecValidator(specPath, true) -} - // newSpecValidator builds a Gin middleware that validates incoming requests // against an OpenAPI 3.x document. // @@ -24,7 +19,7 @@ func newDefaultSpecValidator(specPath string) (gin.HandlerFunc, error) { // - If the request path+method does not exist in the spec => 404 (strict). // - If it exists but the request doesn't conform => 400 with detail. // - Security validation is not enforced unless you plug in AuthenticationFunc. -func newSpecValidator(specPath string, lenientRoutes bool) (gin.HandlerFunc, error) { +func NewSpecValidator(specPath string, lenientRoutes bool) (gin.HandlerFunc, error) { loader := openapi3.NewLoader() loader.IsExternalRefsAllowed = true From 753fab4e04841bc42ad76747b89660f073038ae1 Mon Sep 17 00:00:00 2001 From: Brian Walsh Date: Thu, 25 Dec 2025 17:13:28 -0800 Subject: [PATCH 07/30] adds passport discussion --- ...r-0002-passports-ControlledAccessGrants.md | 779 ++++++++++++++++++ 1 file changed, 779 insertions(+) create mode 100644 docs/adr-0002-passports-ControlledAccessGrants.md diff --git a/docs/adr-0002-passports-ControlledAccessGrants.md b/docs/adr-0002-passports-ControlledAccessGrants.md new file mode 100644 index 0000000..d5c02c0 --- /dev/null +++ b/docs/adr-0002-passports-ControlledAccessGrants.md @@ -0,0 +1,779 @@ + +# ADR-002: GA4GH Passports, DRS Authorization, ControlledAccessGrants, and Signed Access + +## Status +Accepted + +## Context + +This ADR documents the interaction between **GA4GH Passports**, **Data Repository Service (DRS)**, +**ControlledAccessGrants**, and **signed URLs / short-lived credentials (STS)**. + +The goal is to clearly separate: +- Authentication +- Authorization +- Data-plane access + +while remaining compliant with GA4GH standards and scalable for large controlled-access datasets. + +--- + +## Key Concepts + +### Passports +GA4GH Passports are JWT bundles containing Visas. They represent **who the user is** and +**what they are authorized to access**. + +Identity is defined by: +- `iss` (issuer) +- `sub` (subject) + +### ControlledAccessGrants +A `ControlledAccessGrants` Visa represents an affirmative authorization decision issued by a DAC. +It authorizes access to a **protected dataset**, not individual files. + +The Visa `value` is the **dataset identifier** and must exactly match the identifier used by DRS policy. + +### DRS Objects +DRS exposes immutable objects identified by `object_id`. +Each protected object must be mapped internally to a dataset identifier. + +### Signed URLs / STS +Signed URLs or short-lived credentials provide **temporary data-plane access** to object storage. +They are: +- Time-limited +- Object-scoped +- Identity-agnostic + +--- + +## Decision + +1. **Authorization is enforced at the DRS layer using Passports** +2. **ControlledAccessGrants are evaluated at dataset level** +3. **DRS mints short-lived credentials only after successful authorization** +4. **Object storage never evaluates Passport or identity claims** + +--- + +## Architecture Diagram + +```mermaid +graph TD + User -->|OAuth/OIDC| PassportBroker + PassportBroker -->|Passport JWT| User + User -->|POST /objects/id| DRS + DRS -->|Authorize via ControlledAccessGrants| PolicyEngine + PolicyEngine -->|Allow| DRS + DRS -->|Signed URL / STS| User + User -->|Direct GET| ObjectStorage +``` + +--- + +## Sequence Diagram + +```mermaid +sequenceDiagram + participant U as User + participant P as Passport Broker + participant D as DRS + participant S as Storage + + U->>P: Authenticate & request Passport + P-->>U: Passport-ControlledAccessGrants + + U->>D: POST /objects/{object_id} + Passport + D->>D: Validate Passport & Visas + D->>D: object_id -> dataset_id + D->>D: Check ControlledAccessGrants + D-->>U: Signed URL / STS (short-lived) + + U->>S: GET object (signed) + S-->>U: Object bytes +``` + +--- + +## Authorization Logic + +Authorization succeeds if: +- Passport contains a valid `ControlledAccessGrants` Visa +- `visa.value == dataset_id` +- `visa.iss` is trusted +- `visa.exp` is not expired + +Failure returns `403 Forbidden`. + +--- + +## Interaction with Signed URLs / STS + +- Signed URLs are minted **only after authorization** +- Lifetime must be **shorter than Visa lifetime** +- URLs are **read-only** and **object-scoped** +- Revocation occurs by: + - Visa expiration + - DRS refusal to mint new credentials + +--- + +## Consequences + +### Positive +- Clear separation of governance and storage +- Scales to large datasets +- Minimizes trust surface of object storage + +### Negative +- Signed URLs cannot be revoked mid-lifetime +- Requires strict lifetime management + +--- + +## Invariants + +- Passport authorizes *whether* +- DRS authorizes *what* +- Signed URLs authorize *how long* + +--- + +## Alternatives Considered + +- Passing Passport to storage (rejected) +- Dataset-wide storage credentials (rejected) +- Long-lived signed URLs (rejected) + +--- + +## References + +- GA4GH Passports Specification +- GA4GH DRS Specification +- GA4GH ControlledAccessGrants Visa Profile + +--- + +Below is a practical “wiring diagram” of how **GA4GH Passports** relate to **DRS**, what data “keys” they share, and how **authorization of DRS objects** typically works. + +## 1) What depends on what + +### Passports (AAI layer) → used by DRS (data access layer) + +* **GA4GH Passports** define how to represent a researcher’s identity + permissions as **Visas** (signed JWTs) bundled into a **Passport claim** (`ga4gh_passport_v1`). ([GA4GH][1]) +* **DRS** is a *read-only* object access API that can *optionally* use Passports (or bearer/basic tokens) to authorize calls. ([GA4GH][2]) + +### Passport Broker/UI are *issuers/aggregators*, not “DRS features” + +* DRS docs explicitly note it’s **not DRS’s job to return a Passport**; issuing/aggregating Passports is the broker’s responsibility. ([GA4GH][2]) +* Starter Kit Passport UI/Broker exist to help users obtain a Passport JWT (and the UI is built to work with other passport microservices). ([GA4GH Starter Kit][3]) +* Starter Kit DRS explicitly calls out **“Passport mediated auth to DRS objects”** and an **“Auth info”** feature to discover brokers/visas for controlled-access objects. ([GitHub][4]) + +## 2) Where the “join points” are (primary keys / identifiers) + +There isn’t one single universal primary key shared by both specs; instead you typically join across three axes: + +### A) **User identity key** (Passport-side) + +* A Visa’s user identity is the pair **{`sub`, `iss`}** (“Visa Identity”). ([GA4GH][1]) + + * `iss` = issuer (OIDC / Visa Issuer / Broker depending on flow) + * `sub` = subject identifier within that issuer + +This is what your DRS authorization layer ultimately answers: *“does the user represented by (`iss`,`sub`) have the right visas?”* + +### B) **Object key** (DRS-side) + +* The “thing being protected” is the **DRS object id** (`object_id`, `drs_object_id`) used in: + + * `GET /objects/{object_id}` + * `POST /objects/{object_id}` (Passport-bearing) + * `GET /objects/{object_id}/access/{access_id}` (and Passport-bearing variants) ([GA4GH][2]) + +### C) **Issuer discovery key** (DRS ↔ Passport trust alignment) + +DRS exposes *which auth modes and issuers apply* via OPTIONS “auth discovery”: + +* `OPTIONS /objects/{object_id}` returns an `Authorizations` structure including: + + * `supported_types` (e.g., None/Basic/Bearer/Passport) + * `passport_auth_issuers` + * `bearer_auth_issuers` ([GA4GH][2]) + +This is a key interoperability point: the issuer strings in `passport_auth_issuers` must align with what your Passport clearinghouse layer trusts/accepts (typically Visa `iss`, and/or broker/issuer metadata). + +## 3) How DRS “knows” Passport is required (and how clients discover it) + +DRS supports multiple auth modes, and they can vary per object. The spec recommends indicating required auth via the **OptionsObject** API and via access methods. ([GA4GH][2]) + +Concretely: + +* `GET /objects/{object_id}` may allow `None`, `BasicAuth`, or `BearerAuth` (depending on the object and server policy). ([GA4GH][2]) +* If Passport is required/allowed, the pattern is: + + * client calls `OPTIONS /objects/{object_id}` → sees `supported_types` and `passport_auth_issuers` + * client then calls `POST /objects/{object_id}` and includes Passport token(s) in the request body (PassportAuth is defined that way). ([GA4GH][2]) + +## 4) How “authorization of DRS objects” usually works with Visas + +Think of DRS as the enforcement point, with Passport acting as the portable “proof”: + +1. **DRS object has a policy** (out of band / implementation-defined) + + * Example: object X requires a **ControlledAccessGrants** visa whose `value` points at dataset/resource Y, and issuer must be trusted. + +2. Client obtains Passport (UI/Broker flow) containing Visa JWT(s) + + * Passports carry `ga4gh_passport_v1` with embedded visas. ([GA4GH][1]) + +3. Client calls DRS using Passport-bearing endpoints + + * PassportAuth is explicitly modeled as a POST-body token mechanism in DRS. ([GA4GH][2]) + +4. DRS acts as (or delegates to) a **Passport Clearinghouse** + + * It validates Visa signatures/expiry/trust chain per GA4GH AAI/Passport rules, then evaluates Visa content. + * Visas include required JWT claims (`iss`, `sub`, `iat`, `exp`) and a structured `ga4gh_visa_v1` object (`type`, `asserted`, `value`, `source`, etc.). ([GA4GH][1]) + +5. DRS matches visas to its object policy + + * For dataset/object entitlements, the standard Visa type **ControlledAccessGrants** encodes “access granted” and its `value` is a URL identifying the dataset/object being granted. ([GA4GH][1]) + * The spec also notes how `value` must be matched/validated (case-sensitive full match unless the Visa type defines safe partitioning). ([GA4GH][1]) + +### Key takeaway + +**Authorization is fundamentally:** + +* `DRS object_id` → mapped (by your DRS implementation) to required Visa types/values/issuers +* `Passport` → proves the caller’s (`iss`,`sub`) and includes Visa assertions +* `OPTIONS /objects/{id}` → tells clients what to present (and from which issuers) ([GA4GH][2]) + +## 5) Minimal dependency map you can drop into docs + +* **Passport UI/Broker**: obtains Passport JWT(s) with visa claims (`ga4gh_passport_v1`). ([GitHub][5]) +* **DRS**: protects `DrsObject`s; may require Bearer/Basic headers or Passport POST depending on object; exposes auth discovery via OPTIONS. ([GA4GH][2]) +* **Shared “keys”**: + + * user: `{sub, iss}` (Visa Identity) ([GA4GH][1]) + * object: `object_id` / `drs_object_id` ([GA4GH][2]) + * trust: DRS `passport_auth_issuers` ↔ Visa/issuer/broker trust configuration ([GA4GH][2]) + +## 6) Useful links + +[1]: https://ga4gh.github.io/data-security/ga4gh-passport "GA4GH Passport | GA4GH Data Security" +[2]: https://ga4gh.github.io/data-repository-service-schemas/preview/develop/docs/ "Data Repository Service" +[3]: https://starterkit.ga4gh.org/docs/starter-kit-apis/passport/passport_ui_overview?utm_source=chatgpt.com "Starter Kit Passport UI Overview" +[4]: https://github.com/ga4gh/ga4gh-starter-kit-drs "GitHub - ga4gh/ga4gh-starter-kit-drs: Open, extensible server implementation of the GA4GH Data Repository Service (DRS) specification" +[5]: https://github.com/ga4gh/ga4gh-starter-kit-passport-ui "GitHub - ga4gh/ga4gh-starter-kit-passport-ui: Central UI server connecting to other Passport-related microservices (ory hydra, ory kratos)" + +--- + +## Deep Dive + +Below is a **deep dive on `ControlledAccessGrants`** and how it is *actually* used to authorize access to **DRS objects that belong to a protected dataset**, written from an implementer’s point of view (DRS + Passport + policy engine). + +--- + +## 1. What `ControlledAccessGrants` is (and is not) + +**`ControlledAccessGrants` is a *Visa type*** defined by **GA4GH Passports**. + +It represents **an affirmative authorization decision**: + +> *“The bearer is allowed to access a specific controlled-access resource.”* + +Key clarifications: + +* It **does not identify the user** → identity comes from `iss + sub` +* It **does not authenticate the request** → OAuth / OIDC / Bearer token does that +* It **does not describe the data object directly** → it references a *policy resource* (usually a dataset) + +It is a **portable entitlement**, not an ACL. + +--- + +## 2. Canonical structure of a `ControlledAccessGrants` Visa + +At minimum (simplified): + +```json +{ + "iss": "https://dac.example.org", + "sub": "user-123", + "iat": 1710000000, + "exp": 1712600000, + "ga4gh_visa_v1": { + "type": "ControlledAccessGrants", + "asserted": 1709999000, + "value": "https://datasets.example.org/datasets/DS-001", + "source": "https://dac.example.org" + } +} +``` + +### Semantics of the important fields + +| Field | Meaning | +| -------- | ---------------------------------------------------------------- | +| `type` | Must be exactly `ControlledAccessGrants` | +| `value` | **The protected resource identifier** (most often a dataset URL) | +| `iss` | Who issued the grant (DAC / authorization authority) | +| `sub` | Who the grant applies to | +| `exp` | When the authorization expires | +| `source` | Human/traceable provenance (often same as `iss`) | + +> 🔑 **The `value` is the join key between Passport and DRS policy.** + +--- + +## 3. The critical design choice: *dataset-level* grants, not object-level + +In practice, **`ControlledAccessGrants` almost always point to a dataset**, not to individual files or DRS object IDs. + +Why? + +* Datasets are **stable, reviewable, governance-level entities** +* Files come and go (reprocessing, new versions, re-partitioning) +* DAC approvals are granted **for a dataset under a use agreement** + +So you get this hierarchy: + +``` +Dataset (protected, DAC-approved) + ├── DRS Object A (bam) + ├── DRS Object B (vcf) + ├── DRS Object C (index) + └── DRS Object D (metadata) +``` + +**One Visa → many DRS objects** + +--- + +## 4. How a DRS server uses `ControlledAccessGrants` + +### Step 1: DRS object is mapped to a dataset + +This mapping is **implementation-defined**, but must exist. + +Common patterns: + +* DRS object metadata includes `dataset_id` +* Indexd / metadata store has `dataset` or `program/project` tags +* Object URL structure encodes dataset identity + +Example internal model: + +```yaml +drs_object_id: drs://repo/abc123 +dataset_id: https://datasets.example.org/datasets/DS-001 +``` + +--- + +### Step 2: Client discovers auth requirements + +Client calls: + +``` +OPTIONS /objects/{object_id} +``` + +DRS responds with something like: + +```json +{ + "supported_types": ["PassportAuth"], + "passport_auth_issuers": [ + "https://dac.example.org" + ] +} +``` + +This tells the client: + +* Passport is required +* Visas must be issued by this DAC + +--- + +### Step 3: Client presents Passport + +Client calls: + +``` +POST /objects/{object_id} +``` + +with: + +```json +{ + "passport": [ + "", + "" + ] +} +``` + +--- + +### Step 4: DRS validates the Visa (Clearinghouse role) + +DRS (or a delegated clearinghouse) must: + +1. Verify JWT signature +2. Verify `exp`, `nbf`, `iat` +3. Verify `iss` is trusted +4. Extract `type`, `value`, `sub` + +This is **pure Passport validation** — no dataset logic yet. + +--- + +### Step 5: Authorization decision (this is the key part) + +For the requested DRS object: + +```text +Required dataset = DS-001 +``` + +DRS checks: + +> Does the Passport contain **at least one** valid +> `ControlledAccessGrants` Visa where: +> +> * `visa.type == "ControlledAccessGrants"` +> * `visa.value == dataset_id` +> * `visa.iss` is trusted +> * `visa.exp` is valid + +If **yes** → access allowed +If **no** → `403 Forbidden` + +--- + +## 5. Why the `value` must be an exact, stable identifier + +The Passport spec is very explicit: + +* `value` matching is **exact string match** +* No globbing +* No prefix matching +* No implied hierarchy (unless the Visa type explicitly defines it) + +So **this is a contract**: + +> Whatever string your DAC puts in `value` +> must be exactly what your DRS policy engine checks. + +That’s why most deployments choose: + +* A **dataset URL** +* Or a **stable dataset UUID URL** + +Example good choices: + +``` +https://datasets.example.org/datasets/DS-001 +urn:uuid:3f1b8d2e-... +``` + +Bad choices: + +``` +/projects/foo +dataset-1 +"controlled" +``` + +--- + +## 6. Multiple datasets, multiple grants + +A Passport can (and often does) contain **multiple ControlledAccessGrants**: + +```text +User has access to: +- DS-001 +- DS-017 +- DS-042 +``` + +DRS authorization is then: + +``` +object.dataset_id ∈ { visa.value } +``` + +No per-object logic required. + +--- + +## 7. What `ControlledAccessGrants` deliberately avoids + +This is by design: + +❌ It does *not* encode: + +* File paths +* DRS object IDs +* Bucket names +* Access methods (S3 vs HTTPS) +* Read vs write (DRS is read-only) + +Those remain **local enforcement details**. + +--- + +## 8. Mental model summary (one paragraph) + +> **`ControlledAccessGrants` is the bridge between governance and storage.** +> A DAC issues a Visa saying “this person may access dataset X.” +> A DRS server maps each protected object to dataset X and enforces that rule by requiring a Passport containing a matching Visa before returning object metadata or access URLs. + +--- + +This is the part that usually causes confusion, so let’s be very explicit and concrete. + +**TL;DR** +`ControlledAccessGrants` authorize *whether* a user may access a protected dataset. +**Signed URLs / short-lived credentials control *how* that access is temporarily exercised.** +They are complementary and intentionally decoupled. + +--- + +## 1. Separation of concerns (the core principle) + +Think in **three layers**, each with a different trust boundary and lifetime: + +| Layer | Responsibility | Lifetime | +| ------------------------------------- | ------------------------------------------- | ----------------- | +| **Passport / ControlledAccessGrants** | *Authorization decision* (is user allowed?) | Hours → months | +| **DRS server** | *Policy enforcement & mediation* | Request-scoped | +| **Signed URL / temp creds** | *Data plane access* | Seconds → minutes | + +**Passports never grant direct storage access.** +They only allow the DRS server to *mint* temporary access. + +--- + +## 2. End-to-end flow with signed URLs + +### Step 1: User authenticates and presents Passport + +* Client authenticates with OAuth/OIDC +* Client includes Passport (with `ControlledAccessGrants`) when calling DRS + +``` +POST /objects/{object_id} +``` + +DRS verifies: + +* Passport signature +* Trusted issuer +* `ControlledAccessGrants.value == dataset_id` +* Visa is unexpired + +👉 **Authorization happens here and only here** + +--- + +### Step 2: DRS authorizes the object + +DRS maps: + +``` +object_id → dataset_id +``` + +Checks: + +``` +dataset_id ∈ ControlledAccessGrants.value +``` + +If authorized → proceed +If not → `403 Forbidden` + +--- + +### Step 3: DRS mints *ephemeral* access + +Only **after authorization succeeds**, DRS generates one of: + +* A **pre-signed URL** (S3, GCS, Azure) +* A **short-lived access token** +* Temporary cloud credentials (STS-style) + +Examples: + +* S3 presigned GET (5–15 minutes) +* GCS signed URL +* Azure SAS token + +These credentials: + +* Are **scoped to a single object** +* Are **time-limited** +* Are **not reusable across objects** +* Are **not tied to user identity** + +--- + +### Step 4: Client accesses storage directly + +Client downloads from object storage **without Passport**, **without OAuth**, **without identity**: + +``` +GET https://bucket.s3.amazonaws.com/object?...signature... +``` + +At this point: + +* Storage layer has **no idea who the user is** +* Storage only verifies cryptographic validity + expiry + +This is intentional. + +--- + +## 3. Why Passport is *not* used at the storage layer + +### Security reasons + +* Object stores don’t understand GA4GH Passports +* JWT validation inside storage is infeasible +* Storage credentials must be revocable by time, not identity + +### Scalability reasons + +* Avoid per-request auth checks on multi-GB downloads +* Avoid holding auth state open for hours +* CDN compatibility + +### Governance reasons + +* Access review happens *before* access is granted +* Download is treated as execution of an already-approved decision + +--- + +## 4. Key invariants you must maintain + +### Invariant 1: **Signed URLs must never outlive authorization intent** + +Best practice: + +``` +Visa lifetime >> signed URL lifetime +``` + +Example: + +* Visa valid for 7 days +* Signed URL valid for 5 minutes + +Never reverse this. + +--- + +### Invariant 2: **Signed URLs must be non-escalating** + +A signed URL: + +* Must only allow **read** +* Must only allow **that object** +* Must not allow listing, overwrite, or delete + +If your URL allows more than what DRS authorized → that’s a bug. + +--- + +### Invariant 3: **DRS is the only mint** + +Clients must **never**: + +* Exchange Passport directly for cloud creds +* Reuse creds across objects +* Cache creds beyond expiry + +All minting goes through DRS. + +--- + +## 5. How revocation actually works (important) + +You **cannot revoke a signed URL once issued**. + +So revocation happens at **two earlier points**: + +1. **Visa expiry** + + * DAC revokes access by shortening Visa lifetime or not reissuing +2. **DRS mint refusal** + + * After revocation, new signed URLs cannot be minted + +This is why short lifetimes matter. + +--- + +## 6. Multiple objects, same dataset + +This is the sweet spot of `ControlledAccessGrants`. + +User wants 1,000 files from dataset DS-001: + +* **One Visa** +* **1,000 DRS calls** +* **1,000 short-lived signed URLs** + +No re-approval. No per-file DAC logic. + +--- + +## 7. What happens if a dataset mixes public and protected objects? + +DRS logic becomes: + +``` +if object.public: + return access immediately (no Passport) +else: + require ControlledAccessGrants +``` + +This is surfaced cleanly via: + +``` +OPTIONS /objects/{object_id} +``` + +Clients know *beforehand* whether Passport is required. + +--- + +## 8. Mental model (keep this in your head) + +> **Passport answers “may you?”** +> **DRS answers “here’s how (temporarily).”** +> **Signed URLs answer “prove this cryptographic token right now.”** + +Each layer trusts the one above it and never looks sideways. + +--- + +## 9. Common anti-patterns (avoid these) + +❌ Long-lived signed URLs +❌ Passing Passport to S3/GCS +❌ Dataset-wide bucket credentials +❌ Reusing signed URLs across users +❌ Letting object storage enforce GA4GH policy + +--- + From 655379daf6721246daf41f0dccf481734e33942f Mon Sep 17 00:00:00 2001 From: Brian Walsh Date: Thu, 25 Dec 2025 17:27:55 -0800 Subject: [PATCH 08/30] improves docs/ --- .../adr-0001-request-response-validation.md | 0 ...r-0002-passports-ControlledAccessGrants.md | 0 docs/adr/index.md | 75 +++++++++++++++++++ docs/js/mermaid-init.js | 5 ++ mkdocs.yml | 19 +++++ 5 files changed, 99 insertions(+) rename docs/{ => adr}/adr-0001-request-response-validation.md (100%) rename docs/{ => adr}/adr-0002-passports-ControlledAccessGrants.md (100%) create mode 100644 docs/adr/index.md create mode 100644 docs/js/mermaid-init.js diff --git a/docs/adr-0001-request-response-validation.md b/docs/adr/adr-0001-request-response-validation.md similarity index 100% rename from docs/adr-0001-request-response-validation.md rename to docs/adr/adr-0001-request-response-validation.md diff --git a/docs/adr-0002-passports-ControlledAccessGrants.md b/docs/adr/adr-0002-passports-ControlledAccessGrants.md similarity index 100% rename from docs/adr-0002-passports-ControlledAccessGrants.md rename to docs/adr/adr-0002-passports-ControlledAccessGrants.md diff --git a/docs/adr/index.md b/docs/adr/index.md new file mode 100644 index 0000000..79e245f --- /dev/null +++ b/docs/adr/index.md @@ -0,0 +1,75 @@ +An Architecture Decision Record (ADR) is a short document that captures an important engineering decision about a system, along with its context and consequences. It creates a historical log of *why* key architectural choices were made, so future readers can understand the reasoning and avoid repeating old discussions. + +## What an ADR is + +An ADR typically: + +* Describes a *single* significant decision (e.g. "Use Postgres instead of MySQL"). +* Records the context and constraints at the time of the decision. +* Explains the chosen option and alternatives considered. +* States the consequences (both positive and negative). +* Is immutable once accepted; changes are recorded as new ADRs that supersede older ones. + +They are usually stored in a dedicated directory like `docs/adr/` and named with an ordered prefix, for example: + +* `adr/0001-use-rest-api.md` +* `adr/0002-choose-postgres.md` + +This ordering helps track the evolution of architecture over time. + +## How to read an ADR + +Most ADRs follow a consistent template. When you open one (for example via your `docs/adr/index.md` list or nav): + +1. **Title / ID** + At the top, find the ADR number and descriptive title. This tells you *what* decision is being documented. + +2. **Status** + Look for a `Status` section, often one of: + * `Proposed` \- under discussion. + * `Accepted` \- this is the current decision. + * `Superseded by ADR-XXXX` \- replaced by a newer decision. + Focus mainly on `Accepted` ADRs; if it is superseded, go read the newer one. + +3. **Context** + This section explains: + * The problem being solved. + * Relevant constraints (deadlines, existing systems, team skills). + * Requirements or forces that influenced the decision. + Read this to understand *why* there was a decision to make at all. + +4. **Decision** + A concise statement of what was chosen. For example: + *“We will use PostgreSQL as the primary relational database for all services.”* + This is the core answer you are looking for. + +5. **Consequences** + Details the impact of the decision: + * Benefits: what becomes easier or better. + * Drawbacks: trade\-offs, new limitations, or risks. + * Follow\-up tasks or required changes. + This helps you understand how the decision affects current and future work. + +6. **Alternatives / Rationale (if present)** + Some ADRs include: + * Options considered and why they were rejected. + * Links to discussions, tickets, or design docs. + This is useful when you wonder *“Why didn’t we just do X?”*. + +## How to use ADRs in practice + +* **Before changing architecture**: + Search the `adr/` directory for relevant ADRs and read: + * The latest `Accepted` ADR on that topic. + * Any ADRs that *supersede* older ones. + +* **When confused about a pattern or technology choice**: + Find the ADR whose title matches that area (e.g. logging, databases, APIs) to see the original reasoning. + +* **When proposing a change**: + If you need to overturn an existing decision, write a new ADR that: + * References the old one. + * Marks the old one as `Superseded`. + * Explains new context or requirements. + +By consistently reading ADRs this way, you get both the *current architectural rules* and the *history* that explains how the system ended up in its present shape. \ No newline at end of file diff --git a/docs/js/mermaid-init.js b/docs/js/mermaid-init.js new file mode 100644 index 0000000..adf8070 --- /dev/null +++ b/docs/js/mermaid-init.js @@ -0,0 +1,5 @@ +document.addEventListener("DOMContentLoaded", function () { + if (window.mermaid) { + window.mermaid.initialize({ startOnLoad: true }); + } +}); diff --git a/mkdocs.yml b/mkdocs.yml index 9c7dc77..a84e8ac 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -6,3 +6,22 @@ nav: - Configuration: configuration.md - Deployment: deployment.md - Troubleshooting: troubleshooting.md + - Architecture: + - Overview: adr/index.md + - ADRs: + - ADR 0001 - openapi request/response validation: adr/adr-0001-request-response-validation.md + - ADR 0002 - ga4gh passports for authorization: adr/adr-0002-passports-ControlledAccessGrants.md + +markdown_extensions: + - admonition + - toc: + permalink: true + - pymdownx.superfences: + custom_fences: + - name: mermaid + class: mermaid + format: !!python/name:pymdownx.superfences.fence_code_format + +extra_javascript: + - https://cdn.jsdelivr.net/npm/mermaid@10/dist/mermaid.min.js + - js/mermaid-init.js From 9e968b18c27adcc69d8f315f59c08b84b052bb34 Mon Sep 17 00:00:00 2001 From: Brian Walsh Date: Thu, 25 Dec 2025 17:34:58 -0800 Subject: [PATCH 09/30] fix bad merge --- cmd/server/main.go | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/cmd/server/main.go b/cmd/server/main.go index 85d08bb..6b92550 100644 --- a/cmd/server/main.go +++ b/cmd/server/main.go @@ -75,18 +75,13 @@ func main() { log.Fatal("openapi requestValidator", zap.Error(err)) } - // build the response validator middleware - // using default config + // Build the response validator middleware using the default configuration. + // The validator runs in audit mode by default (use Enforce in CI). // If the responseValidator fails to initialize, the server cannot safely start, respCfg := middleware.DefaultResponseValidatorConfig() respCfg.Mode = middleware.ResponseValidationAudit // prod default; use Enforce in CI responseValidator := middleware.NewOpenAPIResponseValidator(respCfg) - // Build the response validator middleware using the default configuration. - // The validator runs in audit mode by default (use Enforce in CI). - respCfg := DefaultResponseValidatorConfig() - respCfg.Mode = ResponseValidationAudit // prod default; use Enforce in CI - responseValidator := NewOpenAPIResponseValidator(respCfg) // Create a new Gin engine instance. Gin provides routing, middleware, // and HTTP handler abstractions. r := gin.New() From 48addf36cbb8851226086dc40a93b4399e034e42 Mon Sep 17 00:00:00 2001 From: Brian Walsh Date: Thu, 25 Dec 2025 17:37:12 -0800 Subject: [PATCH 10/30] update status --- docs/adr/adr-0001-request-response-validation.md | 2 +- docs/adr/adr-0002-passports-ControlledAccessGrants.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/adr/adr-0001-request-response-validation.md b/docs/adr/adr-0001-request-response-validation.md index 3338bd6..1d56459 100644 --- a/docs/adr/adr-0001-request-response-validation.md +++ b/docs/adr/adr-0001-request-response-validation.md @@ -2,7 +2,7 @@ ## Status -Accepted +Implemented ## Date diff --git a/docs/adr/adr-0002-passports-ControlledAccessGrants.md b/docs/adr/adr-0002-passports-ControlledAccessGrants.md index d5c02c0..939bfcf 100644 --- a/docs/adr/adr-0002-passports-ControlledAccessGrants.md +++ b/docs/adr/adr-0002-passports-ControlledAccessGrants.md @@ -2,7 +2,7 @@ # ADR-002: GA4GH Passports, DRS Authorization, ControlledAccessGrants, and Signed Access ## Status -Accepted +For discussion ## Context From de6c681ef03ba22311214519cbf606f49ab34b58 Mon Sep 17 00:00:00 2001 From: Brian Walsh Date: Thu, 25 Dec 2025 17:42:00 -0800 Subject: [PATCH 11/30] github pages --- .github/workflows/gh-pages.yml | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 .github/workflows/gh-pages.yml diff --git a/.github/workflows/gh-pages.yml b/.github/workflows/gh-pages.yml new file mode 100644 index 0000000..7cc2564 --- /dev/null +++ b/.github/workflows/gh-pages.yml @@ -0,0 +1,33 @@ +name: Publish MkDocs site + +on: + push: + branches: + - main + - master + workflow_dispatch: + +permissions: + contents: write + +jobs: + build-deploy: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install mkdocs mkdocs-material mkdocs-mermaid2-plugin pymdown-extensions + + - name: Build and deploy + run: | + mkdocs gh-deploy --force + From 06db677f5f4bd986d2c8cebcd79255938b5ed417 Mon Sep 17 00:00:00 2001 From: Brian Walsh Date: Thu, 25 Dec 2025 17:44:53 -0800 Subject: [PATCH 12/30] github pages --- .github/workflows/gh-pages.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/gh-pages.yml b/.github/workflows/gh-pages.yml index 7cc2564..a91f3fa 100644 --- a/.github/workflows/gh-pages.yml +++ b/.github/workflows/gh-pages.yml @@ -4,7 +4,8 @@ on: push: branches: - main - - master + - development + - 'feature/*' workflow_dispatch: permissions: From 0d17e12a082b500d52781cd0d0fe3860d1ea80a2 Mon Sep 17 00:00:00 2001 From: Brian Walsh Date: Fri, 26 Dec 2025 09:56:33 -0800 Subject: [PATCH 13/30] improve github pages --- .gitignore | 2 ++ Dockerfile.mkdocs | 5 ++++ Makefile | 8 +++++- .../adr-0001-request-response-validation.md | 26 +++++++++---------- ...r-0002-passports-ControlledAccessGrants.md | 3 +++ docs/js/mermaid-init.js | 5 ---- mkdocs.yml | 9 ++++--- 7 files changed, 34 insertions(+), 24 deletions(-) create mode 100644 Dockerfile.mkdocs delete mode 100644 docs/js/mermaid-init.js diff --git a/.gitignore b/.gitignore index 93d618c..8a5a23a 100644 --- a/.gitignore +++ b/.gitignore @@ -28,3 +28,5 @@ coverage.* .idea/ .vscode/ +site/ +ga4gh/ diff --git a/Dockerfile.mkdocs b/Dockerfile.mkdocs new file mode 100644 index 0000000..ea0ea28 --- /dev/null +++ b/Dockerfile.mkdocs @@ -0,0 +1,5 @@ +# syntax=docker/dockerfile:1 +FROM squidfunk/mkdocs-material:latest + +RUN pip install --no-cache-dir mkdocs-mermaid2-plugin + diff --git a/Makefile b/Makefile index 2cda8e4..3323d97 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,8 @@ SHELL := /bin/bash OPENAPI ?= ga4gh/data-repository-service-schemas/openapi/data_repository_service.openapi.yaml OAG_IMAGE ?= openapitools/openapi-generator-cli:latest -MKDOCS_IMAGE ?= squidfunk/mkdocs-material:latest +MKDOCS_IMAGE ?= mkdocs-material-mermaid:latest + .PHONY: gen gen: @@ -32,6 +33,11 @@ test: serve: go run ./cmd/server $(ARGS) + +.PHONY: docs-image +docs-image: + docker build -f Dockerfile.mkdocs -t mkdocs-material-mermaid:latest . + .PHONY: docs docs: docker run --rm -it \ diff --git a/docs/adr/adr-0001-request-response-validation.md b/docs/adr/adr-0001-request-response-validation.md index 1d56459..12c4926 100644 --- a/docs/adr/adr-0001-request-response-validation.md +++ b/docs/adr/adr-0001-request-response-validation.md @@ -31,27 +31,25 @@ Implement OpenAPI validation middleware using **kin-openapi** (`openapi3filter`) Black-box contract testing complements runtime validation in CI. -## Mermaid diagram +## OpenAPI usage ```mermaid flowchart TB - Client[Client] -->|HTTP Request| Gin[gin Engine] - + Spec[OpenAPI Spec local or embedded] --> CI + CI[CI make gen] --> Contract[Contract Tests] + Spec --> Contract + Spec --> MiddlewareChain + Contract --> Report[Fail build on violations] + subgraph MiddlewareChain[Middleware Chain] - LogReq[Request Logging redact auth] --> ReqVal[OpenAPI Request Validation enforce] - ReqVal --> Handler[Application Handlers generated] + LogReq[Logging redact auth] --> ReqVal[OpenAPI Request Validation enforce] + ReqVal --> Handler Handler --> RespVal[OpenAPI Response Validation audit or enforce] - RespVal --> Commit[Late Commit Writer buffered] + RespVal --> Commit end + + MiddlewareChain --> Gin[GIN HTTP Server] - Gin --> MiddlewareChain -->|HTTP Response| Client - - Spec[OpenAPI Spec local or embedded] --> ReqVal - Spec --> RespVal - - CI[CI Pipeline] --> Contract[Contract Tests] - Spec --> Contract - Contract --> Report[Fail build on violations] ``` diff --git a/docs/adr/adr-0002-passports-ControlledAccessGrants.md b/docs/adr/adr-0002-passports-ControlledAccessGrants.md index 939bfcf..a7e431e 100644 --- a/docs/adr/adr-0002-passports-ControlledAccessGrants.md +++ b/docs/adr/adr-0002-passports-ControlledAccessGrants.md @@ -77,10 +77,13 @@ graph TD sequenceDiagram participant U as User participant P as Passport Broker + participant A as Arborist participant D as DRS participant S as Storage U->>P: Authenticate & request Passport + P-->>A: Authenticate, get Roles + P->P: Translate Roles -> Visas P-->>U: Passport-ControlledAccessGrants U->>D: POST /objects/{object_id} + Passport diff --git a/docs/js/mermaid-init.js b/docs/js/mermaid-init.js deleted file mode 100644 index adf8070..0000000 --- a/docs/js/mermaid-init.js +++ /dev/null @@ -1,5 +0,0 @@ -document.addEventListener("DOMContentLoaded", function () { - if (window.mermaid) { - window.mermaid.initialize({ startOnLoad: true }); - } -}); diff --git a/mkdocs.yml b/mkdocs.yml index a84e8ac..0b20148 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -12,6 +12,11 @@ nav: - ADR 0001 - openapi request/response validation: adr/adr-0001-request-response-validation.md - ADR 0002 - ga4gh passports for authorization: adr/adr-0002-passports-ControlledAccessGrants.md +plugins: + - search + - mermaid2 + + markdown_extensions: - admonition - toc: @@ -21,7 +26,3 @@ markdown_extensions: - name: mermaid class: mermaid format: !!python/name:pymdownx.superfences.fence_code_format - -extra_javascript: - - https://cdn.jsdelivr.net/npm/mermaid@10/dist/mermaid.min.js - - js/mermaid-init.js From 976f3c54aeb9d104ade738330bddd3f28cab5074 Mon Sep 17 00:00:00 2001 From: Brian Walsh Date: Fri, 26 Dec 2025 10:09:50 -0800 Subject: [PATCH 14/30] tweak mermaid config --- mkdocs.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mkdocs.yml b/mkdocs.yml index 0b20148..0874f7b 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -21,8 +21,8 @@ markdown_extensions: - admonition - toc: permalink: true - - pymdownx.superfences: - custom_fences: - - name: mermaid - class: mermaid - format: !!python/name:pymdownx.superfences.fence_code_format +# - pymdownx.superfences: +# custom_fences: +# - name: mermaid +# class: mermaid +# format: !!python/name:pymdownx.superfences.fence_code_format From e8e5155387021fd808ab6e7b9272908f1bb2832f Mon Sep 17 00:00:00 2001 From: Brian Walsh Date: Fri, 26 Dec 2025 10:18:38 -0800 Subject: [PATCH 15/30] tweak mermaid config --- mkdocs.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/mkdocs.yml b/mkdocs.yml index 0874f7b..1b23a58 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -21,6 +21,7 @@ markdown_extensions: - admonition - toc: permalink: true + - pymdownx.superfences # - pymdownx.superfences: # custom_fences: # - name: mermaid From fe9391cbfa919f4c4149028de520181ec3ad93d0 Mon Sep 17 00:00:00 2001 From: Copilot <198982749+Copilot@users.noreply.github.com> Date: Fri, 26 Dec 2025 11:45:35 -0800 Subject: [PATCH 16/30] Fix Mermaid rendering on GitHub Pages with correct SRI hash (#7) * Initial plan * Switch to mkdocs-material theme with Mermaid support Co-authored-by: bwalsh <47808+bwalsh@users.noreply.github.com> * Add SRI integrity hash for Mermaid.js security Co-authored-by: bwalsh <47808+bwalsh@users.noreply.github.com> * Add DOMContentLoaded event listener for Mermaid initialization Co-authored-by: bwalsh <47808+bwalsh@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: bwalsh <47808+bwalsh@users.noreply.github.com> --- mkdocs.yml | 12 +++++++++++- overrides/main.html | 2 +- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/mkdocs.yml b/mkdocs.yml index 6a13417..820663c 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,13 +1,23 @@ site_name: drs-server theme: - name: readthedocs + name: material custom_dir: overrides + features: + - content.code.annotate + +plugins: + - search + markdown_extensions: + - admonition + - toc: + permalink: true - pymdownx.superfences: custom_fences: - name: mermaid class: mermaid format: !!python/name:pymdownx.superfences.fence_code_format + nav: - Home: index.md - Configuration: configuration.md diff --git a/overrides/main.html b/overrides/main.html index d8972c4..be68d8d 100644 --- a/overrides/main.html +++ b/overrides/main.html @@ -2,7 +2,7 @@ {% block scripts %} {{ super() }} - + + From e09d66d0888e9d592091f8d0263c240e7cef021f Mon Sep 17 00:00:00 2001 From: Brian Walsh Date: Fri, 26 Dec 2025 12:04:35 -0800 Subject: [PATCH 19/30] remove plugin --- mkdocs.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/mkdocs.yml b/mkdocs.yml index 0a0dd5a..a7a4511 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -19,7 +19,6 @@ nav: plugins: - search - - mermaid2 markdown_extensions: From 0a3e3a0671aacdff0602b3b933655e0949a24216 Mon Sep 17 00:00:00 2001 From: Brian Walsh Date: Sun, 28 Dec 2025 10:06:13 -0800 Subject: [PATCH 20/30] deprecate --- overrides/main.html | 7 ------- 1 file changed, 7 deletions(-) diff --git a/overrides/main.html b/overrides/main.html index 24b43f2..4e7cef2 100644 --- a/overrides/main.html +++ b/overrides/main.html @@ -2,11 +2,4 @@ {% block scripts %} {{ super() }} - - {% endblock %} From 173e9600d03c6079d2ba26916369342fe81eff59 Mon Sep 17 00:00:00 2001 From: Brian Walsh Date: Sun, 28 Dec 2025 10:07:55 -0800 Subject: [PATCH 21/30] adds docs-build --- Makefile | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3323d97..b2b5182 100644 --- a/Makefile +++ b/Makefile @@ -44,4 +44,12 @@ docs: -v "$(PWD):/docs" \ -p 8000:8000 \ $(MKDOCS_IMAGE) \ - serve -a 0.0.0.0:8000 \ No newline at end of file + serve -a 0.0.0.0:8000 + +.PHONY: docs-build +docs-build: + docker run --rm -it \ + -v "$(PWD):/docs" \ + -p 8000:8000 \ + $(MKDOCS_IMAGE) \ + build \ No newline at end of file From 02a56445e0c9ddd2f9b49083ad75a94445d52c76 Mon Sep 17 00:00:00 2001 From: Brian Walsh Date: Sun, 28 Dec 2025 10:08:29 -0800 Subject: [PATCH 22/30] plugin --- mkdocs.yml | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/mkdocs.yml b/mkdocs.yml index a7a4511..339eeac 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -13,16 +13,21 @@ nav: - Troubleshooting: troubleshooting.md - Architecture: - Overview: adr/index.md - - ADRs: - - ADR 0001 - openapi request/response validation: adr/adr-0001-request-response-validation.md - - ADR 0002 - ga4gh passports for authorization: adr/adr-0002-passports-ControlledAccessGrants.md + - Topics: + - openapi request/response validation: adr/adr-0001-request-response-validation.md + - ga4gh passports for authorization: adr/adr-0002-passports-ControlledAccessGrants.md plugins: - search - + - mermaid2 markdown_extensions: - admonition - toc: permalink: true - pymdownx.superfences + - pymdownx.superfences: + custom_fences: + - name: mermaid + class: mermaid + format: !!python/name:pymdownx.superfences.fence_code_format From f0098dbea39cc2a6420d3396b6b55be948b623ee Mon Sep 17 00:00:00 2001 From: Brian Walsh Date: Sun, 28 Dec 2025 10:08:46 -0800 Subject: [PATCH 23/30] misc --- docs/adr/adr-0002-passports-ControlledAccessGrants.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/adr/adr-0002-passports-ControlledAccessGrants.md b/docs/adr/adr-0002-passports-ControlledAccessGrants.md index a7e431e..a0c6272 100644 --- a/docs/adr/adr-0002-passports-ControlledAccessGrants.md +++ b/docs/adr/adr-0002-passports-ControlledAccessGrants.md @@ -2,7 +2,7 @@ # ADR-002: GA4GH Passports, DRS Authorization, ControlledAccessGrants, and Signed Access ## Status -For discussion +Proposed ## Context From 1035a8142bf802e66984036cdcadb85aa14f1f44 Mon Sep 17 00:00:00 2001 From: Brian Walsh Date: Mon, 29 Dec 2025 08:24:42 -0800 Subject: [PATCH 24/30] improve diagram --- docs/adr/adr-0001-request-response-validation.md | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/docs/adr/adr-0001-request-response-validation.md b/docs/adr/adr-0001-request-response-validation.md index 12c4926..088642d 100644 --- a/docs/adr/adr-0001-request-response-validation.md +++ b/docs/adr/adr-0001-request-response-validation.md @@ -31,14 +31,17 @@ Implement OpenAPI validation middleware using **kin-openapi** (`openapi3filter`) Black-box contract testing complements runtime validation in CI. +## Implementation + +See https://github.com/calypr/drs-server/pull/2 + ## OpenAPI usage ```mermaid flowchart TB - Spec[OpenAPI Spec local or embedded] --> CI - CI[CI make gen] --> Contract[Contract Tests] - Spec --> Contract - Spec --> MiddlewareChain + Spec[OpenAPI Spec] --> CI + CI[CI make gen] --> Contract[Bundle Spec, Model, Handler stubs] + Contract --> MiddlewareChain Contract --> Report[Fail build on violations] subgraph MiddlewareChain[Middleware Chain] From f9fa4ce07eaba5bb70ff4415f4eb59444107cfdf Mon Sep 17 00:00:00 2001 From: Brian Walsh Date: Mon, 29 Dec 2025 08:32:50 -0800 Subject: [PATCH 25/30] improve diagram --- docs/adr/adr-0001-request-response-validation.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/adr/adr-0001-request-response-validation.md b/docs/adr/adr-0001-request-response-validation.md index 088642d..8e183da 100644 --- a/docs/adr/adr-0001-request-response-validation.md +++ b/docs/adr/adr-0001-request-response-validation.md @@ -39,7 +39,7 @@ See https://github.com/calypr/drs-server/pull/2 ```mermaid flowchart TB - Spec[OpenAPI Spec] --> CI + Spec[OpenAPI Spec Git Submodule] --> CI CI[CI make gen] --> Contract[Bundle Spec, Model, Handler stubs] Contract --> MiddlewareChain Contract --> Report[Fail build on violations] From 4408f374b8e0fd007acce0ab408419891774c058 Mon Sep 17 00:00:00 2001 From: Brian Walsh Date: Mon, 29 Dec 2025 08:33:07 -0800 Subject: [PATCH 26/30] adds comments --- Makefile | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index b2b5182..e79a1b6 100644 --- a/Makefile +++ b/Makefile @@ -3,14 +3,13 @@ OPENAPI ?= ga4gh/data-repository-service-schemas/openapi/data_repository_service OAG_IMAGE ?= openapitools/openapi-generator-cli:latest MKDOCS_IMAGE ?= mkdocs-material-mermaid:latest - +# Generate Go server stubs from the OpenAPI spec and post\-process the bundle .PHONY: gen gen: @mkdir -p .tmp internal/apigen - # OpenAPI Generator (Go server stub) # delete previous generated code rm -rf internal/apigen - # generate new code + # generate new Go Gin server code using OpenAPI Generator in Docker docker run --rm \ -v "$(PWD):/local" \ $(OAG_IMAGE) generate \ @@ -20,24 +19,26 @@ gen: --git-user-id calypr \ -i /local/$(OPENAPI) \ -o /local/internal/apigen - # a bundle is created at internal/apigen/openapi.yaml, remove examples from it - # as many are not compliant with the spec or seem to be randomly generated + # remove non\-compliant or random examples from the generated OpenAPI bundle go run ./cmd/openapi-remove-examples +# Run the full Go test suite with a clean test cache .PHONY: test test: go clean -testcache go test -v ./... +# Run the application server locally, passing optional args via ARGS .PHONY: serve serve: go run ./cmd/server $(ARGS) - +# Build the MkDocs Docker image used to serve and build documentation .PHONY: docs-image docs-image: docker build -f Dockerfile.mkdocs -t mkdocs-material-mermaid:latest . +# Serve the MkDocs documentation locally with live reload .PHONY: docs docs: docker run --rm -it \ @@ -46,10 +47,11 @@ docs: $(MKDOCS_IMAGE) \ serve -a 0.0.0.0:8000 +# Build the static MkDocs documentation site into the local site directory .PHONY: docs-build docs-build: docker run --rm -it \ -v "$(PWD):/docs" \ -p 8000:8000 \ $(MKDOCS_IMAGE) \ - build \ No newline at end of file + build From a5a1fa020ee46edf1b7cabc13cec551d32a94f0b Mon Sep 17 00:00:00 2001 From: Brian Walsh Date: Mon, 29 Dec 2025 08:33:24 -0800 Subject: [PATCH 27/30] improve diagram --- README.md | 49 ++++++++++++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index b3e6e35..abe1562 100644 --- a/README.md +++ b/README.md @@ -16,28 +16,31 @@ This project consumes the official GA4GH `data-repository-service-schemas` as a ```mermaid -graph TD - 0[ga4gh/data-repository-service-schemas DRS OpenAPI spec submodule] --> B - A[Makefile] --> B[make gen] - A --> C2[cmd/server] - A --> D[make test] - A --> E[make docs/] - - B --> G[internal/apigen generated DRS server code] - B --> H[cmd/openapi-remove-examples clean OpenAPI helper] --> H2[internal/apigen/api/openapi.yaml] - - H2 --> C2 - D --> C2 - G --> C2 -``` +flowchart TB + Spec[OpenAPI Spec Git Submodule] --> CI + CI[CI make gen] --> Contract[Bundle Spec, Model, Handler stubs] + Contract --> MiddlewareChain + Contract --> Report[Fail build on violations] + + subgraph MiddlewareChain[Middleware Chain] + LogReq[Logging redact auth] --> ReqVal[OpenAPI Request Validation enforce] + ReqVal --> Handler + Handler --> RespVal[OpenAPI Response Validation audit or enforce] + RespVal --> Commit + end + + MiddlewareChain --> Gin[GIN HTTP Server] -* Makefile - targets for generation, tests, docs, and running the server. - * `make gen` - generates the DRS server code from the OpenAPI spec. - * ga4gh/data-repository-service-schemas - GA4GH DRS OpenAPI spec (Git submodule). - * internal/apigen - generated DRS server code. - * cmd/openapi-remove-examples - helper to clean the bundled OpenAPI. - * `make serve` - runs the DRS server. - * cmd/server - main HTTP server (uses gin-gonic/gin). - * `make test` - launches server, runs integration tests. - * `make docs` - serves documentation with MkDocs. +``` + +* Makefile \- targets for generation, tests, docs, and running the server. + * `make gen` \- generates the DRS server code from the OpenAPI spec and cleans bundled examples. + * `ga4gh/data-repository-service-schemas` \- GA4GH DRS OpenAPI spec (Git submodule). + * `internal/apigen` \- generated DRS server code. + * `cmd/openapi-remove-examples` \- helper to clean the bundled OpenAPI. + * `make serve` \- runs the DRS server (`cmd/server`) with optional `ARGS` passed through. + * `make test` \- cleans the Go test cache and runs all tests with verbose output. + * `make docs-image` \- builds the Docker image used for MkDocs docs (`Dockerfile.mkdocs` to `mkdocs-material-mermaid:latest`). + * `make docs` \- serves the documentation locally with MkDocs in Docker on port `8000`. + * `make docs-build` \- builds the static MkDocs site into the local `site` directory using Docker. From 80aa56143b3a6d7b3ca0abf02bf39aa66f1a4554 Mon Sep 17 00:00:00 2001 From: Brian Walsh Date: Mon, 29 Dec 2025 08:58:33 -0800 Subject: [PATCH 28/30] adds persistence --- docs/adr/adr-0003-persistence-layer-design.md | 160 ++++++++++++++++++ mkdocs.yml | 5 +- 2 files changed, 163 insertions(+), 2 deletions(-) create mode 100644 docs/adr/adr-0003-persistence-layer-design.md diff --git a/docs/adr/adr-0003-persistence-layer-design.md b/docs/adr/adr-0003-persistence-layer-design.md new file mode 100644 index 0000000..c77c8fd --- /dev/null +++ b/docs/adr/adr-0003-persistence-layer-design.md @@ -0,0 +1,160 @@ +# ADR 0003: Persistence Layer Choice for DRS +--- +## Prior Art + +See: See https://github.com/ga4gh/ga4gh-starter-kit-drs/blob/develop/database/postgresql/create-tables.sql + +## What the schema tells us (important) + +The DRS starter-kit schema has these defining traits: + +### 1. **DB-first, SQL-authored** + +* Canonical schema lives in SQL (`create-tables.sql`) +* No migration framework implied +* Tables are already normalized and intentional + +### 2. **Protocol-driven, not CRUD** + +Key tables: + +* `drs_object` +* `file_access_object` +* `access_method` +* `bundle_object` +* `passport_visa` +* `drs_object_visa` + +These exist to **implement GA4GH DRS semantics**, not generic CRUD entities. + +### 3. **Authorization is relational** + +* `drs_object_visa` is a join table +* Access decisions require **joins**, not object loading +* Queries like: + + > “Which access methods are visible given a passport?” + +### 4. **Very little ORM-friendly behavior** + +* No cascading business logic +* No polymorphic inheritance +* No lifecycle hooks +* Mostly read-heavy, join-heavy + +This is *exactly* the kind of schema where “smart ORMs” become a liability. + +--- + +## Best choice for THIS application + +### ✅ **sqlc + pgx** (strong recommendation) + +This schema is a textbook case for **explicit SQL with type-safe bindings**. + +### Why sqlc fits perfectly here + +| Schema reality | Why sqlc wins | +| ---------------------------- | ------------------------------------------ | +| DB is source of truth | sqlc generates from SQL, not structs | +| Join-heavy auth | You *want* to write these joins explicitly | +| Stable identifiers | No ORM identity map issues | +| Protocol correctness matters | SQL is precise and auditable | +| Mostly reads | Zero ORM overhead | +| GA4GH compliance | Easier to prove correctness | + +You will inevitably write queries like: + +```sql +SELECT am.* +FROM access_method am +JOIN file_access_object fao ON fao.id = am.file_access_object_id +JOIN drs_object_visa dov ON dov.drs_object_id = fao.drs_object_id +JOIN passport_visa v ON v.id = dov.visa_id +WHERE v.issuer = $1; +``` + +That is **exactly** what sqlc is built for. + +--- + +## What about ORMs? + +### ❌ Ent (not a good fit *here*) + +Ent is excellent — **but only when Go owns the schema**. + +Problems for this project: + +* You’d duplicate the SQL schema in Go +* Risk schema drift +* Authorization joins become awkward +* JSON / protocol semantics aren’t buying you anything + +### ⚠️ sqlboiler (acceptable, but second best) + +If you *must* have ORM-style models: + +* sqlboiler can generate from this schema +* But you’ll still write lots of custom SQL +* You’ll end up halfway between ORM and sqlc anyway + +### ❌ GORM (actively discouraged) + +* Reflection-heavy +* Implicit behavior +* Poor fit for auth-driven joins +* Harder to reason about correctness in a standards-based service + +--- + +## Concrete recommendation + +**Use this stack:** + +```text +Postgres +pgx +sqlc +handwritten SQL +``` + +**Do NOT:** + +* Try to model `passport_visa` or `drs_object_visa` as “objects” +* Hide authorization logic in application code +* Let an ORM invent query behavior for you + +--- + +## Suggested project layout + +```text +database/ + postgresql/ + create-tables.sql + +queries/ + drs_objects.sql + access_methods.sql + bundles.sql + authorization.sql + +internal/db/ + sqlc/ + queries.sql.go + models.go +``` + +Each DRS endpoint maps cleanly to **1–2 SQL queries**. +Handlers stay thin. Semantics stay correct. + +--- + +## Bottom line + +> For *this exact schema* and a GA4GH DRS implementation: +> +> **sqlc + pgx is the best possible choice.** + + diff --git a/mkdocs.yml b/mkdocs.yml index 339eeac..2e32e78 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,7 +1,8 @@ site_name: drs-server theme: name: material - custom_dir: overrides + # In MkDocs, the `overrides/` directory is used to customize the HTML templates and assets provided by the theme without modifying the theme itself. + custom_dir: docs/overrides features: - content.code.annotate @@ -16,6 +17,7 @@ nav: - Topics: - openapi request/response validation: adr/adr-0001-request-response-validation.md - ga4gh passports for authorization: adr/adr-0002-passports-ControlledAccessGrants.md + - persistence layer design: adr/adr-0003-persistence-layer-design.md plugins: - search @@ -25,7 +27,6 @@ markdown_extensions: - admonition - toc: permalink: true - - pymdownx.superfences - pymdownx.superfences: custom_fences: - name: mermaid From 985218300a11a64939d5ff2b99ee6c7ece74d375 Mon Sep 17 00:00:00 2001 From: Brian Walsh Date: Mon, 29 Dec 2025 08:58:49 -0800 Subject: [PATCH 29/30] TODO --- docs/configuration.md | 4 ++-- docs/deployment.md | 2 +- docs/overrides/main.html | 5 +++++ 3 files changed, 8 insertions(+), 3 deletions(-) create mode 100644 docs/overrides/main.html diff --git a/docs/configuration.md b/docs/configuration.md index 41837af..a74870b 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -2,8 +2,8 @@ ## Environment variables -Describe any environment variables used by `cmd/server` here. +🚧 TODO: Describe any environment variables used by `cmd/server` here. ## Command\-line flags -Document flags accepted by `make serve` / `cmd/server` here. +🚧 TODO: Document flags accepted by `make serve` / `cmd/server` here. diff --git a/docs/deployment.md b/docs/deployment.md index 4814f8c..5e19589 100644 --- a/docs/deployment.md +++ b/docs/deployment.md @@ -7,4 +7,4 @@ ## Production -Describe how to deploy `drs-server` to your target environment (e.g. Kubernetes, Docker, VM). +🚧 TODO: Describe how to deploy `drs-server` to your target environment (e.g. Kubernetes, Docker, VM). diff --git a/docs/overrides/main.html b/docs/overrides/main.html new file mode 100644 index 0000000..4e7cef2 --- /dev/null +++ b/docs/overrides/main.html @@ -0,0 +1,5 @@ +{% extends "base.html" %} + +{% block scripts %} + {{ super() }} +{% endblock %} From e3724718749b0b548e84c302173d0fe44b745eff Mon Sep 17 00:00:00 2001 From: Brian Walsh Date: Mon, 29 Dec 2025 09:04:16 -0800 Subject: [PATCH 30/30] adds link to pages --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index abe1562..c28b8ec 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,7 @@ A lightweight reference implementation of a GA4GH Data Repository Service (DRS) - [Overview](#overview) - [Quickstart](QUICKSTART.md) - [Contributing](CONTRIBUTING.md) +- [GitHub Pages](https://calypr.github.io/drs-server/) - [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)