Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sec: fix s3 and gcs host checks #512

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 109 additions & 1 deletion detect_gcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@ package getter
import (
"fmt"
"net/url"
"path"
"regexp"
"strings"
"unicode"
)

// GCSDetector implements Detector to detect GCS URLs and turn
Expand All @@ -18,23 +21,39 @@ func (d *GCSDetector) Detect(src, _ string) (string, bool, error) {
return "", false, nil
}

if strings.Contains(src, "googleapis.com/") {
if strings.Contains(src, ".googleapis.com/") {
dduzgun-security marked this conversation as resolved.
Show resolved Hide resolved
return d.detectHTTP(src)
}

return "", false, nil
}

func (d *GCSDetector) detectHTTP(src string) (string, bool, error) {
src = path.Clean(src)

parts := strings.Split(src, "/")
if len(parts) < 5 {
return "", false, fmt.Errorf(
"URL is not a valid GCS URL")
}

version := parts[2]
if !isValidGCSVersion(version) {
return "", false, fmt.Errorf(
"GCS URL version is not valid")
}

bucket := parts[3]
if !isValidGCSBucketName(bucket) {
return "", false, fmt.Errorf(
"GCS URL bucket name is not valid")
}

object := strings.Join(parts[4:], "/")
if !isValidGCSObjectName(object) {
return "", false, fmt.Errorf(
"GCS URL object name is not valid")
}

url, err := url.Parse(fmt.Sprintf("https://www.googleapis.com/storage/%s/%s/%s",
version, bucket, object))
Expand All @@ -44,3 +63,92 @@ func (d *GCSDetector) detectHTTP(src string) (string, bool, error) {

return "gcs::" + url.String(), true, nil
}

func isValidGCSVersion(version string) bool {
versionPattern := `^v\d+$`
if matched, _ := regexp.MatchString(versionPattern, version); !matched {
return false
}
return true
}

// Validate the bucket name using the following rules: https://cloud.google.com/storage/docs/naming-buckets
func isValidGCSBucketName(bucket string) bool {
// Rule 1: Must be between 3 and 63 characters (or up to 222 if it contains dots, each component up to 63 chars)
if len(bucket) < 3 || len(bucket) > 63 {
if len(bucket) > 63 && len(bucket) <= 222 {
// If it contains dots, each segment between dots must be <= 63 chars
components := strings.Split(bucket, ".")
for _, component := range components {
if len(component) > 63 {
return false
}
}
} else {
return false
}
}

// Rule 2: Bucket name cannot start or end with a hyphen, dot, or underscore
if bucket[0] == '-' || bucket[0] == '.' || bucket[len(bucket)-1] == '-' || bucket[len(bucket)-1] == '.' || bucket[len(bucket)-1] == '_' {
dduzgun-security marked this conversation as resolved.
Show resolved Hide resolved
return false
}

// Rule 3: Bucket name cannot contain spaces
if strings.Contains(bucket, " ") {
return false
}

// Rule 4: Bucket name cannot be an IP address (only digits and dots, e.g., 192.168.5.4)
ipPattern := `^(\d{1,3}\.){3}\d{1,3}$`
dduzgun-security marked this conversation as resolved.
Show resolved Hide resolved
if matched, _ := regexp.MatchString(ipPattern, bucket); matched {
return false
}

// Rule 5: Bucket name cannot start with "goog"
if strings.HasPrefix(bucket, "goog") {
return false
}

// Rule 6: Bucket name cannot contain "google" or common misspellings like "g00gle"
googlePattern := `google|g00gle`
dduzgun-security marked this conversation as resolved.
Show resolved Hide resolved
if matched, _ := regexp.MatchString(googlePattern, bucket); matched {
return false
}

// Rule 7: Bucket name can only contain lowercase letters, digits, dashes, underscores, and dots
bucketPattern := `^[a-z0-9\-_\.]+$`
if matched, _ := regexp.MatchString(bucketPattern, bucket); !matched {
return false
}

return true
}

// Validate the object name using the following rules: https://cloud.google.com/storage/docs/naming-objects
func isValidGCSObjectName(object string) bool {
// Rule 1: Object names cannot contain Carriage Return (\r) or Line Feed (\n) characters
if strings.Contains(object, "\r") || strings.Contains(object, "\n") {
return false
}

// Rule 2: Object names cannot start with '.well-known/acme-challenge/'
if strings.HasPrefix(object, ".well-known/acme-challenge/") {
return false
}

// Rule 3: Object names cannot be exactly '.' or '..'
dduzgun-security marked this conversation as resolved.
Show resolved Hide resolved
if object == "." || object == ".." {
return false
}

// Rule 4: Ensure that the object name contains only valid Unicode characters
// (for simplicity, let's ensure it's not empty and does not contain any forbidden control characters)
for _, r := range object {
if !unicode.IsPrint(r) && !unicode.IsSpace(r) && r != '.' && r != '-' && r != '/' {
return false
}
}

return true
}
154 changes: 154 additions & 0 deletions detect_gcs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ func TestGCSDetector(t *testing.T) {
"www.googleapis.com/storage/v1/foo/bar.baz",
"gcs::https://www.googleapis.com/storage/v1/foo/bar.baz",
},
{
"www.googleapis.com/storage/v2/foo/bar/toor.baz",
"gcs::https://www.googleapis.com/storage/v2/foo/bar/toor.baz",
},
}

pwd := "/pwd"
Expand All @@ -42,3 +46,153 @@ func TestGCSDetector(t *testing.T) {
}
}
}

func TestGCSDetector_MalformedDetectHTTP(t *testing.T) {
cases := []struct {
Name string
Input string
Expected string
Output string
}{
{
"valid url",
"www.googleapis.com/storage/v1/my-bucket/foo/bar",
"",
"gcs::https://www.googleapis.com/storage/v1/my-bucket/foo/bar",
},
{
"empty url",
"",
"",
"",
},
{
"not valid url length",
"www.googleapis.com.invalid/storage/v1/",
"URL is not a valid GCS URL",
"",
},
{
"not valid version",
"www.googleapis.com/storage/invalid-version/my-bucket/foo",
"GCS URL version is not valid",
"",
},
{
"not valid bucket",
"www.googleapis.com/storage/v1/127.0.0.1/foo",
"GCS URL bucket name is not valid",
"",
},
{
"not valid object",
"www.googleapis.com/storage/v1/my-bucket/.well-known/acme-challenge/foo",
"GCS URL object name is not valid",
"",
},
{
"path traversal on bucket",
"www.googleapis.com/storage/v1/../foo/bar",
"URL is not a valid GCS URL",
"",
}, {
"path traversal on object",
"www.googleapis.com/storage/v1/my-bucket/../../../foo/bar",
"URL is not a valid GCS URL",
"",
},
}

pwd := "/pwd"
f := new(GCSDetector)
for _, tc := range cases {
output, _, err := f.Detect(tc.Input, pwd)
if err != nil {
if err.Error() != tc.Expected {
t.Fatalf("expected error %s, got %s for %s", tc.Expected, err.Error(), tc.Name)
}
}

if output != tc.Output {
t.Fatalf("expected %s, got %s", tc.Output, output)
}
}
}

func TestIsValidGCSVersion(t *testing.T) {
cases := []struct {
Name string
Input string
Expected bool
}{
{
"valid version",
"v1",
true,
},
{
"invalid version",
"invalid1",
false,
},
}

for _, tc := range cases {
output := isValidGCSVersion(tc.Input)
if output != tc.Expected {
t.Fatalf("expected %t, got %t for test %s", tc.Expected, output, tc.Name)
}
}
}

func TestIsValidGCSBucketName(t *testing.T) {
cases := []struct {
Name string
Input string
Expected bool
}{
{
"valid bucket name",
"my-bucket",
true,
},
{
"invalid bucket name",
"..",
false,
},
}

for _, tc := range cases {
output := isValidGCSBucketName(tc.Input)
if output != tc.Expected {
t.Fatalf("expected %t, got %t for test %s", tc.Expected, output, tc.Name)
}
}
}

func TestIsValidGCSObjectName(t *testing.T) {
cases := []struct {
Name string
Input string
Expected bool
}{
{
"valid object name",
"my-object",
true,
},
{
"invalid object name",
"..",
false,
},
}

for _, tc := range cases {
output := isValidGCSObjectName(tc.Input)
if output != tc.Expected {
t.Fatalf("expected %t, got %t for test %s", tc.Expected, output, tc.Name)
}
}
}
4 changes: 3 additions & 1 deletion get_gcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ func (g *GCSGetter) getObject(ctx context.Context, client *storage.Client, dst,
}

func (g *GCSGetter) parseURL(u *url.URL) (bucket, path, fragment string, err error) {
if strings.Contains(u.Host, "googleapis.com") {
if strings.HasSuffix(u.Host, ".googleapis.com") {
hostParts := strings.Split(u.Host, ".")
if len(hostParts) != 3 {
err = fmt.Errorf("URL is not a valid GCS URL")
Expand All @@ -208,6 +208,8 @@ func (g *GCSGetter) parseURL(u *url.URL) (bucket, path, fragment string, err err
bucket = pathParts[3]
path = pathParts[4]
fragment = u.Fragment
} else {
err = fmt.Errorf("URL is not a valid GCS URL")
}
return
}
Expand Down
32 changes: 32 additions & 0 deletions get_gcs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -233,3 +233,35 @@ func TestGCSGetter_GetFile_OAuthAccessToken(t *testing.T) {
}
assertContents(t, dst, "# Main\n")
}

func Test_GCSGetter_ParseUrl_Malformed(t *testing.T) {
dduzgun-security marked this conversation as resolved.
Show resolved Hide resolved
tests := []struct {
name string
url string
}{
{
name: "invalid host suffix",
url: "https://www.googleapis.com.invalid",
},
{
name: "host suffix with a typo",
url: "https://www.googleapi.com.",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
g := new(GCSGetter)
u, err := url.Parse(tt.url)
if err != nil {
t.Fatalf("unexpected error: %s", err)
}
_, _, _, err = g.parseURL(u)
if err == nil {
t.Fatalf("expected error, got none")
}
if err.Error() != "URL is not a valid GCS URL" {
t.Fatalf("expected error 'URL is not a valid GCS URL', got %s", err.Error())
}
})
}
}
6 changes: 3 additions & 3 deletions get_s3.go
Original file line number Diff line number Diff line change
Expand Up @@ -252,7 +252,7 @@ func (g *S3Getter) parseUrl(u *url.URL) (region, bucket, path, version string, c
// This just check whether we are dealing with S3 or
// any other S3 compliant service. S3 has a predictable
// url as others do not
if strings.Contains(u.Host, "amazonaws.com") {
if strings.HasSuffix(u.Host, ".amazonaws.com") {
// Amazon S3 supports both virtual-hosted–style and path-style URLs to access a bucket, although path-style is deprecated
// In both cases few older regions supports dash-style region indication (s3-Region) even if AWS discourages their use.
// The same bucket could be reached with:
Expand Down Expand Up @@ -304,7 +304,7 @@ func (g *S3Getter) parseUrl(u *url.URL) (region, bucket, path, version string, c
path = pathParts[1]

}
if len(hostParts) < 3 && len(hostParts) > 5 {
if len(hostParts) < 3 || len(hostParts) > 5 {
err = fmt.Errorf("URL is not a valid S3 URL")
return
}
Expand All @@ -313,7 +313,7 @@ func (g *S3Getter) parseUrl(u *url.URL) (region, bucket, path, version string, c
} else {
pathParts := strings.SplitN(u.Path, "/", 3)
if len(pathParts) != 3 {
err = fmt.Errorf("URL is not a valid S3 compliant URL")
err = fmt.Errorf("URL is not a valid S3 URL")
return
}
bucket = pathParts[1]
Expand Down
Loading
Loading