Skip to content

Commit

Permalink
move git url parsing to github pkg
Browse files Browse the repository at this point in the history
  • Loading branch information
aliculPix4D committed Dec 13, 2023
1 parent a18efc3 commit 483a285
Show file tree
Hide file tree
Showing 4 changed files with 242 additions and 227 deletions.
82 changes: 2 additions & 80 deletions cogito/putter.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package cogito

import (
"encoding/json"
"errors"
"fmt"
"io"
"net/url"
Expand All @@ -14,6 +13,7 @@ import (
"github.com/hashicorp/go-hclog"
"github.com/sasbury/mini"

"github.com/Pix4D/cogito/github"
"github.com/Pix4D/cogito/sets"
)

Expand Down Expand Up @@ -255,7 +255,7 @@ func checkGitRepoDir(dir, hostname, owner, repo string) error {
if gitUrl == "" {
return fmt.Errorf(".git/config: key [%s]/%s: not found", section, key)
}
gu, err := parseGitPseudoURL(gitUrl)
gu, err := github.ParseGitPseudoURL(gitUrl)
if err != nil {
return fmt.Errorf(".git/config: remote: %w", err)
}
Expand All @@ -282,84 +282,6 @@ Cogito SOURCE configuration:
return nil
}

type gitURL struct {
URL *url.URL
Owner string
Repo string
}

// safeUrlParse wraps [url.Parse] and returns only the error and not the URL to avoid leaking
// passwords of the form http://user:password@example.com
//
// From https://github.com/golang/go/issues/53993
func safeUrlParse(rawURL string) (*url.URL, error) {
parsedUrl, err := url.Parse(rawURL)
if err != nil {
var uerr *url.Error
if errors.As(err, &uerr) {
// url.Parse returns a wrapped error that contains also the URL.
// Instead, we return only the error.
return nil, uerr.Err
}
return nil, errors.New("invalid URL")
}
return parsedUrl, nil
}

// parseGitPseudoURL attempts to parse rawURL as a git remote URL compatible with the
// Github naming conventions.
//
// It supports the following types of git pseudo URLs:
// - ssh: git@github.com:Pix4D/cogito.git; will be rewritten to the valid URL
// ssh://git@github.com/Pix4D/cogito.git
// - https: https://github.com/Pix4D/cogito.git
// - https with u:p: https//username:password@github.com/Pix4D/cogito.git
// - http: http://github.com/Pix4D/cogito.git
// - http with u:p: http://username:password@github.com/Pix4D/cogito.git
func parseGitPseudoURL(rawURL string) (gitURL, error) {
workURL := rawURL
// If ssh pseudo URL, we need to massage the rawURL ourselves :-(
if strings.HasPrefix(workURL, "git@") {
if strings.Count(workURL, ":") != 1 {
return gitURL{}, fmt.Errorf("invalid git SSH URL %s: want exactly one ':'", rawURL)
}
// Make the URL a real URL, ready to be parsed. For example:
// git@github.com:Pix4D/cogito.git -> ssh://git@github.com/Pix4D/cogito.git
workURL = "ssh://" + strings.Replace(workURL, ":", "/", 1)
}

anyUrl, err := safeUrlParse(workURL)
if err != nil {
return gitURL{}, err
}

scheme := anyUrl.Scheme
if scheme == "" {
return gitURL{}, fmt.Errorf("invalid git URL %s: missing scheme", rawURL)
}
if scheme != "ssh" && scheme != "http" && scheme != "https" {
return gitURL{}, fmt.Errorf("invalid git URL %s: invalid scheme: %s", rawURL, scheme)
}

// Further parse the path component of the URL to see if it complies with the GitHub
// naming conventions.
// Example of compliant path: github.com/Pix4D/cogito.git
tokens := strings.Split(anyUrl.Path, "/")
if have, want := len(tokens), 3; have != want {
return gitURL{},
fmt.Errorf("invalid git URL: path: want: %d components; have: %d %s",
want, have, tokens)
}

// All OK. Fill our gitURL struct
gu := gitURL{
URL: anyUrl,
Owner: tokens[1],
Repo: strings.TrimSuffix(tokens[2], ".git"),
}
return gu, nil
}

// getGitCommit looks into a git repository and extracts the commit SHA of the HEAD.
func getGitCommit(repoPath string) (string, error) {
dotGitPath := filepath.Join(repoPath, ".git")
Expand Down
147 changes: 0 additions & 147 deletions cogito/putter_private_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,10 @@ package cogito
import (
"errors"
"fmt"
"net/url"
"os"
"path/filepath"
"testing"

"github.com/google/go-cmp/cmp"
"gotest.tools/v3/assert"

"github.com/Pix4D/cogito/testhelp"
Expand Down Expand Up @@ -188,151 +186,6 @@ Cogito SOURCE configuration:
}
}

func TestParseGitPseudoURLSuccess(t *testing.T) {
testCases := []struct {
name string
inURL string
wantGU gitURL
}{
{
name: "valid SSH URL",
inURL: "git@github.com:Pix4D/cogito.git",
wantGU: gitURL{
URL: &url.URL{
Scheme: "ssh",
User: url.User("git"),
Host: "github.com",
Path: "/Pix4D/cogito.git",
},
Owner: "Pix4D",
Repo: "cogito",
},
},
{
name: "valid HTTPS URL",
inURL: "https://github.com/Pix4D/cogito.git",
wantGU: gitURL{
URL: &url.URL{
Scheme: "https",
Host: "github.com",
Path: "/Pix4D/cogito.git",
},
Owner: "Pix4D",
Repo: "cogito",
},
},
{
name: "valid HTTP URL",
inURL: "http://github.com/Pix4D/cogito.git",
wantGU: gitURL{
URL: &url.URL{
Scheme: "http",
Host: "github.com",
Path: "/Pix4D/cogito.git",
},
Owner: "Pix4D",
Repo: "cogito",
},
},
{
name: "valid HTTPS URL with username:password",
inURL: "https://username:password@github.com/Pix4D/cogito.git",
wantGU: gitURL{
URL: &url.URL{
Scheme: "https",
User: url.UserPassword("username", "password"),
Host: "github.com",
Path: "/Pix4D/cogito.git",
},
Owner: "Pix4D",
Repo: "cogito",
},
},
{
name: "valid HTTP URL with username:password",
inURL: "http://username:password@github.com/Pix4D/cogito.git",
wantGU: gitURL{
URL: &url.URL{
Scheme: "http",
User: url.UserPassword("username", "password"),
Host: "github.com",
Path: "/Pix4D/cogito.git",
},
Owner: "Pix4D",
Repo: "cogito",
},
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
gitUrl, err := parseGitPseudoURL(tc.inURL)

if err != nil {
t.Fatalf("\nhave: %s\nwant: <no error>", err)
}
if diff := cmp.Diff(tc.wantGU, gitUrl, cmp.Comparer(
func(x, y *url.Userinfo) bool {
return x.String() == y.String()
})); diff != "" {
t.Errorf("gitURL: (-want +have):\n%s", diff)
}
})
}
}

func TestParseGitPseudoURLFailure(t *testing.T) {
testCases := []struct {
name string
inURL string
wantErr string
}{
{
name: "totally invalid URL",
inURL: "hello",
wantErr: "invalid git URL hello: missing scheme",
},
{
name: "invalid SSH URL",
inURL: "git@github.com/Pix4D/cogito.git",
wantErr: "invalid git SSH URL git@github.com/Pix4D/cogito.git: want exactly one ':'",
},
{
name: "invalid HTTPS URL",
inURL: "https://github.com:Pix4D/cogito.git",
wantErr: `invalid port ":Pix4D" after host`,
},
{
name: "invalid HTTP URL",
inURL: "http://github.com:Pix4D/cogito.git",
wantErr: `invalid port ":Pix4D" after host`,
},
{
name: "too few path components",
inURL: "http://github.com/cogito.git",
wantErr: "invalid git URL: path: want: 3 components; have: 2 [ cogito.git]",
},
{
name: "too many path components",
inURL: "http://github.com/1/2/cogito.git",
wantErr: "invalid git URL: path: want: 3 components; have: 4 [ 1 2 cogito.git]",
},
{
name: "No leaked password in invalid URL with username:password",
inURL: "http://username:password@github.com/Pix4D/cogito.git\n",
wantErr: `net/url: invalid control character in URL`,
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
_, err := parseGitPseudoURL(tc.inURL)

assert.Error(t, err, tc.wantErr)
})
}
}

func TestGitGetCommitSuccess(t *testing.T) {
type testCase struct {
name string
Expand Down
86 changes: 86 additions & 0 deletions github/url.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
package github

import (
"errors"
"fmt"
"net/url"
"strings"
)

type GitURL struct {
URL *url.URL
Owner string
Repo string
}

// safeUrlParse wraps [url.Parse] and returns only the error and not the URL to avoid leaking
// passwords of the form http://user:password@example.com
//
// From https://github.com/golang/go/issues/53993
func safeUrlParse(rawURL string) (*url.URL, error) {
parsedUrl, err := url.Parse(rawURL)
if err != nil {
var uerr *url.Error
if errors.As(err, &uerr) {
// url.Parse returns a wrapped error that contains also the URL.
// Instead, we return only the error.
return nil, uerr.Err
}
return nil, errors.New("invalid URL")
}
return parsedUrl, nil
}

// ParseGitPseudoURL attempts to parse rawURL as a git remote URL compatible with the
// Github naming conventions.
//
// It supports the following types of git pseudo URLs:
// - ssh: git@github.com:Pix4D/cogito.git; will be rewritten to the valid URL
// ssh://git@github.com/Pix4D/cogito.git
// - https: https://github.com/Pix4D/cogito.git
// - https with u:p: https//username:password@github.com/Pix4D/cogito.git
// - http: http://github.com/Pix4D/cogito.git
// - http with u:p: http://username:password@github.com/Pix4D/cogito.git
func ParseGitPseudoURL(rawURL string) (GitURL, error) {
workURL := rawURL
// If ssh pseudo URL, we need to massage the rawURL ourselves :-(
if strings.HasPrefix(workURL, "git@") {
if strings.Count(workURL, ":") != 1 {
return GitURL{}, fmt.Errorf("invalid git SSH URL %s: want exactly one ':'", rawURL)
}
// Make the URL a real URL, ready to be parsed. For example:
// git@github.com:Pix4D/cogito.git -> ssh://git@github.com/Pix4D/cogito.git
workURL = "ssh://" + strings.Replace(workURL, ":", "/", 1)
}

anyUrl, err := safeUrlParse(workURL)
if err != nil {
return GitURL{}, err
}

scheme := anyUrl.Scheme
if scheme == "" {
return GitURL{}, fmt.Errorf("invalid git URL %s: missing scheme", rawURL)
}
if scheme != "ssh" && scheme != "http" && scheme != "https" {
return GitURL{}, fmt.Errorf("invalid git URL %s: invalid scheme: %s", rawURL, scheme)
}

// Further parse the path component of the URL to see if it complies with the GitHub
// naming conventions.
// Example of compliant path: github.com/Pix4D/cogito.git
tokens := strings.Split(anyUrl.Path, "/")
if have, want := len(tokens), 3; have != want {
return GitURL{},
fmt.Errorf("invalid git URL: path: want: %d components; have: %d %s",
want, have, tokens)
}

// All OK. Fill our gitURL struct
gu := GitURL{
URL: anyUrl,
Owner: tokens[1],
Repo: strings.TrimSuffix(tokens[2], ".git"),
}
return gu, nil
}
Loading

0 comments on commit 483a285

Please sign in to comment.