Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
/dist/
/state/
/*-source/
95 changes: 79 additions & 16 deletions internal/strategy/git/backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package git

import (
"bufio"
"bytes"
"context"
"log/slog"
"net/http"
Expand Down Expand Up @@ -40,17 +41,37 @@ func (s *Strategy) serveFromBackend(w http.ResponseWriter, r *http.Request, c *c
host := r.PathValue("host")
pathValue := r.PathValue("path")

// git http-backend expects the path as-is: /host/repo.git/info/refs
backendPath := "/" + host + "/" + pathValue
// For regular clones, we need to insert /.git before the git protocol paths
// Find where the git operation starts (e.g., /info/refs, /git-upload-pack)
var gitOperation string
var repoPathWithSuffix string

for _, op := range []string{"/info/refs", "/git-upload-pack", "/git-receive-pack"} {
if idx := strings.Index(pathValue, op); idx != -1 {
repoPathWithSuffix = pathValue[:idx]
gitOperation = pathValue[idx:]
break
}
}

// Remove .git suffix from repo path for the filesystem path
repoPath := strings.TrimSuffix(repoPathWithSuffix, ".git")

// Construct backend path with .git directory: /host/repo/.git/info/refs
backendPath := "/" + host + "/" + repoPath + "/.git" + gitOperation

logger.DebugContext(r.Context(), "Serving with git http-backend",
slog.String("original_path", r.URL.Path),
slog.String("backend_path", backendPath),
slog.String("clone_path", c.path))

// Capture stderr from git http-backend to log errors
var stderrBuf bytes.Buffer

handler := &cgi.Handler{
Path: gitPath,
Args: []string{"http-backend"},
Path: gitPath,
Args: []string{"http-backend"},
Stderr: &stderrBuf,
Env: []string{
"GIT_PROJECT_ROOT=" + absRoot,
"GIT_HTTP_EXPORT_ALL=1",
Expand All @@ -63,9 +84,16 @@ func (s *Strategy) serveFromBackend(w http.ResponseWriter, r *http.Request, c *c
r2.URL.Path = backendPath

handler.ServeHTTP(w, r2)

// Log stderr if there was any output (indicates an error)
if stderrBuf.Len() > 0 {
logger.ErrorContext(r.Context(), "git http-backend error",
slog.String("stderr", stderrBuf.String()),
slog.String("path", backendPath))
}
}

// executeClone performs a git clone --bare operation.
// executeClone performs a git clone operation.
func (s *Strategy) executeClone(ctx context.Context, c *clone) error {
logger := logging.FromContext(ctx)

Expand All @@ -75,7 +103,7 @@ func (s *Strategy) executeClone(ctx context.Context, c *clone) error {

// #nosec G204 - c.upstreamURL and c.path are controlled by us
// Configure git for large repositories to avoid network buffer issues
args := []string{"clone", "--bare"}
args := []string{"clone"}
if s.config.CloneDepth > 0 {
args = append(args, "--depth", strconv.Itoa(s.config.CloneDepth))
}
Expand All @@ -96,11 +124,40 @@ func (s *Strategy) executeClone(ctx context.Context, c *clone) error {
return errors.Wrap(err, "git clone")
}

logger.DebugContext(ctx, "git clone succeeded", slog.String("output", string(output)))
// Configure remote to fetch all branches, not just the default branch
// git clone sets fetch = +refs/heads/master:refs/remotes/origin/master by default
// We need to change it to fetch all branches
// #nosec G204 - c.path is controlled by us
cmd = exec.CommandContext(ctx, "git", "-C", c.path, "config", "remote.origin.fetch", "+refs/heads/*:refs/remotes/origin/*")
output, err = cmd.CombinedOutput()
if err != nil {
logger.ErrorContext(ctx, "git config failed",
slog.String("error", err.Error()),
slog.String("output", string(output)))
return errors.Wrap(err, "configure fetch refspec")
}

// Fetch all branches now that the refspec is configured
cmd, err = gitCommand(ctx, c.upstreamURL, "-C", c.path,
"-c", "http.postBuffer=524288000",
"-c", "http.lowSpeedLimit=1000",
"-c", "http.lowSpeedTime=600",
"fetch", "--all")
if err != nil {
return errors.Wrap(err, "create git command for fetch")
}
output, err = cmd.CombinedOutput()
if err != nil {
logger.ErrorContext(ctx, "git fetch --all failed",
slog.String("error", err.Error()),
slog.String("output", string(output)))
return errors.Wrap(err, "fetch all branches")
}

return nil
}

// executeFetch performs a git fetch --all operation.
// executeFetch performs a git remote update operation.
func (s *Strategy) executeFetch(ctx context.Context, c *clone) error {
logger := logging.FromContext(ctx)

Expand Down Expand Up @@ -193,10 +250,18 @@ func (s *Strategy) ensureRefsUpToDate(ctx context.Context, c *clone) error {
if strings.HasSuffix(ref, "^{}") {
continue
}
localSHA, exists := localRefs[ref]
// Only check refs/heads/* from upstream since those are what we fetch
// (GitHub exposes refs/pull/* and other refs we don't fetch)
if !strings.HasPrefix(ref, "refs/heads/") {
continue
}
// Convert refs/heads/X to refs/remotes/origin/X for local lookup
localRef := "refs/remotes/origin/" + strings.TrimPrefix(ref, "refs/heads/")
localSHA, exists := localRefs[localRef]
if !exists || localSHA != upstreamSHA {
logger.DebugContext(ctx, "Upstream ref differs from local",
slog.String("ref", ref),
slog.String("upstream_ref", ref),
slog.String("local_ref", localRef),
slog.String("upstream_sha", upstreamSHA),
slog.String("local_sha", localSHA))
needsFetch = true
Expand Down Expand Up @@ -225,14 +290,12 @@ func (s *Strategy) ensureRefsUpToDate(ctx context.Context, c *clone) error {
// getLocalRefs returns a map of ref names to SHAs for the local clone.
func (s *Strategy) getLocalRefs(ctx context.Context, c *clone) (map[string]string, error) {
// #nosec G204 - c.path is controlled by us
// Use --head to include HEAD symbolic ref
cmd, err := gitCommand(ctx, "", "-C", c.path, "show-ref", "--head")
if err != nil {
return nil, errors.Wrap(err, "create git command")
}
// Use for-each-ref to get all refs including remote refs
// No need for insteadOf protection since this is purely local
cmd := exec.CommandContext(ctx, "git", "-C", c.path, "for-each-ref", "--format=%(objectname) %(refname)")
output, err := cmd.CombinedOutput()
if err != nil {
return nil, errors.Wrap(err, "git show-ref")
return nil, errors.Wrap(err, "git for-each-ref")
}

return ParseGitRefs(output), nil
Expand Down
16 changes: 14 additions & 2 deletions internal/strategy/git/bundle.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"log/slog"
"net/textproto"
"os"
"strings"
"time"

"github.com/alecthomas/errors"
Expand Down Expand Up @@ -90,8 +91,9 @@ func (s *Strategy) generateAndUploadBundle(ctx context.Context, c *clone) {

// Stream bundle directly to cache
// #nosec G204 - c.path is controlled by us
cmd, err := gitCommand(ctx, "", "-C", c.path,
"bundle", "create", "-", "--branches")
// Use --branches --remotes to include all branches but exclude tags (which can be massive)
args := []string{"-C", c.path, "bundle", "create", "-", "--branches", "--remotes"}
cmd, err := gitCommand(ctx, "", args...)
if err != nil {
logger.ErrorContext(ctx, "Failed to create git command",
slog.String("upstream", c.upstreamURL),
Expand All @@ -109,6 +111,10 @@ func (s *Strategy) generateAndUploadBundle(ctx context.Context, c *clone) {
return
}

logger.DebugContext(ctx, "Starting bundle generation",
slog.String("upstream", c.upstreamURL),
slog.String("command", "git "+strings.Join(args, " ")))

if err := cmd.Start(); err != nil {
logger.ErrorContext(ctx, "Failed to start bundle generation",
slog.String("upstream", c.upstreamURL),
Expand All @@ -126,6 +132,12 @@ func (s *Strategy) generateAndUploadBundle(ctx context.Context, c *clone) {
return
}

if len(stderr) > 0 {
logger.DebugContext(ctx, "Bundle generation stderr",
slog.String("upstream", c.upstreamURL),
slog.String("stderr", string(stderr)))
}

logger.InfoContext(ctx, "Bundle uploaded successfully",
slog.String("upstream", c.upstreamURL))
}
33 changes: 22 additions & 11 deletions internal/strategy/git/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@ func init() {

// Config for the Git strategy.
type Config struct {
MirrorRoot string `hcl:"mirror-root" help:"Directory to store git mirrors." required:""`
MirrorRoot string `hcl:"mirror-root" help:"Directory to store git clones." required:""`
FetchInterval time.Duration `hcl:"fetch-interval,optional" help:"How often to fetch from upstream in minutes." default:"15m"`
RefCheckInterval time.Duration `hcl:"ref-check-interval,optional" help:"How long to cache ref checks." default:"10s"`
BundleInterval time.Duration `hcl:"bundle-interval,optional" help:"How often to generate bundles. 0 disables bundling." default:"0"`
CloneDepth int `hcl:"clone-depth,optional" help:"Depth for shallow clones. 0 means full clone." default:"0"`
}

// cloneState represents the current state of a bare clone.
// cloneState represents the current state of a clone.
type cloneState int

const (
Expand All @@ -43,7 +43,7 @@ const (
stateReady // Clone is ready to serve
)

// clone represents a bare clone of an upstream repository.
// clone represents a checked out clone of an upstream repository.
type clone struct {
mu sync.RWMutex
state cloneState
Expand Down Expand Up @@ -292,7 +292,9 @@ func (s *Strategy) getOrCreateClone(ctx context.Context, upstreamURL string) *cl
}

// Check if clone already exists on disk (from previous run)
if _, err := os.Stat(clonePath); err == nil {
// Verify it has a .git directory to ensure it's a valid clone
gitDir := filepath.Join(clonePath, ".git")
if _, err := os.Stat(gitDir); err == nil {
c.state = stateReady
logging.FromContext(ctx).DebugContext(ctx, "Found existing clone on disk",
slog.String("path", clonePath))
Expand All @@ -315,12 +317,12 @@ func (s *Strategy) clonePathForURL(upstreamURL string) string {
parsed, err := url.Parse(upstreamURL)
if err != nil {
// Fallback to simple hash if URL parsing fails
return filepath.Join(s.config.MirrorRoot, "unknown.git")
return filepath.Join(s.config.MirrorRoot, "unknown")
}

// Create path: {mirror_root}/{host}/{path}.git
// Create path: {mirror_root}/{host}/{path}
repoPath := strings.TrimSuffix(parsed.Path, ".git")
return filepath.Join(s.config.MirrorRoot, parsed.Host, repoPath+".git")
return filepath.Join(s.config.MirrorRoot, parsed.Host, repoPath)
}

// discoverExistingClones scans the mirror root for existing clones and starts bundle loops.
Expand All @@ -338,10 +340,19 @@ func (s *Strategy) discoverExistingClones(ctx context.Context) error {
return nil
}

// Check if this directory is a bare git repository by looking for HEAD file
headPath := filepath.Join(path, "HEAD")
// Check if this directory is a git repository by looking for .git directory or HEAD file
gitDir := filepath.Join(path, ".git")
headPath := filepath.Join(path, ".git", "HEAD")
if _, statErr := os.Stat(gitDir); statErr != nil {
// Skip if .git doesn't exist (not a git repo)
if errors.Is(statErr, os.ErrNotExist) {
return nil
}
// Return other errors
return errors.Wrap(statErr, "stat .git directory")
}
if _, statErr := os.Stat(headPath); statErr != nil {
// Skip if HEAD doesn't exist (not a git repo)
// Skip if HEAD doesn't exist (not a valid git repo)
if errors.Is(statErr, os.ErrNotExist) {
return nil
}
Expand All @@ -365,7 +376,7 @@ func (s *Strategy) discoverExistingClones(ctx context.Context) error {
}

host := parts[0]
repoPath := strings.TrimSuffix(strings.Join(parts[1:], "/"), ".git")
repoPath := strings.Join(parts[1:], "/")
upstreamURL := "https://" + host + "/" + repoPath

// Create clone entry
Expand Down
11 changes: 9 additions & 2 deletions internal/strategy/git/git_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,15 @@ func TestNewWithExistingCloneOnDisk(t *testing.T) {
tmpDir := t.TempDir()

// Create a fake clone directory on disk before initializing strategy
clonePath := filepath.Join(tmpDir, "github.com", "org", "repo.git")
err := os.MkdirAll(clonePath, 0o750)
// For regular clones, we need a .git subdirectory with HEAD file
clonePath := filepath.Join(tmpDir, "github.com", "org", "repo")
gitDir := filepath.Join(clonePath, ".git")
err := os.MkdirAll(gitDir, 0o750)
assert.NoError(t, err)

// Create HEAD file to make it look like a valid git repo
headPath := filepath.Join(gitDir, "HEAD")
err = os.WriteFile(headPath, []byte("ref: refs/heads/main\n"), 0o640)
assert.NoError(t, err)

mux := newTestMux()
Expand Down
12 changes: 9 additions & 3 deletions internal/strategy/git/integration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,17 @@ func TestIntegrationGitCloneViaProxy(t *testing.T) {
_, err = os.Stat(readmePath2)
assert.NoError(t, err)

// Verify the bare clone was created
bareClonePath := filepath.Join(clonesDir, "github.com", "octocat", "Hello-World.git")
info, err := os.Stat(bareClonePath)
// Verify the clone was created
clonePath := filepath.Join(clonesDir, "github.com", "octocat", "Hello-World")
info, err := os.Stat(clonePath)
assert.NoError(t, err)
assert.True(t, info.IsDir())

// Verify it has a .git directory (regular clone)
gitDir := filepath.Join(clonePath, ".git")
gitInfo, err := os.Stat(gitDir)
assert.NoError(t, err)
assert.True(t, gitInfo.IsDir())
}

// TestIntegrationGitFetchViaProxy tests fetching updates through the proxy.
Expand Down