Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
7ae334d
initial #141
bwalsh Jan 28, 2026
9dce35a
updated unit coverage
bwalsh Jan 28, 2026
8982f01
test coverage/combined.out
bwalsh Jan 28, 2026
6de4d91
Apply suggestions from code review
bwalsh Jan 28, 2026
3ae9554
PR suggestions
bwalsh Jan 28, 2026
8c9423d
filename input
bwalsh Jan 28, 2026
c322f1a
filename coverage
bwalsh Jan 28, 2026
97438c8
signed url
bwalsh Jan 28, 2026
c87e418
signed url coverage
bwalsh Jan 28, 2026
09b3cd1
rm noop test
bwalsh Jan 28, 2026
04124dd
rm noop test
bwalsh Jan 28, 2026
fbf5028
rm noop test coverage
bwalsh Jan 28, 2026
0469de1
improve comment
bwalsh Jan 28, 2026
41aec59
improves unit tests
bwalsh Jan 28, 2026
44e03f5
adds simple add-url test
bwalsh Jan 28, 2026
0c7b1e6
add-url coverage
bwalsh Jan 28, 2026
2fdd34d
wip add-url xfer
bwalsh Jan 29, 2026
f4fabff
wip: add-url experiments
bwalsh Jan 29, 2026
f482b72
add-url
bwalsh Jan 29, 2026
4a41aa3
improve err log
bwalsh Jan 29, 2026
b496fc8
improve err log
bwalsh Jan 29, 2026
662d345
Improve err msg
bwalsh Jan 29, 2026
8ac2886
improve err log
bwalsh Jan 29, 2026
1da9d6a
improve error msg
bwalsh Jan 29, 2026
1d3470e
TODO - dependency on ~/.gen3/gen3_client_config.ini
bwalsh Jan 29, 2026
16e922c
improve err log
bwalsh Jan 29, 2026
d91a3c4
incremental changes
bwalsh Jan 30, 2026
1ef3f05
incremental changes
bwalsh Jan 30, 2026
364ef47
refactor packages
bwalsh Jan 30, 2026
e36ec2a
refactor packages
bwalsh Jan 30, 2026
7967227
refactor add-url tests and deps
bwalsh Jan 31, 2026
8d8951d
refactor add-url tests and deps
bwalsh Jan 31, 2026
96ce2d1
improve test & doc
bwalsh Jan 31, 2026
c3cf765
improve test & doc
bwalsh Jan 31, 2026
8100e5a
pre-commit-changes #192
bwalsh Feb 1, 2026
f3797e5
pre-commit-changes #192
bwalsh Feb 1, 2026
42b7205
pre-commit-changes #192 tests
bwalsh Feb 1, 2026
203205a
pre-commit-changes #192 tests
bwalsh Feb 1, 2026
6d59b0d
doc changes to precommit/addurl
bwalsh Feb 1, 2026
d9df177
improve tests, doc
bwalsh Feb 1, 2026
eb60c22
improve tests, doc
bwalsh Feb 1, 2026
7c59619
improve precommit test
bwalsh Feb 1, 2026
72aafb1
improve precommit test
bwalsh Feb 1, 2026
3f85ffe
install precommit
bwalsh Feb 1, 2026
90a40fa
install precommit
bwalsh Feb 1, 2026
f35b875
install precommit test
bwalsh Feb 1, 2026
71c1d7b
install precommit test
bwalsh Feb 1, 2026
f99a454
update user doc
bwalsh Feb 1, 2026
1a2e47f
even files 20M
bwalsh Feb 2, 2026
a9b5c6f
2 files in TARGET-ALL-P2
bwalsh Feb 2, 2026
fa5eb43
feature/pre-commit-changes #192
bwalsh Feb 2, 2026
6a629f1
feature/pre-commit-changes #192
bwalsh Feb 2, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/pr-checks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -71,4 +71,4 @@ jobs:
go-version-file: go.mod

- name: Run tests
run: go test -v -race $(go list ./... | grep -v 'tests/integration/calypr' | grep -v 'client/indexd/tests')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is going to bork some integration tests I had on another branch probably better to keep this how it is an a your | grep -v '/cmd/addurl$' string to it

run: go test -v -race $(go list ./... | grep -v '/cmd/addurl$')
38 changes: 19 additions & 19 deletions client/indexd/add_url.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,19 @@ import (
awsConfig "github.com/aws/aws-sdk-go-v2/config"
"github.com/aws/aws-sdk-go-v2/credentials"
"github.com/aws/aws-sdk-go-v2/service/s3"
"github.com/calypr/git-drs/cloud"
"github.com/calypr/git-drs/drs"
"github.com/calypr/git-drs/drs/hash"
"github.com/calypr/git-drs/drslog"
"github.com/calypr/git-drs/drsmap"
"github.com/calypr/git-drs/messages"
"github.com/calypr/git-drs/projectdir"
"github.com/calypr/git-drs/s3_utils"
"github.com/calypr/git-drs/utils"
)

// getBucketDetails fetches bucket details from Gen3, loading config and auth.
// This is the production version that includes all config/auth dependencies.
func (inc *IndexDClient) getBucketDetails(ctx context.Context, bucket string, httpClient *http.Client) (*s3_utils.S3Bucket, error) {
func (inc *IndexDClient) getBucketDetails(ctx context.Context, bucket string, httpClient *http.Client) (*cloud.S3Bucket, error) {
// get all buckets
baseURL := *inc.Base // Create a copy to avoid mutating inc.Base
baseURL.Path = filepath.Join(baseURL.Path, "user/data/buckets")
Expand All @@ -37,7 +37,7 @@ func (inc *IndexDClient) getBucketDetails(ctx context.Context, bucket string, ht

// FetchS3MetadataWithBucketDetails fetches S3 metadata given bucket details.
// This is the core testable logic, separated for easier unit testing.
func FetchS3MetadataWithBucketDetails(ctx context.Context, s3URL, awsAccessKey, awsSecretKey, region, endpoint string, bucketDetails *s3_utils.S3Bucket, s3Client *s3.Client, logger *slog.Logger) (int64, string, error) {
func FetchS3MetadataWithBucketDetails(ctx context.Context, s3URL, awsAccessKey, awsSecretKey, region, endpoint string, bucketDetails *cloud.S3Bucket, s3Client *s3.Client, logger *slog.Logger) (int64, string, error) {

// Parse S3 URL
bucket, key, err := utils.ParseS3URL(s3URL)
Expand Down Expand Up @@ -148,8 +148,8 @@ func FetchS3MetadataWithBucketDetails(ctx context.Context, s3URL, awsAccessKey,
errorMsg.WriteString(fmt.Sprintf(" %d. %s\n", i+1, field))
}
errorMsg.WriteString("\nPlease provide these values via:\n")
errorMsg.WriteString(" - Command-line flags (--" + s3_utils.AWS_KEY_FLAG_NAME + ", --" + s3_utils.AWS_SECRET_FLAG_NAME + ", --" + s3_utils.AWS_REGION_FLAG_NAME + ", --" + s3_utils.AWS_ENDPOINT_URL_FLAG_NAME + ")\n")
errorMsg.WriteString(" - Environment variables (" + s3_utils.AWS_KEY_ENV_VAR + ", " + s3_utils.AWS_SECRET_ENV_VAR + ", " + s3_utils.AWS_REGION_ENV_VAR + ", " + s3_utils.AWS_ENDPOINT_URL_ENV_VAR + ")\n")
errorMsg.WriteString(" - Command-line flags (--" + cloud.AWS_KEY_FLAG_NAME + ", --" + cloud.AWS_SECRET_FLAG_NAME + ", --" + cloud.AWS_REGION_FLAG_NAME + ", --" + cloud.AWS_ENDPOINT_URL_FLAG_NAME + ")\n")
errorMsg.WriteString(" - Environment variables (" + cloud.AWS_KEY_ENV_VAR + ", " + cloud.AWS_SECRET_ENV_VAR + ", " + cloud.AWS_REGION_ENV_VAR + ", " + cloud.AWS_ENDPOINT_URL_ENV_VAR + ")\n")
errorMsg.WriteString(" - AWS credentials file (~/.aws/credentials)\n")
errorMsg.WriteString(" - Gen3 bucket registration (if bucket can be registered in Gen3)\n")
errorMsg.WriteString("\n")
Expand Down Expand Up @@ -199,7 +199,7 @@ func (inc *IndexDClient) fetchS3Metadata(ctx context.Context, s3URL, awsAccessKe
}
if bucketDetails == nil {
logger.Debug("WARNING: no matching bucket found in CALYPR")
bucketDetails = &s3_utils.S3Bucket{}
bucketDetails = &cloud.S3Bucket{}
}

return FetchS3MetadataWithBucketDetails(ctx, s3URL, awsAccessKey, awsSecretKey, region, endpoint, bucketDetails, s3Client, logger)
Expand Down Expand Up @@ -279,13 +279,13 @@ func (inc *IndexDClient) upsertIndexdRecord(url string, sha256 string, fileSize
}

// AddURL adds a file to the Git DRS repo using an S3 URL
func (inc *IndexDClient) AddURL(s3URL, sha256, awsAccessKey, awsSecretKey, regionFlag, endpointFlag string, opts ...s3_utils.AddURLOption) (s3_utils.S3Meta, error) {
func (inc *IndexDClient) AddURL(s3URL, sha256, awsAccessKey, awsSecretKey, regionFlag, endpointFlag string, opts ...cloud.AddURLOption) (cloud.S3Meta, error) {
// Create context with 10-second timeout
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()

// Apply options
cfg := &s3_utils.AddURLConfig{}
cfg := &cloud.AddURLConfig{}
for _, opt := range opts {
opt(cfg)
}
Expand All @@ -296,23 +296,23 @@ func (inc *IndexDClient) AddURL(s3URL, sha256, awsAccessKey, awsSecretKey, regio
}

// Validate inputs
if err := s3_utils.ValidateInputs(s3URL, sha256); err != nil {
return s3_utils.S3Meta{}, err
if err := cloud.ValidateInputs(s3URL, sha256); err != nil {
return cloud.S3Meta{}, err
}

// check that lfs is tracking the file
_, relPath, err := utils.ParseS3URL(s3URL)
if err != nil {
return s3_utils.S3Meta{}, fmt.Errorf("failed to parse S3 URL: %w", err)
return cloud.S3Meta{}, fmt.Errorf("failed to parse S3 URL: %w", err)
}

// confirm file is tracked
isLFS, err := utils.IsLFSTracked(".gitattributes", relPath)
if err != nil {
return s3_utils.S3Meta{}, fmt.Errorf("unable to determine if file is tracked by LFS: %w", err)
return cloud.S3Meta{}, fmt.Errorf("unable to determine if file is tracked by LFS: %w", err)
}
if !isLFS {
return s3_utils.S3Meta{}, fmt.Errorf("file is not tracked by LFS. Please run `git lfs track %s && git add .gitattributes` before proceeding", relPath)
return cloud.S3Meta{}, fmt.Errorf("file is not tracked by LFS. Please run `git lfs track %s && git add .gitattributes` before proceeding", relPath)
}

// Fetch S3 metadata (size, modified date)
Expand All @@ -321,9 +321,9 @@ func (inc *IndexDClient) AddURL(s3URL, sha256, awsAccessKey, awsSecretKey, regio
if err != nil {
// if err contains 403, probably misconfigured credentials
if strings.Contains(err.Error(), "403") {
return s3_utils.S3Meta{}, fmt.Errorf("failed to fetch S3 metadata: %w. Double check your configured AWS credentials and endpoint url", err)
return cloud.S3Meta{}, fmt.Errorf("failed to fetch S3 metadata: %w. Double check your configured AWS credentials and endpoint url", err)
}
return s3_utils.S3Meta{}, fmt.Errorf("failed to fetch S3 metadata: %w", err)
return cloud.S3Meta{}, fmt.Errorf("failed to fetch S3 metadata: %w", err)
}

// logging
Expand All @@ -335,21 +335,21 @@ func (inc *IndexDClient) AddURL(s3URL, sha256, awsAccessKey, awsSecretKey, regio
inc.Logger.Debug("Processing indexd record...")
drsObj, err := inc.upsertIndexdRecord(s3URL, sha256, fileSize, inc.Logger)
if err != nil {
return s3_utils.S3Meta{}, fmt.Errorf("failed to create indexd record: %w", err)
return cloud.S3Meta{}, fmt.Errorf("failed to create indexd record: %w", err)
}

// write to file so push has that file available
drsObjPath, err := drsmap.GetObjectPath(projectdir.DRS_OBJS_PATH, drsObj.Checksums.SHA256)
if err != nil {
return s3_utils.S3Meta{}, fmt.Errorf("failed to get object path: %w", err)
return cloud.S3Meta{}, fmt.Errorf("failed to get object path: %w", err)
}
if err := drsmap.WriteDrsObj(drsObj, sha256, drsObjPath); err != nil {
return s3_utils.S3Meta{}, fmt.Errorf("failed to write DRS object: %w", err)
return cloud.S3Meta{}, fmt.Errorf("failed to write DRS object: %w", err)
}

inc.Logger.Debug("Indexd updated")

return s3_utils.S3Meta{
return cloud.S3Meta{
Size: fileSize,
LastModified: modifiedDate,
}, nil
Expand Down
34 changes: 2 additions & 32 deletions client/indexd/indexd_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ import (
"github.com/calypr/data-client/client/logs"
"github.com/calypr/data-client/client/upload"
"github.com/calypr/git-drs/client"
"github.com/calypr/git-drs/cloud"
"github.com/calypr/git-drs/drs"
"github.com/calypr/git-drs/drs/hash"
"github.com/calypr/git-drs/drslog"
"github.com/calypr/git-drs/drsmap"
"github.com/calypr/git-drs/projectdir"
"github.com/calypr/git-drs/s3_utils"
"github.com/calypr/git-drs/utils"
"github.com/hashicorp/go-multierror"
"github.com/hashicorp/go-retryablehttp"
Expand All @@ -39,7 +39,7 @@ type IndexDClient struct {
ProjectId string
BucketName string
Logger *slog.Logger
AuthHandler s3_utils.AuthHandler // Injected for testing/flexibility
AuthHandler cloud.AuthHandler // Injected for testing/flexibility

HttpClient *retryablehttp.Client
SConfig sonic.API
Expand Down Expand Up @@ -1066,36 +1066,6 @@ func (cl *IndexDClient) GetIndexdRecordByDID(did string) (*OutputInfo, error) {
return record, nil
}

func (cl *IndexDClient) BuildDrsObj(fileName string, checksum string, size int64, drsId string) (*drs.DRSObject, error) {
bucket := cl.BucketName
if bucket == "" {
return nil, fmt.Errorf("error: bucket name is empty in config file")
}

//TODO: support other storage backends
fileURL := fmt.Sprintf("s3://%s", filepath.Join(bucket, drsId, checksum))

authzStr, err := utils.ProjectToResource(cl.GetProjectId())
if err != nil {
return nil, err
}
authorizations := drs.Authorizations{
Value: authzStr,
}

// create DrsObj
DrsObj := drs.DRSObject{
Id: drsId,
Name: fileName,
// TODO: ensure that we can retrieve the access method during submission (happens in transfer)
AccessMethods: []drs.AccessMethod{{Type: "s3", AccessURL: drs.AccessURL{URL: fileURL}, Authorizations: &authorizations}},
Checksums: hash.HashInfo{SHA256: checksum},
Size: size,
}

return &DrsObj, nil
}

// Helper function to get indexd record by DID (similar to existing pattern in DeleteIndexdRecord)
func (cl *IndexDClient) getIndexdRecordByDID(did string) (*OutputInfo, error) {
url := fmt.Sprintf("%s/index/%s", cl.Base.String(), did)
Expand Down
64 changes: 0 additions & 64 deletions client/indexd/indexd_client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ import (
"net/url"
"os"
"os/exec"
"path/filepath"
"strings"
"sync"
"testing"
Expand Down Expand Up @@ -301,24 +300,6 @@ func TestIndexdClient_RegisterAndUpdate(t *testing.T) {
}
}

func TestIndexdClient_BuildDrsObj(t *testing.T) {
client := &IndexDClient{
ProjectId: "test-project",
BucketName: "bucket",
}

obj, err := client.BuildDrsObj("file.txt", "sha-256", 12, "did-1")
if err != nil {
t.Fatalf("BuildDrsObj error: %v", err)
}
if obj.Id != "did-1" || obj.Checksums.SHA256 != "sha-256" {
t.Fatalf("unexpected drs object: %+v", obj)
}
if len(obj.AccessMethods) != 1 || !strings.Contains(obj.AccessMethods[0].AccessURL.URL, filepath.Join("bucket", "did-1", "sha-256")) {
t.Fatalf("unexpected access URL: %+v", obj.AccessMethods)
}
}

func TestIndexdClient_GetProfile(t *testing.T) {
client := &IndexDClient{AuthHandler: &RealAuthHandler{Cred: confCredential("profile")}}
profile, err := client.GetProfile()
Expand Down Expand Up @@ -464,48 +445,3 @@ func chdirForTest(t *testing.T, dir string) func() {
}
}
}

func TestBuildDrsObj_Success(t *testing.T) {
client := &IndexDClient{
ProjectId: "test-project",
BucketName: "bucket",
}

obj, err := client.BuildDrsObj("file.txt", "sha-256", 12, "did-1")
if err != nil {
t.Fatalf("BuildDrsObj error: %v", err)
}
if obj.Id != "did-1" {
t.Fatalf("unexpected Id: %s", obj.Id)
}
if obj.Name != "file.txt" {
t.Fatalf("unexpected Name: %s", obj.Name)
}
if obj.Checksums.SHA256 != "sha-256" {
t.Fatalf("unexpected checksum: %v", obj.Checksums)
}
if obj.Size != 12 {
t.Fatalf("unexpected size: %d", obj.Size)
}
if len(obj.AccessMethods) != 1 {
t.Fatalf("expected 1 access method, got %d", len(obj.AccessMethods))
}
if !strings.Contains(obj.AccessMethods[0].AccessURL.URL, filepath.Join("bucket", "did-1", "sha-256")) {
t.Fatalf("unexpected access URL: %s", obj.AccessMethods[0].AccessURL.URL)
}
if obj.AccessMethods[0].Type != "s3" {
t.Fatalf("unexpected access method type: %s", obj.AccessMethods[0].Type)
}
}

func TestBuildDrsObj_EmptyBucket(t *testing.T) {
client := &IndexDClient{
ProjectId: "test-project",
BucketName: "",
}

_, err := client.BuildDrsObj("file.txt", "sha-256", 12, "did-1")
if err == nil {
t.Fatalf("expected error when BucketName is empty")
}
}
Loading