-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathfetch.go
More file actions
140 lines (118 loc) · 3.53 KB
/
fetch.go
File metadata and controls
140 lines (118 loc) · 3.53 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
package httpmirror
import (
"context"
"fmt"
"io"
"io/fs"
"net/http"
"time"
)
// httpHead performs an HTTP HEAD request to retrieve file metadata without downloading the content.
// It returns file information as an fs.FileInfo interface.
//
// Returns ErrNotOK if the response status is not 200 OK.
func httpHead(ctx context.Context, client *http.Client, p string) (fs.FileInfo, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodHead, p, nil)
if err != nil {
return nil, err
}
resp, err := client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("%w: http status %d", ErrNotOK, resp.StatusCode)
}
return &fileInfo{
name: p,
resp: resp,
}, nil
}
// httpGet performs an HTTP GET request to download file content.
// It returns the response body reader and file information.
//
// The caller is responsible for closing the returned io.ReadCloser.
// Returns ErrNotOK if the response status is not 200 OK.
func httpGet(ctx context.Context, client *http.Client, p string, teeHf bool) (io.ReadCloser, *fileInfo, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, p, nil)
if err != nil {
return nil, nil, err
}
resp, err := client.Do(req)
if err != nil {
return nil, nil, err
}
if resp.StatusCode != http.StatusOK {
resp.Body.Close()
return nil, nil, fmt.Errorf("%w: http status %d", ErrNotOK, resp.StatusCode)
}
body := resp.Body
// huggingface HEAD and GET headers may differ, so we perform a HEAD request to get accurate metadata for caching decisions.
if teeHf && resp.ContentLength <= 0 {
if _, ok := hfHosts[req.URL.Host]; ok {
reqHead, err := http.NewRequestWithContext(ctx, http.MethodHead, p, nil)
if err != nil {
return nil, nil, err
}
resp, err = client.Do(reqHead)
if err != nil {
return nil, nil, err
}
}
}
return body, &fileInfo{
name: p,
resp: resp,
}, nil
}
// ErrNotOK is returned when an HTTP response status is not 200 OK.
var ErrNotOK = fmt.Errorf("http status not ok")
var _ fs.FileInfo = (*fileInfo)(nil)
// fileInfo implements fs.FileInfo interface for HTTP responses.
// It extracts file metadata from HTTP response headers.
type fileInfo struct {
name string
resp *http.Response
}
// Name returns the name of the file (the URL in this case).
func (f fileInfo) Name() string {
return f.name
}
// IsDir always returns false as HTTP responses represent files, not directories.
func (f fileInfo) IsDir() bool {
return false
}
// Mode returns the file mode (always 0 for HTTP responses).
func (f fileInfo) Mode() fs.FileMode {
return 0
}
// Sys returns the underlying *http.Response object.
func (f fileInfo) Sys() any {
return f.resp
}
// Size returns the content length from the HTTP response.
func (f fileInfo) Size() int64 {
return f.resp.ContentLength
}
// ETag returns the ETag header from the HTTP response, which can be used for caching and validation.
func (f fileInfo) ETag() string {
return f.resp.Header.Get("ETag")
}
// ModTime returns the modification time from the Last-Modified header.
// Returns zero time if the header is missing or cannot be parsed.
func (f fileInfo) ModTime() time.Time {
lastModified := f.resp.Header.Get("Last-Modified")
if lastModified == "" {
return time.Time{}
}
t, err := time.Parse(http.TimeFormat, lastModified)
if err != nil {
return time.Time{}
}
return t
}
// String returns a string representation of the file info.
func (f fileInfo) String() string {
return fmt.Sprintf("%s %s %d", f.Name(), f.ModTime(), f.Size())
}