Skip to content

Commit bb123d1

Browse files
authored
support upload folder (#66)
* support upload folder * support upload file to folder
1 parent 113eeb4 commit bb123d1

File tree

8 files changed

+192
-64
lines changed

8 files changed

+192
-64
lines changed

README.md

+33-5
Original file line numberDiff line numberDiff line change
@@ -91,11 +91,17 @@ csghub-cli download wanghh2000/myprivate1
9191
# donwload dataset
9292
csghub-cli download wanghh2000/myds1 -t dataset
9393

94-
# upload a single file
95-
csghub-cli upload wanghh2000/myprivate1 abc/3.txt
94+
# upload a single file to folder1
95+
csghub-cli upload wanghh2000/myprivate1 abc/3.txt folder1
9696

97-
# upload files
98-
csghub-cli upload wanghh2000/myds1 abc/4.txt abc/5.txt -t dataset
97+
# upload local folder '/Users/hhwang/temp/jsonl' to root path of repo 'wanghh2000/m01' with default branch
98+
csghub-cli upload wanghh2000/m01 /Users/hhwang/temp/jsonl
99+
100+
# upload local folder '/Users/hhwang/temp/jsonl' to path 'test/files' of repo 'wanghh2000/m01' with branch v1
101+
csghub-cli upload wanghh2000/m01 /Users/hhwang/temp/jsonl test/files --revision v1
102+
103+
# upload local folder '/Users/hhwang/temp/jsonl' to path 'test/files' of repo 'wanghh2000/m01' with token 'xxxxxx'
104+
csghub-cli upload wanghh2000/m01 /Users/hhwang/temp/jsonl test/files -k xxxxxx
99105
```
100106

101107
Download location is `~/.cache/csg/` by default.
@@ -184,7 +190,7 @@ for item in repo_files:
184190
http_upload_file(repo_id=repo_id, repo_type=repo_type, file_path=item, endpoint=endpoint, token=token)
185191
```
186192

187-
### Upload repo
193+
### Upload the local path to repo
188194

189195
Before starting, please make sure you have Git-LFS installed (see [here](https://git-lfs.github.com/) for installation instructions).
190196

@@ -204,6 +210,28 @@ r = Repository(
204210
r.upload()
205211
```
206212

213+
### Upload the local path to the specified path in the repo
214+
215+
Before starting, please make sure you have Git-LFS installed (see [here](https://git-lfs.github.com/) for installation instructions).
216+
217+
```python
218+
from pycsghub.repository import Repository
219+
220+
token = "your access token"
221+
222+
r = Repository(
223+
repo_id="wanghh2000/model01",
224+
upload_path="/Users/hhwang/temp/jsonl",
225+
path_in_repo="test/abc",
226+
user_name="wanghh2000",
227+
token=token,
228+
repo_type="model",
229+
branch_name="v1",
230+
)
231+
232+
r.upload()
233+
```
234+
207235
### Model loading compatible with huggingface
208236

209237
The transformers library supports directly inputting the repo_id from Hugging Face to download and load related models, as shown below:

README_cn.md

+33-5
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,17 @@ csghub-cli download wanghh2000/myprivate1
9090
# 数据集下载
9191
csghub-cli download wanghh2000/myds1 -t dataset
9292

93-
# 上传单个文件
94-
csghub-cli upload wanghh2000/myprivate1 abc/3.txt
93+
# 上传单个文件到仓库目录folder1
94+
csghub-cli upload wanghh2000/myprivate1 abc/3.txt folder1
9595

96-
# 上传多个文件
97-
csghub-cli upload wanghh2000/myds1 abc/4.txt abc/5.txt -t dataset
96+
# 上传本地目录'/Users/hhwang/temp/jsonl'到仓库'wanghh2000/m01'的默认分支根目录下
97+
csghub-cli upload wanghh2000/m01 /Users/hhwang/temp/jsonl
98+
99+
# 上传本地目录'/Users/hhwang/temp/jsonl'到仓库'wanghh2000/m01'的v1分支的'test/files'目录下
100+
csghub-cli upload wanghh2000/m01 /Users/hhwang/temp/jsonl test/files --revision v1
101+
102+
# 上传本地目录'/Users/hhwang/temp/jsonl'到仓库'wanghh2000/m01'的默认分支'test/files'目录下并使用指定token
103+
csghub-cli upload wanghh2000/m01 /Users/hhwang/temp/jsonl test/files -k xxxxxx
98104
```
99105

100106
文件默认下载路径为`~/.cache/csg/`
@@ -184,7 +190,7 @@ for item in repo_files:
184190
http_upload_file(repo_id=repo_id, repo_type=repo_type, file_path=item, endpoint=endpoint, token=token)
185191
```
186192

187-
### 上传仓库
193+
### 上传本地目录到仓库
188194

189195
在开始之前,请确保您已安装 Git-LFS(安装说明请参见 [这里](https://git-lfs.github.com/))。
190196

@@ -204,6 +210,28 @@ r = Repository(
204210
r.upload()
205211
```
206212

213+
### 上传本地目录到仓库的指定目录
214+
215+
在开始之前,请确保您已安装 Git-LFS(安装说明请参见 [这里](https://git-lfs.github.com/))。
216+
217+
```python
218+
from pycsghub.repository import Repository
219+
220+
token = "your access token"
221+
222+
r = Repository(
223+
repo_id="wanghh2000/model01",
224+
upload_path="/Users/hhwang/temp/jsonl",
225+
path_in_repo="test/abc",
226+
user_name="wanghh2000",
227+
token=token,
228+
repo_type="model",
229+
branch_name="v1",
230+
)
231+
232+
r.upload()
233+
```
234+
207235
### 兼容huggingface的模型加载
208236

209237
huggingface的transformers库支持直接输入huggingface上的repo_id以下载并读取相关模型,如下列所示:

pycsghub/cli.py

+30-10
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import typer
2+
import os
23
from typing import Annotated, List, Optional
34
from pycsghub.cmd import repo
45
from pycsghub.cmd.repo_types import RepoType
@@ -15,11 +16,13 @@ def version_callback(value: bool):
1516

1617
OPTIONS = {
1718
"repoID": typer.Argument(help="The ID of the repo. (e.g. `username/repo-name`)."),
18-
"repoFiles": typer.Argument(help="Local path to the file or files to upload. Defaults to the relative path of the file of repo of OpenCSG Hub."),
19+
"localPath": typer.Argument(help="Local path to the file or folder to upload. Defaults to the relative path of the file of repo of OpenCSG Hub."),
20+
"pathInRepo": typer.Argument(help="Path of the folder in the repo. Defaults to the relative path of the file or folder."),
1921
"repoType": typer.Option("-t", "--repo-type", help="Specify the repository type."),
2022
"revision": typer.Option("-r", "--revision", help="An optional Git revision id which can be a branch name"),
2123
"cache_dir": typer.Option("-cd", "--cache-dir", help="Path to the directory where to save the downloaded files."),
2224
"endpoint": typer.Option("-e", "--endpoint", help="The address of the request to be sent."),
25+
"username": typer.Option("-u", "--username", help="Logon account of OpenCSG Hub."),
2326
"token": typer.Option("-k", "--token", help="A User Access Token generated from https://opencsg.com/settings/access-token"),
2427
"version": typer.Option(None, "-V", "--version", callback=version_callback, is_eager=True, help="Show the version and exit."),
2528
}
@@ -45,20 +48,37 @@ def download(
4548
@app.command(name="upload", help="Upload repository files to opencsg.com.")
4649
def upload(
4750
repo_id: Annotated[str, OPTIONS["repoID"]],
48-
repo_files: Annotated[List[str], OPTIONS["repoFiles"]],
51+
local_path: Annotated[str, OPTIONS["localPath"]],
52+
path_in_repo: Annotated[str, OPTIONS["pathInRepo"]] = "",
4953
repo_type: Annotated[RepoType, OPTIONS["repoType"]] = RepoType.MODEL,
5054
revision: Annotated[Optional[str], OPTIONS["revision"]] = DEFAULT_REVISION,
5155
endpoint: Annotated[Optional[str], OPTIONS["endpoint"]] = DEFAULT_CSGHUB_DOMAIN,
5256
token: Annotated[Optional[str], OPTIONS["token"]] = None,
57+
user_name: Annotated[Optional[str], OPTIONS["username"]] = "",
5358
):
54-
repo.upload(
55-
repo_id=repo_id,
56-
repo_type=repo_type,
57-
repo_files=repo_files,
58-
revision=revision,
59-
endpoint=endpoint,
60-
token=token
61-
)
59+
# File upload
60+
if os.path.isfile(local_path):
61+
repo.upload_files(
62+
repo_id=repo_id,
63+
repo_type=repo_type,
64+
repo_file=local_path,
65+
path_in_repo=path_in_repo,
66+
revision=revision,
67+
endpoint=endpoint,
68+
token=token
69+
)
70+
# Folder upload
71+
else:
72+
repo.upload_folder(
73+
repo_id=repo_id,
74+
repo_type=repo_type,
75+
local_path=local_path,
76+
path_in_repo=path_in_repo,
77+
revision=revision,
78+
endpoint=endpoint,
79+
token=token,
80+
user_name=user_name
81+
)
6282

6383
@app.callback(invoke_without_command=True)
6484
def main(version: bool = OPTIONS["version"]):

pycsghub/cmd/repo.py

+46-11
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from typing import Optional, Union, List
55
from pycsghub.constants import DEFAULT_REVISION
66
import requests
7+
from pycsghub.repository import Repository
78

89
def download(
910
repo_id: str,
@@ -22,20 +23,54 @@ def download(
2223
token=token,
2324
)
2425

25-
def upload(
26+
def upload_files(
2627
repo_id: str,
2728
repo_type: str,
28-
repo_files: List[str],
29+
repo_file: str,
30+
path_in_repo: Optional[str] = "",
2931
revision: Optional[str] = DEFAULT_REVISION,
3032
endpoint: Optional[str] = None,
3133
token: Optional[str] = None
3234
):
33-
for item in repo_files:
34-
http_upload_file(
35-
repo_id=repo_id,
36-
repo_type=repo_type,
37-
file_path=item,
38-
revision=revision,
39-
endpoint=endpoint,
40-
token=token,
41-
)
35+
http_upload_file(
36+
repo_id=repo_id,
37+
repo_type=repo_type,
38+
file_path=repo_file,
39+
path_in_repo=path_in_repo,
40+
revision=revision,
41+
endpoint=endpoint,
42+
token=token,
43+
)
44+
45+
def upload_folder(
46+
repo_id: str,
47+
repo_type: str,
48+
local_path: str,
49+
path_in_repo: Optional[str] = "",
50+
work_dir: Optional[str] = "/tmp/csg",
51+
nickname: Optional[str] = "",
52+
description: Optional[str] = "",
53+
license: Optional[str] = "apache-2.0",
54+
revision: Optional[str] = DEFAULT_REVISION,
55+
endpoint: Optional[str] = None,
56+
user_name: Optional[str] = "",
57+
token: Optional[str] = None,
58+
auto_create: Optional[bool] = True,
59+
):
60+
r = Repository(
61+
repo_id=repo_id,
62+
upload_path=local_path,
63+
path_in_repo=path_in_repo,
64+
work_dir=work_dir,
65+
repo_type=repo_type,
66+
nickname=nickname,
67+
description=description,
68+
license=license,
69+
branch_name=revision,
70+
endpoint=endpoint,
71+
user_name=user_name,
72+
token=token,
73+
auto_create=auto_create,
74+
)
75+
r.upload()
76+

pycsghub/constants.py

+3
Original file line numberDiff line numberDiff line change
@@ -86,3 +86,6 @@
8686

8787

8888
S3_INTERNAL = os.environ.get("S3_INTERNAL", '')
89+
90+
GIT_HIDDEN_DIR = ".git"
91+
GIT_ATTRIBUTES_FILE = ".gitattributes"

pycsghub/file_upload.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,21 @@ def http_upload_file(
88
repo_id: str,
99
repo_type: Optional[str] = None,
1010
file_path: str = None,
11+
path_in_repo: Optional[str] = "",
1112
revision: Optional[str] = DEFAULT_REVISION,
1213
endpoint: Optional[str] = None,
1314
token: Optional[str] = None,
1415
):
1516
if not os.path.exists(file_path):
1617
raise ValueError(f"file '{file_path}' does not exist")
18+
destination_path = os.path.join(path_in_repo, os.path.basename(file_path)) if path_in_repo else file_path
1719
http_endpoint = endpoint if endpoint is not None else get_endpoint()
1820
if not http_endpoint.endswith("/"):
1921
http_endpoint += "/"
2022
http_url = http_endpoint + "api/v1/" + repo_type + "s/" + repo_id + "/upload_file"
2123
post_headers = build_csg_headers(token=token)
2224
file_data = {'file': open(file_path, 'rb')}
23-
form_data = {'file_path': file_path, 'branch': revision, 'message': 'upload' + file_path}
25+
form_data = {'file_path': destination_path, 'branch': revision, 'message': 'upload' + file_path}
2426
response = requests.post(http_url, headers=post_headers, data=form_data, files=file_data)
2527
if response.status_code == 200:
2628
print(f"file '{file_path}' upload successfully.")

0 commit comments

Comments
 (0)