@@ -232,7 +232,6 @@ def convert_to_gguf(
232
232
model_files ,
233
233
f"{ username } /{ modelname } -gguf" ,
234
234
source_model_id ,
235
- quanttype ,
236
235
private ,
237
236
)
238
237
@@ -241,21 +240,18 @@ def convert_to_gguf(
241
240
raise
242
241
243
242
@method ()
244
- def upload_to_hf (self , file_path : str , repo_id : str , source_model_id : str , quant_type : str , private : bool = False ):
243
+ def upload_to_hf (self , model_files : List [ tuple ] , repo_id : str , source_model_id : str , private : bool = False ):
245
244
logger .info ("Reloading volume before upload..." )
246
245
volume .reload ()
247
246
248
- logger .info (f"Uploading GGUF model to HuggingFace repo { repo_id } ..." )
247
+ logger .info (f"Uploading GGUF models to HuggingFace repo { repo_id } ..." )
249
248
from huggingface_hub import HfApi , ModelCard
250
249
from textwrap import dedent
251
250
252
251
try :
253
252
api = HfApi ()
254
-
255
- # Create repo first
256
253
api .create_repo (repo_id , exist_ok = True , private = private , repo_type = "model" )
257
254
258
- # Generate model card
259
255
try :
260
256
card = ModelCard .load (source_model_id )
261
257
except Exception :
@@ -266,31 +262,37 @@ def upload_to_hf(self, file_path: str, repo_id: str, source_model_id: str, quant
266
262
card .data .tags .extend (["llama-cpp" , "gguf" ])
267
263
card .data .base_model = source_model_id
268
264
269
- filename = os .path .basename (file_path )
270
- card .text = dedent (
271
- f"""
265
+ # Generate model card with all versions
266
+ versions_text = "\n " .join ([
267
+ f"- `{ os .path .basename (file )} ` ({ quant_type } )"
268
+ for file , quant_type in model_files
269
+ ])
270
+
271
+ card .text = dedent (f"""
272
272
# { repo_id }
273
273
This model was converted to GGUF format from [`{ source_model_id } `](https://huggingface.co/{ source_model_id } ) using llama.cpp.
274
274
Refer to the [original model card](https://huggingface.co/{ source_model_id } ) for more details on the model.
275
275
276
+ ## Available Versions
277
+ { versions_text }
278
+
276
279
## Use with llama.cpp
280
+ Replace `FILENAME` with one of the above filenames.
277
281
278
282
### CLI:
279
283
```bash
280
- llama-cli --hf-repo { repo_id } --hf-file { filename } -p "Your prompt here"
284
+ llama-cli --hf-repo { repo_id } --hf-file FILENAME -p "Your prompt here"
281
285
```
282
286
283
287
### Server:
284
288
```bash
285
- llama-server --hf-repo { repo_id } --hf-file { filename } -c 2048
289
+ llama-server --hf-repo { repo_id } --hf-file FILENAME -c 2048
286
290
```
287
291
288
292
## Model Details
289
- - **Quantization Type:** { quant_type }
290
293
- **Original Model:** [{ source_model_id } ](https://huggingface.co/{ source_model_id } )
291
294
- **Format:** GGUF
292
- """
293
- )
295
+ """ )
294
296
295
297
# Save and upload README
296
298
readme_path = "/tmp/README.md"
@@ -301,13 +303,15 @@ def upload_to_hf(self, file_path: str, repo_id: str, source_model_id: str, quant
301
303
repo_id = repo_id
302
304
)
303
305
304
- # Upload the model file
305
- logger .info (f"Uploading quantized model: { file_path } " )
306
- api .upload_file (
307
- path_or_fileobj = file_path ,
308
- path_in_repo = filename ,
309
- repo_id = repo_id
310
- )
306
+ # Upload all model files
307
+ for file_path , _ in model_files :
308
+ filename = os .path .basename (file_path )
309
+ logger .info (f"Uploading quantized model: { filename } " )
310
+ api .upload_file (
311
+ path_or_fileobj = file_path ,
312
+ path_in_repo = filename ,
313
+ repo_id = repo_id
314
+ )
311
315
312
316
# Upload imatrix.dat if it exists
313
317
imatrix_path = "/root/llama.cpp/imatrix.dat"
0 commit comments