Replies: 2 comments 3 replies
-
|
Hi, yeah this is something important that we wanted to add but got lost in the backlog. Technically, it's very simple to do, you need to override these functions . For the moment, you can do it via monkey patching like this: ##########################################################################
#Do this on the program start
from hqq.models.base import BaseHQQModel
import os
# Get available number of chunks
def get_num_weight_chunks(cls, save_dir: str) -> int:
name, ext = cls.get_weight_file(save_dir).split('.')
files = os.listdir(save_dir)
num_chunks = 0
for file in files:
c_file = file.split('.')
if(len(c_file)!=2):
continue
if(c_file[0] == name and c_file[1] == ext):
num_chunks +=1
return num_chunks
# Get name of the chunk file
def get_weight_file_chunk(cls, save_dir: str, chunk_id: int) -> str:
name, ext = cls.get_weight_file(save_dir).split('.')
return name + '_' + str(chunk_id) + '.' + ext
def split_weights_into_chunks(cls, weights, num_chunks="auto") -> list:
# TODO : logic to split weights
return weights_chunks
# Save weights to disk
def save_weights_chunked(cls, weights: dict, save_dir: str) -> None:
#weights is just a dictionary, splits the weights into weights_chunks: list.
weights_chunks = cls.split_weights_into_chunks(weights)
num_chunks = len(weights_chunks)
for i in range(num_chunks):
torch.save(weights_chunks[i], cls.get_weight_file_chunk(save_dir, i))
# Load weights from disk
def load_weights_chunked(cls, save_dir: str, map_location: bool = None):
weights = {}
num_chunks = cls.get_num_weight_chunks(save_dir)
for i in range(num_chunks):
weights.update(torch.load(cls.get_weight_file_chunk(save_dir, i), map_location=map_location))
return weights
BaseHQQModel.get_weight_file_chunk = get_weight_file_chunk
BaseHQQModel.split_weights_into_chunks = split_weights_into_chunks
BaseHQQModel.get_weights_file_chunked = get_weights_file_chunked
BaseHQQModel.save_weights = save_weights_chunked
BaseHQQModel.load_weights = load_weights_chunked
##########################################################################Later, we will do a refactoring to fully support safetensors as well. Let me know if the solution above works! |
Beta Was this translation helpful? Give feedback.
3 replies
-
|
Serialization will be fully supported directly in HF after this PR: huggingface/transformers#33141 |
Beta Was this translation helpful? Give feedback.
0 replies
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Uh oh!
There was an error while loading. Please reload this page.
-
I quantized Mixtral 8x22b. It produces a pt model file of approx 60GB in size. I can't push the model to huggingface because of per-file size limits (50GB max). Is there a way to shard it like we can with safetensors?
Beta Was this translation helpful? Give feedback.
All reactions