A repo of detailed steps about the deployment of the Vicuna using fastchat
.
https://github.com/thisserand/FastChat
https://github.com/oobabooga/GPTQ-for-LLaMa
GPTQ is the Post-Training Quantization for Generative Pre-trained Transformers, a SOTA one-shot weight quantization method.
lspci | grep -i nvidia
sudo apt-get purge nvidia*
sudo apt remove nvidia-*
sudo rm /etc/apt/sources.list.d/cuda*
sudo apt-get autoremove && sudo apt-get autoclean
sudo rm -rf /usr/local/cuda*
- update the system
sudo apt-get update && sudo apt-get upgrade -y
- install other import packages
sudo apt-get install g++ freeglut3-dev build-essential libx11-dev libxmu-dev libxi-dev libglu1-mesa libglu1-mesa-dev
- get the PPA repository driver
sudo add-apt-repository ppa:graphics-drivers/ppa
sudo apt update
sudo apt install libnvidia-common-515
sudo apt install libnvidia-gl-515
sudo apt install nvidia-driver-515
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin
sudo mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600
sudo apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub
sudo add-apt-repository "deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /"
sudo apt-get update
- install CUDA-11.7
sudo apt install cuda-11-7
echo 'export PATH=/usr/local/cuda-11.7/bin:$PATH' >> ~/.bashrc
echo 'export LD_LIBRARY_PATH=/usr/local/cuda-11.7/lib64:$LD_LIBRARY_PATH' >> ~/.bashrc
source ~/.bashrc
sudo ldconfig
CUDNN_TAR_FILE="cudnn-linux-x86_64-8.5.0.96_cuda11-archive.tar.xz"
wget https://developer.download.nvidia.com/compute/redist/cudnn/v8.5.0/local_installers/11.7/cudnn-linux-x86_64-8.5.0.96_cuda11-archive.tar.xz
tar -xzvf ${CUDNN_TAR_FILE}
- copy the following files into the cuda toolkit directory.
sudo cp -P cuda/include/cudnn.h /usr/local/cuda-11.7/include
sudo cp -P cuda/lib/libcudnn* /usr/local/cuda-11.7/lib64/
sudo chmod a+r /usr/local/cuda-11.7/lib64/libcudnn*
nvidia-smi
nvcc -V
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh
sha256sum Miniconda3-latest-Linux-x86_64.sh
bash Miniconda3-latest-Linux-x86_64.sh
source ~/.bashrc
conda create -n vicuna python=3.9
conda activate vicuna
conda install -c anaconda pip
conda install -c conda-forge tqdm
conda install -c conda-forge huggingface_hub # might need to login into the huggingface with access token
conda install -c huggingface transformers
conda install pytorch torchvision torchaudio pytorch-cuda=11.7 -c pytorch -c nvidia
- you should change it to your home directory
/home/[your account]/miniconda3/envs/vicuna/bin/pip safetensors protobuf sentencepiece
/home/[your account]/miniconda3/envs/vicuna/bin/pip git+https://github.com/huggingface/transformers
git clone https://github.com/thisserand/FastChat.git
cd FastChat
pip3 install -e .
mkdir repositories
cd repositories
git clone https://github.com/oobabooga/GPTQ-for-LLaMa.git -b cuda
cd GPTQ-for-LLaMa
python setup_cuda.py install
- the original codes has a bug, the 'cursor' might return None, used updated version solve the problem.
cd ../..
python download-model_updated.py anon8231489123/vicuna-13b-GPTQ-4bit-128g
python -m fastchat.serve.cli --model-name anon8231489123/vicuna-13b-GPTQ-4bit-128g --wbits 4 --groupsize 128