From abf44cceacac0fcdae19f39c8d6e57f1f99a1a23 Mon Sep 17 00:00:00 2001 From: Casper Date: Thu, 2 Nov 2023 19:38:25 +0100 Subject: [PATCH] Bump to v0.1.6 (#139) --- README.md | 2 +- awq/__init__.py | 2 +- setup.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index ca631ef5..c9b285ac 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ AutoAWQ is an easy-to-use package for 4-bit quantized models. AutoAWQ speeds up models by 2x while reducing memory requirements by 3x compared to FP16. AutoAWQ implements the Activation-aware Weight Quantization (AWQ) algorithm for quantizing LLMs. AutoAWQ was created and improved upon from the [original work](https://github.com/mit-han-lab/llm-awq) from MIT. *Latest News* 🔥 -- [2023/11] AutoAWQ has been merged into 🤗 transformers. Example found in: [examples/basic_transformers](examples/basic_transformers.py). +- [2023/11] AutoAWQ has been merged into 🤗 transformers. Now includes CUDA 12.1 wheels. - [2023/10] Mistral (Fused Modules), Bigcode, Turing support, Memory Bug Fix (Saves 2GB VRAM) - [2023/09] 1.6x-2.5x speed boost on fused models (now including MPT and Falcon). - [2023/09] Multi-GPU support, bug fixes, and better benchmark scripts available diff --git a/awq/__init__.py b/awq/__init__.py index c12525ec..446e7aad 100644 --- a/awq/__init__.py +++ b/awq/__init__.py @@ -1,2 +1,2 @@ -__version__ = "0.1.5" +__version__ = "0.1.6" from awq.models.auto import AutoAWQForCausalLM \ No newline at end of file diff --git a/setup.py b/setup.py index 6dcc9cfc..650f3c7d 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ raise RuntimeError("Your system must have an Nvidia GPU for installing AutoAWQ") common_setup_kwargs = { - "version": f"0.1.5+cu{CUDA_VERSION}", + "version": f"0.1.6+cu{CUDA_VERSION}", "name": "autoawq", "author": "Casper Hansen", "license": "MIT",