Skip to content

Commit

Permalink
New release (#8)
Browse files Browse the repository at this point in the history
* new new_release

* format with black

* update converter

* update converter

* update test

---------

Co-authored-by: michaelfeil <me@michaelfeil.eu>
  • Loading branch information
michaelfeil and michaelfeil authored Jun 15, 2023
1 parent 49730fe commit 69a3dfd
Show file tree
Hide file tree
Showing 6 changed files with 511 additions and 122 deletions.
287 changes: 214 additions & 73 deletions conversion_utils/convert.py
Original file line number Diff line number Diff line change
@@ -1,128 +1,269 @@
import os


def call(*args, **kwargs):
import subprocess

out = subprocess.call(*args, **kwargs)
if out != 0:
raise ValueError(f"Output: {out}")

def convert(NAME="opus-mt-en-fr", ORG="Helsinki-NLP"):

model_description_generator = """
from hf_hub_ctranslate2 import GeneratorCT2fromHfHub
model = GeneratorCT2fromHfHub(
# load in int8 on CUDA
model_name_or_path=model_name,
device="cuda",
compute_type="int8_float16",
# tokenizer=AutoTokenizer.from_pretrained("{ORG}/{NAME}")
)
outputs = model.generate(
text=["def fibonnaci(", "User: How are you doing? Bot:"],
max_length=64,
include_prompt_in_result=False
)
print(outputs)"""

model_description_translator = """
from hf_hub_ctranslate2 import TranslatorCT2fromHfHub
model = TranslatorCT2fromHfHub(
# load in int8 on CUDA
model_name_or_path=model_name,
device="cuda",
compute_type="int8_float16",
# tokenizer=AutoTokenizer.from_pretrained("{ORG}/{NAME}")
)
outputs = model.generate(
text=["def fibonnaci(", "User: How are you doing? Bot:"],
max_length=64,
)
print(outputs)"""

model_description_encoder = """
from hf_hub_ctranslate2 import EncoderCT2fromHfHub
model = EncoderCT2fromHfHub(
# load in int8 on CUDA
model_name_or_path=model_name,
device="cuda",
compute_type="float16",
# tokenizer=AutoTokenizer.from_pretrained("{ORG}/{NAME}")
)
embeddings = model.encode(
["I like soccer", "I like tennis", "The eiffel tower is in Paris"],
batch_size=32,
convert_to_numpy=True,
normalize_embeddings=True,
)
print(embeddings.shape, embeddings)
scores = (embeddings @ embeddings.T) * 100
"""


def convert(NAME="opus-mt-en-fr", ORG="Helsinki-NLP", description="generator"):
print(f"converting {ORG}/{NAME} ")
import re
import datetime
from huggingface_hub import HfApi, snapshot_download

api = HfApi()
HUB_NAME=f"ct2fast-{NAME}"

HUB_NAME = f"ct2fast-{NAME}"
repo_id = f"michaelfeil/{HUB_NAME}"
api.create_repo(repo_id=repo_id, exist_ok=True, repo_type="model")
tmp_dir = os.path.join(os.path.expanduser("~"), f"tmp-{HUB_NAME}")
os.chdir(os.path.expanduser("~"))

path = snapshot_download(
f'{ORG}/{NAME}',
f"{ORG}/{NAME}",
)
files = [f for f in os.listdir(path) if "." in f]
filtered_f = [
f
for f in files
if not ("model" in f or "config.json" == f or f.endswith(".py"))
]

conv_arg = (
[
"ct2-transformers-converter",
"--model",
f"{ORG}/{NAME}",
"--output_dir",
str(tmp_dir),
"--force",
"--copy_files",
]
+ filtered_f
+ [
"--quantization",
"float16" if description == "encoder" else "int8_float16",
"--trust_remote_code",
]
)
files = os.listdir(path)
filtered_f = [f for f in files if not ("model" in f or "config.json" == f)]

conv_arg = [
'ct2-transformers-converter',
'--model',
f'{ORG}/{NAME}',
'--output_dir',
str(tmp_dir),
'--force',
'--copy_files',
]+ filtered_f + [
'--quantization',
'float16']
call(conv_arg)

with open(os.path.join(tmp_dir,'README.md'),'r') as f:
if not "vocabulary.txt" in os.listdir(tmp_dir) and "vocab.txt" in os.listdir(
tmp_dir
):
import shutil

shutil.copyfile(
os.path.join(tmp_dir, "vocab.txt"),
os.path.join(tmp_dir, "vocabulary.txt"),
)

with open(os.path.join(tmp_dir, "README.md"), "r") as f:
content = f.read()
if "tags:" in content:
content = content.replace("tags:","tags:\n- ctranslate2\n- int8\n- float16")
content = content.replace("tags:", "tags:\n- ctranslate2\n- int8\n- float16", 1)
else:
content = content.replace("---","---\ntags:\n- ctranslate2\n- int8\n- float16\n")
content = content.replace(
"---", "---\ntags:\n- ctranslate2\n- int8\n- float16\n", 1
)

end_header = [m.start() for m in re.finditer(r"---",content)]
end_header = [m.start() for m in re.finditer(r"---", content)]
if len(end_header) > 1:
end_header = end_header[1] + 3
else:
end_header = 0
conv_arg_nice = " ".join(conv_arg)
conv_arg_nice = conv_arg_nice.replace(os.path.expanduser("~"), "~")
if description == "generator":
model_description = model_description_generator
elif description == "encoder":
model_description = model_description_encoder
elif description == "translator":
model_description = model_description_translator
add_string = f"""
# # Fast-Inference with Ctranslate2
Speedup inference while reducing memory by 2x-4x using int8 inference in C++ on CPU or GPU.
quantized version of [{ORG}/{NAME}](https://huggingface.co/{ORG}/{NAME})
```bash
pip install hf-hub-ctranslate2>=2.0.6
```
Converted on {str(datetime.datetime.now())[:10]} using
pip install hf-hub-ctranslate2>=2.10.0 ctranslate2>=3.16.0
```
{conv_arg_nice}
```python
# from transformers import AutoTokenizer
model_name = "{repo_id}"
{model_description}
```
Checkpoint compatible to [ctranslate2>=3.13.0](https://github.com/OpenNMT/CTranslate2) and [hf-hub-ctranslate2>=2.0.6](https://github.com/michaelfeil/hf-hub-ctranslate2)
- `compute_type=int8_float16` for `device="cuda"`
Checkpoint compatible to [ctranslate2>=3.16.0](https://github.com/OpenNMT/CTranslate2)
and [hf-hub-ctranslate2>=2.10.0](https://github.com/michaelfeil/hf-hub-ctranslate2)
- `compute_type=int8_float16` for `device="cuda"`
- `compute_type=int8` for `device="cpu"`
```python
from hf_hub_ctranslate2 import TranslatorCT2fromHfHub, GeneratorCT2fromHfHub
from transformers import AutoTokenizer
model_name = "{repo_id}"
# use either TranslatorCT2fromHfHub or GeneratorCT2fromHfHub here, depending on model.
model = GeneratorCT2fromHfHub(
# load in int8 on CUDA
model_name_or_path=model_name,
device="cuda",
compute_type="int8_float16",
tokenizer=AutoTokenizer.from_pretrained("{ORG}/{NAME}")
)
outputs = model.generate(
text=["How do you call a fast Flan-ingo?", "User: How are you doing? Bot:"],
)
print(outputs)
Converted on {str(datetime.datetime.now())[:10]} using
```
{conv_arg_nice}
```
# Licence and other remarks:
This is just a quantized version. Licence conditions are intended to be idential to original huggingface repo.
# Original description
"""
with open(os.path.join(tmp_dir,'README.md'),'w') as f:

with open(os.path.join(tmp_dir, "README.md"), "w") as f:
f.write(content[:end_header] + add_string + content[end_header:])


api.upload_folder(
folder_path=tmp_dir,
repo_id=repo_id, repo_type="model",
commit_message=f"Upload {ORG}/{NAME} ctranslate fp16 weights"
repo_id=repo_id,
repo_type="model",
commit_message=f"Upload {ORG}/{NAME} ctranslate fp16 weights",
)
call(["rm","-rf", tmp_dir])

call(["rm", "-rf", tmp_dir])


if __name__ == "__main__":
generators = [
("togethercomputer/RedPajama-INCITE-Instruct-3B-v1"),
("togethercomputer/GPT-JT-6B-v0"),
"togethercomputer/RedPajama-INCITE-Chat-7B-v0.1",
"togethercomputer/RedPajama-INCITE-Instruct-7B-v0.1",
"EleutherAI/pythia-160m",
"EleutherAI/pythia-2.8b",
"EleutherAI/pythia-6.9b",
"EleutherAI/pythia-12b",
"togethercomputer/Pythia-Chat-Base-7B",
"stabilityai/stablelm-base-alpha-7b",
"stabilityai/stablelm-tuned-alpha-7b",
"stabilityai/stablelm-base-alpha-3b",
"stabilityai/stablelm-tuned-alpha-3b",
"OpenAssistant/stablelm-7b-sft-v7-epoch-3",
"EleutherAI/gpt-j-6b",
"EleutherAI/gpt-neox-20b",
"OpenAssistant/pythia-12b-sft-v8-7k-steps"
# "togethercomputer/RedPajama-INCITE-Instruct-3B-v1",
# "togethercomputer/GPT-JT-6B-v0",
# "togethercomputer/RedPajama-INCITE-7B-Instruct",
# "togethercomputer/RedPajama-INCITE-7B-Chat",
# "EleutherAI/pythia-160m",
# "EleutherAI/pythia-2.8b",
# "EleutherAI/pythia-6.9b",
# "EleutherAI/pythia-12b",
# "togethercomputer/Pythia-Chat-Base-7B",
# "stabilityai/stablelm-base-alpha-7b",
# "stabilityai/stablelm-tuned-alpha-7b",
# "stabilityai/stablelm-base-alpha-3b",
# "stabilityai/stablelm-tuned-alpha-3b",
# "OpenAssistant/stablelm-7b-sft-v7-epoch-3",
# "EleutherAI/gpt-j-6b",
# "EleutherAI/gpt-neox-20b",
# "OpenAssistant/pythia-12b-sft-v8-7k-steps",
# "Salesforce/codegen-350M-mono",
# "Salesforce/codegen-350M-multi",
# "Salesforce/codegen-2B-mono",
# "Salesforce/codegen-2B-multi",
# "Salesforce/codegen-6B-multi",
# "Salesforce/codegen-6B-mono",
# "Salesforce/codegen-16B-mono",
# "Salesforce/codegen-16B-multi",
# "Salesforce/codegen2-1B",
# "Salesforce/codegen2-3_7B",
# "Salesforce/codegen2-7B",
# "Salesforce/codegen2-16B",
# "bigcode/gpt_bigcode-santacoder",
# 'bigcode/starcoder',
# "mosaicml/mpt-7b",
# "mosaicml/mpt-7b-instruct",
# "mosaicml/mpt-7b-chat"
"VMware/open-llama-7b-open-instruct",
# "tiiuae/falcon-7b-instruct",
# 'tiiuae/falcon-7b',
"tiiuae/falcon-40b-instruct",
"tiiuae/falcon-40b",
"OpenAssistant/falcon-7b-sft-top1-696",
"OpenAssistant/falcon-7b-sft-mix-2000",
"OpenAssistant/falcon-40b-sft-mix-1226",
# "HuggingFaceH4/starchat-beta",
"WizardLM/WizardCoder-15B-V1.0",
]
translators = [
# 'Salesforce/codet5p-770m-py', 'Salesforce/codet5p-770m'
]
encoders = [
"sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
"intfloat/e5-small-v2",
"intfloat/e5-large-v2",
"intfloat/e5-large",
"sentence-transformers/all-MiniLM-L6-v2",
"setu4993/LaBSE",
]
for m in encoders:
ORG, NAME = m.split("/")
convert(NAME=NAME, ORG=ORG, description="encoder")

for m in translators:
ORG, NAME = m.split("/")
convert(NAME=NAME, ORG=ORG, description="translator")

for m in generators:
ORG , NAME = m.split("/")
convert(NAME=NAME, ORG=ORG)
ORG, NAME = m.split("/")
# import huggingface_hub
# huggingface_hub.snapshot_download(
# m
# )
convert(NAME=NAME, ORG=ORG, description="generator")

from hf_hub_ctranslate2 import GeneratorCT2fromHfHub
from transformers import AutoTokenizer

model_name = f"michaelfeil/ct2fast-{NAME}"
# use either TranslatorCT2fromHfHub or GeneratorCT2fromHfHub here, depending on model.
model = GeneratorCT2fromHfHub(
# load in int8 on CUDA
model_name_or_path=model_name,
device="cuda",
compute_type="int8",
tokenizer=AutoTokenizer.from_pretrained(m),
)
outputs = model.generate(
text=["def print_hello_world():", "def hello_name(name:"], max_length=64
)
print(outputs)
10 changes: 8 additions & 2 deletions hf_hub_ctranslate2/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# -*- coding: utf-8 -*-
"""Compatability between Huggingface and Ctranslate2."""
# __all__ = ["__version__", "TranslatorCT2fromHfHub", "GeneratorCT2fromHfHub", "MultiLingualTranslatorCT2fromHfHub"]
from hf_hub_ctranslate2.translate import TranslatorCT2fromHfHub, GeneratorCT2fromHfHub, MultiLingualTranslatorCT2fromHfHub
__version__ = "2.0.9"
from hf_hub_ctranslate2.translate import (
TranslatorCT2fromHfHub,
GeneratorCT2fromHfHub,
MultiLingualTranslatorCT2fromHfHub,
EncoderCT2fromHfHub,
)

__version__ = "2.0.10"
Loading

0 comments on commit 69a3dfd

Please sign in to comment.