Skip to content

Commit

Permalink
fix e2e tests
Browse files Browse the repository at this point in the history
  • Loading branch information
dan-garvey committed Mar 20, 2024
1 parent d24fee2 commit 55cfd04
Show file tree
Hide file tree
Showing 3 changed files with 220 additions and 145 deletions.
11 changes: 7 additions & 4 deletions models/turbine_models/custom_models/sd_inference/clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,17 +56,20 @@ def export_clip_model(
max_alloc=None,
upload_ir=False,
):
input_len = 77
if "google/t5" in hf_model_name:
from transformers import T5Tokenizer, T5Model

tokenizer = T5Tokenizer.from_pretrained(hf_model_name)
text_encoder_model = T5Model.from_pretrained(hf_model_name)
input_len = 512

else:
# TODO: Add better filtering mechanism for things that require CLIPProcessor
if hf_model_name == "openai/clip-vit-large-patch14":
if "openai" in hf_model_name:
tokenizer = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
hf_subfolder = "" # CLIPProcessor does not have a subfolder
input_len = 10
else:
# Load the tokenizer and text encoder to tokenize and encode the text.
tokenizer = CLIPTokenizer.from_pretrained(
Expand Down Expand Up @@ -102,8 +105,8 @@ class CompiledClip(CompiledModule):

def main(
self,
inp=AbstractTensor(1, 77, dtype=torch.int64),
decoder_input_ids=AbstractTensor(1, 77, dtype=torch.int64),
inp=AbstractTensor(1, input_len, dtype=torch.int64),
decoder_input_ids=AbstractTensor(1, input_len, dtype=torch.int64),
):
return jittable(text_encoder_model.forward)(
input_ids=inp, decoder_input_ids=decoder_input_ids
Expand All @@ -122,7 +125,7 @@ class CompiledClip(CompiledModule):
else:
params = export_parameters(text_encoder_model)

def main(self, inp=AbstractTensor(1, 77, dtype=torch.int64)):
def main(self, inp=AbstractTensor(1, input_len, dtype=torch.int64)):
return jittable(text_encoder_model.forward)(input_ids=inp)

import_to = "INPUT" if compile_to == "linalg" else "IMPORT"
Expand Down
109 changes: 84 additions & 25 deletions models/turbine_models/custom_models/sd_inference/clip_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from transformers import CLIPTokenizer
from iree import runtime as ireert
import torch
from PIL import Image

parser = argparse.ArgumentParser()

Expand Down Expand Up @@ -52,21 +53,54 @@ def run_clip(
):
runner = vmfbRunner(device, vmfb_path, external_weight_path)

tokenizer = CLIPTokenizer.from_pretrained(
hf_model_name,
subfolder="tokenizer",
token=hf_auth_token,
)
text_input = tokenizer(
prompt,
padding="max_length",
max_length=tokenizer.model_max_length,
truncation=True,
return_tensors="pt",
)
if "google/t5" in hf_model_name:
from transformers import T5Tokenizer, T5Model

tokenizer = T5Tokenizer.from_pretrained(hf_model_name)
text_input = tokenizer(
prompt,
padding="max_length",
max_length=tokenizer.model_max_length,
truncation=True,
return_tensors="pt",
)
# TODO: Integrate with HFTransformerBuilder
else:
if "openai" in hf_model_name:
from transformers import CLIPProcessor
import requests

url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)
tokenizer = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
text_input = tokenizer(
text=prompt,
images=image,
truncation=True,
padding=True,
return_tensors="pt",
)
else:
hf_subfolder = "tokenizer"

tokenizer = CLIPTokenizer.from_pretrained(
hf_model_name,
subfolder=hf_subfolder,
token=hf_auth_token,
)

text_input = tokenizer(
prompt,
padding="max_length",
max_length=tokenizer.model_max_length,
truncation=True,
return_tensors="pt",
)
example_input = text_input.input_ids
inp = [ireert.asdevicearray(runner.config.device, example_input)]

if "google/t5" in hf_model_name:
inp += [ireert.asdevicearray(runner.config.device, example_input)]
results = runner.ctx.modules.compiled_clip["main"](*inp)
return results

Expand All @@ -77,13 +111,38 @@ def run_torch_clip(hf_model_name, hf_auth_token, prompt):

tokenizer = T5Tokenizer.from_pretrained(hf_model_name)
model = T5Model.from_pretrained(hf_model_name)
text_input = tokenizer(
prompt,
padding="max_length",
max_length=tokenizer.model_max_length,
truncation=True,
return_tensors="pt",
)
# TODO: Integrate with HFTransformerBuilder
else:
if hf_model_name == "openai/clip-vit-large-patch14":
from transformers import CLIPProcessor
import requests

url = "http://images.cocodataset.org/val2017/000000039769.jpg"
image = Image.open(requests.get(url, stream=True).raw)

tokenizer = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
hf_subfolder = "" # CLIPProcessor does not have a subfolder
from transformers import CLIPTextModel

model = CLIPTextModel.from_pretrained(
hf_model_name,
subfolder=hf_subfolder,
token=hf_auth_token,
)
text_input = tokenizer(
text=prompt,
images=image,
truncation=True,
padding=True,
return_tensors="pt",
)
else:
hf_subfolder = "text_encoder"

Expand All @@ -93,20 +152,20 @@ def run_torch_clip(hf_model_name, hf_auth_token, prompt):
token=hf_auth_token,
)

from transformers import CLIPTextModel
from transformers import CLIPTextModel

model = CLIPTextModel.from_pretrained(
hf_model_name,
subfolder=hf_subfolder,
token=hf_auth_token,
)
text_input = tokenizer(
prompt,
padding="max_length",
max_length=tokenizer.model_max_length,
truncation=True,
return_tensors="pt",
)
model = CLIPTextModel.from_pretrained(
hf_model_name,
subfolder=hf_subfolder,
token=hf_auth_token,
)
text_input = tokenizer(
prompt,
padding="max_length",
max_length=tokenizer.model_max_length,
truncation=True,
return_tensors="pt",
)
example_input = text_input.input_ids

if "google/t5" in hf_model_name:
Expand Down
Loading

0 comments on commit 55cfd04

Please sign in to comment.