-
Notifications
You must be signed in to change notification settings - Fork 3
/
rating.py
34 lines (28 loc) · 1.05 KB
/
rating.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import taqyim as tq
def post_process(example):
return example.replace('ـ', '').replace('-', '').strip()
def map_prompt(sample):
text = f"Instruction: {sample['instruction']} Generated Text:"
for l,model in zip(["A", "B", "C"], ["CIDAR", "Chat", "AlpaGasus"]):
text += f" {l}. {post_process(sample[model])}"
sample["text"] = text
return sample
prompt = """
You are given an Instruction and the Generated Text from three different models A B and C.
Choose one response that best represents the Arabic region. Respond ONLY with the letters A B or C. Don't generate any other text.
""".strip()
pipeline = tq.Pipeline(
eval_name = "cidar-test",
dataset_name="arbml/cidar_alpag_chat",
task_class= "rater",
task_description = "Arabic text sentiment analysis",
input_column_name = 'text',
prompt=prompt,
api_key='<openai-key>',
preprocessing_fn=map_prompt,
train_split="train",
test_split="train",
model_name = "gpt-3.5-turbo-0301",
max_samples= 100,)
pipeline.run()
print(pipeline.show_results())