-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscorers.py
101 lines (83 loc) · 3.84 KB
/
scorers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import utils
from collections import defaultdict
import numpy as np
from liquid import Template
from tqdm import tqdm
import concurrent.futures
def predict_on_example(inputs):
ex, predictor, prompt = inputs
pred = predictor.inference(ex, prompt)
return prompt, ex, pred
class Cached01Scorer:
def __init__(self):
self.cache = {}
def __call__(self, predictor, prompts, data, agg='mean', max_threads=1):
def compute_scores(prompts_exs):
out_scores = {}
inputs = [(ex, predictor, prompt) for prompt, ex in prompts_exs]
for ex in inputs:
prompt, ex, pred = predict_on_example(ex)
if pred == ex['label']:
out_scores[f'{ex}-{prompt}'] = 1
else:
out_scores[f'{ex}-{prompt}'] = 0
return out_scores
cached_scores = defaultdict(list)
prompts_exs_to_compute = []
for ex, prompt in [(ex, prompt) for ex in data for prompt in prompts]:
if f'{ex}-{prompt}' in self.cache:
cached_scores[prompt].append(self.cache[f'{ex}-{prompt}'])
else:
prompts_exs_to_compute.append((prompt, ex))
computed_scores = compute_scores(prompts_exs_to_compute)
for prompt, ex in prompts_exs_to_compute:
self.cache[f'{ex}-{prompt}'] = computed_scores[f'{ex}-{prompt}']
cached_scores[prompt].append(computed_scores[f'{ex}-{prompt}'])
if agg == 'mean':
return [np.mean(cached_scores[prompt]) for prompt in prompts]
else:
raise Exception('Unk agg: '+ agg)
def logprob_on_example(inputs):
ex, predictor, base_prompt, prompt, temperature = inputs
lps = utils.instructGPT_logprobs(prompt, temperature=temperature)
# last log prob is the log prob of answer (assuming single token responses)
return base_prompt, ex, lps[0]['logprobs']['token_logprobs'][-1]
class CachedLogLikelihoodScorer:
def __init__(self):
self.cache = {}
def __call__(self, predictor, prompts, data, agg='mean', max_threads=1):
def compute_scores(prompts_exs):
out_scores = {}
inputs = []
for prompt, ex in prompts_exs:
inputs.append((
ex,
predictor,
prompt,
Template(
prompt + ' ' + predictor.categories[ex['label']]
).render(text=ex['text']),
predictor.opt['temperature']
))
with concurrent.futures.ProcessPoolExecutor(max_workers=max_threads) as executor:
futures = [executor.submit(logprob_on_example, input) for input in inputs]
for i, future in tqdm(enumerate(concurrent.futures.as_completed(futures)
), total=len(futures), desc='ll scorer'):
prompt, ex, pred = future.result()
out_scores[f'{ex}-{prompt}'] = pred
return out_scores
cached_scores = defaultdict(list)
prompts_exs_to_compute = []
for ex, prompt in [(ex, prompt) for ex in data for prompt in prompts]:
if f'{ex}-{prompt}' in self.cache:
cached_scores[prompt].append(self.cache[f'{ex}-{prompt}'])
else:
prompts_exs_to_compute.append((prompt, ex))
computed_scores = compute_scores(prompts_exs_to_compute)
for prompt, ex in prompts_exs_to_compute:
self.cache[f'{ex}-{prompt}'] = computed_scores[f'{ex}-{prompt}']
cached_scores[prompt].append(computed_scores[f'{ex}-{prompt}'])
if agg == 'mean':
return [np.mean(cached_scores[prompt]) for prompt in prompts]
else:
raise Exception('Unk agg: '+ agg)