-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpredict.py
76 lines (59 loc) · 2.59 KB
/
predict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#pylint: disable = redefined-outer-name, invalid-name
# inbuilt lib imports:
from typing import List, Dict
import json
import os
import argparse
# external lib imports:
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras import models
# project imports
from data import read_instances, load_vocabulary, index_instances, generate_batches
from util import load_pretrained_model
def predict(model: models.Model,
instances: List[Dict],
batch_size: int,
save_to_file: str = None) -> List[int]:
"""
Makes predictions using model on instances and saves them in save_to_file.
"""
batches = generate_batches(instances, batch_size)
predicted_labels = []
all_predicted_labels = []
print("Making predictions")
for batch_inputs in tqdm(batches):
batch_inputs.pop("labels")
logits = model(**batch_inputs, training=False)["logits"]
predicted_labels = list(tf.argmax(logits, axis=-1).numpy())
all_predicted_labels += predicted_labels
if save_to_file:
print(f"Saving predictions to filepath: {save_to_file}")
with open(save_to_file, "w") as file:
for predicted_label in all_predicted_labels:
file.write(str(predicted_label) + "\n")
else:
for predicted_label in all_predicted_labels:
print(str(predicted_label) + "\n")
return all_predicted_labels
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Predict with trained Main/Probing Model')
parser.add_argument('load_serialization_dir', type=str,
help='serialization directory from which to load the trained model.')
parser.add_argument('data_file_path', type=str, help='data file path to predict on.')
parser.add_argument('--predictions-file', type=str, help='output predictions file.')
parser.add_argument('--batch-size', type=int, default=32, help='batch size')
args = parser.parse_args()
# Set some constants
MAX_NUM_TOKENS = 250
instances = read_instances(args.data_file_path, MAX_NUM_TOKENS)
vocabulary_path = os.path.join(args.load_serialization_dir, "vocab.txt")
vocab_token_to_id, _ = load_vocabulary(vocabulary_path)
instances = index_instances(instances, vocab_token_to_id)
# Load Config
config_path = os.path.join(args.load_serialization_dir, "config.json")
with open(config_path, "r") as file:
config = json.load(file)
# Load Model
classifier = load_pretrained_model(args.load_serialization_dir)
predict(classifier, instances, args.batch_size, args.predictions_file)