|
14 | 14 | # ==============================================================================
|
15 | 15 | """Demo script to train and evaluate a model.
|
16 | 16 |
|
17 |
| -This scripts contains boilerplate code to train a DNNClassifier |
| 17 | +This scripts contains boilerplate code to train a Keras Text Classifier |
18 | 18 | and evaluate it using Tensorflow Model Analysis. Evaluation
|
19 | 19 | results can be visualized using tools like TensorBoard.
|
20 |
| -
|
21 |
| -Usage: |
22 |
| -
|
23 |
| -1. Train model: |
24 |
| - demo_script.train_model(...) |
25 |
| -
|
26 |
| -2. Evaluate: |
27 |
| - demo_script.evaluate_model(...) |
28 | 20 | """
|
29 | 21 |
|
30 |
| -import os |
31 |
| -import tempfile |
| 22 | +from tensorflow import keras |
32 | 23 | import tensorflow.compat.v1 as tf
|
33 |
| -from tensorflow.compat.v1 import estimator as tf_estimator |
34 |
| -import tensorflow_hub as hub |
35 | 24 | import tensorflow_model_analysis as tfma
|
36 | 25 | from tensorflow_model_analysis.addons.fairness.post_export_metrics import fairness_indicators # pylint: disable=unused-import
|
37 | 26 |
|
38 | 27 |
|
39 |
| -def train_model(model_dir, |
40 |
| - train_tf_file, |
41 |
| - label, |
42 |
| - text_feature, |
43 |
| - feature_map, |
44 |
| - module_spec='https://tfhub.dev/google/nnlm-en-dim128/1'): |
45 |
| - """Train model using DNN Classifier. |
46 |
| -
|
47 |
| - Args: |
48 |
| - model_dir: Directory path to save trained model. |
49 |
| - train_tf_file: File containing training TFRecordDataset. |
50 |
| - label: Groundtruth label. |
51 |
| - text_feature: Text feature to be evaluated. |
52 |
| - feature_map: Dict of feature names to their data type. |
53 |
| - module_spec: A module spec defining the module to instantiate or a path |
54 |
| - where to load a module spec. |
55 |
| -
|
56 |
| - Returns: |
57 |
| - Trained DNNClassifier. |
58 |
| - """ |
59 |
| - |
60 |
| - def train_input_fn(): |
61 |
| - """Train Input function.""" |
62 |
| - |
63 |
| - def parse_function(serialized): |
64 |
| - parsed_example = tf.io.parse_single_example( |
65 |
| - serialized=serialized, features=feature_map) |
66 |
| - # Adds a weight column to deal with unbalanced classes. |
67 |
| - parsed_example['weight'] = tf.add(parsed_example[label], 0.1) |
68 |
| - return (parsed_example, parsed_example[label]) |
69 |
| - |
70 |
| - train_dataset = tf.data.TFRecordDataset( |
71 |
| - filenames=[train_tf_file]).map(parse_function).batch(512) |
72 |
| - return train_dataset |
| 28 | +TEXT_FEATURE = 'comment_text' |
| 29 | +LABEL = 'toxicity' |
| 30 | +SLICE = 'slice' |
| 31 | +FEATURE_MAP = { |
| 32 | + LABEL: tf.io.FixedLenFeature([], tf.float32), |
| 33 | + TEXT_FEATURE: tf.io.FixedLenFeature([], tf.string), |
| 34 | + SLICE: tf.io.VarLenFeature(tf.string), |
| 35 | +} |
73 | 36 |
|
74 |
| - text_embedding_column = hub.text_embedding_column( |
75 |
| - key=text_feature, module_spec=module_spec) |
76 | 37 |
|
77 |
| - classifier = tf_estimator.DNNClassifier( |
78 |
| - hidden_units=[500, 100], |
79 |
| - weight_column='weight', |
80 |
| - feature_columns=[text_embedding_column], |
81 |
| - n_classes=2, |
82 |
| - optimizer=tf.train.AdagradOptimizer(learning_rate=0.003), |
83 |
| - model_dir=model_dir) |
| 38 | +class ExampleParser(keras.layers.Layer): |
| 39 | + """A Keras layer that parses the tf.Example.""" |
84 | 40 |
|
85 |
| - classifier.train(input_fn=train_input_fn, steps=1000) |
86 |
| - return classifier |
| 41 | + def __init__(self, input_feature_key): |
| 42 | + self._input_feature_key = input_feature_key |
| 43 | + super().__init__() |
87 | 44 |
|
88 |
| - |
89 |
| -def evaluate_model(classifier, validate_tf_file, tfma_eval_result_path, |
90 |
| - selected_slice, label, feature_map): |
| 45 | + def call(self, serialized_examples): |
| 46 | + def get_feature(serialized_example): |
| 47 | + parsed_example = tf.io.parse_single_example( |
| 48 | + serialized_example, features=FEATURE_MAP |
| 49 | + ) |
| 50 | + return parsed_example[self._input_feature_key] |
| 51 | + |
| 52 | + return tf.map_fn(get_feature, serialized_examples) |
| 53 | + |
| 54 | + |
| 55 | +class ExampleModel(keras.Model): |
| 56 | + """A Example Keras NLP model.""" |
| 57 | + |
| 58 | + def __init__(self, input_feature_key): |
| 59 | + super().__init__() |
| 60 | + self.parser = ExampleParser(input_feature_key) |
| 61 | + self.text_vectorization = keras.layers.TextVectorization( |
| 62 | + max_tokens=32, |
| 63 | + output_mode='int', |
| 64 | + output_sequence_length=32, |
| 65 | + ) |
| 66 | + self.text_vectorization.adapt( |
| 67 | + ['nontoxic', 'toxic comment', 'test comment', 'abc', 'abcdef', 'random'] |
| 68 | + ) |
| 69 | + self.dense1 = keras.layers.Dense(32, activation='relu') |
| 70 | + self.dense2 = keras.layers.Dense(1) |
| 71 | + |
| 72 | + def call(self, inputs, training=True, mask=None): |
| 73 | + parsed_example = self.parser(inputs) |
| 74 | + text_vector = self.text_vectorization(parsed_example) |
| 75 | + output1 = self.dense1(tf.cast(text_vector, tf.float32)) |
| 76 | + output2 = self.dense2(output1) |
| 77 | + return output2 |
| 78 | + |
| 79 | + |
| 80 | +def evaluate_model( |
| 81 | + classifier_model_path, |
| 82 | + validate_tf_file_path, |
| 83 | + tfma_eval_result_path, |
| 84 | + eval_config, |
| 85 | +): |
91 | 86 | """Evaluate Model using Tensorflow Model Analysis.
|
92 | 87 |
|
93 | 88 | Args:
|
94 |
| - classifier: Trained classifier model to be evaluted. |
95 |
| - validate_tf_file: File containing validation TFRecordDataset. |
96 |
| - tfma_eval_result_path: Directory path where eval results will be written. |
97 |
| - selected_slice: Feature for slicing the data. |
98 |
| - label: Groundtruth label. |
99 |
| - feature_map: Dict of feature names to their data type. |
| 89 | + classifier_model_path: Trained classifier model to be evaluted. |
| 90 | + validate_tf_file_path: File containing validation TFRecordDataset. |
| 91 | + tfma_eval_result_path: Path to export tfma-related eval path. |
| 92 | + eval_config: tfma eval_config. |
100 | 93 | """
|
101 | 94 |
|
102 |
| - def eval_input_receiver_fn(): |
103 |
| - """Eval Input Receiver function.""" |
104 |
| - serialized_tf_example = tf.compat.v1.placeholder( |
105 |
| - dtype=tf.string, shape=[None], name='input_example_placeholder') |
106 |
| - |
107 |
| - receiver_tensors = {'examples': serialized_tf_example} |
108 |
| - |
109 |
| - features = tf.io.parse_example(serialized_tf_example, feature_map) |
110 |
| - features['weight'] = tf.ones_like(features[label]) |
111 |
| - |
112 |
| - return tfma.export.EvalInputReceiver( |
113 |
| - features=features, |
114 |
| - receiver_tensors=receiver_tensors, |
115 |
| - labels=features[label]) |
116 |
| - |
117 |
| - tfma_export_dir = tfma.export.export_eval_savedmodel( |
118 |
| - estimator=classifier, |
119 |
| - export_dir_base=os.path.join(tempfile.gettempdir(), 'tfma_eval_model'), |
120 |
| - eval_input_receiver_fn=eval_input_receiver_fn) |
121 |
| - |
122 |
| - # Define slices that you want the evaluation to run on. |
123 |
| - slice_spec = [ |
124 |
| - tfma.slicer.SingleSliceSpec(), # Overall slice |
125 |
| - tfma.slicer.SingleSliceSpec(columns=[selected_slice]), |
126 |
| - ] |
127 |
| - |
128 |
| - # Add the fairness metrics. |
129 |
| - # pytype: disable=module-attr |
130 |
| - add_metrics_callbacks = [ |
131 |
| - tfma.post_export_metrics.fairness_indicators( |
132 |
| - thresholds=[0.1, 0.3, 0.5, 0.7, 0.9], labels_key=label) |
133 |
| - ] |
134 |
| - # pytype: enable=module-attr |
135 |
| - |
136 | 95 | eval_shared_model = tfma.default_eval_shared_model(
|
137 |
| - eval_saved_model_path=tfma_export_dir, |
138 |
| - add_metrics_callbacks=add_metrics_callbacks) |
| 96 | + eval_saved_model_path=classifier_model_path, eval_config=eval_config |
| 97 | + ) |
139 | 98 |
|
140 | 99 | # Run the fairness evaluation.
|
141 | 100 | tfma.run_model_analysis(
|
142 | 101 | eval_shared_model=eval_shared_model,
|
143 |
| - data_location=validate_tf_file, |
| 102 | + data_location=validate_tf_file_path, |
144 | 103 | output_path=tfma_eval_result_path,
|
145 |
| - slice_spec=slice_spec) |
| 104 | + eval_config=eval_config, |
| 105 | + ) |
0 commit comments