-
Notifications
You must be signed in to change notification settings - Fork 80
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Showing
1 changed file
with
263 additions
and
0 deletions.
There are no files selected for viewing
263 changes: 263 additions & 0 deletions
263
fairness_indicators/test_cases/dlvm/fairness_indicators_dlvm_test_case.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,263 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"id": "Tce3stUlHN0L" | ||
}, | ||
"source": [ | ||
"##### Copyright 2020 The TensorFlow Authors." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"cellView": "form", | ||
"id": "tuOe1ymfHZPu" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"#@title Licensed under the Apache License, Version 2.0 (the \"License\");\n", | ||
"# you may not use this file except in compliance with the License.\n", | ||
"# You may obtain a copy of the License at\n", | ||
"#\n", | ||
"# https://www.apache.org/licenses/LICENSE-2.0\n", | ||
"#\n", | ||
"# Unless required by applicable law or agreed to in writing, software\n", | ||
"# distributed under the License is distributed on an \"AS IS\" BASIS,\n", | ||
"# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n", | ||
"# See the License for the specific language governing permissions and\n", | ||
"# limitations under the License." | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"id": "aalPefrUUplk" | ||
}, | ||
"source": [ | ||
"# Fairness Indicators DLVM Test Case" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"id": "u33JXdluZ2lG" | ||
}, | ||
"source": [ | ||
"## Setup" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"id": "B8dlyTyiTe-9" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"import tensorflow as tf\n", | ||
"print('TF version: {}'.format(tf.__version__))\n", | ||
"\n", | ||
"import tensorflow_model_analysis as tfma\n", | ||
"print('TFMA version: {}'.format(tfma.__version__))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"id": "HG4ww5SwVUaq" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"# Download the tar file from GCP and extract it\n", | ||
"import io, os, tempfile\n", | ||
"TAR_NAME = 'saved_models-2.2'\n", | ||
"BASE_DIR = tempfile.mkdtemp()\n", | ||
"DATA_DIR = os.path.join(BASE_DIR, TAR_NAME, 'data')\n", | ||
"MODELS_DIR = os.path.join(BASE_DIR, TAR_NAME, 'models')\n", | ||
"SCHEMA = os.path.join(BASE_DIR, TAR_NAME, 'schema.pbtxt')\n", | ||
"OUTPUT_DIR = os.path.join(BASE_DIR, 'output')\n", | ||
"\n", | ||
"!curl -O https://storage.googleapis.com/artifacts.tfx-oss-public.appspot.com/datasets/{TAR_NAME}.tar\n", | ||
"!tar xf {TAR_NAME}.tar\n", | ||
"!mv {TAR_NAME} {BASE_DIR}\n", | ||
"!rm {TAR_NAME}.tar\n", | ||
"\n", | ||
"print(\"Here's what we downloaded:\")\n", | ||
"!ls -R {BASE_DIR}" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"id": "h8i1NGecVZv1" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"from google.protobuf import text_format\n", | ||
"from tensorflow.python.lib.io import file_io\n", | ||
"from tensorflow_metadata.proto.v0 import schema_pb2\n", | ||
"from tensorflow.core.example import example_pb2\n", | ||
"\n", | ||
"schema = schema_pb2.Schema()\n", | ||
"contents = file_io.read_file_to_string(SCHEMA)\n", | ||
"schema = text_format.Parse(contents, schema)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"id": "BPg2wEx_Vk3o" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"import csv\n", | ||
"\n", | ||
"datafile = os.path.join(DATA_DIR, 'eval', 'data.csv')\n", | ||
"reader = csv.DictReader(open(datafile, 'r'))\n", | ||
"examples = []\n", | ||
"for line in reader:\n", | ||
" example = example_pb2.Example()\n", | ||
" for feature in schema.feature:\n", | ||
" key = feature.name\n", | ||
" if feature.type == schema_pb2.FLOAT:\n", | ||
" example.features.feature[key].float_list.value[:] = (\n", | ||
" [float(line[key])] if len(line[key]) \u003e 0 else [])\n", | ||
" elif feature.type == schema_pb2.INT:\n", | ||
" example.features.feature[key].int64_list.value[:] = (\n", | ||
" [int(line[key])] if len(line[key]) \u003e 0 else [])\n", | ||
" elif feature.type == schema_pb2.BYTES:\n", | ||
" example.features.feature[key].bytes_list.value[:] = (\n", | ||
" [line[key].encode('utf8')] if len(line[key]) \u003e 0 else [])\n", | ||
" # Add a new column 'big_tipper' that indicates if tips was \u003e 20% of the fare. \n", | ||
" # TODO(b/157064428): Remove after label transformation is supported for Keras.\n", | ||
" big_tipper = float(line['tips']) \u003e float(line['fare']) * 0.2\n", | ||
" example.features.feature['big_tipper'].float_list.value[:] = [big_tipper]\n", | ||
" examples.append(example)\n", | ||
"\n", | ||
"tfrecord_file = os.path.join(BASE_DIR, 'train_data.rio')\n", | ||
"with tf.io.TFRecordWriter(tfrecord_file) as writer:\n", | ||
" for example in examples:\n", | ||
" writer.write(example.SerializeToString())\n", | ||
"\n", | ||
"!ls {tfrecord_file}" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"id": "B8AQqw20YAB9" | ||
}, | ||
"source": [ | ||
"## Run Fairness Indicators and TFMA" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"id": "RhN80nIvVn49" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"# Setup tfma.EvalConfig settings\n", | ||
"keras_eval_config = text_format.Parse(\"\"\"\n", | ||
" ## Model information\n", | ||
" model_specs {\n", | ||
" # For keras (and serving models) we need to add a `label_key`.\n", | ||
" label_key: \"big_tipper\"\n", | ||
" }\n", | ||
"\n", | ||
" ## Post training metric information. These will be merged with any built-in\n", | ||
" ## metrics from training.\n", | ||
" metrics_specs {\n", | ||
" metrics { class_name: \"ExampleCount\" }\n", | ||
" metrics { class_name: \"BinaryAccuracy\" }\n", | ||
" metrics { class_name: \"AUC\" }\n", | ||
" metrics { class_name: \"MeanLabel\" }\n", | ||
" metrics { class_name: \"MeanPrediction\" }\n", | ||
" metrics {\n", | ||
" class_name: \"FairnessIndicators\"\n", | ||
" config: '{ \"thresholds\": [0.3, 0.5, 0.7] }'\n", | ||
" }\n", | ||
" }\n", | ||
"\n", | ||
" ## Slicing information\n", | ||
" slicing_specs {} # overall slice\n", | ||
" slicing_specs {\n", | ||
" feature_keys: [\"trip_start_hour\"]\n", | ||
" }\n", | ||
" slicing_specs {\n", | ||
" feature_keys: [\"trip_start_day\"]\n", | ||
" }\n", | ||
" slicing_specs {\n", | ||
" feature_values: {\n", | ||
" key: \"trip_start_month\"\n", | ||
" value: \"1\"\n", | ||
" }\n", | ||
" }\n", | ||
" slicing_specs {\n", | ||
" feature_keys: [\"trip_start_hour\", \"trip_start_day\"]\n", | ||
" }\n", | ||
"\"\"\", tfma.EvalConfig())\n", | ||
"\n", | ||
"# Create a tfma.EvalSharedModel that points at our keras model.\n", | ||
"keras_model_path = os.path.join(MODELS_DIR, 'keras', '2')\n", | ||
"keras_eval_shared_model = tfma.default_eval_shared_model(\n", | ||
" eval_saved_model_path=keras_model_path,\n", | ||
" eval_config=keras_eval_config)\n", | ||
"\n", | ||
"keras_output_path = os.path.join(OUTPUT_DIR, 'keras')\n", | ||
"\n", | ||
"# Run TFMA\n", | ||
"keras_eval_result = tfma.run_model_analysis(\n", | ||
" eval_shared_model=keras_eval_shared_model,\n", | ||
" eval_config=keras_eval_config,\n", | ||
" data_location=tfrecord_file,\n", | ||
" output_path=keras_output_path)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": { | ||
"id": "ktlASJQIzE3l" | ||
}, | ||
"source": [ | ||
"## Render Fairness Indicators" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": { | ||
"id": "ul0Ud9TVWB_b" | ||
}, | ||
"outputs": [], | ||
"source": [ | ||
"tfma.addons.fairness.view.widget_view.render_fairness_indicator(eval_result=keras_eval_result)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"accelerator": "GPU", | ||
"colab": { | ||
"collapsed_sections": [], | ||
"name": "Fairness Indicators DLVM Test Case.ipynb", | ||
"private_outputs": true, | ||
"provenance": [ | ||
], | ||
"toc_visible": true | ||
}, | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"name": "python3" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 0 | ||
} |