chore(visualization): Revert to tensorflow image because tfx image is…

… too big. Fix #6053 (#6061) * quit using tfx library for visualization * revert to tensorflow * fix read file issue on FE
kubeflow · Jul 16, 2021 · ce40a2e · ce40a2e
1 parent 24c551d
commit ce40a2e
Show file tree

Hide file tree

Showing 6 changed files with 73 additions and 54 deletions.
diff --git a/backend/Dockerfile.visualization b/backend/Dockerfile.visualization
@@ -18,7 +18,7 @@
 # and exporter.py files in the directory specified above.
 
 # This image should be in sync with image in backend/src/apiserver/visualization/update_requirements.sh.
-FROM tensorflow/tfx:0.30.1
+FROM tensorflow/tensorflow:2.4.0
 
 RUN apt-get update \
   && apt-get install -y wget curl tar openssl

diff --git a/backend/src/apiserver/visualization/requirements.in b/backend/src/apiserver/visualization/requirements.in
@@ -8,6 +8,7 @@ jupyter_client==5.3.*
 nbconvert==5.5.0
 nbformat==4.4.0
 scikit_learn==0.21.2
-tensorflow-metadata==0.30.*
-tensorflow-model-analysis==0.30.*
-tensorflow-data-validation==0.30.*
+tensorflow-metadata==0.26.*
+tensorflow-model-analysis==0.26.*
+tensorflow-data-validation==0.26.*
+tensorflow-serving-api==2.3.*
diff --git a/backend/src/apiserver/visualization/requirements.txt b/backend/src/apiserver/visualization/requirements.txt
@@ -4,45 +4,44 @@
 #
 #    pip-compile --output-file=- -
 #
-absl-py==0.12.0           # via tensorboard, tensorflow, tensorflow-data-validation, tensorflow-metadata, tensorflow-model-analysis, tfx-bsl
-apache-beam[gcp]==2.31.0  # via tensorflow-data-validation, tensorflow-model-analysis, tfx-bsl
+absl-py==0.10.0           # via tensorboard, tensorflow, tensorflow-data-validation, tensorflow-metadata, tensorflow-model-analysis, tensorflow-transform, tfx-bsl
+apache-beam[gcp]==2.28.0  # via tensorflow-data-validation, tensorflow-model-analysis, tensorflow-transform, tfx-bsl
 argon2-cffi==20.1.0       # via notebook
 astunparse==1.6.3         # via tensorflow
 attrs==21.2.0             # via jsonschema
 avro-python3==1.9.2.1     # via apache-beam
 backcall==0.2.0           # via ipython
-bleach==3.3.0             # via nbconvert
+bleach==3.3.1             # via nbconvert
 bokeh==1.2.0              # via -r -
-cached-property==1.5.2    # via h5py
 cachetools==4.2.2         # via apache-beam, google-auth
 certifi==2021.5.30        # via requests
 cffi==1.14.6              # via argon2-cffi, google-crc32c
-chardet==4.0.0            # via requests
+charset-normalizer==2.0.2  # via requests
 crcmod==1.7               # via apache-beam
-decorator==5.0.9          # via gcsfs, ipython
+dataclasses==0.8          # via libcst, werkzeug
+decorator==5.0.9          # via gcsfs, ipython, traitlets
 defusedxml==0.7.1         # via nbconvert
 dill==0.3.1.1             # via apache-beam
 docopt==0.6.2             # via hdfs
 entrypoints==0.3          # via nbconvert
 fastavro==1.4.2           # via apache-beam
 fasteners==0.16.3         # via google-apitools
-flatbuffers==1.12         # via tensorflow
 future==0.18.2            # via apache-beam
-gast==0.4.0               # via tensorflow
+gast==0.3.3               # via tensorflow
 gcsfs==0.2.3              # via -r -
-google-api-core[grpc,grpcgcp]==1.30.0  # via google-cloud-bigquery, google-cloud-bigtable, google-cloud-core, google-cloud-datastore, google-cloud-dlp, google-cloud-language, google-cloud-pubsub, google-cloud-spanner, google-cloud-videointelligence, google-cloud-vision
-google-api-python-client==1.7.12  # via -r -, google-cloud-profiler, tfx-bsl
+google-api-core[grpc,grpcgcp]==1.31.0  # via google-cloud-bigquery, google-cloud-bigtable, google-cloud-build, google-cloud-core, google-cloud-datastore, google-cloud-dlp, google-cloud-language, google-cloud-pubsub, google-cloud-spanner, google-cloud-videointelligence, google-cloud-vision
+google-api-python-client==1.7.12  # via -r -, tfx-bsl
 google-apitools==0.5.31   # via apache-beam
-google-auth-httplib2==0.1.0  # via google-api-python-client, google-cloud-profiler
+google-auth-httplib2==0.1.0  # via google-api-python-client
 google-auth-oauthlib==0.4.4  # via gcsfs, tensorboard
-google-auth==1.32.1       # via apache-beam, gcsfs, google-api-core, google-api-python-client, google-auth-httplib2, google-auth-oauthlib, google-cloud-core, google-cloud-profiler, tensorboard
-google-cloud-bigquery==2.20.0  # via apache-beam
+google-auth==1.33.0       # via apache-beam, gcsfs, google-api-core, google-api-python-client, google-auth-httplib2, google-auth-oauthlib, google-cloud-core, tensorboard
+google-cloud-bigquery==1.28.0  # via apache-beam
 google-cloud-bigtable==1.7.0  # via apache-beam
+google-cloud-build==2.0.0  # via apache-beam
 google-cloud-core==1.7.1  # via apache-beam, google-cloud-bigquery, google-cloud-bigtable, google-cloud-datastore, google-cloud-spanner
 google-cloud-datastore==1.15.3  # via apache-beam
 google-cloud-dlp==1.0.0   # via apache-beam
 google-cloud-language==1.3.0  # via apache-beam
-google-cloud-profiler==3.0.4  # via apache-beam
 google-cloud-pubsub==1.7.0  # via apache-beam
 google-cloud-spanner==1.19.1  # via apache-beam
 google-cloud-videointelligence==1.16.1  # via apache-beam
@@ -53,11 +52,11 @@ google-resumable-media==1.3.1  # via google-cloud-bigquery
 googleapis-common-protos[grpc]==1.53.0  # via google-api-core, grpc-google-iam-v1, tensorflow-metadata
 grpc-google-iam-v1==0.12.3  # via google-cloud-bigtable, google-cloud-pubsub, google-cloud-spanner
 grpcio-gcp==0.2.2         # via apache-beam, google-api-core
-grpcio==1.34.1            # via apache-beam, google-api-core, googleapis-common-protos, grpc-google-iam-v1, grpcio-gcp, tensorboard, tensorflow, tensorflow-serving-api
-h5py==3.1.0               # via tensorflow
+grpcio==1.38.1            # via apache-beam, google-api-core, googleapis-common-protos, grpc-google-iam-v1, grpcio-gcp, tensorboard, tensorflow, tensorflow-serving-api
+h5py==2.10.0              # via tensorflow
 hdfs==2.6.0               # via apache-beam
-httplib2==0.19.1          # via apache-beam, google-api-python-client, google-apitools, google-auth-httplib2, oauth2client
-idna==2.10                # via requests
+httplib2==0.17.4          # via apache-beam, google-api-python-client, google-apitools, google-auth-httplib2, oauth2client
+idna==3.2                 # via requests
 importlib-metadata==4.6.1  # via jsonschema, markdown
 ipykernel==5.1.1          # via -r -, ipywidgets, notebook
 ipython-genutils==0.2.0   # via nbformat, notebook, traitlets
@@ -71,66 +70,71 @@ jsonschema==3.2.0         # via nbformat
 jupyter-client==5.3.5     # via -r -, ipykernel, notebook
 jupyter-core==4.7.1       # via jupyter-client, nbconvert, nbformat, notebook
 jupyterlab-widgets==1.0.0  # via ipywidgets
-keras-nightly==2.5.0.dev2021032900  # via tensorflow
 keras-preprocessing==1.1.2  # via tensorflow
+libcst==0.3.19            # via google-cloud-build
 markdown==3.3.4           # via tensorboard
 markupsafe==2.0.1         # via jinja2
 mistune==0.8.4            # via nbconvert
+mock==2.0.0               # via apache-beam
+mypy-extensions==0.4.3    # via typing-inspect
 nbconvert==5.5.0          # via -r -, notebook
 nbformat==4.4.0           # via -r -, ipywidgets, nbconvert, notebook
 notebook==6.4.0           # via widgetsnbextension
-numpy==1.19.5             # via apache-beam, bokeh, h5py, keras-preprocessing, opt-einsum, pandas, pyarrow, scikit-learn, scipy, tensorboard, tensorflow, tensorflow-data-validation, tensorflow-model-analysis, tfx-bsl
+numpy==1.18.5             # via apache-beam, bokeh, h5py, keras-preprocessing, opt-einsum, pandas, pyarrow, scikit-learn, scipy, tensorboard, tensorflow, tensorflow-data-validation, tensorflow-model-analysis, tensorflow-transform, tfx-bsl
 oauth2client==4.1.3       # via apache-beam, google-apitools
 oauthlib==3.1.1           # via requests-oauthlib
 opt-einsum==3.3.0         # via tensorflow
-packaging==21.0           # via bleach, bokeh, google-api-core, google-cloud-bigquery
-pandas==1.3.0             # via itables, tensorflow-data-validation, tensorflow-model-analysis, tfx-bsl
+packaging==21.0           # via bleach, bokeh, google-api-core
+pandas==1.1.5             # via itables, tensorflow-data-validation, tensorflow-model-analysis, tfx-bsl
 pandocfilters==1.4.3      # via nbconvert
 parso==0.8.2              # via jedi
+pbr==5.6.0                # via mock
 pexpect==4.8.0            # via ipython
 pickleshare==0.7.5        # via ipython
 pillow==8.3.1             # via bokeh
 prometheus-client==0.11.0  # via notebook
 prompt-toolkit==3.0.19    # via ipython
-proto-plus==1.19.0        # via google-cloud-bigquery
-protobuf==3.17.3          # via apache-beam, google-api-core, google-cloud-bigquery, google-cloud-profiler, googleapis-common-protos, proto-plus, tensorboard, tensorflow, tensorflow-data-validation, tensorflow-metadata, tensorflow-model-analysis, tensorflow-serving-api, tfx-bsl
+proto-plus==1.19.0        # via google-cloud-build
+protobuf==3.17.3          # via apache-beam, google-api-core, googleapis-common-protos, proto-plus, tensorboard, tensorflow, tensorflow-data-validation, tensorflow-metadata, tensorflow-model-analysis, tensorflow-serving-api, tensorflow-transform, tfx-bsl
 ptyprocess==0.7.0         # via pexpect, terminado
-pyarrow==2.0.0            # via apache-beam, tensorflow-data-validation, tensorflow-model-analysis, tfx-bsl
+pyarrow==0.17.1           # via apache-beam, tensorflow-data-validation, tensorflow-model-analysis, tensorflow-transform, tfx-bsl
 pyasn1-modules==0.2.8     # via google-auth, oauth2client
 pyasn1==0.4.8             # via oauth2client, pyasn1-modules, rsa
 pycparser==2.20           # via cffi
-pydot==1.4.2              # via apache-beam
+pydot==1.4.2              # via apache-beam, tensorflow-transform
 pygments==2.9.0           # via ipython, nbconvert
-pymongo==3.11.4           # via apache-beam
-pyparsing==2.4.7          # via httplib2, packaging, pydot
+pymongo==3.12.0           # via apache-beam
+pyparsing==2.4.7          # via packaging, pydot
 pyrsistent==0.18.0        # via jsonschema
-python-dateutil==2.8.1    # via apache-beam, bokeh, jupyter-client, pandas
+python-dateutil==2.8.2    # via apache-beam, bokeh, jupyter-client, pandas
 pytz==2021.1              # via apache-beam, google-api-core, pandas
-pyyaml==5.4.1             # via bokeh
+pyyaml==5.4.1             # via bokeh, libcst
 pyzmq==22.1.0             # via jupyter-client, notebook
 requests-oauthlib==1.3.0  # via google-auth-oauthlib
-requests==2.25.1          # via apache-beam, gcsfs, google-api-core, google-cloud-bigquery, google-cloud-profiler, hdfs, requests-oauthlib, tensorboard
+requests==2.26.0          # via apache-beam, gcsfs, google-api-core, hdfs, requests-oauthlib, tensorboard
 rsa==4.7.2                # via google-auth, oauth2client
 scikit_learn==0.21.2      # via -r -
-scipy==1.7.0              # via scikit-learn, tensorflow-model-analysis
+scipy==1.5.4              # via scikit-learn, tensorflow-model-analysis
 send2trash==1.7.1         # via notebook
-six==1.15.0               # via absl-py, argon2-cffi, astunparse, bleach, bokeh, fasteners, google-api-core, google-api-python-client, google-apitools, google-auth, google-auth-httplib2, google-cloud-core, google-pasta, google-resumable-media, grpcio, hdfs, jsonschema, keras-preprocessing, oauth2client, protobuf, python-dateutil, tensorflow, tensorflow-data-validation, tensorflow-model-analysis
+six==1.16.0               # via absl-py, argon2-cffi, astunparse, bleach, bokeh, fasteners, google-api-core, google-api-python-client, google-apitools, google-auth, google-auth-httplib2, google-cloud-bigquery, google-cloud-core, google-pasta, google-resumable-media, grpcio, h5py, hdfs, jsonschema, keras-preprocessing, mock, oauth2client, protobuf, python-dateutil, tensorflow, tensorflow-data-validation, tensorflow-model-analysis, tensorflow-transform, traitlets
 tensorboard-data-server==0.6.1  # via tensorboard
 tensorboard-plugin-wit==1.8.0  # via tensorboard
 tensorboard==2.5.0        # via tensorflow
-tensorflow-data-validation==0.30.0  # via -r -
-tensorflow-estimator==2.5.0  # via tensorflow
-tensorflow-metadata==0.30.0  # via -r -, tensorflow-data-validation, tensorflow-model-analysis, tfx-bsl
-tensorflow-model-analysis==0.30.0  # via -r -
-tensorflow-serving-api==2.5.1  # via tfx-bsl
-tensorflow==2.5.0         # via tensorflow-data-validation, tensorflow-model-analysis, tensorflow-serving-api, tfx-bsl
+tensorflow-data-validation==0.26.1  # via -r -
+tensorflow-estimator==2.3.0  # via tensorflow
+tensorflow-metadata==0.26.0  # via -r -, tensorflow-data-validation, tensorflow-model-analysis, tensorflow-transform, tfx-bsl
+tensorflow-model-analysis==0.26.1  # via -r -
+tensorflow-serving-api==2.3.0  # via -r -, tfx-bsl
+tensorflow-transform==0.26.0  # via tensorflow-data-validation
+tensorflow==2.3.3         # via tensorflow-data-validation, tensorflow-model-analysis, tensorflow-serving-api, tensorflow-transform, tfx-bsl
 termcolor==1.1.0          # via tensorflow
 terminado==0.10.1         # via notebook
 testpath==0.5.0           # via nbconvert
-tfx-bsl==0.30.0           # via tensorflow-data-validation, tensorflow-model-analysis
+tfx-bsl==0.26.1           # via tensorflow-data-validation, tensorflow-model-analysis, tensorflow-transform
 tornado==6.1              # via bokeh, ipykernel, jupyter-client, notebook, terminado
-traitlets==5.0.5          # via ipykernel, ipython, ipywidgets, jupyter-client, jupyter-core, nbconvert, nbformat, notebook
-typing-extensions==3.7.4.3  # via apache-beam, importlib-metadata, tensorflow
+traitlets==4.3.3          # via ipykernel, ipython, ipywidgets, jupyter-client, jupyter-core, nbconvert, nbformat, notebook
+typing-extensions==3.7.4.3  # via apache-beam, importlib-metadata, libcst, typing-inspect
+typing-inspect==0.7.1     # via libcst
 uritemplate==3.0.1        # via google-api-python-client
 urllib3==1.26.6           # via requests
 wcwidth==0.2.5            # via prompt-toolkit

diff --git a/backend/src/apiserver/visualization/update_requirements.sh b/backend/src/apiserver/visualization/update_requirements.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 # This image should be in sync with Dockerfile.visualization.
-IMAGE="tensorflow/tfx:0.30.1"
+IMAGE="tensorflow/tensorflow:2.4.0"
 # tensorflow/tfx default entrypoint is Apache BEAM, because Apache BEAM doesn't
 # support custom entrypoint for now. We need to override with --entrypoint ""
 # for other `docker run` usecase.

diff --git a/developer_guide.md b/developer_guide.md
@@ -62,6 +62,16 @@ $ gcloud auth configure-docker
 $ docker push gcr.io/<your-gcp-project>/persistenceagent:latest
 ```
 
+To build the visualization server image and upload it to GCR:
+
+```bash
+# Run in the repository root directory
+$ docker build -t gcr.io/<your-gcp-project>/visualization:latest -f backend/Dockerfile.visualization .
+# Push to GCR
+$ gcloud auth configure-docker
+$ docker push gcr.io/<your-gcp-project>/visualization:latest
+```
+
 To build the frontend image and upload it to GCR:
 
 ```bash

diff --git a/frontend/src/lib/OutputArtifactLoader.ts b/frontend/src/lib/OutputArtifactLoader.ts
@@ -298,10 +298,13 @@ export class OutputArtifactLoader {
       viewers = viewers.concat(
         [evalUri, trainUri].map(async specificUri => {
           const script = [
-            'from tfx.utils import io_utils',
             'import tensorflow_data_validation as tfdv',
-            `stats_path = io_utils.get_only_uri_in_dir('${specificUri}')`,
-            'stats = tfdv.load_stats_binary(stats_path)',
+            'import os',
+            'import tensorflow as tf',
+            `files = tf.io.gfile.listdir('${specificUri}')`,
+            `filename = os.path.dirname(os.path.join(files[0], ''))`,
+            `filePath = os.path.join('${specificUri}', filename)`,
+            'stats = tfdv.load_stats_binary(filePath)',
             'tfdv.visualize_statistics(stats)',
           ];
           return buildArtifactViewer({ script, namespace });
@@ -343,12 +346,13 @@ export class OutputArtifactLoader {
           return splitNames.map(name => {
             const script = [
               'import tensorflow_data_validation as tfdv',
-              'from tfx.utils import io_utils',
               'from tensorflow_metadata.proto.v0 import anomalies_pb2',
               'anomalies = anomalies_pb2.Anomalies()',
-              `anomalies_bytes = io_utils.read_bytes_file('${artifact.getUri()}/Split-${name}')`,
-              'anomalies.ParseFromString(anomalies_bytes)',
-              'tfdv.display_anomalies(anomalies)',
+              'import tensorflow as tf',
+              `with tf.io.gfile.GFile('${artifact.getUri()}/Split-${name}', mode='rb') as f:`,
+              `  anomalies_bytes = f.read()`,
+              '  anomalies.ParseFromString(anomalies_bytes)',
+              '  tfdv.display_anomalies(anomalies)',
             ];
             return buildArtifactViewer({ script, namespace });
           });