Skip to content

Commit

Permalink
chore(visualization): Revert to tensorflow image because tfx image is…
Browse files Browse the repository at this point in the history
… too big. Fix #6053 (#6061)

* quit using tfx library for visualization

* revert to tensorflow

* fix read file issue on FE
  • Loading branch information
zijianjoy authored Jul 16, 2021
1 parent 24c551d commit ce40a2e
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 54 deletions.
2 changes: 1 addition & 1 deletion backend/Dockerfile.visualization
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
# and exporter.py files in the directory specified above.

# This image should be in sync with image in backend/src/apiserver/visualization/update_requirements.sh.
FROM tensorflow/tfx:0.30.1
FROM tensorflow/tensorflow:2.4.0

RUN apt-get update \
&& apt-get install -y wget curl tar openssl
Expand Down
7 changes: 4 additions & 3 deletions backend/src/apiserver/visualization/requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ jupyter_client==5.3.*
nbconvert==5.5.0
nbformat==4.4.0
scikit_learn==0.21.2
tensorflow-metadata==0.30.*
tensorflow-model-analysis==0.30.*
tensorflow-data-validation==0.30.*
tensorflow-metadata==0.26.*
tensorflow-model-analysis==0.26.*
tensorflow-data-validation==0.26.*
tensorflow-serving-api==2.3.*
88 changes: 46 additions & 42 deletions backend/src/apiserver/visualization/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,45 +4,44 @@
#
# pip-compile --output-file=- -
#
absl-py==0.12.0 # via tensorboard, tensorflow, tensorflow-data-validation, tensorflow-metadata, tensorflow-model-analysis, tfx-bsl
apache-beam[gcp]==2.31.0 # via tensorflow-data-validation, tensorflow-model-analysis, tfx-bsl
absl-py==0.10.0 # via tensorboard, tensorflow, tensorflow-data-validation, tensorflow-metadata, tensorflow-model-analysis, tensorflow-transform, tfx-bsl
apache-beam[gcp]==2.28.0 # via tensorflow-data-validation, tensorflow-model-analysis, tensorflow-transform, tfx-bsl
argon2-cffi==20.1.0 # via notebook
astunparse==1.6.3 # via tensorflow
attrs==21.2.0 # via jsonschema
avro-python3==1.9.2.1 # via apache-beam
backcall==0.2.0 # via ipython
bleach==3.3.0 # via nbconvert
bleach==3.3.1 # via nbconvert
bokeh==1.2.0 # via -r -
cached-property==1.5.2 # via h5py
cachetools==4.2.2 # via apache-beam, google-auth
certifi==2021.5.30 # via requests
cffi==1.14.6 # via argon2-cffi, google-crc32c
chardet==4.0.0 # via requests
charset-normalizer==2.0.2 # via requests
crcmod==1.7 # via apache-beam
decorator==5.0.9 # via gcsfs, ipython
dataclasses==0.8 # via libcst, werkzeug
decorator==5.0.9 # via gcsfs, ipython, traitlets
defusedxml==0.7.1 # via nbconvert
dill==0.3.1.1 # via apache-beam
docopt==0.6.2 # via hdfs
entrypoints==0.3 # via nbconvert
fastavro==1.4.2 # via apache-beam
fasteners==0.16.3 # via google-apitools
flatbuffers==1.12 # via tensorflow
future==0.18.2 # via apache-beam
gast==0.4.0 # via tensorflow
gast==0.3.3 # via tensorflow
gcsfs==0.2.3 # via -r -
google-api-core[grpc,grpcgcp]==1.30.0 # via google-cloud-bigquery, google-cloud-bigtable, google-cloud-core, google-cloud-datastore, google-cloud-dlp, google-cloud-language, google-cloud-pubsub, google-cloud-spanner, google-cloud-videointelligence, google-cloud-vision
google-api-python-client==1.7.12 # via -r -, google-cloud-profiler, tfx-bsl
google-api-core[grpc,grpcgcp]==1.31.0 # via google-cloud-bigquery, google-cloud-bigtable, google-cloud-build, google-cloud-core, google-cloud-datastore, google-cloud-dlp, google-cloud-language, google-cloud-pubsub, google-cloud-spanner, google-cloud-videointelligence, google-cloud-vision
google-api-python-client==1.7.12 # via -r -, tfx-bsl
google-apitools==0.5.31 # via apache-beam
google-auth-httplib2==0.1.0 # via google-api-python-client, google-cloud-profiler
google-auth-httplib2==0.1.0 # via google-api-python-client
google-auth-oauthlib==0.4.4 # via gcsfs, tensorboard
google-auth==1.32.1 # via apache-beam, gcsfs, google-api-core, google-api-python-client, google-auth-httplib2, google-auth-oauthlib, google-cloud-core, google-cloud-profiler, tensorboard
google-cloud-bigquery==2.20.0 # via apache-beam
google-auth==1.33.0 # via apache-beam, gcsfs, google-api-core, google-api-python-client, google-auth-httplib2, google-auth-oauthlib, google-cloud-core, tensorboard
google-cloud-bigquery==1.28.0 # via apache-beam
google-cloud-bigtable==1.7.0 # via apache-beam
google-cloud-build==2.0.0 # via apache-beam
google-cloud-core==1.7.1 # via apache-beam, google-cloud-bigquery, google-cloud-bigtable, google-cloud-datastore, google-cloud-spanner
google-cloud-datastore==1.15.3 # via apache-beam
google-cloud-dlp==1.0.0 # via apache-beam
google-cloud-language==1.3.0 # via apache-beam
google-cloud-profiler==3.0.4 # via apache-beam
google-cloud-pubsub==1.7.0 # via apache-beam
google-cloud-spanner==1.19.1 # via apache-beam
google-cloud-videointelligence==1.16.1 # via apache-beam
Expand All @@ -53,11 +52,11 @@ google-resumable-media==1.3.1 # via google-cloud-bigquery
googleapis-common-protos[grpc]==1.53.0 # via google-api-core, grpc-google-iam-v1, tensorflow-metadata
grpc-google-iam-v1==0.12.3 # via google-cloud-bigtable, google-cloud-pubsub, google-cloud-spanner
grpcio-gcp==0.2.2 # via apache-beam, google-api-core
grpcio==1.34.1 # via apache-beam, google-api-core, googleapis-common-protos, grpc-google-iam-v1, grpcio-gcp, tensorboard, tensorflow, tensorflow-serving-api
h5py==3.1.0 # via tensorflow
grpcio==1.38.1 # via apache-beam, google-api-core, googleapis-common-protos, grpc-google-iam-v1, grpcio-gcp, tensorboard, tensorflow, tensorflow-serving-api
h5py==2.10.0 # via tensorflow
hdfs==2.6.0 # via apache-beam
httplib2==0.19.1 # via apache-beam, google-api-python-client, google-apitools, google-auth-httplib2, oauth2client
idna==2.10 # via requests
httplib2==0.17.4 # via apache-beam, google-api-python-client, google-apitools, google-auth-httplib2, oauth2client
idna==3.2 # via requests
importlib-metadata==4.6.1 # via jsonschema, markdown
ipykernel==5.1.1 # via -r -, ipywidgets, notebook
ipython-genutils==0.2.0 # via nbformat, notebook, traitlets
Expand All @@ -71,66 +70,71 @@ jsonschema==3.2.0 # via nbformat
jupyter-client==5.3.5 # via -r -, ipykernel, notebook
jupyter-core==4.7.1 # via jupyter-client, nbconvert, nbformat, notebook
jupyterlab-widgets==1.0.0 # via ipywidgets
keras-nightly==2.5.0.dev2021032900 # via tensorflow
keras-preprocessing==1.1.2 # via tensorflow
libcst==0.3.19 # via google-cloud-build
markdown==3.3.4 # via tensorboard
markupsafe==2.0.1 # via jinja2
mistune==0.8.4 # via nbconvert
mock==2.0.0 # via apache-beam
mypy-extensions==0.4.3 # via typing-inspect
nbconvert==5.5.0 # via -r -, notebook
nbformat==4.4.0 # via -r -, ipywidgets, nbconvert, notebook
notebook==6.4.0 # via widgetsnbextension
numpy==1.19.5 # via apache-beam, bokeh, h5py, keras-preprocessing, opt-einsum, pandas, pyarrow, scikit-learn, scipy, tensorboard, tensorflow, tensorflow-data-validation, tensorflow-model-analysis, tfx-bsl
numpy==1.18.5 # via apache-beam, bokeh, h5py, keras-preprocessing, opt-einsum, pandas, pyarrow, scikit-learn, scipy, tensorboard, tensorflow, tensorflow-data-validation, tensorflow-model-analysis, tensorflow-transform, tfx-bsl
oauth2client==4.1.3 # via apache-beam, google-apitools
oauthlib==3.1.1 # via requests-oauthlib
opt-einsum==3.3.0 # via tensorflow
packaging==21.0 # via bleach, bokeh, google-api-core, google-cloud-bigquery
pandas==1.3.0 # via itables, tensorflow-data-validation, tensorflow-model-analysis, tfx-bsl
packaging==21.0 # via bleach, bokeh, google-api-core
pandas==1.1.5 # via itables, tensorflow-data-validation, tensorflow-model-analysis, tfx-bsl
pandocfilters==1.4.3 # via nbconvert
parso==0.8.2 # via jedi
pbr==5.6.0 # via mock
pexpect==4.8.0 # via ipython
pickleshare==0.7.5 # via ipython
pillow==8.3.1 # via bokeh
prometheus-client==0.11.0 # via notebook
prompt-toolkit==3.0.19 # via ipython
proto-plus==1.19.0 # via google-cloud-bigquery
protobuf==3.17.3 # via apache-beam, google-api-core, google-cloud-bigquery, google-cloud-profiler, googleapis-common-protos, proto-plus, tensorboard, tensorflow, tensorflow-data-validation, tensorflow-metadata, tensorflow-model-analysis, tensorflow-serving-api, tfx-bsl
proto-plus==1.19.0 # via google-cloud-build
protobuf==3.17.3 # via apache-beam, google-api-core, googleapis-common-protos, proto-plus, tensorboard, tensorflow, tensorflow-data-validation, tensorflow-metadata, tensorflow-model-analysis, tensorflow-serving-api, tensorflow-transform, tfx-bsl
ptyprocess==0.7.0 # via pexpect, terminado
pyarrow==2.0.0 # via apache-beam, tensorflow-data-validation, tensorflow-model-analysis, tfx-bsl
pyarrow==0.17.1 # via apache-beam, tensorflow-data-validation, tensorflow-model-analysis, tensorflow-transform, tfx-bsl
pyasn1-modules==0.2.8 # via google-auth, oauth2client
pyasn1==0.4.8 # via oauth2client, pyasn1-modules, rsa
pycparser==2.20 # via cffi
pydot==1.4.2 # via apache-beam
pydot==1.4.2 # via apache-beam, tensorflow-transform
pygments==2.9.0 # via ipython, nbconvert
pymongo==3.11.4 # via apache-beam
pyparsing==2.4.7 # via httplib2, packaging, pydot
pymongo==3.12.0 # via apache-beam
pyparsing==2.4.7 # via packaging, pydot
pyrsistent==0.18.0 # via jsonschema
python-dateutil==2.8.1 # via apache-beam, bokeh, jupyter-client, pandas
python-dateutil==2.8.2 # via apache-beam, bokeh, jupyter-client, pandas
pytz==2021.1 # via apache-beam, google-api-core, pandas
pyyaml==5.4.1 # via bokeh
pyyaml==5.4.1 # via bokeh, libcst
pyzmq==22.1.0 # via jupyter-client, notebook
requests-oauthlib==1.3.0 # via google-auth-oauthlib
requests==2.25.1 # via apache-beam, gcsfs, google-api-core, google-cloud-bigquery, google-cloud-profiler, hdfs, requests-oauthlib, tensorboard
requests==2.26.0 # via apache-beam, gcsfs, google-api-core, hdfs, requests-oauthlib, tensorboard
rsa==4.7.2 # via google-auth, oauth2client
scikit_learn==0.21.2 # via -r -
scipy==1.7.0 # via scikit-learn, tensorflow-model-analysis
scipy==1.5.4 # via scikit-learn, tensorflow-model-analysis
send2trash==1.7.1 # via notebook
six==1.15.0 # via absl-py, argon2-cffi, astunparse, bleach, bokeh, fasteners, google-api-core, google-api-python-client, google-apitools, google-auth, google-auth-httplib2, google-cloud-core, google-pasta, google-resumable-media, grpcio, hdfs, jsonschema, keras-preprocessing, oauth2client, protobuf, python-dateutil, tensorflow, tensorflow-data-validation, tensorflow-model-analysis
six==1.16.0 # via absl-py, argon2-cffi, astunparse, bleach, bokeh, fasteners, google-api-core, google-api-python-client, google-apitools, google-auth, google-auth-httplib2, google-cloud-bigquery, google-cloud-core, google-pasta, google-resumable-media, grpcio, h5py, hdfs, jsonschema, keras-preprocessing, mock, oauth2client, protobuf, python-dateutil, tensorflow, tensorflow-data-validation, tensorflow-model-analysis, tensorflow-transform, traitlets
tensorboard-data-server==0.6.1 # via tensorboard
tensorboard-plugin-wit==1.8.0 # via tensorboard
tensorboard==2.5.0 # via tensorflow
tensorflow-data-validation==0.30.0 # via -r -
tensorflow-estimator==2.5.0 # via tensorflow
tensorflow-metadata==0.30.0 # via -r -, tensorflow-data-validation, tensorflow-model-analysis, tfx-bsl
tensorflow-model-analysis==0.30.0 # via -r -
tensorflow-serving-api==2.5.1 # via tfx-bsl
tensorflow==2.5.0 # via tensorflow-data-validation, tensorflow-model-analysis, tensorflow-serving-api, tfx-bsl
tensorflow-data-validation==0.26.1 # via -r -
tensorflow-estimator==2.3.0 # via tensorflow
tensorflow-metadata==0.26.0 # via -r -, tensorflow-data-validation, tensorflow-model-analysis, tensorflow-transform, tfx-bsl
tensorflow-model-analysis==0.26.1 # via -r -
tensorflow-serving-api==2.3.0 # via -r -, tfx-bsl
tensorflow-transform==0.26.0 # via tensorflow-data-validation
tensorflow==2.3.3 # via tensorflow-data-validation, tensorflow-model-analysis, tensorflow-serving-api, tensorflow-transform, tfx-bsl
termcolor==1.1.0 # via tensorflow
terminado==0.10.1 # via notebook
testpath==0.5.0 # via nbconvert
tfx-bsl==0.30.0 # via tensorflow-data-validation, tensorflow-model-analysis
tfx-bsl==0.26.1 # via tensorflow-data-validation, tensorflow-model-analysis, tensorflow-transform
tornado==6.1 # via bokeh, ipykernel, jupyter-client, notebook, terminado
traitlets==5.0.5 # via ipykernel, ipython, ipywidgets, jupyter-client, jupyter-core, nbconvert, nbformat, notebook
typing-extensions==3.7.4.3 # via apache-beam, importlib-metadata, tensorflow
traitlets==4.3.3 # via ipykernel, ipython, ipywidgets, jupyter-client, jupyter-core, nbconvert, nbformat, notebook
typing-extensions==3.7.4.3 # via apache-beam, importlib-metadata, libcst, typing-inspect
typing-inspect==0.7.1 # via libcst
uritemplate==3.0.1 # via google-api-python-client
urllib3==1.26.6 # via requests
wcwidth==0.2.5 # via prompt-toolkit
Expand Down
2 changes: 1 addition & 1 deletion backend/src/apiserver/visualization/update_requirements.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/bash

# This image should be in sync with Dockerfile.visualization.
IMAGE="tensorflow/tfx:0.30.1"
IMAGE="tensorflow/tensorflow:2.4.0"
# tensorflow/tfx default entrypoint is Apache BEAM, because Apache BEAM doesn't
# support custom entrypoint for now. We need to override with --entrypoint ""
# for other `docker run` usecase.
Expand Down
10 changes: 10 additions & 0 deletions developer_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,16 @@ $ gcloud auth configure-docker
$ docker push gcr.io/<your-gcp-project>/persistenceagent:latest
```

To build the visualization server image and upload it to GCR:

```bash
# Run in the repository root directory
$ docker build -t gcr.io/<your-gcp-project>/visualization:latest -f backend/Dockerfile.visualization .
# Push to GCR
$ gcloud auth configure-docker
$ docker push gcr.io/<your-gcp-project>/visualization:latest
```

To build the frontend image and upload it to GCR:

```bash
Expand Down
18 changes: 11 additions & 7 deletions frontend/src/lib/OutputArtifactLoader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -298,10 +298,13 @@ export class OutputArtifactLoader {
viewers = viewers.concat(
[evalUri, trainUri].map(async specificUri => {
const script = [
'from tfx.utils import io_utils',
'import tensorflow_data_validation as tfdv',
`stats_path = io_utils.get_only_uri_in_dir('${specificUri}')`,
'stats = tfdv.load_stats_binary(stats_path)',
'import os',
'import tensorflow as tf',
`files = tf.io.gfile.listdir('${specificUri}')`,
`filename = os.path.dirname(os.path.join(files[0], ''))`,
`filePath = os.path.join('${specificUri}', filename)`,
'stats = tfdv.load_stats_binary(filePath)',
'tfdv.visualize_statistics(stats)',
];
return buildArtifactViewer({ script, namespace });
Expand Down Expand Up @@ -343,12 +346,13 @@ export class OutputArtifactLoader {
return splitNames.map(name => {
const script = [
'import tensorflow_data_validation as tfdv',
'from tfx.utils import io_utils',
'from tensorflow_metadata.proto.v0 import anomalies_pb2',
'anomalies = anomalies_pb2.Anomalies()',
`anomalies_bytes = io_utils.read_bytes_file('${artifact.getUri()}/Split-${name}')`,
'anomalies.ParseFromString(anomalies_bytes)',
'tfdv.display_anomalies(anomalies)',
'import tensorflow as tf',
`with tf.io.gfile.GFile('${artifact.getUri()}/Split-${name}', mode='rb') as f:`,
` anomalies_bytes = f.read()`,
' anomalies.ParseFromString(anomalies_bytes)',
' tfdv.display_anomalies(anomalies)',
];
return buildArtifactViewer({ script, namespace });
});
Expand Down

0 comments on commit ce40a2e

Please sign in to comment.