generated from RedHatQuickCourses/course-starter-template
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #10 from RedHatQuickCourses/updates_beta
Updates beta
- Loading branch information
Showing
24 changed files
with
1,940 additions
and
139 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,255 @@ | ||
# PIPELINE DEFINITION | ||
# Name: 7-get-data-train-upload | ||
components: | ||
comp-get-data: | ||
executorLabel: exec-get-data | ||
outputDefinitions: | ||
artifacts: | ||
data_output_path: | ||
artifactType: | ||
schemaTitle: system.Artifact | ||
schemaVersion: 0.0.1 | ||
comp-train-model: | ||
executorLabel: exec-train-model | ||
inputDefinitions: | ||
artifacts: | ||
data_input_path: | ||
artifactType: | ||
schemaTitle: system.Artifact | ||
schemaVersion: 0.0.1 | ||
outputDefinitions: | ||
artifacts: | ||
model_output_path: | ||
artifactType: | ||
schemaTitle: system.Artifact | ||
schemaVersion: 0.0.1 | ||
comp-upload-model: | ||
executorLabel: exec-upload-model | ||
inputDefinitions: | ||
artifacts: | ||
input_model_path: | ||
artifactType: | ||
schemaTitle: system.Artifact | ||
schemaVersion: 0.0.1 | ||
deploymentSpec: | ||
executors: | ||
exec-get-data: | ||
container: | ||
args: | ||
- --executor_input | ||
- '{{$}}' | ||
- --function_to_execute | ||
- get_data | ||
command: | ||
- sh | ||
- -c | ||
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ | ||
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ | ||
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.5.0'\ | ||
\ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ | ||
$0\" \"$@\"\n" | ||
- sh | ||
- -ec | ||
- 'program_path=$(mktemp -d) | ||
printf "%s" "$0" > "$program_path/ephemeral_component.py" | ||
_KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" | ||
' | ||
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ | ||
\ *\n\ndef get_data(data_output_path: OutputPath()):\n import urllib.request\n\ | ||
\ print(\"starting download...\")\n url = \"https://raw.githubusercontent.com/rh-aiservices-bu/fraud-detection/main/data/card_transdata.csv\"\ | ||
\n urllib.request.urlretrieve(url, data_output_path)\n print(\"done\"\ | ||
)\n\n" | ||
image: quay.io/modh/runtime-images:runtime-cuda-tensorflow-ubi9-python-3.9-2023b-20240301 | ||
exec-train-model: | ||
container: | ||
args: | ||
- --executor_input | ||
- '{{$}}' | ||
- --function_to_execute | ||
- train_model | ||
command: | ||
- sh | ||
- -c | ||
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ | ||
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ | ||
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.5.0'\ | ||
\ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\ | ||
\ python3 -m pip install --quiet --no-warn-script-location 'tf2onnx' 'seaborn'\ | ||
\ && \"$0\" \"$@\"\n" | ||
- sh | ||
- -ec | ||
- 'program_path=$(mktemp -d) | ||
printf "%s" "$0" > "$program_path/ephemeral_component.py" | ||
_KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" | ||
' | ||
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ | ||
\ *\n\ndef train_model(data_input_path: InputPath(), model_output_path:\ | ||
\ OutputPath()):\n import numpy as np\n import pandas as pd\n from\ | ||
\ keras.models import Sequential\n from keras.layers import Dense, Dropout,\ | ||
\ BatchNormalization, Activation\n from sklearn.model_selection import\ | ||
\ train_test_split\n from sklearn.preprocessing import StandardScaler\n\ | ||
\ from sklearn.utils import class_weight\n import tf2onnx\n import\ | ||
\ onnx\n import pickle\n from pathlib import Path\n\n # Load the\ | ||
\ CSV data which we will use to train the model.\n # It contains the\ | ||
\ following fields:\n # distancefromhome - The distance from home where\ | ||
\ the transaction happened.\n # distancefromlast_transaction - The\ | ||
\ distance from last transaction happened.\n # ratiotomedianpurchaseprice\ | ||
\ - Ratio of purchased price compared to median purchase price.\n # \ | ||
\ repeat_retailer - If it's from a retailer that already has been purchased\ | ||
\ from before.\n # used_chip - If the (credit card) chip was used.\n\ | ||
\ # usedpinnumber - If the PIN number was used.\n # online_order\ | ||
\ - If it was an online order.\n # fraud - If the transaction is fraudulent.\n\ | ||
\ Data = pd.read_csv(data_input_path)\n\n # Set the input (X) and\ | ||
\ output (Y) data.\n # The only output data we have is if it's fraudulent\ | ||
\ or not, and all other fields go as inputs to the model.\n\n X = Data.drop(columns\ | ||
\ = ['repeat_retailer','distance_from_home', 'fraud'])\n y = Data['fraud']\n\ | ||
\n # Split the data into training and testing sets so we have something\ | ||
\ to test the trained model with.\n\n # X_train, X_test, y_train, y_test\ | ||
\ = train_test_split(X,y, test_size = 0.2, stratify = y)\n X_train, X_test,\ | ||
\ y_train, y_test = train_test_split(X,y, test_size = 0.2, shuffle = False)\n\ | ||
\n X_train, X_val, y_train, y_val = train_test_split(X_train,y_train,\ | ||
\ test_size = 0.2, stratify = y_train)\n\n # Scale the data to remove\ | ||
\ mean and have unit variance. This means that the data will be between\ | ||
\ -1 and 1, which makes it a lot easier for the model to learn than random\ | ||
\ potentially large values.\n # It is important to only fit the scaler\ | ||
\ to the training data, otherwise you are leaking information about the\ | ||
\ global distribution of variables (which is influenced by the test set)\ | ||
\ into the training set.\n\n scaler = StandardScaler()\n\n X_train\ | ||
\ = scaler.fit_transform(X_train.values)\n\n Path(\"artifact\").mkdir(parents=True,\ | ||
\ exist_ok=True)\n with open(\"artifact/test_data.pkl\", \"wb\") as handle:\n\ | ||
\ pickle.dump((X_test, y_test), handle)\n with open(\"artifact/scaler.pkl\"\ | ||
, \"wb\") as handle:\n pickle.dump(scaler, handle)\n\n # Since\ | ||
\ the dataset is unbalanced (it has many more non-fraud transactions than\ | ||
\ fraudulent ones), we set a class weight to weight the few fraudulent transactions\ | ||
\ higher than the many non-fraud transactions.\n\n class_weights = class_weight.compute_class_weight('balanced',classes\ | ||
\ = np.unique(y_train),y = y_train)\n class_weights = {i : class_weights[i]\ | ||
\ for i in range(len(class_weights))}\n\n\n # Build the model, the model\ | ||
\ we build here is a simple fully connected deep neural network, containing\ | ||
\ 3 hidden layers and one output layer.\n\n model = Sequential()\n \ | ||
\ model.add(Dense(32, activation = 'relu', input_dim = len(X.columns)))\n\ | ||
\ model.add(Dropout(0.2))\n model.add(Dense(32))\n model.add(BatchNormalization())\n\ | ||
\ model.add(Activation('relu'))\n model.add(Dropout(0.2))\n model.add(Dense(32))\n\ | ||
\ model.add(BatchNormalization())\n model.add(Activation('relu'))\n\ | ||
\ model.add(Dropout(0.2))\n model.add(Dense(1, activation = 'sigmoid'))\n\ | ||
\ model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])\n\ | ||
\ model.summary()\n\n\n # Train the model and get performance\n\n\ | ||
\ epochs = 2\n history = model.fit(X_train, y_train, epochs=epochs,\ | ||
\ \\\n validation_data=(scaler.transform(X_val.values),y_val),\ | ||
\ \\\n verbose = True, class_weight = class_weights)\n\ | ||
\n # Save the model as ONNX for easy use of ModelMesh\n\n model_proto,\ | ||
\ _ = tf2onnx.convert.from_keras(model)\n print(model_output_path)\n\ | ||
\ onnx.save(model_proto, model_output_path)\n\n" | ||
image: quay.io/modh/runtime-images:runtime-cuda-tensorflow-ubi9-python-3.9-2023b-20240301 | ||
exec-upload-model: | ||
container: | ||
args: | ||
- --executor_input | ||
- '{{$}}' | ||
- --function_to_execute | ||
- upload_model | ||
command: | ||
- sh | ||
- -c | ||
- "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ | ||
\ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ | ||
\ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.5.0'\ | ||
\ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' &&\ | ||
\ python3 -m pip install --quiet --no-warn-script-location 'boto3' 'botocore'\ | ||
\ && \"$0\" \"$@\"\n" | ||
- sh | ||
- -ec | ||
- 'program_path=$(mktemp -d) | ||
printf "%s" "$0" > "$program_path/ephemeral_component.py" | ||
_KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" | ||
' | ||
- "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ | ||
\ *\n\ndef upload_model(input_model_path: InputPath()):\n import os\n\ | ||
\ import boto3\n import botocore\n\n aws_access_key_id = os.environ.get('AWS_ACCESS_KEY_ID')\n\ | ||
\ aws_secret_access_key = os.environ.get('AWS_SECRET_ACCESS_KEY')\n \ | ||
\ endpoint_url = os.environ.get('AWS_S3_ENDPOINT')\n region_name =\ | ||
\ os.environ.get('AWS_DEFAULT_REGION')\n bucket_name = os.environ.get('AWS_S3_BUCKET')\n\ | ||
\n s3_key = os.environ.get(\"S3_KEY\")\n\n session = boto3.session.Session(aws_access_key_id=aws_access_key_id,\n\ | ||
\ aws_secret_access_key=aws_secret_access_key)\n\ | ||
\n s3_resource = session.resource(\n 's3',\n config=botocore.client.Config(signature_version='s3v4'),\n\ | ||
\ endpoint_url=endpoint_url,\n region_name=region_name)\n\n\ | ||
\ bucket = s3_resource.Bucket(bucket_name)\n\n print(f\"Uploading\ | ||
\ {s3_key}\")\n bucket.upload_file(input_model_path, s3_key)\n\n" | ||
env: | ||
- name: S3_KEY | ||
value: models/fraud/1/model.onnx | ||
image: quay.io/modh/runtime-images:runtime-cuda-tensorflow-ubi9-python-3.9-2023b-20240301 | ||
pipelineInfo: | ||
name: 7-get-data-train-upload | ||
root: | ||
dag: | ||
tasks: | ||
get-data: | ||
cachingOptions: | ||
enableCache: true | ||
componentRef: | ||
name: comp-get-data | ||
taskInfo: | ||
name: get-data | ||
train-model: | ||
cachingOptions: | ||
enableCache: true | ||
componentRef: | ||
name: comp-train-model | ||
dependentTasks: | ||
- get-data | ||
inputs: | ||
artifacts: | ||
data_input_path: | ||
taskOutputArtifact: | ||
outputArtifactKey: data_output_path | ||
producerTask: get-data | ||
taskInfo: | ||
name: train-model | ||
upload-model: | ||
cachingOptions: | ||
enableCache: true | ||
componentRef: | ||
name: comp-upload-model | ||
dependentTasks: | ||
- train-model | ||
inputs: | ||
artifacts: | ||
input_model_path: | ||
taskOutputArtifact: | ||
outputArtifactKey: model_output_path | ||
producerTask: train-model | ||
taskInfo: | ||
name: upload-model | ||
schemaVersion: 2.1.0 | ||
sdkVersion: kfp-2.5.0 | ||
--- | ||
platforms: | ||
kubernetes: | ||
deploymentSpec: | ||
executors: | ||
exec-upload-model: | ||
secretAsEnv: | ||
- keyToEnv: | ||
- envVar: AWS_ACCESS_KEY_ID | ||
secretKey: AWS_ACCESS_KEY_ID | ||
- envVar: AWS_SECRET_ACCESS_KEY | ||
secretKey: AWS_SECRET_ACCESS_KEY | ||
- envVar: AWS_DEFAULT_REGION | ||
secretKey: AWS_DEFAULT_REGION | ||
- envVar: AWS_S3_BUCKET | ||
secretKey: AWS_S3_BUCKET | ||
- envVar: AWS_S3_ENDPOINT | ||
secretKey: AWS_S3_ENDPOINT | ||
secretName: aws-connection-my-storage |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.