diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..356f284 Binary files /dev/null and b/.DS_Store differ diff --git a/6 Feature Store Workflow.ipynb b/6 Feature Store Workflow.ipynb new file mode 100644 index 0000000..d62be68 --- /dev/null +++ b/6 Feature Store Workflow.ipynb @@ -0,0 +1,637 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Feature Store - Unified storage of curated features\n", + "\n", + "This notebook is intended to help you get started with Feature Store in the H2O AI Cloud using Python.\n", + "\n", + "* **Product Documentation:** https://h2oai.github.io/featurestore/" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from featurestore import CSVFile, Schema\n", + "from pyspark.sql import SparkSession\n", + "from h2o_ai_cloud import fs_client" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Configure User Spark session for Feature Store" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Set up the Java Environment for Spark " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from jdk4py import JAVA_HOME\n", + "os.environ['JAVA_HOME'] = str(JAVA_HOME)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + ":: loading settings :: url = jar:file:/opt/conda/lib/python3.9/site-packages/pyspark/jars/ivy-2.5.1.jar!/org/apache/ivy/core/settings/ivysettings.xml\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Ivy Default Cache set to: /home/jovyan/.ivy2/cache\n", + "The jars for the packages stored in: /home/jovyan/.ivy2/jars\n", + "org.apache.hadoop#hadoop-aws added as a dependency\n", + "org.apache.hadoop#hadoop-azure added as a dependency\n", + "io.delta#delta-core_2.12 added as a dependency\n", + ":: resolving dependencies :: org.apache.spark#spark-submit-parent-67b05020-a1d9-41be-9fae-9f8b614c61c3;1.0\n", + "\tconfs: [default]\n", + "\tfound org.apache.hadoop#hadoop-aws;3.3.1 in central\n", + "\tfound com.amazonaws#aws-java-sdk-bundle;1.11.901 in central\n", + "\tfound org.wildfly.openssl#wildfly-openssl;1.0.7.Final in central\n", + "\tfound org.apache.hadoop#hadoop-azure;3.3.1 in central\n", + "\tfound org.apache.httpcomponents#httpclient;4.5.13 in central\n", + "\tfound org.apache.httpcomponents#httpcore;4.4.13 in central\n", + "\tfound commons-logging#commons-logging;1.1.3 in central\n", + "\tfound commons-codec#commons-codec;1.11 in central\n", + "\tfound com.microsoft.azure#azure-storage;7.0.1 in central\n", + "\tfound com.fasterxml.jackson.core#jackson-core;2.10.5 in central\n", + "\tfound org.slf4j#slf4j-api;1.7.30 in central\n", + "\tfound com.microsoft.azure#azure-keyvault-core;1.0.0 in central\n", + "\tfound com.google.guava#guava;27.0-jre in central\n", + "\tfound com.google.guava#failureaccess;1.0 in central\n", + "\tfound com.google.guava#listenablefuture;9999.0-empty-to-avoid-conflict-with-guava in central\n", + "\tfound com.google.code.findbugs#jsr305;3.0.2 in central\n", + "\tfound org.checkerframework#checker-qual;2.5.2 in central\n", + "\tfound com.google.errorprone#error_prone_annotations;2.2.0 in central\n", + "\tfound com.google.j2objc#j2objc-annotations;1.1 in central\n", + "\tfound org.codehaus.mojo#animal-sniffer-annotations;1.17 in central\n", + "\tfound org.apache.hadoop.thirdparty#hadoop-shaded-guava;1.1.1 in central\n", + "\tfound org.eclipse.jetty#jetty-util-ajax;9.4.40.v20210413 in central\n", + "\tfound org.eclipse.jetty#jetty-util;9.4.40.v20210413 in central\n", + "\tfound org.codehaus.jackson#jackson-mapper-asl;1.9.13 in central\n", + "\tfound org.codehaus.jackson#jackson-core-asl;1.9.13 in central\n", + "\tfound io.delta#delta-core_2.12;2.2.0 in central\n", + "\tfound io.delta#delta-storage;2.2.0 in central\n", + "\tfound org.antlr#antlr4-runtime;4.8 in central\n", + ":: resolution report :: resolve 423ms :: artifacts dl 16ms\n", + "\t:: modules in use:\n", + "\tcom.amazonaws#aws-java-sdk-bundle;1.11.901 from central in [default]\n", + "\tcom.fasterxml.jackson.core#jackson-core;2.10.5 from central in [default]\n", + "\tcom.google.code.findbugs#jsr305;3.0.2 from central in [default]\n", + "\tcom.google.errorprone#error_prone_annotations;2.2.0 from central in [default]\n", + "\tcom.google.guava#failureaccess;1.0 from central in [default]\n", + "\tcom.google.guava#guava;27.0-jre from central in [default]\n", + "\tcom.google.guava#listenablefuture;9999.0-empty-to-avoid-conflict-with-guava from central in [default]\n", + "\tcom.google.j2objc#j2objc-annotations;1.1 from central in [default]\n", + "\tcom.microsoft.azure#azure-keyvault-core;1.0.0 from central in [default]\n", + "\tcom.microsoft.azure#azure-storage;7.0.1 from central in [default]\n", + "\tcommons-codec#commons-codec;1.11 from central in [default]\n", + "\tcommons-logging#commons-logging;1.1.3 from central in [default]\n", + "\tio.delta#delta-core_2.12;2.2.0 from central in [default]\n", + "\tio.delta#delta-storage;2.2.0 from central in [default]\n", + "\torg.antlr#antlr4-runtime;4.8 from central in [default]\n", + "\torg.apache.hadoop#hadoop-aws;3.3.1 from central in [default]\n", + "\torg.apache.hadoop#hadoop-azure;3.3.1 from central in [default]\n", + "\torg.apache.hadoop.thirdparty#hadoop-shaded-guava;1.1.1 from central in [default]\n", + "\torg.apache.httpcomponents#httpclient;4.5.13 from central in [default]\n", + "\torg.apache.httpcomponents#httpcore;4.4.13 from central in [default]\n", + "\torg.checkerframework#checker-qual;2.5.2 from central in [default]\n", + "\torg.codehaus.jackson#jackson-core-asl;1.9.13 from central in [default]\n", + "\torg.codehaus.jackson#jackson-mapper-asl;1.9.13 from central in [default]\n", + "\torg.codehaus.mojo#animal-sniffer-annotations;1.17 from central in [default]\n", + "\torg.eclipse.jetty#jetty-util;9.4.40.v20210413 from central in [default]\n", + "\torg.eclipse.jetty#jetty-util-ajax;9.4.40.v20210413 from central in [default]\n", + "\torg.slf4j#slf4j-api;1.7.30 from central in [default]\n", + "\torg.wildfly.openssl#wildfly-openssl;1.0.7.Final from central in [default]\n", + "\t---------------------------------------------------------------------\n", + "\t| | modules || artifacts |\n", + "\t| conf | number| search|dwnlded|evicted|| number|dwnlded|\n", + "\t---------------------------------------------------------------------\n", + "\t| default | 28 | 0 | 0 | 0 || 28 | 0 |\n", + "\t---------------------------------------------------------------------\n", + ":: retrieving :: org.apache.spark#spark-submit-parent-67b05020-a1d9-41be-9fae-9f8b614c61c3\n", + "\tconfs: [default]\n", + "\t0 artifacts copied, 28 already retrieved (0kB/10ms)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "23/04/17 17:15:56 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Setting default log level to \"WARN\".\n", + "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n" + ] + } + ], + "source": [ + "spark_dependencies_jar = \"https://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/feature-store/release/0.15.0/spark-dependencies/featurestore-azure-gen2-spark-dependencies-0.15.0.jar\"\n", + "spark = SparkSession.builder \\\n", + " .master(\"local\") \\\n", + " .config(\"spark.jars.packages\", \"org.apache.hadoop:hadoop-aws:3.3.1,org.apache.hadoop:hadoop-azure:3.3.1,io.delta:delta-core_2.12:2.2.0\") \\\n", + " .config(\"spark.jars\", spark_dependencies_jar) \\\n", + " .config(\"spark.sql.extensions\", \"io.delta.sql.DeltaSparkSessionExtension\") \\\n", + " .config(\"spark.sql.catalog.spark_catalog\", \"org.apache.spark.sql.delta.catalog.DeltaCatalog\") \\\n", + " .getOrCreate()\n", + "spark.sparkContext.setLogLevel(\"ERROR\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Connect to Feature Store\n", + "We first connect to the Feature Store cloud endpoint using appropriate H2O Cloud Discovery Service in order to initialize the client. Then we are able to authenticate into Feature Store." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "17-04-2023 05:16:00 : INFO : client : Connecting to the server featurestore-api.cloud-qa.h2o.ai ...\n", + "17-04-2023 05:16:02 : ERROR : auth : Browser is not supported: Please visit https://auth.demo.h2o.ai/auth/realms/q8s-qa/protocol/openid-connect/auth?client_id=feature-store-qa&code_challenge=-IFutm4_E4ZeZiDe_Iqf35D1BPBCYcKGyKJGuxPWtwM&code_challenge_method=S256&redirect_uri=https://featurestore.cloud-qa.h2o.ai/Callback&response_type=code&scope=openid%20offline_access&state=gK8R62SM7l to continue authentication.\n" + ] + } + ], + "source": [ + "client = fs_client()\n", + "client.auth.login()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Understand the environment" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'0.15.0'" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "client.get_version()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Define data source \n", + "Feature Store supports different data sources - https://h2oai.github.io/featurestore/supported_data_sources" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "source = CSVFile(\"s3a://h2o-public-test-data/smalldata/gbm_test/titanic.csv\")" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Extract the schema from the data source\n", + "The schema represents the features of the feature set" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "17-04-2023 05:16:52 : INFO : interactive_console : Job ID: 01gy83ewsh7zhcmg8zcjewmh7e, Status: Finished setting up spark session.\n", + "17-04-2023 05:17:06 : INFO : interactive_console : Job ID: 01gy83ewsh7zhcmg8zcjewmh7e, Status: Finished reading data from source location to extract schema.\n", + "17-04-2023 05:17:06 : INFO : interactive_console : Job ID: 01gy83ewsh7zhcmg8zcjewmh7e, Status: Schema generation completed.\n", + "17-04-2023 05:17:06 : INFO : interactive_console : \n", + "\n", + "Time taken - 60.696 seconds\n" + ] + } + ], + "source": [ + "schema = client.extract_schema_from_source(source)" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create a project\n", + "Users must follow the project naming conventions outlined on https://h2oai.github.io/featurestore/api/naming_conventions" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "project = client.projects.create(\"sample_project\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create a feature set" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "feature_set = project.feature_sets.register(schema, \"sample_fs\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Ingest data from source\n", + "Uploading data into Feature Store" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "17-04-2023 05:18:14 : INFO : interactive_console : Job ID: 01gy83j2wbpdkg9cfsqt77v5pq, Status: Finished setting up spark session.\n", + "17-04-2023 05:18:14 : INFO : interactive_console : Job ID: 01gy83j2wbpdkg9cfsqt77v5pq, Status: Finished reading data to ingest.\n", + "17-04-2023 05:18:22 : INFO : interactive_console : Job ID: 01gy83j2wbpdkg9cfsqt77v5pq, Status: Finished extracting scope from the data.\n", + "17-04-2023 05:18:42 : INFO : interactive_console : Job ID: 01gy83j2wbpdkg9cfsqt77v5pq, Status: Finished computation of incremental statistics.\n", + "17-04-2023 05:19:57 : INFO : interactive_console : Job ID: 01gy83j2wbpdkg9cfsqt77v5pq, Status: Finished writing data to main storage.\n", + "17-04-2023 05:20:01 : INFO : interactive_console : \n", + "\n", + "Time taken - 131.536 seconds\n" + ] + }, + { + "data": { + "text/plain": [ + "{\n", + " \"rawCacheLocation\": \"01879039-0b43-d3e8-f649-3007b595fde7/01879039-0b5c-68d9-de71-de543814f45b-f6b4c38b-f891-4346-acef-4bd76e0d0476-raw\",\n", + " \"ingestionTimestamp\": \"2023-04-17T17:18:13.096298028Z\",\n", + " \"ingestScope\": {\n", + " \"startDateTime\": \"2023-04-17T17:18:13.096298028Z\",\n", + " \"endDateTime\": \"2023-04-17T17:18:13.096298028Z\"\n", + " },\n", + " \"ingestId\": \"01gy83js39km6f7ke8tjqgy8d0\",\n", + " \"cacheLocation\": \"\",\n", + " \"message\": \"\"\n", + "}" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "feature_set.ingest(source)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Retrieve the data" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [], + "source": [ + "reference = feature_set.retrieve()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Download features\n", + "Download the files from Feature Store" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "17-04-2023 05:20:28 : INFO : interactive_console : Job ID: 01gy83p3bk6x9qgegky2n3pqbe, Status: Finished setting up spark session.\n", + "17-04-2023 05:21:06 : INFO : interactive_console : Job ID: 01gy83p3bk6x9qgegky2n3pqbe, Status: Finished reading data from main storage.\n", + "17-04-2023 05:21:34 : INFO : interactive_console : Job ID: 01gy83p3bk6x9qgegky2n3pqbe, Status: Finished writing data to retrieve storage.\n", + "17-04-2023 05:21:34 : INFO : interactive_console : Job ID: 01gy83p3bk6x9qgegky2n3pqbe, Status: Finished generating pre-signed urls.\n" + ] + }, + { + "data": { + "text/plain": [ + "'/tmp/tmpbbu36pmt'" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "reference.download()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Obtain data as a Spark Frame \n", + "Download features as a Spark dataframe" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[Stage 8:> (0 + 1) / 1]\r" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "+------+--------+--------------------+------+----+-----+-----+----------------+--------+-----------+--------+----+----+--------------------+---------------------------------+\n", + "|pclass|survived| name| sex| age|sibsp|parch| ticket| fare| cabin|embarked|boat|body| home.dest|time_travel_column_auto_generated|\n", + "+------+--------+--------------------+------+----+-----+-----+----------------+--------+-----------+--------+----+----+--------------------+---------------------------------+\n", + "| 1| 1|Cardeza Mr. Thom...| male|36.0| 0| 1| PC 17755|512.3292|B51 B53 B55| C| 3|null|Austria-Hungary /...| 2023-04-17 17:18:13|\n", + "| 2| 0|Hickman Mr. Stan...| male|21.0| 2| 0| S.O.C. 14879| 73.5| null| S|null|null|West Hampstead L...| 2023-04-17 17:18:13|\n", + "| 2| 0| Hold Mr. Stephen| male|44.0| 1| 0| 26707| 26.0| null| S|null|null|England / Sacrame...| 2023-04-17 17:18:13|\n", + "| 2| 0|Parkes Mr. Franc...| male|null| 0| 0| 239853| 0.0| null| S|null|null| Belfast| 2023-04-17 17:18:13|\n", + "| 2| 1|Sinkkonen Miss. ...|female|30.0| 0| 0| 250648| 13.0| null| S| 10|null|Finland / Washing...| 2023-04-17 17:18:13|\n", + "| 3| 1|Abrahamsson Mr. ...| male|20.0| 0| 0|SOTON/O2 3101284| 7.925| null| S| 15|null|Taalintehdas Fin...| 2023-04-17 17:18:13|\n", + "| 3| 0| Barry Miss. Julia|female|27.0| 0| 0| 330844| 7.8792| null| Q|null|null| New York NY| 2023-04-17 17:18:13|\n", + "| 3| 0| Lockyer Mr. Edward| male|null| 0| 0| 1222| 7.8792| null| S|null| 153| null| 2023-04-17 17:18:13|\n", + "| 3| 1|Nilsson Miss. He...|female|26.0| 0| 0| 347470| 7.8542| null| S| 13|null| null| 2023-04-17 17:18:13|\n", + "| 3| 0|Robins Mrs. Alex...|female|47.0| 1| 0| A/5. 3337| 14.5| null| S|null| 7| null| 2023-04-17 17:18:13|\n", + "| 3| 0|Skoog Miss. Marg...|female| 2.0| 3| 2| 347088| 27.9| null| S|null|null| null| 2023-04-17 17:18:13|\n", + "| 1| 1|Chambers Mrs. No...|female|33.0| 1| 0| 113806| 53.1| E8| S| 5|null|New York NY / It...| 2023-04-17 17:18:13|\n", + "| 1| 0|Douglas Mr. Walt...| male|50.0| 1| 0| PC 17761| 106.425| C86| C|null| 62|Deephaven MN / C...| 2023-04-17 17:18:13|\n", + "| 1| 1|Duff Gordon Sir....| male|49.0| 1| 0| PC 17485| 56.9292| A20| C| 1|null| London / Paris| 2023-04-17 17:18:13|\n", + "| 1| 0|Hilliard Mr. Her...| male|null| 0| 0| 17463| 51.8625| E46| S|null|null| Brighton MA| 2023-04-17 17:18:13|\n", + "| 1| 1|Longley Miss. Gr...|female|21.0| 0| 0| 13502| 77.9583| D9| S| 10|null| Hudson NY| 2023-04-17 17:18:13|\n", + "| 2| 0|Corey Mrs. Percy...|female|null| 0| 0| F.C.C. 13534| 21.0| null| S|null|null|Upper Burma Indi...| 2023-04-17 17:18:13|\n", + "| 2| 0|Sobey Mr. Samuel...| male|25.0| 0| 0| C.A. 29178| 13.0| null| S|null|null|Cornwall / Hought...| 2023-04-17 17:18:13|\n", + "| 3| 0|Ibrahim Shawah M...| male|30.0| 0| 0| 2685| 7.2292| null| C|null|null| null| 2023-04-17 17:18:13|\n", + "| 3| 0| Kink Mr. Vincenz| male|26.0| 2| 0| 315151| 8.6625| null| S|null|null| null| 2023-04-17 17:18:13|\n", + "+------+--------+--------------------+------+----+-----+-----+----------------+--------+-----------+--------+----+----+--------------------+---------------------------------+\n", + "only showing top 20 rows\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + " \r" + ] + } + ], + "source": [ + "reference.as_spark_frame(spark).show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Prepare a schema from a string\n", + "Schema can be created from a string format" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "schema_str = \"id integer, value string\"\n", + "schema = Schema.create_from(schema_str)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create another feature set" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "fs_online = project.feature_sets.register(schema, \"sample_fs_online\", primary_key=\"id\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Ingest data from Online Feature Store" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [], + "source": [ + "fs_online.ingest_online('{\"id\": 1, \"value\": \"test\"}')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Retrieve data from Online Feature Store" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'id': 1, 'value': 'test'}" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fs_online.retrieve_online(1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Delete a feature set" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "17-04-2023 05:22:06 : INFO : feature_set : Feature set 'sample_fs_online' is deleted\n" + ] + } + ], + "source": [ + "fs_online.delete()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Delete a project" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "17-04-2023 05:22:06 : INFO : project : Project 'sample_project' is deleted\n" + ] + } + ], + "source": [ + "project.delete()" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "b5803137338cb19a16337a87a205be3b478f8cca74095ec6d83bca1ed0847cec" + }, + "kernelspec": { + "display_name": "Python 3.8.16 64-bit ('3.8.16': pyenv)", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/README.md b/README.md index 9ae40ca..a881215 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ pip install h2o_authn==0.1.1 pip install https://enterprise-steam.s3.amazonaws.com/release/1.8.12/python/h2osteam-1.8.12-py2.py3-none-any.whl pip install https://s3.amazonaws.com/artifacts.h2o.ai/releases/ai/h2o/mlops/rel-0.56.1/2/h2o_mlops_client-0.56.1%2Bdd66f93.rel0.56.1.2-py2.py3-none-any.whl pip install https://h2o-release.s3.amazonaws.com/h2o/rel-zumbo/2/Python/h2o-3.36.1.2-py2.py3-none-any.whl +pip install pyspark==3.2.1 h2o-featurestore==0.14.4 ``` ### Setup your connection @@ -37,4 +38,5 @@ Update the `h2o_ai_cloud.py` file with the connection parameters for your H2O AI * H2O-3 additional tutorials: https://github.com/h2oai/h2o-tutorials * MLOps product documentation: https://docs.h2o.ai/mlops/ * MLOps python documentation: https://docs.h2o.ai/mlops/py-client-installing/ +* Feature Store product documentation: https://docs.h2o.ai/feature-store/latest-stable/docs/index.html diff --git a/h2o_ai_cloud.py b/h2o_ai_cloud.py index cae4c84..a768edc 100644 --- a/h2o_ai_cloud.py +++ b/h2o_ai_cloud.py @@ -1,8 +1,11 @@ +import os import getpass import h2o_authn import h2o_mlops_client import h2osteam +import h2o_discovery +import featurestore # The URL you use to access the H2O AI Cloud's UI - do not include the `https://` - ex: cloud.h2o.ai @@ -44,7 +47,6 @@ def mlops_client(): ) - def steam_client(): """ Connect to Enterprise Steam, Driverless AI, and H2O-3 @@ -58,3 +60,14 @@ def steam_client(): ) +def fs_client(): + """ + Connect to Feature Store + """ + discovery = h2o_discovery.discover(os.environ['H2O_CLOUD_ENVIRONMENT']) + client = featurestore.Client( + url=discovery.services['feature-store-grpc-api'].uri, + secure=True + ) + return client +