diff --git a/Makefile b/Makefile index d9f3bb686..1b0c5948d 100644 --- a/Makefile +++ b/Makefile @@ -10,9 +10,9 @@ build-ui: node_modules/.bin/gulp --cwd ui bundle clean: - rm -r build + rm -rf build # 'cache' is the configured cache dir in the playbook - rm -r cache + rm -rf cache # The netlify repo is checked out without any blobs. This script # iterates through the release branches and checks them out one-by-one diff --git a/antora-playbook.yml b/antora-playbook.yml index dc616ae89..3b0e62c5f 100644 --- a/antora-playbook.yml +++ b/antora-playbook.yml @@ -21,13 +21,18 @@ content: - release/23.7 - release/23.4 - release/23.1 - - release/22.11 - - release/22.09 - - release/22.06 - # stackablectl - - url: https://github.com/stackabletech/stackablectl.git + # management tools + - url: https://github.com/stackabletech/stackable-cockpit.git start_path: docs branches: main + # demos + - url: https://github.com/stackabletech/demos.git + start_path: docs + branches: + - main + - release-23.7 + - release-23.4 + - release-23.1 # internal operators - url: https://github.com/stackabletech/commons-operator.git start_path: docs @@ -36,10 +41,6 @@ content: - release-23.7 - release-23.4 - release-23.1 - - distributed-component-compat # (required for 0.4 and below) - - old-release-docs/0.4 - - old-release-docs/0.3 - - old-release-docs/0.2 - url: https://github.com/stackabletech/secret-operator.git start_path: docs branches: @@ -47,10 +48,6 @@ content: - release-23.7 - release-23.4 - release-23.1 - - distributed-component-compat # (required for 0.6 and below) - tags: - - docs/0.6 - - docs/0.5 - url: https://github.com/stackabletech/listener-operator.git start_path: docs branches: @@ -66,11 +63,6 @@ content: - release-23.7 - release-23.4 - release-23.1 - - distributed-component-compat # (required for 0.6 and below) - - old-release-docs/0.6 - - old-release-docs/0.5 - tags: - - docs/0.4 - url: https://github.com/stackabletech/druid-operator.git start_path: docs branches: @@ -78,11 +70,6 @@ content: - release-23.7 - release-23.4 - release-23.1 - - distributed-component-compat - - old-release-docs/0.8 - - old-release-docs/0.7 - tags: - - docs/0.6 - url: https://github.com/stackabletech/hbase-operator.git start_path: docs branches: @@ -90,10 +77,6 @@ content: - release-23.7 - release-23.4 - release-23.1 - - distributed-component-compat - - old-release-docs/0.5 - - old-release-docs/0.4 - - old-release-docs/0.3 - url: https://github.com/stackabletech/hdfs-operator.git start_path: docs branches: @@ -101,11 +84,6 @@ content: - release-23.7 - release-23.4 - release-23.1 - - distributed-component-compat - - old-release-docs/0.6 - - old-release-docs/0.5 - tags: - - docs/0.4 - url: https://github.com/stackabletech/hive-operator.git start_path: docs branches: @@ -113,11 +91,6 @@ content: - release-23.7 - release-23.4 - release-23.1 - - distributed-component-compat - tags: - - docs/0.8 - - docs/0.7 - - docs/0.6 - url: https://github.com/stackabletech/kafka-operator.git start_path: docs branches: @@ -125,11 +98,6 @@ content: - release-23.7 - release-23.4 - release-23.1 - - distributed-component-compat - tags: - - docs/0.8 - - docs/0.7 - - docs/0.6 - url: https://github.com/stackabletech/nifi-operator.git start_path: docs branches: @@ -137,10 +105,6 @@ content: - release-23.7 - release-23.4 - release-23.1 - - distributed-component-compat # (required for 0.8 and below) - - old-release-docs/0.8 - - old-release-docs/0.7 - - old-release-docs/0.6 - url: https://github.com/stackabletech/opa-operator.git start_path: docs branches: @@ -148,11 +112,6 @@ content: - release-23.7 - release-23.4 - release-23.1 - - distributed-component-compat - tags: - - docs/0.11 - - docs/0.10 - - docs/0.9 - url: https://github.com/stackabletech/spark-k8s-operator.git start_path: docs branches: @@ -160,13 +119,6 @@ content: - release-23.7 - release-23.4 - release-23.1 - - distributed-component-compat - tags: - - docs/0.6 - - docs/0.5 - - docs/0.4 - - docs/0.3 - - docs/0.2 - url: https://github.com/stackabletech/superset-operator.git start_path: docs branches: @@ -174,10 +126,6 @@ content: - release-23.7 - release-23.4 - release-23.1 - - distributed-component-compat - - old-release-docs/0.7 - - old-release-docs/0.6 - - old-release-docs/0.5 - url: https://github.com/stackabletech/trino-operator.git start_path: docs branches: @@ -185,13 +133,6 @@ content: - release-23.7 - release-23.4 - release-23.1 - - distributed-component-compat - - old-release-docs/0.8 - - old-release-docs/0.7 - tags: - - docs/0.6 - - docs/0.5 - - docs/0.4 - url: https://github.com/stackabletech/zookeeper-operator.git start_path: docs branches: @@ -199,10 +140,6 @@ content: - release-23.7 - release-23.4 - release-23.1 - - distributed-component-compat - - old-release-docs/0.12 - - old-release-docs/0.11 - - old-release-docs/0.10 ui: bundle: diff --git a/antora.yml b/antora.yml index 8ecace773..acba04396 100644 --- a/antora.yml +++ b/antora.yml @@ -4,6 +4,7 @@ title: Stackable Documentation nav: - modules/ROOT/nav.adoc - modules/concepts/nav.adoc + - modules/demos/nav.adoc - modules/tutorials/nav.adoc - modules/reference/nav.adoc - modules/operators/nav.adoc diff --git a/gitpod-antora-playbook.yml b/gitpod-antora-playbook.yml index 62f8dd943..106d406de 100644 --- a/gitpod-antora-playbook.yml +++ b/gitpod-antora-playbook.yml @@ -18,8 +18,11 @@ content: - url: /workspace/documentation tags: [] branches: [HEAD, release/*] - # stackablectl - - url: https://github.com/stackabletech/stackablectl.git + # management tools + - url: https://github.com/stackabletech/stackable-cockpit.git + start_path: docs + # demos + - url: https://github.com/stackabletech/demos.git start_path: docs # internal operators - url: https://github.com/stackabletech/commons-operator.git diff --git a/local-antora-playbook.yml b/local-antora-playbook.yml index c84995e51..d2c6ba958 100644 --- a/local-antora-playbook.yml +++ b/local-antora-playbook.yml @@ -15,18 +15,23 @@ urls: content: sources: - url: ./ - branches: + branches: - HEAD - release/23.7 - release/23.4 - release/23.1 - - release/22.11 - - release/22.09 - - release/22.06 - # stackablectl - - url: https://github.com/stackabletech/stackablectl.git + # management tools + - url: https://github.com/stackabletech/stackable-cockpit.git start_path: docs branches: main + # demos + - url: https://github.com/stackabletech/demos.git + start_path: docs + branches: + - main + - release-23.7 + - release-23.4 + - release-23.1 # internal operators - url: https://github.com/stackabletech/commons-operator.git start_path: docs @@ -35,10 +40,6 @@ content: - release-23.7 - release-23.4 - release-23.1 - - distributed-component-compat # (required for 0.4 and below) - - old-release-docs/0.4 - - old-release-docs/0.3 - - old-release-docs/0.2 - url: https://github.com/stackabletech/secret-operator.git start_path: docs branches: @@ -46,10 +47,6 @@ content: - release-23.7 - release-23.4 - release-23.1 - - distributed-component-compat # (required for 0.6 and below) - tags: - - docs/0.6 - - docs/0.5 - url: https://github.com/stackabletech/listener-operator.git start_path: docs branches: @@ -65,11 +62,6 @@ content: - release-23.7 - release-23.4 - release-23.1 - - distributed-component-compat # (required for 0.6 and below) - - old-release-docs/0.6 - - old-release-docs/0.5 - tags: - - docs/0.4 - url: https://github.com/stackabletech/druid-operator.git start_path: docs branches: @@ -77,11 +69,6 @@ content: - release-23.7 - release-23.4 - release-23.1 - - distributed-component-compat - - old-release-docs/0.8 - - old-release-docs/0.7 - tags: - - docs/0.6 - url: https://github.com/stackabletech/hbase-operator.git start_path: docs branches: @@ -89,10 +76,6 @@ content: - release-23.7 - release-23.4 - release-23.1 - - distributed-component-compat - - old-release-docs/0.5 - - old-release-docs/0.4 - - old-release-docs/0.3 - url: https://github.com/stackabletech/hdfs-operator.git start_path: docs branches: @@ -100,11 +83,6 @@ content: - release-23.7 - release-23.4 - release-23.1 - - distributed-component-compat - - old-release-docs/0.6 - - old-release-docs/0.5 - tags: - - docs/0.4 - url: https://github.com/stackabletech/hive-operator.git start_path: docs branches: @@ -112,11 +90,6 @@ content: - release-23.7 - release-23.4 - release-23.1 - - distributed-component-compat - tags: - - docs/0.8 - - docs/0.7 - - docs/0.6 - url: https://github.com/stackabletech/kafka-operator.git start_path: docs branches: @@ -124,11 +97,6 @@ content: - release-23.7 - release-23.4 - release-23.1 - - distributed-component-compat - tags: - - docs/0.8 - - docs/0.7 - - docs/0.6 - url: https://github.com/stackabletech/nifi-operator.git start_path: docs branches: @@ -136,10 +104,6 @@ content: - release-23.7 - release-23.4 - release-23.1 - - distributed-component-compat # (required for 0.8 and below) - - old-release-docs/0.8 - - old-release-docs/0.7 - - old-release-docs/0.6 - url: https://github.com/stackabletech/opa-operator.git start_path: docs branches: @@ -147,11 +111,6 @@ content: - release-23.7 - release-23.4 - release-23.1 - - distributed-component-compat - tags: - - docs/0.11 - - docs/0.10 - - docs/0.9 - url: https://github.com/stackabletech/spark-k8s-operator.git start_path: docs branches: @@ -159,13 +118,6 @@ content: - release-23.7 - release-23.4 - release-23.1 - - distributed-component-compat - tags: - - docs/0.6 - - docs/0.5 - - docs/0.4 - - docs/0.3 - - docs/0.2 - url: https://github.com/stackabletech/superset-operator.git start_path: docs branches: @@ -173,10 +125,6 @@ content: - release-23.7 - release-23.4 - release-23.1 - - distributed-component-compat - - old-release-docs/0.7 - - old-release-docs/0.6 - - old-release-docs/0.5 - url: https://github.com/stackabletech/trino-operator.git start_path: docs branches: @@ -184,13 +132,6 @@ content: - release-23.7 - release-23.4 - release-23.1 - - distributed-component-compat - - old-release-docs/0.8 - - old-release-docs/0.7 - tags: - - docs/0.6 - - docs/0.5 - - docs/0.4 - url: https://github.com/stackabletech/zookeeper-operator.git start_path: docs branches: @@ -198,10 +139,6 @@ content: - release-23.7 - release-23.4 - release-23.1 - - distributed-component-compat - - old-release-docs/0.12 - - old-release-docs/0.11 - - old-release-docs/0.10 ui: bundle: diff --git a/modules/ROOT/pages/getting_started.adoc b/modules/ROOT/pages/getting_started.adoc index 01f4de8cd..e5e7614a4 100644 --- a/modules/ROOT/pages/getting_started.adoc +++ b/modules/ROOT/pages/getting_started.adoc @@ -4,28 +4,32 @@ One of the best ways of getting started with a new platform is to try it out. An == About this guide -Firstly, let’s cover whether this *Getting Started* guide is right for you. This is intended as a learning tool to discover more about Stackable, its deployment and architecture. +Firstly, let's cover whether this *Getting Started* guide is right for you. This is intended as a learning tool to discover more about Stackable, its deployment and architecture. -* If you want to build a production cluster then this is not for you. This tutorial is to familiarise you with the Stackable architecture and is not a guide for building robust clusters. +* If you want to build a production cluster then this is not for you. This tutorial is to familiarize you with the Stackable architecture and is not a guide for building robust clusters. * This is intended for use in a private network or lab; it doesn't enable many security features such as authentication or encryption and should not be directly connected to the Internet. Be careful if you're deploying in the cloud as your instances may default to using public IPs. == Overview + Stackable is based on Kubernetes and uses this as the control plane to manage clusters. In this guide we will build a simple cluster with 3 services; Apache ZooKeeper, Apache Kafka and Apache NiFi. == Installing Kubernetes and kubectl -Stackable’s control plane is built around Kubernetes. Follow the xref:kubernetes.adoc#local-installation[instructions] on how to set up a local Kubernetes instance if you do not have access to a cluster and install kubectl. + +Stackable's control plane is built around Kubernetes. Follow the xref:kubernetes.adoc#local-installation[instructions] on how to set up a local Kubernetes instance if you do not have access to a cluster and install kubectl. If you already have kubectl installed, and have access to a Kubernetes cluster, you can skip this step. == Installing Stackable + === Install stackablectl -Install the Stackable command line utility xref:stackablectl::index.adoc[stackablectl] by following the installation steps for your platform on the xref:stackablectl::installation.adoc[installation] page. +Install the Stackable command line utility xref:management:stackablectl:index.adoc[stackablectl] by following the installation steps for your platform on the xref:management:stackablectl:installation.adoc[installation] page. === Installing Stackable Operators + The Stackable operators are components that translate the service definitions deployed via Kubernetes into deploy services on the worker nodes. These can be installed on any node that has access to the Kubernetes control plane. In this example we will install them on the controller node. -Stackable operators can be installed using stackablectl. Run the following commands to install ZooKeeper, Kafka and NiFi from the Stackable 23.7 release. +Stackable operators can be installed using `stackablectl`. Run the following commands to install ZooKeeper, Kafka and NiFi from the Stackable 23.7 release. [source,bash] ---- @@ -58,19 +62,29 @@ helm install nifi-operator stackable-stable/nifi-operator --version=23.7 You can check which operators are installed using `stackablectl operator installed`: +[source,console] ---- -OPERATOR VERSION NAMESPACE STATUS LAST UPDATED -commons 23.7.0 default deployed 2023-07-27 09:41:05.769685041 +0200 CEST -kafka 23.7.0 default deployed 2023-07-27 09:41:27.685845379 +0200 CEST -nifi 23.7.0 default deployed 2023-07-27 09:41:40.928558978 +0200 CEST -secret 23.7.0 default deployed 2023-07-27 09:41:51.820834174 +0200 CEST -zookeeper 23.7.0 default deployed 2023-07-27 09:41:54.972145417 +0200 CEST +┌────────────────────┬─────────┬─────────────────────┬──────────┬──────────────────────────────────────────┐ +│ OPERATOR ┆ VERSION ┆ NAMESPACE ┆ STATUS ┆ LAST UPDATED │ +╞════════════════════╪═════════╪═════════════════════╪══════════╪══════════════════════════════════════════╡ +│ commons-operator ┆ 23.7.0 ┆ stackable-operators ┆ deployed ┆ 2023-09-26 14:59:10.447836367 +0200 CEST │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ kafka-operator ┆ 23.7.0 ┆ stackable-operators ┆ deployed ┆ 2023-09-26 14:59:25.162058457 +0200 CEST │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ nifi-operator ┆ 23.7.0 ┆ stackable-operators ┆ deployed ┆ 2023-09-26 14:59:35.881227443 +0200 CEST │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ secret-operator ┆ 23.7.0 ┆ stackable-operators ┆ deployed ┆ 2023-09-26 14:59:44.51273442 +0200 CEST │ +├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ zookeeper-operator ┆ 23.7.0 ┆ stackable-operators ┆ deployed ┆ 2023-09-26 14:59:58.196949027 +0200 CEST │ +└────────────────────┴─────────┴─────────────────────┴──────────┴──────────────────────────────────────────┘ ---- == Deploying Stackable Services -At this point you’ve successfully deployed Kubernetes and the Stackable operators we need and are ready to deploy services to the cluster. To do this we provide service descriptions to Kubernetes for each of the services we wish to deploy. + +At this point you've successfully deployed Kubernetes and the Stackable operators we need and are ready to deploy services to the cluster. To do this we provide service descriptions to Kubernetes for each of the services we wish to deploy. === Apache ZooKeeper + We will deploy an Apache ZooKeeper instance to our cluster. [source,bash] @@ -105,6 +119,7 @@ EOF ---- === Apache Kafka + We will deploy an Apache Kafka broker that depends on the ZooKeeper service we just deployed. The zookeeperReference property below points to the namespace and name we gave to the ZooKeeper service deployed previously. [source,bash] @@ -139,6 +154,7 @@ EOF ---- === Apache NiFi + We will next deploy an Apache NiFi server. [source,bash] @@ -203,9 +219,11 @@ zookeeper-operator-deployment-64fcccc797-pckhf 1/1 Running 0 Since this is the first time that each of these services has been deployed to these nodes, it will take some time to download the software from the Stackable repository and deploy the services. Once all the pods are in the running state your cluster is ready to use. == Testing your cluster + If all has gone well then you will have successfully deployed a Stackable cluster and used it to start three services that should now be ready for you. === Apache ZooKeeper + We can test ZooKeeper by running the ZooKeeper CLI shell. The easiest way to do this is to run the CLI shell on the pod that is running ZooKeeper. [source,bash] @@ -221,6 +239,7 @@ The shell should connect automatically to the ZooKeeper server running on the po ---- === Apache Kafka + To test Kafka we'll create a topic, and verify that it was created. First create the topic with the following command: @@ -239,6 +258,7 @@ kubectl exec -i -t simple-kafka-broker-brokers-0 -c kafka -- \ ---- === Apache NiFi + Apache NiFi provides a web interface and the easiest way to test it is to view this in a web browser. To access the web interface we first need to get the ip address and port Nifi is listening on. To get the IP address we need to connect to (in this case `172.18.0.2`), run: diff --git a/modules/ROOT/pages/index.adoc b/modules/ROOT/pages/index.adoc index cbd1a4b1d..fa931e719 100644 --- a/modules/ROOT/pages/index.adoc +++ b/modules/ROOT/pages/index.adoc @@ -1,8 +1,14 @@ = Stackable Documentation :page-layout: landing +:k8s-operators: https://kubernetes.io/docs/concepts/extend-kubernetes/operator/ +:docs-discussion: https://github.com/stackabletech/community/discussions +:docs-issues: https://github.com/stackabletech/documentation/issues +:docs-repo: https://github.com/stackabletech/documentation + Welcome to Stackable! -This documentation gives you an overview of the Stackable Data Platform, how to install and manage it as well as some tutorials. +This documentation gives you an overview of the Stackable Data Platform, how to install and manage it as well as some +tutorials. ++++
@@ -16,11 +22,14 @@ This documentation gives you an overview of the Stackable Data Platform, how to

Introduction

++++ -The Stackable Data Platform allows you to deploy, scale and manage Data infrastructure in any environment running https://kubernetes.io/[Kubernetes]. +The Stackable Data Platform allows you to deploy, scale and manage Data infrastructure in any environment running +https://kubernetes.io/[Kubernetes]. -You can find an overview of the supported components <>, as well as a full list of all supported product versions xref:operators:supported_versions.adoc[here]. +You can find an overview of the supported components <>, as well as a full list of all supported +product versions xref:operators:supported_versions.adoc[here]. -If you have any feedback regarding the documentation please either open an https://github.com/stackabletech/documentation/issues[issue], ask a https://github.com/stackabletech/documentation/discussions[question] or look at the source for this documentation in its https://github.com/stackabletech/documentation[repository]. +If you have any feedback regarding the documentation please either open an {docs-issues}[issue], ask a +{docs-discussion}[question] or look at the source for this documentation in its {docs-repo}[repository]. ++++
@@ -34,16 +43,20 @@ If you have any feedback regarding the documentation please either open an https

Goal of the project

++++ -We are building a distribution of existing Open Source tools that together comprise the components of a modern data platform. +We are building a distribution of existing Open Source tools that together comprise the components of a modern data +platform. -There are components to ingest data, to store data, to process and visualize and much more. -While the platform got started in the _Big Data_ ecosystem we are in no way limited to big data workloads. +There are components to ingest data, to store data, to process and visualize and much more. While the platform got +started in the _Big Data_ ecosystem we are in no way limited to big data workloads. -You can declaratively build these environments, and we don't stop at the tool level as we also provide ways for the users to interact with the platform in the "as Code"-approach. +You can declaratively build these environments, and we don't stop at the tool level as we also provide ways for the +users to interact with the platform in the "as Code"-approach. We are leveraging the https://www.openpolicyagent.org/[Open Policy Agent] to provide Security-as-Code. -We are building a distribution that includes the “best of breed” of existing Open Source tools, but bundles them in a way, so it is easy to deploy a fully working stack of software. Most of the existing tools are “single purpose” tools, which often do not play nicely together out-of-the-box. +We are building a distribution that includes the “best of breed” of existing Open Source tools, but bundles them in a +way, so it is easy to deploy a fully working stack of software. Most of the existing tools are “single purpose” tools, +which often do not play nicely together out-of-the-box. ++++ @@ -53,12 +66,16 @@ We are building a distribution that includes the “best of breed” of existing ++++ -== Components +== Release Notes -We are using Kubernetes as our deployment platform. -And we're building https://kubernetes.io/docs/concepts/extend-kubernetes/operator/[Operators] for each of the products we support. +The Stackable platform consists of multiple operators that work together. Periodically a platform release is made, +including all components of the platform at a specific version. See the latest release notes for 23.7 +xref:release_notes.adoc[here]. + +== Components -The Stackable Data Platform supports the following products: +We are using Kubernetes as our deployment platform. And we're building {k8s-operators}[Operators] for each of the +products we support. The Stackable Data Platform supports the following products: ++++
@@ -78,7 +95,7 @@ The Stackable Data Platform supports the following products: Airflow is a workflow engine and your replacement should you be using Apache Oozie. -xref:airflow::index.adoc[Read more] +xref:airflow:index.adoc[Read more] ++++ @@ -94,7 +111,7 @@ xref:airflow::index.adoc[Read more] Apache Druid is a real-time database to power modern analytics applications. -xref:druid::index.adoc[Read more] +xref:druid:index.adoc[Read more] ++++ @@ -110,7 +127,7 @@ xref:druid::index.adoc[Read more] HBase is a distributed, scalable, big data store. -xref:hbase::index.adoc[Read more] +xref:hbase:index.adoc[Read more] ++++ @@ -126,7 +143,7 @@ xref:hbase::index.adoc[Read more] HDFS is a distributed file system that provides high-throughput access to application data. -xref:hdfs::index.adoc[Read more] +xref:hdfs:index.adoc[Read more] ++++ @@ -140,9 +157,10 @@ xref:hdfs::index.adoc[Read more]

Apache Hive

++++ -The Apache Hive data warehouse software facilitates reading, writing, and managing large datasets residing in distributed storage using SQL. We support the Hive Metastore. +The Apache Hive data warehouse software facilitates reading, writing, and managing large datasets residing in +distributed storage using SQL. We support the Hive Metastore. -xref:hive::index.adoc[Read more] +xref:hive:index.adoc[Read more] ++++ @@ -156,9 +174,10 @@ xref:hive::index.adoc[Read more]

Apache Kafka

++++ -Apache Kafka is an open-source distributed event streaming platform used by thousands of companies for high-performance data pipelines, streaming analytics, data integration, and mission-critical applications. +Apache Kafka is an open-source distributed event streaming platform used by thousands of companies for high-performance +data pipelines, streaming analytics, data integration, and mission-critical applications. -xref:kafka::index.adoc[Read more] +xref:kafka:index.adoc[Read more] ++++ @@ -174,7 +193,7 @@ xref:kafka::index.adoc[Read more] An easy to use, powerful, and reliable system to process and distribute data. -xref:nifi::index.adoc[Read more] +xref:nifi:index.adoc[Read more] ++++ @@ -188,9 +207,10 @@ xref:nifi::index.adoc[Read more]

Apache Spark

++++ -Apache Spark is a multi-language engine for executing data engineering, data science, and machine learning on single-node machines or clusters. +Apache Spark is a multi-language engine for executing data engineering, data science, and machine learning on +single-node machines or clusters. -xref:spark-k8s::index.adoc[Read more] +xref:spark-k8s:index.adoc[Read more] ++++ @@ -206,7 +226,7 @@ xref:spark-k8s::index.adoc[Read more] Apache Superset is a modern data exploration and visualization platform. -xref:superset::index.adoc[Read more] +xref:superset:index.adoc[Read more] ++++ @@ -222,7 +242,7 @@ xref:superset::index.adoc[Read more] Fast distributed SQL query engine for big data analytics that helps you explore your data universe. -xref:trino::index.adoc[Read more] +xref:trino:index.adoc[Read more] ++++ @@ -236,9 +256,10 @@ xref:trino::index.adoc[Read more]

Apache ZooKeeper

++++ -ZooKeeper is a centralized service for maintaining configuration information, naming, providing distributed synchronization, and providing group services. +ZooKeeper is a centralized service for maintaining configuration information, naming, providing distributed +synchronization, and providing group services. -xref:zookeeper::index.adoc[Read more] +xref:zookeeper:index.adoc[Read more] ++++ diff --git a/modules/ROOT/pages/product-information.adoc b/modules/ROOT/pages/product-information.adoc index 220edef07..5fea81db8 100644 --- a/modules/ROOT/pages/product-information.adoc +++ b/modules/ROOT/pages/product-information.adoc @@ -1,15 +1,21 @@ = Product information -This page contains concrete specifications about the Stackable Data Platform (SDP) as a product, which components are included, how they are supplied and which external dependencies exist that you as a customer need to take care of. +This page contains concrete specifications about the Stackable Data Platform (SDP) as a product, which components are +included, how they are supplied and which external dependencies exist that you as a customer need to take care of. [#platform-components] == Platform components -The Stackable Platform is made up of multiple components. Operators for data products, Operators for additional functionality to facilitate easy integration between the different products, as well as the data products themselves. Auxiliary software to interact with the platform, as well as software to ease the deployment of platform components. +The Stackable Platform is made up of multiple components. Operators for data products, Operators for additional +functionality to facilitate easy integration between the different products, as well as the data products themselves. +Auxiliary software to interact with the platform, as well as software to ease the deployment of platform components. === Data products and Kubernetes Operators -The main components of the SDP are a set of open source data products. The SDP control plane consists of corresponding Kubernetes Operators. Each Operator is supplied in a Docker <> image. For every Operator there is also a Helm Chart to facilitate installation via Helm. Operators for Products supported by the Platform deploy these products as <> in Kubernetes. +The main components of the SDP are a set of open source data products. The SDP control plane consists of corresponding +Kubernetes Operators. Each Operator is supplied in a Docker <> image. For every Operator there is +also a Helm Chart to facilitate installation via Helm. Operators for Products supported by the Platform deploy these +products as <> in Kubernetes. Supported products: @@ -25,36 +31,43 @@ Supported products: * xref:trino:index.adoc[Trino] * xref:zookeeper:index.adoc[Apache Zookeeper] -The product also includes the xref:opa:index.adoc[Open Policy Agent], and operators that provide additional functionality for managing and control SDP: xref:commons-operator:index.adoc[Commons], xref:secret-operator:index.adoc[Secret] and xref:listener-operator:index.adoc[Listener] Operator. +The product also includes the xref:opa:index.adoc[Open Policy Agent], and operators that provide additional +functionality for managing and control SDP: xref:commons-operator:index.adoc[Commons], +xref:secret-operator:index.adoc[Secret] and xref:listener-operator:index.adoc[Listener] Operator. -The pages linked above also detail the use cases and features supported by each component. You can find additional information in the xref:release_notes.adoc[release notes]. Refer to the xref:operators:supported_versions.adoc[list of supported product versions] to find out which product versions are supported. +The pages linked above also detail the use cases and features supported by each component. You can find additional +information in the xref:release_notes.adoc[release notes]. Refer to the xref:operators:supported_versions.adoc[list of +supported product versions] to find out which product versions are supported. [#stackablectl] === stackablectl -xref:stackablectl::index.adoc[stackablectl] is a commandline utility that makes it easier to install and interact with stackable components. +xref:management:stackablectl:index.adoc[] is a command line utility that makes it easier to install and interact with +Stackable components. == Supported installation methods [#containers] === Operators and products -All operators are supplied in container images. The products are also deployed in container images. -The docker images are available for download here: https://repo.stackable.tech/#browse/browse:docker +All operators are supplied in container images. The products are also deployed in container images. The docker images +are available for download here: https://repo.stackable.tech/#browse/browse:docker[] -Stackable supports installing the Operators via https://helm.sh/[Helm] or with <>. -Every Operator includes installation instructions in the Getting started guide. +Stackable supports installing the Operators via https://helm.sh/[Helm] or with <>. Every Operator includes +installation instructions in the Getting started guide. ==== Helm Charts -The Helm Charts can be found here: https://repo.stackable.tech/#browse/browse:helm-stable Using the Helm Charts requires Helm version 3 or above. +The Helm Charts can be found here: https://repo.stackable.tech/#browse/browse:helm-stable[] Using the Helm Charts +requires Helm version 3 or above. [#stackablectl-installation] -=== stackablectl +==== stackablectl -stackablectl is available for download, pre-built binaries are available on GitHub. The download link and installation steps are provided in the xref:stackablectl::installation.adoc[installation documentation]. +`stackablectl` is available for download, pre-built binaries are available on GitHub. The download link and installation +steps are provided in the xref:management:stackablectl:installation.adoc[installation]. == System requirements @@ -67,15 +80,23 @@ Every Operator needs: === stackablectl -Hardware requirements and supported operating systems can be found in the xref:stackablectl::installation.adoc#system-requirements[system requirements section] of the stackablectl documentation. +Hardware requirements and supported operating systems can be found in the +xref:management:stackablectl:installation.adoc#system-requirements[system requirements section] of the `stackablectl` +documentation. === Stackable Data Platform Open Source Products -The system requirements of each open source product depend on your specific use case. In the xref:stackablectl::demos/index.adoc[demos] you can find example use cases; every demo also has a _System requirements_ section which can provide a guideline for system sizing. Further example sizings can be found at https://ci.stackable.tech/job/-operator-it-summary/ (where `` is i.e. _druid_, _hbase_, etc.) in the `cluster-info.txt` file. These are the cluster sizes used for integration tests, and can be regarded as working cluster configurations. +The system requirements of each open source product depend on your specific use case. In the xref:demos:index.adoc[demos] +you can find example use cases; every demo also has a _System requirements_ section which can provide a guideline for +system sizing. Further example sizings can be found at https://ci.stackable.tech/job/-operator-it-summary/ +(where `` is i.e. _druid_, _hbase_, etc.) in the `cluster-info.txt` file. These are the cluster sizes used for +integration tests, and can be regarded as working cluster configurations. == Prerequisites and required external components -Required external components are components that are required for the platform or parts of it to operate, but are not part of the Stackable Data Platform. Note that Stackable does neither distribute nor offer support for the external components. +Required external components are components that are required for the platform or parts of it to operate, but are not +part of the Stackable Data Platform. Note that Stackable does neither distribute nor offer support for the external +components. === Kubernetes @@ -100,8 +121,17 @@ The following products have required external components to run: === Optional components -Stackable software can be used with xref:stackablectl::demos/index.adoc[sample configurations] and third-party components, as outlined in the relevant documentation. These external components are not part of of the stackable products and can be used by the customer at their own risk. Stackable does not distribute these components and does not offer support for them. (See <> above for a list of the supported components that are part of the platform) +Stackable software can be used with xref:demos:index.adoc[sample configurations] and third-party components, as +outlined in the relevant documentation. These external components are not part of of the stackable products and can be +used by the customer at their own risk. Stackable does not distribute these components and does not offer support for +them. (See <> above for a list of the supported components that are part of the +platform) === Technology preview components -Some functionality of the platform might be labelled as _technology preview_. Stackable does not offer support for technology preview components, and therefore discourages their use in a production setting. Using these features/components is done so at your own risk. Customers are encouraged to provide feedback and suggestions for improvements on preview components. Neither Stackable nor third parties or licensees are obligated to distribute technology preview components or include them into the product. Technology preview components might be discontinued at any time. +Some functionality of the platform might be labelled as _technology preview_. Stackable does not offer support for +technology preview components, and therefore discourages their use in a production setting. Using these +features/components is done so at your own risk. Customers are encouraged to provide feedback and suggestions for +improvements on preview components. Neither Stackable nor third parties or licensees are obligated to distribute +technology preview components or include them into the product. Technology preview components might be discontinued at +any time. diff --git a/modules/ROOT/pages/quickstart.adoc b/modules/ROOT/pages/quickstart.adoc index 366180e13..7bd52654e 100644 --- a/modules/ROOT/pages/quickstart.adoc +++ b/modules/ROOT/pages/quickstart.adoc @@ -1,52 +1,52 @@ = Quickstart -This is the super-short getting started guide that should enable you to get something up and running in less than three minutes (excluding download times). +:latest-release: https://github.com/stackabletech/stackable-cockpit/releases/tag/stackablectl-1.0.0-rc1 +:cockpit-releases: https://github.com/stackabletech/stackable-cockpit/releases + +This is the super-short getting started guide that should enable you to get something up and running in less than three +minutes (excluding download times). == Setup Install `stackablectl`, the Stackable CLI utility. -On Linux (Windows and MacOS instructions below): +=== Installation on Linux -[source,console] ----- -$ curl -L -o stackablectl https://github.com/stackabletech/stackablectl/releases/latest/download/stackablectl-x86_64-unknown-linux-gnu ----- - -and mark it as executable: +Download the `stackablectl-x86_64-unknown-linux-gnu` binary file from the link:{latest-release}[latest release], then +rename the file to `stackablectl`. You can also use the following command: [source,console] ---- -$ chmod +x stackablectl +wget -O stackablectl https://github.com/stackabletech/stackable-cockpit/releases/download/stackablectl-1.0.0-rc1/stackablectl-x86_64-unknown-linux-gnu +# or +curl -L -o stackablectl https://github.com/stackabletech/stackable-cockpit/releases/download/stackablectl-1.0.0-rc1/stackablectl-x86_64-unknown-linux-gnu ---- -.Instruction for Windows and MacOS -[%collapsible] -==== -Download `stackablectl-x86_64-pc-windows-gnu.exe` (for Windows) or `stackablectl-x86_64-apple-darwin` (Mac OS Intel) or `stackablectl-aarch64-apple-darwin` (MacOS ARM) from the https://github.com/stackabletech/stackablectl/releases/latest[latest release] and save it as `stackablectl`. - -For windows, you can simply execute it. For MacOS mark it as executable: +Mark the binary as executable: [source,console] ---- -$ chmod +x stackablectl +chmod +x stackablectl ---- -If you want to execute it from anywhere in your system, you need to add it to the system `PATH`. +Then, make sure it is present in your `$PATH`, like `/usr/local/bin`. + +=== Installation on macOS and Windows -NOTE: If macOS denies the execution of stackablectl go to `Settings` -> `Security & Privacy` -> `General`. Here you will see a pop up asking if you want to allow access for `stackablectl`. You must allow access. -==== +See the xref:management:stackablectl:installation.adoc[guide] for detailed information about the installation process on macOS and Windows. == Install the Taxi data demo -The xref:stackablectl::demos/trino-taxi-data.adoc[`trino-taxi-data`] Demo installs the latest Stackable platform release and a visualization of https://www.nyc.gov/site/tlc/about/tlc-trip-record-data.page[New York City Taxi Data] using Trino and Superset: +The xref:demos:trino-taxi-data.adoc[`trino-taxi-data`] Demo installs the latest Stackable platform release and a +visualization of https://www.nyc.gov/site/tlc/about/tlc-trip-record-data.page[New York City Taxi Data] using Trino and +Superset: [source,console] ---- -./stackablectl demo install trino-taxi-data +stackablectl demo install trino-taxi-data ---- -TIP: Learn more about this demo in the demo docs: xref:stackablectl::demos/trino-taxi-data.adoc[] +TIP: Learn more about this demo in the demo docs: xref:demos:trino-taxi-data.adoc[] == Connect @@ -54,28 +54,30 @@ To list the installed installed Stackable services run the following command: [source,console] ---- -./stackablectl services list --all-namespaces +stackablectl stacklet list ---- It will output something like: [source] ---- - PRODUCT NAME NAMESPACE ENDPOINTS EXTRA INFOS - - hive hive default hive 172.18.0.5:30298 - metrics 172.18.0.5:31633 - - opa opa default http http://172.18.0.5:30316 - - superset superset default external-superset http://172.18.0.4:32295 Admin user: admin, password: adminadmin - - trino trino default coordinator-http http://172.18.0.3:30167 - coordinator-metrics 172.18.0.3:31818 - coordinator-https https://172.18.0.3:30141 - - minio minio-trino default http http://172.18.0.3:31062 Third party service - console-http http://172.18.0.3:30503 Admin user: admin, password: adminadmin +┌──────────┬───────────────┬───────────┬──────────────────────────────────────────────┬─────────────────────────────────┐ +│ PRODUCT ┆ NAME ┆ NAMESPACE ┆ ENDPOINTS ┆ CONDITIONS │ +╞══════════╪═══════════════╪═══════════╪══════════════════════════════════════════════╪═════════════════════════════════╡ +│ hive ┆ hive ┆ default ┆ ┆ Available, Reconciling, Running │ +├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ opa ┆ opa ┆ default ┆ ┆ Available, Reconciling, Running │ +├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ superset ┆ superset ┆ default ┆ external-superset http://172.18.0.3:31974 ┆ Available, Reconciling, Running │ +├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ trino ┆ trino ┆ default ┆ coordinator-metrics 172.18.0.3:30788 ┆ Available, Reconciling, Running │ +│ ┆ ┆ ┆ coordinator-https https://172.18.0.3:31010 ┆ │ +├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤ +│ minio ┆ minio-console ┆ default ┆ http http://172.18.0.3:32650 ┆ │ +└──────────┴───────────────┴───────────┴──────────────────────────────────────────────┴─────────────────────────────────┘ + +Use "stackablectl stacklet credentials [OPTIONS] " to display credentials for deployed stacklets. ---- -To connect to Superset, open the listed endpoint in your browser and log in with the username `admin` and the password `adminadmin`. +To connect to Superset, open the listed endpoint in your browser and log in with the username `admin` and the password +`adminadmin`. Use the `stackablectl stacklet credentials` command to retrieve credentials for various stacklets. diff --git a/modules/ROOT/pages/release_notes.adoc b/modules/ROOT/pages/release_notes.adoc index d259d6746..a1d7d9ea1 100644 --- a/modules/ROOT/pages/release_notes.adoc +++ b/modules/ROOT/pages/release_notes.adoc @@ -1,11 +1,12 @@ = Release notes for the Stackable Data Platform -The Stackable platform consists of multiple operators that work together. -Periodically a platform release is made, including all components of the platform at a specific version. +The Stackable platform consists of multiple operators that work together. Periodically a platform release is made, +including all components of the platform at a specific version. == Release 23.7 -This release introduces the specification of resource quotas and pod overrides and updates the product versions supported by SDP. +This release introduces the specification of resource quotas and pod overrides and updates the product versions +supported by SDP. === New / extended platform features @@ -13,11 +14,17 @@ The following new major platform features were added: Resource Quotas:: -Explicit resources are now applied to all containers, for both operators and products. This allows running the Stackable data platform on Kubernetes clusters with a ResourceQuota or LimitRange set. Where these are not specified directly, defaults will be used. See https://github.com/stackabletech/issues/issues/368[this issue] for more information. +Explicit resources are now applied to all containers, for both operators and products. This allows running the Stackable +Data Platform on Kubernetes clusters with a ResourceQuota or LimitRange set. Where these are not specified directly, +defaults will be used. See https://github.com/stackabletech/issues/issues/368[this issue] for more information. Pod Overrides:: -It is now possible to add custom settings which specify elements of a pod template (Service, StatefulSet etc.) on roles or rolegroups, which the operator then merges with the objects it writes before actually applying them. This provides the user with a possibility for specifying any property that can be set on a regular Kubernetes Pod, but which is not directly exposed via the Stackable custom resource definition. Have a look at xref:concepts:overrides.adoc[the documentation] for more details. +It is now possible to add custom settings which specify elements of a pod template (Service, StatefulSet etc.) on roles +or rolegroups, which the operator then merges with the objects it writes before actually applying them. This provides +the user with a possibility for specifying any property that can be set on a regular Kubernetes Pod, but which is not +directly exposed via the Stackable custom resource definition. Have a look at xref:concepts:overrides.adoc[the +documentation] for more details. For example, with HDFS: @@ -38,11 +45,14 @@ For example, with HDFS: Openshift certification:: -OLM bundles - a pre-requisite for the Openshift certification process - have been created for each operator. All 15 SDP operators in release 23.4.1 are now Openshift-certified and deployable directly from within an Openshift cluster. +OLM bundles - a pre-requisite for the Openshift certification process - have been created for each operator. All 15 SDP +operators in release 23.4.1 are now Openshift-certified and deployable directly from within an Openshift cluster. Signed SDP operator images:: -As of this release all Stackable operator images are signed (this feature will be added to product images in a subsequent release). More information about this, including how to verify the image signatures, can be found in this xref:tutorials:enabling_verification_of_image_signatures.adoc[tutorial]. +As of this release all Stackable operator images are signed (this feature will be added to product images in a +subsequent release). More information about this, including how to verify the image signatures, can be found in this +xref:tutorials:enabling_verification_of_image_signatures.adoc[tutorial]. New Versions:: @@ -84,8 +94,10 @@ Product features:: Additionally, there are some individual product features that are noteworthy: * https://github.com/stackabletech/hdfs-operator/issues/334[HDFS: support for enabling secure mode with Kerberos] -* https://github.com/stackabletech/spark-k8s-operator/issues/247[Spark-k8s: support for using custom certificates when accessing S3 with TLS] -* https://github.com/stackabletech/trino-operator/issues/436[Trino: support for arbitrary connectors using the generic connector for e.g. access to PostgreSQL] +* https://github.com/stackabletech/spark-k8s-operator/issues/247[Spark-k8s: support for using custom certificates when + accessing S3 with TLS] +* https://github.com/stackabletech/trino-operator/issues/436[Trino: support for arbitrary connectors using the generic + connector for e.g. access to PostgreSQL] * https://github.com/stackabletech/zookeeper-operator/issues/334[ZooKeeper: expose ZOOKEEPER_CLIENT_PORT in discovery CM] === stackablectl @@ -113,11 +125,13 @@ This release supports the following OpenShift versions: === Breaking changes -The re-structuring of configuration definitions in certain operators will require you to adapt your existing CRDs as shown below. +The re-structuring of configuration definitions in certain operators will require you to adapt your existing CRDs as +shown below. ==== Stackable Operator for Apache Airflow -* https://github.com/stackabletech/airflow-operator/issues/271[Consolidated remaining top level configuration to clusterConfig] +* https://github.com/stackabletech/airflow-operator/issues/271[Consolidated remaining top level configuration to + clusterConfig] CRDs should be changed from e.g. @@ -146,7 +160,8 @@ spec: ==== Stackable Operator for Apache Superset -* https://github.com/stackabletech/superset-operator/issues/379[Moved all top level config options to clusterConfig. Authentication is now provided via an array of AuthenticationClasses and additional properties] +* https://github.com/stackabletech/superset-operator/issues/379[Moved all top level config options to clusterConfig. + Authentication is now provided via an array of AuthenticationClasses and additional properties] CRDs should be changed from e.g. @@ -173,7 +188,9 @@ spec: ==== Stackable Operator for Trino -* https://github.com/stackabletech/trino-operator/issues/434[Reworked authentication mechanism: The clusterConfig.authentication now requires a list of AuthenticationClass references instead of the MultiUser and LDAP separation] +* https://github.com/stackabletech/trino-operator/issues/434[Reworked authentication mechanism: The + `clusterConfig.authentication` now requires a list of AuthenticationClass references instead of the MultiUser and + LDAP separation] CRDs should be changed from e.g. @@ -372,16 +389,22 @@ helm install --wait zookeeper-operator stackable-stable/zookeeper-operator --ver ==== Known upgrade issues -In the case of the breaking changes detailed above it will be necessary to update the custom resources for Airflow, Superset and Trino clusters and re-apply them. +In the case of the breaking changes detailed above it will be necessary to update the custom resources for Airflow, +Superset and Trino clusters and re-apply them. Additionally, please note the following: ===== All operators -* If the default PVC size has been changed, then the StatefulSet must be deleted: it is not possible to change the PVC in the StatefulSet specification. -** The error message is similar to: `StatefulSet.apps "trino-worker-default" is invalid: spec: Forbidden: updates to statefulset spec for fields other than 'replicas', 'template', 'updateStrategy', [...]` + +* If the default PVC size has been changed, then the StatefulSet must be deleted: it is not possible to change the PVC + in the StatefulSet specification. +** The error message is similar to: `StatefulSet.apps "trino-worker-default" is invalid: spec: Forbidden: updates to + `StatefulSet` spec for fields other than 'replicas', 'template', 'updateStrategy', [...]` ===== ZooKeeper operator -* The ZooKeeper operator in this release expects a product image with the same version. An existing ZooKeeper cluster resource should be deleted and re-applied with the corresponding `stackableVersion` e.g. + +* The ZooKeeper operator in this release expects a product image with the same version. An existing ZooKeeper cluster + resource should be deleted and re-applied with the corresponding `stackableVersion` e.g.: [source,yaml] ---- @@ -391,45 +414,78 @@ spec: stackableVersion: "23.7" ---- - == Release 23.4 -The focus in this platform release is on the support of default/custom affinities and the status field, as well as the rollout of log aggregation across the remaining operators. Additionally, all operators have been updated and tested for compatibility with OpenShift clusters (versions 4.10 and 4.11). Several operators from the 23.1 platform release were already certified against OpenShift. +The focus in this platform release is on the support of default/custom affinities and the status field, as well as the +rollout of log aggregation across the remaining operators. Additionally, all operators have been updated and tested for +compatibility with OpenShift clusters (versions 4.10 and 4.11). Several operators from the 23.1 platform release were +already certified against OpenShift. === Release 23.4.0 -This was the first release in the 23.4 release line. -It is recommended to install <> instead, as it contains relevant bugfixes. + +This was the first release in the 23.4 release line. It is recommended to install <> instead, as it +contains relevant bugfixes. === Release 23.4.1 + This is a bugfix/patch-level release that fixes the following issues: -* Fix missing custom resource defaults that are required for a release update. See https://github.com/stackabletech/issues/issues/388[here]. -* Specify the security context to run as a member of the root group (this has been implemented for the Stackable operators where it had not previously been implemented i.e. Apache HBase, Apache HDFS, Apache ZooKeeper and Apache Spark on Kubernetes). This is required by Openshift clusters so that the product can be run with a random UID. This is a https://airflow.apache.org/docs/docker-stack/entrypoint.html#allowing-arbitrary-user-to-run-the-container[requirement] for at least Airflow, but is Openshift policy as described https://docs.openshift.com/container-platform/4.11/openshift_images/create-images.html#images-create-guide-openshift_create-images[here] and https://developers.redhat.com/blog/2020/10/26/adapting-docker-and-kubernetes-containers-to-run-on-red-hat-openshift-container-platform[here]. -* Automatically migrate the name used for the bundle-builder container for OPA daemonsets. See https://github.com/stackabletech/opa-operator/issues/444[here]. -* Automatically shorten the registration socket path used in listener-operator for Microk8s compatibility, migrated during upgrade. See https://github.com/stackabletech/listener-operator/issues/76[here]. +* Fix missing custom resource defaults that are required for a release update. See + https://github.com/stackabletech/issues/issues/388[here]. +* Specify the security context to run as a member of the root group (this has been implemented for the Stackable + operators where it had not previously been implemented i.e. Apache HBase, Apache HDFS, Apache ZooKeeper and Apache + Spark on Kubernetes). This is required by Openshift clusters so that the product can be run with a random UID. This is + a https://airflow.apache.org/docs/docker-stack/entrypoint.html#allowing-arbitrary-user-to-run-the-container[requirement] + for at least Airflow, but is Openshift policy as described https://docs.openshift.com/container-platform/4.11/openshift_images/create-images.html#images-create-guide-openshift_create-images[here] + and https://developers.redhat.com/blog/2020/10/26/adapting-docker-and-kubernetes-containers-to-run-on-red-hat-openshift-container-platform[here]. +* Automatically migrate the name used for the bundle-builder container for OPA daemonsets. See + https://github.com/stackabletech/opa-operator/issues/444[here]. +* Automatically shorten the registration socket path used in listener-operator for Microk8s compatibility, migrated + during upgrade. See https://github.com/stackabletech/listener-operator/issues/76[here]. === New / extended platform features The following new major platform features were added: Cluster Operation:: -The first part of xref:concepts:cluster_operations.adoc[Cluster operations] was rolled out in every applicable Stackable Operator. This supports pausing the cluster reconciliation and stopping the cluster completely. Pausing reconciliation will not apply any changes to the Kubernetes resources (e.g. when changing the custom resource). Stopping the cluster will set all replicas of StatefulSets, Deployments or DaemonSets to zero and therefore deleting all Pods belonging to that cluster (not the PVCs). +The first part of xref:concepts:operations/cluster_operations.adoc[Cluster operations] was rolled out in every applicable Stackable +Operator. This supports pausing the cluster reconciliation and stopping the cluster completely. Pausing reconciliation +will not apply any changes to the Kubernetes resources (e.g. when changing the custom resource). Stopping the cluster +will set all replicas of StatefulSets, Deployments or DaemonSets to zero and therefore result in the deletion of all Pods +belonging to that cluster (not the PVCs) Status Field:: -Operators of the Stackable Data Platform create, manage and delete Kubernetes resources: in order to easily query the health state of the products - and react accordingly - Stackable Operators use several predefined condition types to capture different aspects of a product's availability. See this xref:contributor:adr/ADR027-status[ADR] for more information. +Operators of the Stackable Data Platform create, manage and delete Kubernetes resources: in order to easily query the +health state of the products - and react accordingly - Stackable Operators use several predefined condition types to +capture different aspects of a product's availability. See this xref:contributor:adr/ADR027-status[ADR] for more +information. Default / Custom Affinities:: -In Kubernetes there are different ways to influence how Pods are assigned to Nodes. In some cases it makes sense to co-locate certain services that communicate a lot with each other, such as HBase regionservers with HDFS datanodes. In other cases it makes sense to distribute the Pods among as many Nodes as possible. There may also be additional requirements e.g. placing important services - such as HDFS namenodes - in different racks, datacenter rooms or even datacenters. This release implements default affinities that should suffice for many scenarios out-of-the box, while also allowing for custom affinity rules at a role and/or role-group level. See this xref:contributor:adr/ADR026-affinities.adoc[ADR] for more information. +In Kubernetes there are different ways to influence how Pods are assigned to Nodes. In some cases it makes sense to +co-locate certain services that communicate a lot with each other, such as HBase regionservers with HDFS datanodes. In +other cases it makes sense to distribute the Pods among as many Nodes as possible. There may also be additional +requirements e.g. placing important services - such as HDFS namenodes - in different racks, datacenter rooms or even +datacenters. This release implements default affinities that should suffice for many scenarios out-of-the box, while +also allowing for custom affinity rules at a role and/or role-group level. See this +xref:contributor:adr/ADR026-affinities.adoc[ADR] for more information. Log Aggregation:: -The logging framework (added to the platform in Release 23.1) offers a consistent custom resource configuration and a separate, persisted sink (defaulting to OpenSearch). This has now been rolled out across all products. See this xref:contributor:adr/adr025-logging_architecture[ADR] and this xref:concepts:logging.adoc[concepts page] for more information. +The logging framework (added to the platform in Release 23.1) offers a consistent custom resource configuration and a +separate, persisted sink (defaulting to OpenSearch). This has now been rolled out across all products. See this +xref:contributor:adr/adr025-logging_architecture[ADR] and this xref:concepts:logging.adoc[concepts page] for more +information. Service Type:: -The Service type can now be specified in all products. This currently differentiates between the internal ClusterIP and the external NodePort and is forward compatible with the xref:listener-operator:listenerclass.adoc[ListenerClass] for the automatic exposure of Services via the Listener Operator. This change is not backwards compatible with older platform releases. For security reasons, the default is set to the cluster-internal (ClusterIP) ListenerClass. A cluster can be exposed outside of Kubernetes by setting clusterConfig.listenerClass to external-unstable (NodePort) or external-stable (LoadBalancer). +The Service type can now be specified in all products. This currently differentiates between the internal ClusterIP and +the external NodePort and is forward compatible with the xref:listener-operator:listenerclass.adoc[ListenerClass] for +the automatic exposure of Services via the Listener Operator. This change is not backwards compatible with older +platform releases. For security reasons, the default is set to the cluster-internal (ClusterIP) ListenerClass. A cluster +can be exposed outside of Kubernetes by setting clusterConfig.listenerClass to external-unstable (NodePort) or +external-stable (LoadBalancer). New Versions:: @@ -456,14 +512,19 @@ The following have been added to `stackablectl`: ==== Trino-iceberg demo -This is a condensed form of the xref:stackablectl::demos/data-lakehouse-iceberg-trino-spark.adoc[] demo focusing on using the lakehouse to store and modify data. It demonstrates how to integrate Trino and Iceberg and should run on a local workstation. +This is a condensed form of the xref:demos:data-lakehouse-iceberg-trino-spark.adoc[] demo focusing on using the +lakehouse to store and modify data. It demonstrates how to integrate Trino and Iceberg and should run on a local +workstation. ==== Jupyterhub/Spark demo -This demo showcases the integration between Jupyter and Apache Hadoop deployed on the Stackable Data Platform (SDP) Kubernetes cluster. This demo can be installed on most cloud managed Kubernetes clusters as well as on premise or on a reasonably provisioned laptop. +This demo showcases the integration between Jupyter and Apache Hadoop deployed on the Stackable Data Platform (SDP) +Kubernetes cluster. This demo can be installed on most cloud managed Kubernetes clusters as well as on premise or on a +reasonably provisioned laptop. -The xref:stackablectl::quickstart.adoc[quickstart guide] shows how to get started with `stackablectl`. This link lists the xref:stackablectl::demos/index.adoc[available demos]. +The xref:management:stackablectl:quickstart.adoc[quickstart guide] shows how to get started with `stackablectl`. This +link lists the xref:demos:index.adoc[available demos]. === Supported Kubernetes versions @@ -488,9 +549,9 @@ You will need to adapt your existing CRDs due to the following breaking changes ==== All Stackable Operators -As mentioned above, specifying the service type is a breaking change for all operators. -The default value is set to the `cluster-internal` `ListenerClass`: if the cluster requires external access outside of Kubernetes then set `clusterConfig.listenerClass` - to `external-unstable` or `external-stable`: +As mentioned above, specifying the service type is a breaking change for all operators. The default value is set to the +`cluster-internal` `ListenerClass`: if the cluster requires external access outside of Kubernetes then set +`clusterConfig.listenerClass` to `external-unstable` or `external-stable`: ``` spec: @@ -504,9 +565,11 @@ This is an example for Trino, but the pattern is the same across all operators. ==== Stackable Operator for Apache Airflow -Existing Airflow clusters need to be deleted and recreated. Airflow metadata held in the database and DAGs saved on disk are not affected. +Existing Airflow clusters need to be deleted and recreated. Airflow metadata held in the database and DAGs saved on disk +are not affected. -This is required because the UID of the Airflow user has https://github.com/stackabletech/airflow-operator/pull/219[changed] to be in line with the rest of the platform. +This is required because the UID of the Airflow user has +https://github.com/stackabletech/airflow-operator/pull/219[changed] to be in line with the rest of the platform. ==== Stackable Operator for Apache HBase @@ -850,17 +913,21 @@ The following have been added to `stackablectl`: ==== Logging demo -This illustrates how to set up logging for Zookeeper and browse the results in an Open Search dashboard. This has been implemented for HBase, Hadoop and Zookeeper and will eventually be available for all Stackable operators. +This illustrates how to set up logging for Zookeeper and browse the results in an Open Search dashboard. This has been +implemented for HBase, Hadoop and Zookeeper and will eventually be available for all Stackable operators. ==== LDAP stack and tutorial -LDAP support has now been added to multiple products. An explanation of the overall approach is given xref:concepts:authentication.adoc[here] but in order to make the configuration steps a little clearer a xref:tutorials:authentication_with_openldap.adoc[tutorial] has been added that uses a dedicated Stackable xref:stackablectl::commands/stack.adoc[stack] for OpenLDAP and shows its usage. - - -The xref:stackablectl::quickstart.adoc[quickstart guide] shows how to get started with `stackablectl`. This link lists the xref:stackablectl::demos/index.adoc[available demos]. +LDAP support has now been added to multiple products. An explanation of the overall approach is given +xref:concepts:authentication.adoc[here] but in order to make the configuration steps a little clearer a +xref:tutorials:authentication_with_openldap.adoc[tutorial] has been added that uses a dedicated Stackable +xref:management:stackablectl:commands/stack.adoc[stack] for OpenLDAP and shows its usage. +The xref:management:stackablectl:quickstart.adoc[quickstart guide] shows how to get started with `stackablectl`. This +link lists the xref:demos:index.adoc[available demos]. === Supported Kubernetes versions + This release supports the following Kubernetes versions: * `1.25` @@ -869,11 +936,14 @@ This release supports the following Kubernetes versions: * `1.22` === Breaking changes -This release brings with it several breaking changes needed to future-proof the platform. You will need to adapt your existing CRDs due to the following breaking changes: + +This release brings with it several breaking changes needed to future-proof the platform. You will need to adapt your +existing CRDs due to the following breaking changes: ==== All Stackable Operators -As mentioned above, product image selection is a breaking for all operators. Previously the product image was declared using `spec.version`: +As mentioned above, product image selection is a breaking for all operators. Previously the product image was declared +using `spec.version`: ``` spec: @@ -914,7 +984,8 @@ This means a stackable version >= 23.1 has to be used for the product image. * https://github.com/stackabletech/druid-operator/pull/333[Reworked top level configuration to support TLS changes] -Deep storage, Ingestion spec, discovery config maps, authentication etc. are now subfields of spec.clusterConfig instead of being top level under spec. Change the resource from e.g. +Deep storage, Ingestion spec, discovery config maps, authentication etc. are now subfields of spec.clusterConfig instead +of being top level under spec. Change the resource from e.g.: ``` zookeeperConfigMapName: simple-druid-znode @@ -945,11 +1016,12 @@ to ``` ==== Stackable Operator for Apache Hadoop -* https://github.com/stackabletech/hdfs-operator/issues/290[Enable Log Aggregation for HDFS] -As part of the change mentioned above we also did some code cleanup that allowed us to remove arbitrary hard-coded values from the operator. +* https://github.com/stackabletech/hdfs-operator/issues/290[Enable Log Aggregation for HDFS] -This change affects the directory structure the operator creates inside of the PersistentVolumes used for permanent storage. +As part of the change mentioned above we also did some code cleanup that allowed us to remove arbitrary hard-coded +values from the operator. This change affects the directory structure the operator creates inside of the +`PersistentVolumes` used for permanent storage. The old folder naming was: @@ -964,11 +1036,14 @@ which has now been adopted to match the actual rolename: - NameNode -> `namenode` -Unfortunately, this means that for cluster that where initially rolled out with an older operator version, a one-time migration step becomes necessary to rename these directories. +Unfortunately, this means that for cluster that where initially rolled out with an older operator version, a one-time +migration step becomes necessary to rename these directories. -You can either do this manually by attaching the PVs to a pod and performing the rename (cluster needs to be stopped for this) or use the script provided below. +You can either do this manually by attaching the PVs to a pod and performing the rename (cluster needs to be stopped for +this) or use the script provided below. -WARNING: Please be aware that if this script runs after the cluster was already restarted with the newer operator version it will delete any data that was written to the empty post-upgrade HDFS that was stood up by the new operator. +WARNING: Please be aware that if this script runs after the cluster was already restarted with the newer operator +version it will delete any data that was written to the empty post-upgrade HDFS that was stood up by the new operator. [source,bash] ---- @@ -983,6 +1058,7 @@ The migration process for this now becomes: * Install newer version of Stackable Operator for Apache Hadoop ==== Stackable Operator for Apache Hive + * https://github.com/stackabletech/hive-operator/pull/292[Moved database specification from role/role-group level to top-level clusterConfig] * https://github.com/stackabletech/hive-operator/pull/292[Moved s3, serviceType and hdfs discovery to top-level clusterConfig] @@ -1018,6 +1094,7 @@ will now be defined like this: ``` ==== Stackable Operator for Apache Kafka + * https://github.com/stackabletech/kafka-operator/pull/527[Remove the tools image and add kcat to the product image] This means a stackable version >= 23.1 has to be used for the product image. @@ -1052,16 +1129,19 @@ spec: ``` ==== Stackable Operator for Apache Nifi + * https://github.com/stackabletech/nifi-operator/pull/397[Removed tools image] This means a stackable version >= 23.1 has to be used for the product image. ==== Stackable Operator for Trino + * https://github.com/stackabletech/trino-operator/pull/357[Removed tools image] This means a stackable version >= 23.1 has to be used for the product image. -* https://github.com/stackabletech/trino-operator/pull/362[Use user and password Secret keys for LDAP bind credentials Secrets, instead of env var names] +* https://github.com/stackabletech/trino-operator/pull/362[Use user and password Secret keys for LDAP bind credentials + Secrets, instead of env var names] This changes the secret definition from: ``` @@ -1492,8 +1572,9 @@ The following operators added support for LDAP authentication: === stackablectl -`stackablectl` now supports deploying ready-to-use demos, which give an end-to-end demonstration of the usage of the Stackable Data Platform. -The xref:stackablectl::quickstart.adoc[quickstart guide] shows how to get started with `stackablectl`. Here you can see the xref:stackablectl::demos/index.adoc[available demos]. +`stackablectl` now supports deploying ready-to-use demos, which give an end-to-end demonstration of the usage of the +Stackable Data Platform. The xref:management:stackablectl:quickstart.adoc[quickstart guide] shows how to get started +with `stackablectl`. Here you can see the xref:demos:index.adoc[available demos]. === Supported Kubernetes versions This release supports the following Kubernetes versions: @@ -1772,9 +1853,8 @@ All our operators reuse the same CRD structure as well. [#stackablectl] === stackablectl -`stackablectl` is used to install and interact with the operators, either individually or with multiple at once. -// -xref:stackablectl::index.adoc[Learn more] +xref:management:stackablectl:index.adoc[stackablectl] is used to install and interact with the operators, either +individually or with multiple at once. [#operators] === Operators @@ -1782,39 +1862,39 @@ xref:stackablectl::index.adoc[Learn more] This is the list of all operators in this current release, with their versions for this release. .*Products* -* xref:airflow::index.adoc[] (0.4.0) +* xref:airflow:index.adoc[] (0.4.0) ** Load DAGs from ConfigMaps or PersistentVolumeClaims -* xref:druid::index.adoc[] (0.6.0) +* xref:druid:index.adoc[] (0.6.0) ** S3 and HDFS as deep storage options ** ingestion from S3 buckets ** authorization using OPA -* xref:hbase::index.adoc[] (0.3.0) -* xref:hdfs::index.adoc[] (0.4.0) -* xref:hive::index.adoc[] (0.6.0) +* xref:hbase:index.adoc[] (0.3.0) +* xref:hdfs:index.adoc[] (0.4.0) +* xref:hive:index.adoc[] (0.6.0) ** Hive Metastore can index S3 -* xref:kafka::index.adoc[] (0.6.0) +* xref:kafka:index.adoc[] (0.6.0) ** Seamless integration with NiFi and Druid ** Supports OPA authorization -* xref:nifi::index.adoc[] (0.6.0) -* xref:spark-k8s::index.adoc[] (0.3.0) -* xref:superset::index.adoc[] (0.5.0) +* xref:nifi:index.adoc[] (0.6.0) +* xref:spark-k8s:index.adoc[] (0.3.0) +* xref:superset:index.adoc[] (0.5.0) ** connects to Druid as a backend ** Supports LDAP authentication -* xref:trino::index.adoc[] (0.4.0) +* xref:trino:index.adoc[] (0.4.0) ** Supports OPA and file-based authorization ** Connects to the Hive Metastore ** Query data from S3 ** TLS support -* xref:zookeeper::index.adoc[] (0.10.0) +* xref:zookeeper:index.adoc[] (0.10.0) ** Supports creating ZNodes with CRDs Read up on the xref:operators:supported_versions.adoc[supported versions] for each of these products. .*Supporting operators* -* xref:opa::index.adoc[] (0.9.0) +* xref:opa:index.adoc[] (0.9.0) ** Create RegoRules in ConfigMaps -* xref:commons-operator::index.adoc[] (0.2.0) -* xref:secret-operator::index.adoc[] (0.5.0) +* xref:commons-operator:index.adoc[] (0.2.0) +* xref:secret-operator:index.adoc[] (0.5.0) === Supported Kubernetes versions This release supports the following Kubernetes versions: diff --git a/modules/concepts/examples/cluster-operations.yaml b/modules/concepts/examples/cluster-operations.yaml index 66fcd191c..31882a693 100644 --- a/modules/concepts/examples/cluster-operations.yaml +++ b/modules/concepts/examples/cluster-operations.yaml @@ -5,5 +5,5 @@ metadata: name: my-cluster spec: clusterOperation: - reconcilePaused: false # <1> + reconciliationPaused: false # <1> stopped: false # <2> diff --git a/modules/concepts/nav.adoc b/modules/concepts/nav.adoc index 0e49bc2b2..220213e91 100644 --- a/modules/concepts/nav.adoc +++ b/modules/concepts/nav.adoc @@ -11,5 +11,8 @@ ** xref:s3.adoc[] ** xref:tls_server_verification.adoc[] ** xref:pod_placement.adoc[] -** xref:cluster_operations.adoc[] ** xref:overrides.adoc[] +** xref:operations/index.adoc[] +*** xref:operations/cluster_operations.adoc[] +*** xref:operations/pod_placement.adoc[] +*** xref:operations/pod_disruptions.adoc[] diff --git a/modules/concepts/pages/cluster_operations.adoc b/modules/concepts/pages/cluster_operations.adoc deleted file mode 100644 index f27d27a9e..000000000 --- a/modules/concepts/pages/cluster_operations.adoc +++ /dev/null @@ -1,22 +0,0 @@ - -= Cluster operations - -Stackable operators offer different cluster operations to control the reconciliation process. This is useful when updating operators, debugging or testing of new settings: - -* `reconcilePaused` - Stop the operator from reconciling the cluster spec. The status will still be updated. -* `stopped` - Stop all running pods but keep updating all deployed resources like `ConfigMaps`, `Services` and the cluster status. - -== Example - -[source,yaml] ----- -include::example$cluster-operations.yaml[] ----- -<1> The `clusterOperation.reconcilePaused` flag set to `true` stops the operator from reconciling any changes to the cluster spec. The cluster status is still updated. -<2> The `clusterOperation.stopped` flag set to `true` stops all pods in the cluster. This is done by setting all deployed `StatefulSet` replicas to 0. - -== Notes - -If not specified, `clusterOperation.reconcilePaused` and `clusterOperation.stopped` default to `false`. - -IMPORTANT: When setting `clusterOperation.reconcilePaused` and `clusterOperation.stopped` to true in the same step, `clusterOperation.reconcilePaused` will take precedence. This means the cluster will stop reconciling immediately and the `stopped` field is ignored. To avoid this, the cluster should first be stopped and then paused. diff --git a/modules/concepts/pages/logging.adoc b/modules/concepts/pages/logging.adoc index 686823cc9..dd1822b5d 100644 --- a/modules/concepts/pages/logging.adoc +++ b/modules/concepts/pages/logging.adoc @@ -52,7 +52,7 @@ vectorAggregatorConfigMapName: vector-aggregator-discovery // <1> ---- <1> The discovery ConfigMap of the Vector aggregator to publish the logs to. This is set at cluster level. <2> The logging configuration fragment, can be set at role or role group level. -<3> Enable the Vector agent to aggregate logs. +<3> Enable the Vector agent to aggregate logs. <4> Logging is defined for each container. <5> Console and file appenders can have different log level thresholds. <6> Setting log levels for individual modules is also possible. @@ -87,4 +87,4 @@ logging: == Further Reading -To get some hands on experience and see logging in action, try out the xref:stackablectl::demos/logging.adoc[logging demo]. +To get some hands on experience and see logging in action, try out the xref:demos:logging.adoc[logging demo]. diff --git a/modules/concepts/pages/operations/cluster_operations.adoc b/modules/concepts/pages/operations/cluster_operations.adoc new file mode 100644 index 000000000..027f0b19d --- /dev/null +++ b/modules/concepts/pages/operations/cluster_operations.adoc @@ -0,0 +1,22 @@ += Cluster operations +:page-aliases: ../cluster_operations.adoc + +Stackable operators offer different cluster operations to control the reconciliation process. This is useful when updating operators, debugging or testing of new settings: + +* `reconciliationPaused` - Stop the operator from reconciling the cluster spec. The status will still be updated. +* `stopped` - Stop all running pods but keep updating all deployed resources like `ConfigMaps`, `Services` and the cluster status. + +== Example + +[source,yaml] +---- +include::example$cluster-operations.yaml[] +---- +<1> The `clusterOperation.reconciliationPaused` flag set to `true` stops the operator from reconciling any changes to the cluster spec. The cluster status is still updated. +<2> The `clusterOperation.stopped` flag set to `true` stops all pods in the cluster. This is done by setting all deployed `StatefulSet` replicas to 0. + +== Notes + +If not specified, `clusterOperation.reconciliationPaused` and `clusterOperation.stopped` default to `false`. + +IMPORTANT: When setting `clusterOperation.reconciliationPaused` and `clusterOperation.stopped` to true in the same step, `clusterOperation.reconciliationPaused` will take precedence. This means the cluster will stop reconciling immediately and the `stopped` field is ignored. To avoid this, the cluster should first be stopped and then paused. diff --git a/modules/concepts/pages/operations/index.adoc b/modules/concepts/pages/operations/index.adoc new file mode 100644 index 000000000..ac50f4f5b --- /dev/null +++ b/modules/concepts/pages/operations/index.adoc @@ -0,0 +1,49 @@ += Operations + +This section of the documentation is intended for the operations teams that maintain a Stackable Data Platform installation. +It provides you with the necessary details to operate it in a production environment. + +== Service availability + +Make sure to go through the following checklist to achieve the maximum level of availability for your services. + +1. Make setup highly available (HA): In case the product supports running in an HA fashion, our operators will automatically + configure it for you. You only need to make sure that you deploy a sufficient number of replicas. Please note that + some products don't support HA. +2. Reduce the number of simultaneous pod disruptions (unavailable replicas). The Stackable operators write defaults + based upon knowledge about the fault tolerance of the product, which should cover most of the use-cases. For details + have a look at xref:operations/pod_disruptions.adoc[]. +3. Reduce impact of pod disruption: Many HA capable products offer a way to gracefully shut down the service running + within the Pod. The flow is as follows: Kubernetes wants to shut down the Pod and calls a hook into the Pod, which in turn + interacts with the product, telling it to gracefully shut down. The final deletion of the Pod is then blocked until + the product has successfully migrated running workloads away from the Pod that is to be shut down. Details covering the graceful shutdown mechanism are described in the actual operator documentation. ++ +WARNING: Graceful shutdown is not implemented for all products yet. Please check the documentation specific to the product operator to see if it is supported (such as e.g. xref:trino:usage_guide/operations/graceful-shutdown.adoc[the documentation for Trino]. + +4. Spread workload across multiple Kubernetes nodes, racks, datacenter rooms or datacenters to guarantee availability + in the case of e.g. power outages or fire in parts of the datacenter. All of this is supported by + configuring an https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/[antiAffinity] as documented in + xref:operations/pod_placement.adoc[] + +== Maintenance actions + +Sometimes you want to quickly shut down a product or update the Stackable operators without all the managed products +restarting at the same time. You can achieve this using the following methods: + +1. Quickly stop and start a whole product using `stopped` as described in xref:operations/cluster_operations.adoc[]. +2. Prevent any changes to your deployed product using `reconcilePaused` as described in xref:operations/cluster_operations.adoc[]. + +== Performance + +1. You can configure the available resource every product has using xref:concepts:resources.adoc[]. The defaults are + very restrained, as you should be able to spin up multiple products running on your Laptop. +2. You can not only use xref:operations/pod_placement.adoc[] to achieve more resilience, but also to co-locate products + that communicate frequently with each other. One example is placing HBase regionservers on the same Kubernetes node + as the HDFS datanodes. Our operators already take this into account and co-locate connected services. However, if + you are not satisfied with the automatically created affinities you can use ref:operations/pod_placement.adoc[] to + configure your own. +3. If you want to have certain services running on dedicated nodes you can also use xref:operations/pod_placement.adoc[] + to force the Pods to be scheduled on certain nodes. This is especially helpful if you e.g. have Kubernetes nodes with + 16 cores and 64 GB, as you could allocate nearly 100% of these node resources to your Spark executors or Trino workers. + In this case it is important that you https://kubernetes.io/docs/concepts/scheduling-eviction/taint-and-toleration/[taint] + your Kubernetes nodes and use xref:overrides.adoc#pod-overrides[podOverrides] to add a `toleration` for the taint. diff --git a/modules/concepts/pages/operations/pod_disruptions.adoc b/modules/concepts/pages/operations/pod_disruptions.adoc new file mode 100644 index 000000000..89a71bf7f --- /dev/null +++ b/modules/concepts/pages/operations/pod_disruptions.adoc @@ -0,0 +1,94 @@ += Allowed Pod disruptions + +Any downtime of our products is generally considered to be bad. +Although downtime can't be prevented 100% of the time - especially if the product does not support High Availability - we can try to do our best to reduce it to an absolute minimum. + +Kubernetes has mechanisms to ensure minimal *planned* downtime. +Please keep in mind, that this only affects planned (voluntary) downtime of Pods - unplanned Kubernetes node crashes can always occur. + +Our product operator will always deploy so-called https://kubernetes.io/docs/tasks/run-application/configure-pdb/[PodDisruptionBudget (PDB)] resources alongside the products. +For every role that you specify (e.g. HDFS namenodes or Trino workers) a PDB is created. + +== Default values +The defaults depend on the individual product and can be found below the "Operations" usage guide. + +They are based on our knowledge of each product's fault tolerance. +In some cases they may be a little pessimistic, but they can be adjusted as documented in the following sections. + +== Influencing and disabling PDBs + +You can configure + +1. Whether PDBs are written at all +2. The `maxUnavailable` replicas for this role PDB + +The following example + +1. Sets `maxUnavailable` for NameNodes to `1` +2. Sets `maxUnavailable` for DataNodes to `10`, which allows downtime of 10% of the total DataNodes. +3. Disables PDBs for JournalNodes + +[source,yaml] +---- +apiVersion: hdfs.stackable.tech/v1alpha1 +kind: HdfsCluster +metadata: + name: hdfs +spec: + nameNodes: + roleConfig: # optional, only supported on role level, *not* on rolegroup + podDisruptionBudget: # optional + enabled: true # optional, defaults to true + maxUnavailable: 1 # optional, defaults to our "smart" calculation + roleGroups: + default: + replicas: 3 + dataNodes: + roleConfig: + podDisruptionBudget: + maxUnavailable: 10 + roleGroups: + default: + replicas: 100 + journalnodes: + roleConfig: + podDisruptionBudget: + enabled: false + roleGroups: + default: + replicas: 3 +---- + +== Using you own custom PDBs +In case you are not satisfied with the PDBs that are written by the operators, you can deploy your own. + +WARNING: In case you write custom PDBs, it is your responsibility to take care of the availability of the products + +IMPORTANT: It is important to disable the PDBs created by the Stackable operators as described above before creating your own PDBs, as this is a https://github.com/kubernetes/kubernetes/issues/75957[limitation of Kubernetes]. + +*After disabling the Stackable PDBs*, you can deploy you own PDB such as + +[source,yaml] +---- +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: hdfs-journalnode-and-namenode +spec: + maxUnavailable: 1 + selector: + matchLabels: + app.kubernetes.io/name: hdfs + app.kubernetes.io/instance: hdfs + matchExpressions: + - key: app.kubernetes.io/component + operator: In + values: + - journalnode + - namenode +---- + +This PDB allows only one Pod out of all the Namenodes and Journalnodes to be down at one time. + +== Details +Have a look at <<< TODO: link ADR on Pod Disruptions once merged >>> for the implementation details. diff --git a/modules/concepts/pages/pod_placement.adoc b/modules/concepts/pages/operations/pod_placement.adoc similarity index 97% rename from modules/concepts/pages/pod_placement.adoc rename to modules/concepts/pages/operations/pod_placement.adoc index 4da9970ca..f5aa7c4f6 100644 --- a/modules/concepts/pages/pod_placement.adoc +++ b/modules/concepts/pages/operations/pod_placement.adoc @@ -1,4 +1,5 @@ -= Pod Placement += Pod placement +:page-aliases: ../pod_placement.adoc Several operators of the Stackable Data Platform permit the configuration of pod affinity as described in the Kubernetes https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/[documentation]. If no affinity is defined in the product's custom resource, the operators apply reasonable defaults that make use of the `preferred_during_scheduling_ignored_during_execution` property. Refer to the operator documentation for details. diff --git a/modules/concepts/pages/overrides.adoc b/modules/concepts/pages/overrides.adoc index 1b4ef2d1c..b670a5b60 100644 --- a/modules/concepts/pages/overrides.adoc +++ b/modules/concepts/pages/overrides.adoc @@ -10,6 +10,7 @@ WARNING: Overriding certain configuration properties can lead to faulty clusters The cluster definitions also supports overriding configuration aspects, either per xref:roles-and-role-groups.adoc[role or per role group], where the more specific override (role group) has precedence over the less specific one (role). +[#config-overrides] == Config overrides For a xref:roles-and-role-groups.adoc[role or role group], at the same level of `config`, you can specify `configOverrides` for any of the configuration files the product uses. @@ -44,6 +45,7 @@ The properties will be formatted and escaped correctly into the file format used You can also set the property to an empty string (`my.property: ""`), which effectively disables the property the operator would write out normally. In case of a `.properties` file, this will show up as `my.property=` in the `.properties` file. +[#env-overrides] == Environment variable overrides For a xref:roles-and-role-groups.adoc[role or role group], at the same level of `config`, you can specify `envOverrides` for any env variable @@ -75,6 +77,7 @@ spec: You can set any environment variable, but every specific product does support a different set of environment variables. All override property values must be strings. +[#pod-overrides] == Pod overrides For a xref:roles-and-role-groups.adoc[role or role group], at the same level of `config`, you can specify `podOverrides` for any of the attributes you can configure on a Pod. diff --git a/modules/concepts/pages/product_image_selection.adoc b/modules/concepts/pages/product_image_selection.adoc index 1607185a3..34e5f7ed0 100644 --- a/modules/concepts/pages/product_image_selection.adoc +++ b/modules/concepts/pages/product_image_selection.adoc @@ -129,7 +129,7 @@ When deriving images from official Stackable images this will mean updating the * It is not possible to update the Stackable Platform to a new version without changing the deployed cluster definitions when using custom images. The recommended process here is: -** Tag clusters as "do not reconcile" (see xref:cluster_operations.adoc[]) +** Tag clusters as "do not reconcile" (see xref:operations/cluster_operations.adoc[]) ** Update Stackable plattform ** Change custom images in cluster specifications ** Remove "do not reconcile flag" diff --git a/modules/contributor/pages/adr/ADR021-stackablectl_stacks_initial_version.adoc b/modules/contributor/pages/adr/ADR021-stackablectl_stacks_initial_version.adoc index 9bc600b67..467ff443b 100644 --- a/modules/contributor/pages/adr/ADR021-stackablectl_stacks_initial_version.adoc +++ b/modules/contributor/pages/adr/ADR021-stackablectl_stacks_initial_version.adoc @@ -58,7 +58,7 @@ By defining our own, thin, abstraction layer we can isolate our users from the c We could do nothing right now and instead use the _create_test_cluster.py_ script as our entry point for the initial release. * Good, this gives us time to design a fully thought out solution before implementing something -* Bad, _create_test_cluster.py_ doesn't feel very polished and we do want to introducet _stackablectl_ to the world as our tool of choice +* Bad, _create_test_cluster.py_ doesn't feel very polished and we do want to introduce _stackablectl_ to the world as our tool of choice === Implement basic definition of stacks diff --git a/modules/contributor/pages/index.adoc b/modules/contributor/pages/index.adoc index a93a5863f..0a7162f16 100644 --- a/modules/contributor/pages/index.adoc +++ b/modules/contributor/pages/index.adoc @@ -2,32 +2,45 @@ Welcome to Stackable! -This document will show you the steps needed to contribute to the Stackable Platform. Contributions can come in many shapes and forms but in this document we'll focus on code and documentation improvements. We'll assume you already know what the Stackable Platform is and have a specific issue that you want solved. +This document will show you the steps needed to contribute to the Stackable Platform. Contributions can come in many +shapes and forms but in this document we'll focus on code and documentation improvements. We'll assume you already know +what the Stackable Platform is and have a specific issue that you want solved. This document aims at getting your contribution posted as soon as possible. == Contribution Workflow -The development of the Stackable Platform takes place on https://github.com/stackabletech[GitHub]. On GitHub, each repository has its own issue tracker and since Stackable is a platform, there are several repositories where issues and pull-requests can be created. If you already have a good idea where the issue or pull-request belongs, then you can skip reading to the contribution steps below. Otherwise, you might want to a have a look at the xref:development_dashboard.adoc[] for hints. +The development of the Stackable Platform takes place on https://github.com/stackabletech[GitHub]. On GitHub, each +repository has its own issue tracker and since Stackable is a platform, there are several repositories where issues and +pull-requests can be created. If you already have a good idea where the issue or pull-request belongs, then you can skip +reading to the contribution steps below. Otherwise, you might want to a have a look at the +xref:development_dashboard.adoc[] for hints. Contribution steps: -1. Lookup if an existing issue already covers your problem -2. Submit a new issue. If it is only a trivial change like fixing a typo then an issue is not necessary and a pull-request can be created directly. -3. Submit a pull-request if you want. The necessary steps to successfully create an approvable pull-request are detailed in xref:steps.adoc[]. -4. Respond to questions or feedback -5. Improve your submission if necessary -6. Closing your contribution +. Lookup if an existing issue already covers your problem +. Submit a new issue. If it is only a trivial change like fixing a typo then an issue is not necessary and a + pull-request can be created directly. +. Submit a pull-request if you want. The necessary steps to successfully create an approvable pull-request are detailed + in xref:steps.adoc[]. +. Respond to questions or feedback +. Improve your submission if necessary +. Closing your contribution -Before submitting a new issue or pull-request, it's always a good idea to check if the issue has already been discussed in the past and if there is a resolution for it. If that is the case, consider contributing to the existing issue by either adding new feedback or code. +Before submitting a new issue or pull-request, it's always a good idea to check if the issue has already been discussed +in the past and if there is a resolution for it. If that is the case, consider contributing to the existing issue by +either adding new feedback or code. -When creating a new issue please provide as much information as you consider relevant. Issues can be bug reports, feature requests and so on. The Stackable repositories provide templates to make it easier to submit high-quality issues. +When creating a new issue please provide as much information as you consider relevant. Issues can be bug reports, +feature requests and so on. The Stackable repositories provide templates to make it easier to submit high-quality +issues. When submitting a pull-request, you might be interested in knowing about the necessary <>. == Repo Walkthrough -Please see the xref:development_dashboard.adoc[] page to get an overview of the most important Git repositories used by us. +Please see the xref:development_dashboard.adoc[] page to get an overview of the most important Git repositories used by +us. === Operator Repositories @@ -67,11 +80,11 @@ Of course you are free to use whatever works for you best. No editor is perfect For `VisualStudio Code` we also recommend the following extensions: -* Better Toml -* CodeLLDB (for debugging) -* Error Lens (inline error messages) -* AsciiDoc -* GitHub Pull requests and Issues -* GitLens -* Python -* Docker +* https://marketplace.visualstudio.com/items?itemName=tamasfe.even-better-toml[Even Better TOML] +* https://marketplace.visualstudio.com/items?itemName=vadimcn.vscode-lldb[CodeLLDB] (for debugging) +* https://marketplace.visualstudio.com/items?itemName=usernamehw.errorlens[Error Lens] (inline error messages) +* https://marketplace.visualstudio.com/items?itemName=asciidoctor.asciidoctor-vscode[AsciiDoc] +* https://marketplace.visualstudio.com/items?itemName=GitHub.vscode-pull-request-github[GitHub Pull requests and Issues] +* https://marketplace.visualstudio.com/items?itemName=eamodio.gitlens[GitLens] +* https://marketplace.visualstudio.com/items?itemName=ms-python.python[Python] +* https://marketplace.visualstudio.com/items?itemName=ms-azuretools.vscode-docker[Docker] diff --git a/modules/contributor/pages/steps.adoc b/modules/contributor/pages/steps.adoc index 3e5a22cdc..a5d2fa790 100644 --- a/modules/contributor/pages/steps.adoc +++ b/modules/contributor/pages/steps.adoc @@ -1,34 +1,58 @@ = Steps to contribute +:docs-readme: https://github.com/stackabletech/documentation/blob/main/README.adoc +:templating-repo: https://github.com/stackabletech/operator-templating +:operator-repo: https://github.com/stackabletech/operator-rs +:docker-repo: https://github.com/stackabletech/docker-images +:docs-repo: https://github.com/stackabletech/documentation + This guide covers the steps for internal and external contributors. Differences will be outlined. -Depending on the project, not all steps make sense. For instance, if the documentation was changed then the integration tests need not to be adapted. Please skip the steps which are not applicable. +Depending on the project, not all steps make sense. For instance, if the documentation was changed then the integration +tests need not to be adapted. Please skip the steps which are not applicable. == Preparation -1. As an external contributor, please fork the according repository on GitHub. -2. Clone the repository to the local machine or if it is already cloned then make sure to pull the latest changes. Backports to previous releases are not covered in this guide. -3. Create a feature branch. As for now, there is no naming convention. +. As an external contributor, please fork the according repository on GitHub. +. Clone the repository to the local machine or if it is already cloned then make sure to pull the latest changes. + Backports to previous releases are not covered in this guide. +. Create a feature branch. As for now, there is no naming convention. == Changes in Rust projects -1. Make your desired changes in the according repository and test them manually. Ensure that the code compiles without warnings (`cargo clippy --all-targets`) and that the code is formatted with `cargo fmt`. -2. If code was added or adapted then please create or adapt the unit tests in the same file as well as the integration tests in the `tests` directory. Ensure that all unit tests run successfully (`cargo test`) and all integration tests run successfully (`./scripts/run_tests.sh`). See also <<_changes_in_the_integration_tests>>. -3. Comment your code and check with `cargo doc --document-private-items` that there are no syntax errors. -4. The YAML schemas of the custom resource definitions (CRDs) are rebuilt when the project is compiled (see `rust/operator-binary/build.rs` if changing an operator). These CRDs as well as the product configuration are also required in the Helm chart and the Kubernetes manifest. To ensure that everything is in a consistent state, please execute `make regenerate-charts`. -5. If it is useful for the users of the project to know about the change then it must be added to the changelog. For instance, if only the dependencies in an operator are upgraded but nothing changes for the user then the upgrade should not be added to the changelog. Conversely, if the dependencies in the https://github.com/stackabletech/operator-rs[operator framework] are upgraded then changes are probably required in the operators (which are the clients of the framework) and therefore the upgrade must be mentioned in the changelog. The changelog must be formatted according to https://keepachangelog.com/en/1.1.0/[keep a changelog]. +. Make your desired changes in the according repository and test them manually. Ensure that the code compiles without + warnings (`cargo clippy --all-targets`) and that the code is formatted with cargo fmt`. +. If code was added or adapted then please create or adapt the unit tests in the same file as well as the integration + tests in the `tests` directory. Ensure that all unit tests run successfully `cargo test`) and all integration tests + run successfully (`./scripts/run_tests.sh`). See also <<_changes_in_the_integration_tests>>. +. Comment your code and check with `cargo doc --document-private-items` that there are no syntax errors. +. The YAML schemas of the custom resource definitions (CRDs) are rebuilt when the project is compiled (see + `rust/operator-binary/build.rs` if changing an operator). These CRDs as well as the product configuration are also + required in the Helm chart and the Kubernetes manifest. To ensure that everything is in a consistent state, please + execute `make regenerate-charts`. +. If it is useful for the users of the project to know about the change then it must be added to the changelog. For + instance, if only the dependencies in an operator are upgraded but nothing changes for the user then the upgrade + should not be added to the changelog. Conversely, if the dependencies in the {operator-repo}[operator framework] are + upgraded then changes are probably required in the operators (which are the clients of the framework) and therefore + the upgrade must be mentioned in the changelog. The changelog must be formatted according to + https://keepachangelog.com/en/1.1.0/[keep a changelog]. == Changes in the product images -1. The code for building the product images can be found in the https://github.com/stackabletech/docker-images[docker-images repository]. Please follow the steps in <>. -2. Make the desired changes. -3. Add an entry to the product image changelog and use the pattern `-stackable` to reference the next image version. The `` follows semantic versioning and is independent of the upstream version. -4. If a new version of a product was added then the following tasks must be performed: - * Add the new version to the supported ones in the documentation of the operators (see `docs/modules/\{product name\}/partials/supported-versions.adoc` in the operator repositories). - * Update the operator to support the new version if necessary. - * Update the examples in the operator to use the new version. - * Update the integration tests. The tests should cover the latest patch version of each supported versions. -5. Run the integration tests with the new product image. The image can be built and uploaded to the kind cluster with the following commands: +. The code for building the product images can be found in the {docker-repo}[docker-images repository]. Please follow + the steps in <>. +. Make the desired changes. +. Add an entry to the product image changelog and use the pattern `-stackable` to + reference the next image version. The `` follows semantic versioning and is independent of + the upstream version. +. If a new version of a product was added then the following tasks must be performed: + * Add the new version to the supported ones in the documentation of the operators (see + `docs/modules/\{product name\}/partials/supported-versions.adoc` in the operator repositories). + * Update the operator to support the new version if necessary. + * Update the examples in the operator to use the new version. + * Update the integration tests. The tests should cover the latest patch version of each supported versions. +. Run the integration tests with the new product image. The image can be built and uploaded to the kind cluster with the + following commands: + [source,bash] ---- @@ -40,10 +64,18 @@ See the output of `build_product_images.py` to retrieve the image tag for `> for the integration tests as well. -2. Create or adapt the tests. Try to mimic the style of the other tests. They are written with https://kuttl.dev/[KUTTL] and using a jinja2 templating mechanism to test multiple product versions at once. -3. Start a test cluster using https://kind.sigs.k8s.io/[kind]. You can also use xref:stackablectl::commands/operator.adoc#_install_operator[stackablectl] to install a kind cluster. -4. If changes in an operator are tested then the according version of the operator must be started. The operator can be started outside the cluster with `cargo run -- crd | kubectl apply -f - && cargo run -- run`. This approach allows a fast test develop cycle but has the downside that the RBAC rules are not tested. So a proper Helm installation should be tested before creating a pull request. First a Docker image of the operator must be built locally and uploaded to the kind cluster and then the Helm chart must be installed. This can be achieved in the operator directory with the following commands: +. Most code changes should also be tested with integration tests. The tests for every operator can be found in the + operator repository in the `tests` directory. Follow the steps in <> for the integration tests as well. +. Create or adapt the tests. Try to mimic the style of the other tests. They are written with https://kuttl.dev/[KUTTL] + and using a jinja2 templating mechanism to test multiple product versions at once. +. Start a test cluster using https://kind.sigs.k8s.io/[kind]. You can also use + xref:management:stackablectl:commands/operator.adoc#_install_operator[stackablectl] to install a kind cluster. +. If changes in an operator are tested then the according version of the operator must be started. The operator can be + started outside the cluster with `cargo run -- crd | kubectl apply -f - && cargo run -- run`. This approach allows a + fast test develop cycle but has the downside that the RBAC rules are not tested. So a proper Helm installation should + be tested before creating a pull request. First a Docker image of the operator must be built locally and uploaded to + the kind cluster and then the Helm chart must be installed. This can be achieved in the operator directory with the + following commands: + [source,bash] ---- @@ -52,28 +84,49 @@ kind load docker-image docker.stackable.tech/stackable/:-dev helm install deploy/helm// ---- -5. Run the tests from the repository root with `./scripts/run_tests.sh`. +. Run the tests from the repository root with `./scripts/run_tests.sh`. == Changes in the documentation -1. The Stackable Platform documentation can be found at https://docs.stackable.tech/. The documentation is built with Antora from the sources in the https://github.com/stackabletech/documentation[documentation repository] and the `docs` directories in the operator repositories. Follow the steps in <> to be able to change the documentation. -2. Make your changes. -3. Build the documentation locally to ensure that the formatting is fine and all links are specified correctly. See the https://github.com/stackabletech/documentation/blob/main/README.adoc[`README.adoc`] file for further details and the xref:style_guide.adoc[] for style and formatting guidelines. +. The Stackable Platform documentation can be found at https://docs.stackable.tech/. The documentation is built with + Antora from the sources in the {docs-repo}[documentation repository] and the `docs` directories in the operator + repositories. Follow the steps in <> to be able to change the documentation. +. Make your changes. +. Build the documentation locally to ensure that the formatting is fine and all links are specified correctly. See the + {docs-readme}[`README.adoc`] file for further details and the xref:style_guide.adoc[] for style and formatting + guidelines. == Changes in the operator-templating -1. Files which are common in all operators are handled and rolled out by the https://github.com/stackabletech/operator-templating[operator-templating]. Follow the steps in <> to check out the repository. -2. Make your changes. -3. Test the changes locally. Create the directory `work`, clone all operators into this directory, and run the `test.sh` script. The changes can be examined with `git status`. When the pull request is later merged into the `main` branch then pull requests with these changes will be created automatically. Depending on the change, it makes sense to run the integration tests for all changed operators. If the tests are not run in this stage and if there is even just one integration test failing in the subsequentially generated pull requests then the operator-templating must be adapted which creates again pull requests for all operators. Changes in the GitHub workflow actions cannot be tested until finally merged. +. Files which are common in all operators are handled and rolled out by the {templating-repo}[operator-templating]. + Follow the steps in <> to check out the repository. +. Make your changes. +. Test the changes locally. Create the directory `work`, clone all operators into this directory, and run the `test.sh` + script. The changes can be examined with `git status`. When the pull request is later merged into the `main` branch + then pull requests with these changes will be created automatically. Depending on the change, it makes sense to run + the integration tests for all changed operators. If the tests are not run in this stage and if there is even just one + integration test failing in the subsequential generated pull requests then the operator-templating must be adapted + which creates again pull requests for all operators. Changes in the GitHub workflow actions cannot be tested until + finally merged. == Create pull requests -1. Finally, pull requests must be created for all adapted repositories. - * Have a look at the review checklist and ensure that all applicable points are fulfilled. - * Create a comprehensive description of the changes. - * Link the according issue to the pull request by using a keyword like "Closes". - * Add references to other pull requests, like the pull request in the integration-tests repository which contains test cases for the change in the operator pull request. - * Select a reviewer. Usually "stackabletech/developers" is a good choice. - * If you are an internal contributor then assign yourself to the issue. -2. All pull requests must pass a quality gate before they can be merged. This gate consists of required and not strictly required checks which are performed by automated GitHub checks, as well as the mentioned checklist which is checked manually in the review. The number of checks seems to be overwhelming but in practice they can be quite easily fulfilled if following this guide. A properly set-up development environment (see xref:index.adoc#_development_environment[Development Environment]) makes it even easier because the most critical steps are performed automatically like showing Clippy warnings while developing and formatting the code. Have a look at the status of the checks after they are processed and fix them. The `reviewdog` checks are not mandatory and can be ignored if the according change was intentionally. For instance, if a Kubernetes secret was added to the examples of an operator then the `detect-secrets` steps could fail which is okay in this case. -3. After the pull request is approved, it can be merged. Internal contributors merge them on their own. Pull request from external contributors are merged by the approver. +. Finally, pull requests must be created for all adapted repositories. + * Have a look at the review checklist and ensure that all applicable points are fulfilled. + * Create a comprehensive description of the changes. + * Link the according issue to the pull request by using a keyword like "Closes". + * Add references to other pull requests, like the pull request in the integration-tests repository which contains test + cases for the change in the operator pull request. + * Select a reviewer. Usually "stackabletech/developers" is a good choice. + * If you are an internal contributor then assign yourself to the issue. +. All pull requests must pass a quality gate before they can be merged. This gate consists of required and not strictly + required checks which are performed by automated GitHub checks, as well as the mentioned checklist which is checked + manually in the review. The number of checks seems to be overwhelming but in practice they can be quite easily + fulfilled if following this guide. A properly set-up development environment (see + xref:index.adoc#_development_environment[Development Environment]) makes it even easier because the most critical + steps are performed automatically like showing Clippy warnings while developing and formatting the code. Have a look + at the status of the checks after they are processed and fix them. The `reviewdog` checks are not mandatory and can be + ignored if the according change was intentionally. For instance, if a Kubernetes secret was added to the examples of + an operator then the `detect-secrets` steps could fail which is okay in this case. +. After the pull request is approved, it can be merged. Internal contributors merge them on their own. Pull request from + external contributors are merged by the approver. diff --git a/modules/demos/nav.adoc b/modules/demos/nav.adoc new file mode 100644 index 000000000..7b7bc8720 --- /dev/null +++ b/modules/demos/nav.adoc @@ -0,0 +1,5 @@ +* xref:demos:index.adoc[Demos] ++ +-- +include::demos:partial$demos.adoc[] +-- \ No newline at end of file diff --git a/modules/tutorials/pages/authentication_with_openldap.adoc b/modules/tutorials/pages/authentication_with_openldap.adoc index 60cd13c59..bd3928321 100644 --- a/modules/tutorials/pages/authentication_with_openldap.adoc +++ b/modules/tutorials/pages/authentication_with_openldap.adoc @@ -1,40 +1,54 @@ = Authentication with OpenLDAP -The Stackable platform supports user authentication with LDAP in multiple products. This page guides you through setting up the configuration necessary to use an exising LDAP installation with Stackable supported products. You can learn more about authentication in the Stackable Platform on the xref:concepts:authentication.adoc[concepts page]. +The Stackable platform supports user authentication with LDAP in multiple products. This page guides you through setting +up the configuration necessary to use an existing LDAP installation with Stackable supported products. You can learn +more about authentication in the Stackable Platform on the xref:concepts:authentication.adoc[concepts page]. Prerequisites: * a k8s cluster available, or https://kind.sigs.k8s.io/[kind] installed -* xref:stackablectl::index.adoc[] installed -* basic knowledge of how to create resources in Kubernetes (i.e. `kubectl apply -f .yaml`) and inspect them (`kubectl get` or a tool like https://k9scli.io/[k9s]) +* xref:management::stackablectl/index.adoc installed +* basic knowledge of how to create resources in Kubernetes (i.e. `kubectl apply -f .yaml`) and inspect them + (`kubectl get` or a tool like https://k9scli.io/[k9s]) == Setup -Before configuring LDAP you need to set up some services to configure, as well as an LDAP server to use for authentication. This tutorial is about learning how to configure LDAP, so there won't be detailed instructions on how to set up all of this, instead the tutorial uses xref:stackablectl::commands/stack.adoc[stackablectl Stacks]. +Before configuring LDAP you need to set up some services to configure, as well as an LDAP server to use for +authentication. This tutorial is about learning how to configure LDAP, so there won't be detailed instructions on how to +set up all of this, instead the tutorial uses the xref:management:stackablectl:commands/stack.adoc[`stackablectl stacks`] +command. === Superset and Trino Stack -This command will install the latest Stackable release for you, and set up the `trino-superset-s3` Stack. It contains a Superset instance connected to a Trino instance, and all their dependencies (Minio S3, PostgreSQL). Later in this guide, a Superset and Trino instance will be configured to use LDAP. +This command will install the latest Stackable release for you, and set up the `trino-superset-s3` Stack. It contains a +Superset instance connected to a Trino instance, and all their dependencies (Minio S3, PostgreSQL). Later in this guide, +a Superset and Trino instance will be configured to use LDAP. -If you do not have a Kubernetes cluster already, add the `--kind-cluster` (or `-k`) flag to let stackablectl deploy one for you. +If you do not have a Kubernetes cluster already, add the `--cluster kind` (or `-c kind`) flag to let stackablectl deploy +one for you. [source,bash] include::example$ldap-auth/10-install-base.sh[tag=stackablectl-install] This command will take a few minutes to complete. -NOTE: The stack installed here is used in the xref:stackablectl::demos/trino-taxi-data.adoc[`trino-taxi-data` demo]. Click the link to learn more. +NOTE: The stack installed here is used in the xref:demos:trino-taxi-data.adoc[`trino-taxi-data` demo]. Click the link +to learn more. [#setup_login] ==== Inspect -Before starting to add configuration to your Stackable cluster, inspect what the command above has set up in your Kubernetes cluster. +Before starting to add configuration to your Stackable cluster, inspect what the command above has set up in your +Kubernetes cluster. -Use `stackablectl svc list` to find the endpoints of Superset and Trino and open their web interfaces in the Browser. +Use `stackablectl stacklets list` to find the endpoints of Superset and Trino and open their web interfaces in the +browser. -You can log into Superset with user _admin_ and password _adminadmin_, and into Trino with user _admin_ and password _adminadmin_. +You can log into Superset with user _admin_ and password _adminadmin_, and into Trino with user _admin_ and password +_adminadmin_. -These are the current users defined in Superset's and Trino's internal user management. Later you will see that these users cannot be used for authentication anymore after LDAP authentication has been enabled. +These are the current users defined in Superset's and Trino's internal user management. Later you will see that these +users cannot be used for authentication anymore after LDAP authentication has been enabled. === OpenLDAP Stack @@ -116,7 +130,7 @@ Remember the name of the AuthenticationClass (`openldap`), you will use it in th [#superset] === Add LDAP authentication to Superset -To make Superset use your new LDAP AuthenticationClass, you have to update the SupersetCluster definition. A SupersetCluster named `superset` is already installed by the stack. +To make Superset use your new LDAP AuthenticationClass, you have to update the SupersetCluster definition. A SupersetCluster named `superset` is already installed by the stack. Fetch the existing SupersetCluster defintion from the Kubernetes API server and save it into a `superset.yaml` file: diff --git a/modules/tutorials/pages/end-to-end_data_pipeline_example.adoc b/modules/tutorials/pages/end-to-end_data_pipeline_example.adoc index f0191db09..6f14beefb 100644 --- a/modules/tutorials/pages/end-to-end_data_pipeline_example.adoc +++ b/modules/tutorials/pages/end-to-end_data_pipeline_example.adoc @@ -1,11 +1,15 @@ = Setting up an end-to-end data pipeline -In this tutorial you will set up a data pipeline, from raw data to visualization. You read data from S3 using NiFi, send it to Kafka, from there it is ingested into Druid, and lastly you visualize the data using Superset. +In this tutorial you will set up a data pipeline, from raw data to visualization. You read data from S3 using NiFi, send +it to Kafka, from there it is ingested into Druid, and lastly you visualize the data using Superset. == About this tutorial -The purpose of this tutorial is a deeper exploration of the Stackable platform and its features. It is not a guide to building a robust data pipeline. +The purpose of this tutorial is a deeper exploration of the Stackable platform and its features. It is not a guide to +building a robust data pipeline. -This tutorial is intended to run in a private network or lab; it does not enable many security features such as authentication or encryption and should not be directly connected to the Internet. Be careful if you are deploying in the cloud as your instances may default to using public IPs. +This tutorial is intended to run in a private network or lab; it does not enable many security features such as +authentication or encryption and should not be directly connected to the Internet. Be careful if you are deploying in +the cloud as your instances may default to using public IPs. == Before you begin @@ -14,7 +18,7 @@ You should make sure that you have everything you need: * A running Kubernetes cluster * https://kubernetes.io/docs/tasks/tools/#kubectl[kubectl] to interact with the cluster * https://helm.sh/[Helm] to deploy third-party dependencies -* xref:stackablectl::installation.adoc[stackablectl] to install and interact with Stackable operators +* xref:management:stackablectl:installation.adoc[stackablectl] to install and interact with Stackable operators + [NOTE] ==== @@ -32,16 +36,19 @@ Instructions for installing via Helm are also provided throughout the tutorial. == Nifi and Kafka -This section shows how to instantiate the first part of the entire processing chain, which will ingest CSV files from an S3 bucket, split the files into individual records and send these records to a Kafka topic. +This section shows how to instantiate the first part of the entire processing chain, which will ingest CSV files from an +S3 bucket, split the files into individual records and send these records to a Kafka topic. === Deploy the Operators -The resource definitions rolled out in this section need their respective Operators to be installed in the K8s cluster. I.e. to run a Kafka instance, the Kafka Operator needs to be installed. +The resource definitions rolled out in this section need their respective Operators to be installed in the K8s cluster. +For example, running a Kafka instance requires the Kafka Operator. ==== Secret Operator -The xref:secret-operator::index.adoc[Secret Operator] is needed by the Stackable Operator for Apache NiFi, as NiFi requires the UI to be served via HTTPS. -The necessary certificates and keys for this are provided by the Secret Operator to the NiFi Pods. +The xref:home:secret-operator:index.adoc[Secret Operator] is needed by the Stackable Operator for Apache NiFi, as NiFi +requires the UI to be served via HTTPS. The necessary certificates and keys for this are provided by the Secret Operator +to the NiFi Pods. [source,bash] stackablectl operator install secret @@ -57,8 +64,10 @@ helm install secret-operator stackable-stable/secret-operator ==== ZooKeeper Operator -Apache NiFi and Apache Kafka both use Apache ZooKeeper as backing config storage, so the xref:zookeeper::index.adoc[Stackable Operator for Apache ZooKeeper] has to be installed in order to make sure that a ZooKeeper cluster can be rolled out. -There is no need to install multiple ZooKeeper clusters, as NiFi, Kafka and Druid can share the same cluster via provisioning a ZNode per backed service. +Apache NiFi and Apache Kafka both use Apache ZooKeeper as backing config storage, so the +xref:home:zookeeper:index.adoc[Stackable Operator for Apache ZooKeeper] has to be installed in order to make sure that a +ZooKeeper cluster can be rolled out. There is no need to install multiple ZooKeeper clusters, as NiFi, Kafka and Druid +can share the same cluster via provisioning a ZNode per backed service. [source,bash] stackablectl operator install zookeeper @@ -90,8 +99,8 @@ helm install kafka-operator stackable-stable/kafka-operator ==== NiFi Operator -NiFi is an ETL tool which will be used to model the dataflow of downloading and splitting files from S3. -It will also be used to convert the file content from CSV to JSON. +NiFi is an ETL tool which will be used to model the dataflow of downloading and splitting files from S3. It will also be +used to convert the file content from CSV to JSON. [source,bash] stackablectl operator install nifi @@ -127,7 +136,8 @@ EOF === Deploying Kafka and NiFi -To deploy Kafka and NiFi you can now apply the cluster configuration. Run the following command in the console to deploy and configure all three services. +To deploy Kafka and NiFi you can now apply the cluster configuration. Run the following command in the console to deploy +and configure all three services. [source,bash] kubectl apply -f - < This is the version of Superset used for this instance. You can find the Superset versions supported by Stackable in the xref:superset::index.adoc[Superset Operator documentation]. +<1> This is the version of Superset used for this instance. You can find the Superset versions supported by Stackable in + the xref:home:superset:index.adoc[Superset Operator documentation]. <2> This is the reference to the Secret you created earlier. On the first deployment of the Superset cluster, the Operator will also initialize the database. Once the database is initialized, you can connect to the cluster. diff --git a/supplemental-ui/partials/navbar.hbs b/supplemental-ui/partials/navbar.hbs index ad7ce7c44..393cd4c4b 100644 --- a/supplemental-ui/partials/navbar.hbs +++ b/supplemental-ui/partials/navbar.hbs @@ -1,8 +1,14 @@ Home Getting Started Concepts -Tutorials -stackablectl +Demos +