diff --git a/assets/xks/operator-guide/blue-green.drawio b/assets/xks/operator-guide/blue-green.drawio new file mode 100644 index 00000000000..9ba8f073cd1 --- /dev/null +++ b/assets/xks/operator-guide/blue-green.drawio @@ -0,0 +1 @@ +7ZfJbtswEIafxscYWr0c460B0rQFDDRALgZDjSXGFCmQlJc8fUcSZVlVY7SAU7doL7b4c8YU/4+ekXr+NN1/UCRLHmQEvOc50b7nz3qe53pugF+FcqiUcDiohFixyAY1wpK9ghUdq+YsAt0KNFJyw7K2SKUQQE1LI0rJXTtsLXl71YzE0BGWlPCu+sgik1h1EAbNxB2wOKmXdgfjaiYldbTdik5IJHcnkj/v+VMlpamu0v0UeOFebUyVt3hj9nhnCoT5mYSXz/dz31/e6clgpR5AfQ2eHm8sjS3hud3xLTVsC/aWzaE2AiL0xQ6lMomMpSB83qgTJXMRQbGYg6Mm5qOUGYouii9gzMFCJrmRKCUm5Xa2WrNY6M0dWknLXFE4s636qBAVgzkT5x854AkGmYJRB8xTwEnpQus42aMUH+Mas/HC+v0L3g873n8BETERX8P8iOikzL8SieCaJNwOCdiTNOPQpzLt0CA6q0rNmu0Ly05tzCQTpry9cNILZ6gQzmKBAkXrQKHA0rLmTNZSGIvD9Rp9xlI8AAvOnvGTvOYKis0JMDupNuXhWMw+LVdPUoDu622MmVtQhmHNurVLmYK5hVjMwf48xq7tNmEYhFVKXb1HXjXeNbVwYCthclIFa+3ioPwOqM1I1/hOEOkNGJrYf8Nv5JVqSrB2Lu7zZ1DI7IjoAjAGjtOGMXY6MEKnCyMYvROM4IcwvH8CxtgJ/ywYbreGTRUQc5VOjm3stnj4wqHAOlUpC1bsqPxJje3A1BGUE60ZrWUbduE2NPob2tCog3BFaAo3NCGcg4ih/78tVS8H4Xel8P36Eg6bR/Ry7uRNx59/Aw== \ No newline at end of file diff --git a/assets/xks/operator-guide/deployment-structure.drawio b/assets/xks/operator-guide/deployment-structure.drawio new file mode 100644 index 00000000000..026f8d6344d --- /dev/null +++ b/assets/xks/operator-guide/deployment-structure.drawio @@ -0,0 +1 @@ +7Zlbj6IwFMc/jcnugxtoAfFx1suYzbqTrMnOuG8VKrAWakq9zaffokVAlDiZ0aKZF+VcCvT3h9PT0ICdcP3I0NwfUheTBtDcdQN2GwDosG2Iv8Sz2XnaLW3n8FjgyqTMMQpesXSmaYvAxXEhkVNKeDAvOh0aRdjhBR9ijK6KaVNKiledIw+XHCMHkbL3OXC5L72WaWSBAQ48P720brV3kRCl2XIqsY9cusq5YK8BO4xSvjsK1x1MEnopmN24/ono/s4Yjvg5A/6CQfQ0jMfh71Z/FjyD0WC0bMppxHyTzhi7AoA0KeM+9WiESC/zfmd0Ebk4OasmrCznJ6Vz4dSF8x/mfCPVRAtOhcvnIZFRccNs8yLHb41xYnwzU7O7zge7G2mVZywhxHTBHFwxzfTRQczDvCIP7PISBrkLSJ6PmIZY3I9IYJggHiyLDwmSz5q3z8vUEAdSkDeIYyoRZx3wl0wOYY1zkUyZxNjkZaqloLBWgsrzLhFZyCt5dIlZhCIx2UOti0qu/IDj0RxtqaxEsS2qNg0I6VBC2XYsdE1su4bwx5zRGc5FbDCBllXFXtwQx+tKWjIKbFnaZHGHhrRXuVKZlj8/VyUt7UKAQQXgpkfoBJEP5Ty1Hew4xzhPbNMwK5/x8znDA86Gcs6wijPDXpCUnDsgbSonresljHe5QttnFvR2rQq6XXoPHMpusZSboG6lvH15tNepKRaoW/XW1TSW128SwZlFRVfWJg7nvyaTV9B3XCcE5vThz/LHU9rF3Hvff0D70hobKiWFn5K+S9KjUNXu5cp7DTSLxalw8ntzDUDLqlsDoJc3GXvAt7uXs63adQPGac5foq93wPiqu7jTleok4qa6zrYE9Qj6szlbyjm3bq+xfeuSeXTexpkrpqmyB7I/e6APF9RSKWh54fAXEzW9z7vqmK5pCpuf0y/qRdFeZ4nQdU1hv3P6lSmuxa/JZ1wxoTvge8leR5jZ5+ptLPfVH/b+Aw== \ No newline at end of file diff --git a/assets/xks/operator-guide/ingress-networking.drawio b/assets/xks/operator-guide/ingress-networking.drawio new file mode 100644 index 00000000000..a56d84abbae --- /dev/null +++ b/assets/xks/operator-guide/ingress-networking.drawio @@ -0,0 +1 @@ +7VpbV+I6FP41PNLVC709CniZ44zH0fGM48us0KYl2jacNGjx10/SprQlLYIIOmvpg5CdS5vv+/ZOskPPGMXZKQGz6Tfsw6inq37WM8Y9XdcGps0+uGVRWCxXKwwhQb5oVBmu0TMURlVY58iHaaMhxTiiaNY0ejhJoEcbNkAIfmo2C3DUfOoMhFAyXHsgkq0/kU+nwmqZg6riDKJwWj5as9yiJgZlazGVdAp8/FQzGcc9Y0QwpsW3OBvBiKNXAlP0O+moXb4ZgQndpIM1+LHIRv98ubi5Pz0/usn+vz0/62tiHo8gmospi7elixKDkOD5rGcMHyGhiEFzFKEwYVUUc2sEJjC6xCmiCHOrx14HklrzrysNJphSHLMGQIyz7JEPNQTeA39i4o9whJl57MMAzCM2x6F4VzYwzNroB5PynVUZHW2JOVMrxDGkZMGaiIFMQ3QRQjVsUX6qaNctYZvWGS+fBYTUwuXYFRnsi+BjG24MiZvTCE9AJDGUwwX5WCpD6WmKKLyeAY/XPjG/ZLYpjdmzxxr76oN0mrctC5eAMvyT3KKr3BqgKCrR7+lGEAS65zF7Sgl+gLUa35pYptWijSXJnZTVqVkjTZmwgxCSBb/Pjk4Hd98HM+3k7kw/wj9gGyEBxsoEEIkRkM6KcBSgjINdJ2CGUULzdzOHPXPc5gkozuPSMMAJFUFR0yv7GMUhm0GEJuw/eJ4TyGeWQPqEyQNKeN344vr3HU5gqqSP4WvctPDuTl47fDVhj1yyviHFLfLodFPdaHip7khOag1kSZS2t1eELikCZiCeRVDxGEyfqjiMKjT73WTROoMyAtVkcQVDhuABYvd6CraP7FtLJEa+zycn63e3RWK9BAYr67fpyOu3ZbQsFwN1b8vFy1urinxtS/I78NnGawaW3oTMNSXITMuRITMG5u6QXfxn3g9J/1t6Aq++2944cE5PNoEsfYDUmwp3OWBAjVMPQPZ5Pp8wh4N0GUPfgglzRbyuLF6zTbvOngKYK/HwE3DUV8mAPjstiSImdIpDHuOOK+uwGd6qNl8xX0lyou4hpQsBP5hT3KQRZoje8u6KKUq/ajXjTIycFxZlIWEg3NYLv6oReLHqlpfKfsX8+KTWRyGGAZ4TD65xfqtoRwEJ4brFzm4XBoERoOix+R5tNIuul1zslaAsrSko23UVt/a3opviLcUYlXSOCAGLWjPhUZ1PNcuzr3iqY5srSixGrHS5nPEOa60cM0YEAsp81coPjRO2O7dC/u0Kepj46QtL8Gs0utSbopt1yWkfT3DFIaYzFKmK5q7Q2BfbpNdqsmyCgyCFtLcam7bTwLq5N3bhfFsEor6fvMT3C6vuG4R3U3UUswGpZcg71OW5tbFF3VtyQT65/KUhXml4nPqCx+WlS0gQg5HvAd7aDe1N3bBDMhs72W4HFJn99wya9nsFzY3Zejlo6k0P1z96zLQlBXgsoPVjkLC9r5zQOnTMtBzro0VM+VD/d0ZMTVFVrdfcGA/sd46bzoaeuOt+eacTvSMpACUhgWnaT0KUZO/uNI5lfLiNhoyZBFNxwdSVA9r9bsfStAYojtFyt2O3gOLuCxQ5+O6mne3vdHwAnaA182d5DpwEO13dOO107AHumxSSfyf3PAmkq3mqsuhKYUoVKRW/Pl2n1t5akNiVd2qDfklT2qRpBXh+Kj7W24AP8j8pczVUWbxmXj1SFcvgHzyZpY/yCi036ytWt9WaD7Ha0u0Y2M57s/qWQbQVG8+2N9rmybZ0CmYchTgL+U8DlIcqb4Y8nigezsiX/MuY300c4Hp5G/FuE3UNV7GbUbc8XL19+o4Va5LfJhktp0km4Fnpur/8VPun2jvu2pw8yXUYuW+4M29JAXK8AoYYP9J+avtQ2vaI37kBeUNttzvPRvuV9eK29eZO0Tb3dhHDitVvv4pDffUTOuP4Dw== \ No newline at end of file diff --git a/assets/xks/operator-guide/multi-region.drawio b/assets/xks/operator-guide/multi-region.drawio new file mode 100644 index 00000000000..e786e605fb1 --- /dev/null +++ b/assets/xks/operator-guide/multi-region.drawio @@ -0,0 +1 @@ +5VhRc6IwEP41PN6NEEB8vKp3fbh7qQ9tHyOsQBtZJgYBf/0lEgRK69gZb5Arzmj2201Ivi9uFgwy3xa/OE2jPxgAM6xJUBhkYViWSWa2/FFIWSGz6aQCQh4HOqgBVvEBNFiHZXEAu06gQGQiTrugj0kCvuhglHPMu2EbZN27pjSEHrDyKeujj3EgIo26jt047iEOo/rWpjurPFtaR+ul7CIaYN6CyNIgc44oqta2mANT7NXEVP1+fuA9zYxDIi7p4IPnpRk+5PeHPR6W64cf68k3PcqeskyvOGS4pkxPWZQ1ERyzJAA11MQgd3kUC1il1FfeXEovsUhsmbRM2dzEjM2RIT/2JYEDXmBLfCc4vkLL41lr4rrSo6cBXEDx4frME2tyvwFuQfBShugOpCZabzWntvOWcDUWtTRzNUb1XglPQzdsyoYm9BPkWn1yUS4xoYnkbfwE24MT7PZYhED+fbWJXEQYYkLZskHvujw3Mb8RU83uCwhR6lxEM4Fd7iVbvHzS/Y/GszK+O7W5KNrORamtaq5qguf5l+vBjPtwZt1EZ0LKQxBn4pz39eTAqIj33XlcXRxvEHGKWDw1ckjrueVplFFG2ZZpHIJOhxSU9NLZI+yERJYZx/S6GW3jqI/CMREtvLrey3Tu8bpOpnNuLtM5Pe595GM8RNybO6WnPWrp606OBOp79Py6g/M76/EbZesRMvu2/Bme2foh6cvVP6Z14XlpkiEPTNMcRJ9xlkCXa2oPqmn/oW6VQwCJxOaKnCs/Ow9bCE3dWyuEzH4VOtJKyHNvrRIy7f+qFHpL8L88sKXZvMk7+lovRMnyLw== \ No newline at end of file diff --git a/assets/xks/operator-guide/xkf-overview.drawio b/assets/xks/operator-guide/xkf-overview.drawio new file mode 100644 index 00000000000..02a70dd8ee7 --- /dev/null +++ b/assets/xks/operator-guide/xkf-overview.drawio @@ -0,0 +1 @@ +7Vpdk5o6GP41XtYhCUS4XHW77bQ9Z3vsTNvLABEyIqEQv/rrT4JB+YhdO6ur7qwXEp68CcnzvnnyTqCHRvP1Q06y+AsPadKDVrjuoXEPQug4QF4UstkiwLLcLRLlLNTYHpiw37Qy1OiChbRoGArOE8GyJhjwNKWBaGAkz/mqaTblSfOpGYloB5gEJOmi31koYo1ix95XfKAsiqtHA+xta+akstZTKWIS8lUNQvc9NMo5F9vSfD2iiaKvImbb7v2B2t3IcpqKYxp8/3fjJzP8c+M+isnXX7YTP35+h7e9LEmy0DP+j0aMp0S1HNOlHrrYVITkfJGGVHVp9dBwFTNBJxkJVO1KxoDEYjFP5B2QxSXNBZNk3iUsSiUmuDLoDlzPRZnTdQ3SE3mgfE5FvpEmutbGmtRN6361d9IAaSyu+Qd5GiQ6MKJd33vqZEGz9xdM2h0mn8fdCVhyQJMlx+2yhB0DS9A5F0voaZaKGRVBrEkiRbZd1lO2VszVGco4S0U5QGfYc8bKWsdZIDmjuQTYvFzfwylPhRYXAPf4mM0jOY2E+WoyRUCovH5a+DRPqaBFv1hGJ4pXt+UJp+sJkyNs90x+AOBpR0QyXjOTD44nZCfMxK96tf5IFHZRkyjP7hAFoGMIWYD7GJ+LLdhh6xtNiZw9tIbPW+ZTliQjnvC8bItCQt1pIPFC5HxGazU4cKk/lTUhKeKyd7NK/NndR0dsxXG1cSt2MXCQa2//B123WIYABp7X97xdKwzO5qK/VZaaD3aM7vxVNP3VchJ0B849NDlpWv462jS0+pYlCR1ZfYzURckVHJUVoIRhC/WMaNlF29I70PGgbC3rDZ2AFgbdlm0ppzJjyRQL83Wk0rv+bK+MTK5rNc38Y1kYp8XpI7EWWzZQ4Vf72aatXY7fHWAPIqv8P1Okwa50juT2QlgqtxxoqfypqJ5zKlmQQQUDoyyE2McOPs0u5VmtXcrq7lIAGlY5Ple6ALvC+5mTUMkuSUgaKMZvjmZg2Rfk+ePYDwaZXM4/hvlsPf32KL56756fDRy99RvIO5zAwhZRL50NGMm68mTA4Ik644f9f4u5gHE2b6nANaQCJ4zDy2cC5tEeEWiXkM3dirwq2QTdE5Lb1030qoQTOG/KeavKeSASr1U6uwfPVyGdwGqfmV6Hdg5eoXbi16Wd7pt23qp2HojEy2un8VCkqwUPCfev/I0drt6L6vWMDGceqJK5xgJG5zpc6i7Y8T+Tk4rpy5wnuTbsN8USGV4vvfCBktch9+73Iqc9iBP57KGfy1KkSneBYEuqYpflcu/mlzo5fUI4nohu7yVdIG/3nw2UdbXPL9D9/w== \ No newline at end of file diff --git a/docs/xks/operator-guide/architecture.md b/docs/xks/operator-guide/architecture.md new file mode 100644 index 00000000000..ab59c0a92ba --- /dev/null +++ b/docs/xks/operator-guide/architecture.md @@ -0,0 +1,87 @@ +--- +id: architecture +title: Architecture +--- + +import useBaseUrl from '@docusaurus/useBaseUrl'; + +XKF is a opinionated method to deploy and run Kubernetes clusters. Currently XKF supports running on both AKS in Azure and on EKS in AWS. It's core consists of a set of Terraform Modules which configure everything from developer governance to the actual Kubernetes infrastructure and applications running inside of the cluster. XKF access model is tenant based which allows for multiple development teams to have access to a part of the same Kubernetes cluster in which they can deploy application workloads. + +XKF Overview + +XKF aims to enable Kubernetes cluster flexibility, allowing for quick replacement of Kubernetes cluster with limited developer involvement. This has influenced the choice of continuous deployment solution and other tools. Another choice is that XKF is built on the idea that all Kubernetes clusters are stateless. This means that any state like databases, object store, etc. have to be located outside of the Kubernetes cluster. XKF does however provide a golden path for how tenants can connect their applications to external services like databases or other cloud services. + +## Tenants + +Tenants in XKF are used to separate access between teams or products. There is no real requirement how tenants should be organized or who should belong to which tenant. It should instead reflect the organization structure of the end user. When deploying XKF in Azure a tenant is given a resource group to create cloud resource in, while in AWS the counterpart is a separate account. Currently there is a 1:1 relationship between tenants and Kubernetes namespaces. Meaning that each tenant will only receive a single namespace. This is subject for change in the future but is a strict limitation currently. + +## Environments + +XKF is designed to be deployed as multiple identical environments. The environments will run separately from each other with no access in between them. Every environment will contain a single Kubernetes cluster, which is where all workloads should be running. Having multiple environments in the same Kubernetes cluster is not supported, and will most likely not work. The goal should be to keep each environment deployment as similar as possible, with the main difference being the resource scaling as certain environments may have more load than others. There are no real requirement on environment names or ordering but the recommended environments to use are `dev`, `qa` and `prod` in that order. Having more or less environments is possible, XKF but remember that running all environments in a single deployment is not possible. There should be a determined ordering of the environments in which changes are applied, this should reflect the order in which applications are deployed. + +## Components + +### Kubernetes + +### DNS + +### Container Registry + +## Deployment Structure + +XKF is built with the help of [Terraform](https://www.terraform.io/) to create and manage cloud resources. XKF consists of multiple Terraform modules from the GitHub repository [xenitab/terraform-modules](https://github.com/xenitAB/terraform-modules). These modules together contain all configuration required ton setup XKF. For instructions how to setup XKF refer to the [getting started guid](./getting-started). XKF is designed to be split up into separate Terraform state. This is partly to keep the blast radius limited in a state corruption event but also to enable multiple deployments of XKF in different regions. Some states will only contain a single module while others can contain multiple modules in the same state. The setup may be different depending on the cloud provider in which XKF is deployed in but they will also share some modules. + +:::info +Currently XKF does not support running multiple cloud providers. It is not possible to run different cloud providers per environment, nor is it possible to run two Kubernetes clusters from different cloud providers in the same environment. This is subject for change in the future. +::: + +The diagram below gives an overview of the different Terraform states and the modules which are used within each. The green boxes represent the individual Terraform states while the red boxes represent the modules. + +Deployment Structure + + + +### Governance + +The governance state configures a Azure AD landing zone, which XKF bases its tenant model on. The governance state consists of two Terraform modules, a [global](https://github.com/XenitAB/terraform-modules/tree/main/modules/azure/governance-global) and [regional](https://github.com/XenitAB/terraform-modules/tree/main/modules/azure/governance-regional). The global module creates all Azure AD resources which do not belong to a specific region. While the regional module will setup Azure AD resources for a specific region. Azure AD resources are global within a tenant, so any resources created in the regional module has to contain the region name in the identifier to not conflict with the same resource created by other regional modules in the same tenant. + +### Core + +The core state contains the Azure [core](https://github.com/XenitAB/terraform-modules/tree/main/modules/azure/core) or AWS [core](https://github.com/XenitAB/terraform-modules/tree/main/modules/aws/eks-core) module which configures all networking resources required by XKF. This includes resources such as VNET/VPC, subnets, and network peering configuration. + +### EKS/AKS + +This is most likely the largest state in XKF. It contains three separate modules which create and configure a AKS or EKS cluster. The [aks-global](https://github.com/XenitAB/terraform-modules/tree/main/modules/azure/aks-global) or [eks-global](https://github.com/XenitAB/terraform-modules/tree/main/modules/aws/eks-global) module creates resource which are shared across [blue-green clusters](.blue-green clusters) which should not be removed during cluster, these resources are things like IAM/MSI used by containers to authenticate towards the cloud provider. The [aks](https://github.com/XenitAB/terraform-modules/tree/main/modules/azure/aks) or [eks](https://github.com/XenitAB/terraform-modules/tree/main/modules/aws/eks) module creates the actual Kubernetes cluster in the cloud provider together with the nodepools. The [aks-core](https://github.com/XenitAB/terraform-modules/tree/main/modules/kubernetes/aks-core) or [eks-core](https://github.com/XenitAB/terraform-modules/tree/main/modules/kubernetes/eks-core) module configures resources inside of the Kubernetes cluster. + +#### Blue/Green Clusters + +Only a single instance of the global module has to be configured but the other two modules are configured in pairs to handle blue/green deployments of clusters. This deployment strategy allows for changes to Kubernetes clusters to be applied to a new cluster, with the old cluster still running. To differentiate between the clusters they are suffixed with either a `1` or a `2` in their name. The new cluster will deploy the exact same application manifests as the old cluster. It will however provision new certificates as they are ephemeral. The new cluster will also attempt to create DNS records for all Ingress resources in the cluster, but will not be able to as it will not overwrite the older clusters existing records. The DNS record overwrite can be enabled manually when traffic should be shifted from the old to new cluster. Refer to the [blue/green documentation](./blue-green) for detailed information about this process. While traffic is being shifted across clusters there will be two clusters for the same environment. + +Blue Green + +### Hub + +The hub state can contain the modules [hub](https://github.com/XenitAB/terraform-modules/tree/main/modules/azure/hub), [azure-pipelines-agent-vmss](https://github.com/XenitAB/terraform-modules/tree/main/modules/azure/azure-pipelines-agent-vmss), and [github-runner](https://github.com/XenitAB/terraform-modules/tree/main/modules/azure/github-runner). The hub state sets up non application workload resource like CI runners. + +## Cloud Regions + +By default XKF should be deployed in a single region, there are however use cases which require multiple regions. Most cloud providers have a concept of regions. A region tends to be a collection of data centers located geographically close to each other. Cloud services can be separated into those which are global and regional. Regional services live in a specific resource and tend to be limited in their data access to other services in the same region. While globally services are not region specific and can be used by multiple regions. These constraints adds some additional constraints to XKF design as there may be requirements to run XKF in multiple regions in the same cloud account. Multi region as a concept can be very broad and have different definitions for different people, XKF supports the following multi region use cases. + +* Separate Markets - Separate deployments to comply with GDPR or other legal rest restrictions. +* Parallel - An environment contains multiple clusters located in different regions. Ingress traffic is distributed between the different regions. +* Failover - Workloads run only in a single region, but traffic can be shifted over to a new region in full region loss situation. + +The simplest multi region deployment is serving multiple different markets. This is generally required when laws do not allow data transfer to other countries. In these situations the recommended method is to setup of two totally separate XKF deployments. They will for all intents and purposes be considered two separate customers with a single region each. Doing this gives the highest level of separation between the two market deployments reducing the risk of accidentally transferring data between regions or accidentally exposing information. Managing ingress traffic has to solved separately, either through the use of a CDN or serving DNS records based on geographic location. There are also options for how to manage application deployments. The different clusters can still reconcile from the same Git repository, enabling simpler deployment workflows where the same update can be applied to two different markets at the same time. This is however all up to the end users who should configure this in their tenants GitOps repository. + +The other option is to deploy XKF in multiple parallel regions. The method in which these regions are used can be very different but the deployment method will still be the same. XKF has an opinionated method to deal with multi region deployments with Terraform. There are resources linked to boot strapping which have to be located in a specific region. This includes things like Terraform state and encryption keys, which are not possible to replicate across multiple Azure regions. Additionally a resource group has to exist within a specific resource group where metadata is stored. For this reason it is required to have a "home" region from which globally shared resources will be created in. The regional components of XKF should all be located in their own separate regions. This means placing the Terraform state for the regional deployments into separate regions, doing this is important as placing all Terraform state into the same region would create a bottle neck. + +:::info +A detail that will not be discussed in this architecture is how to deal with stateful cloud services in multiple regions. This is a separate topic which requires architecture specific to business logic, so there is no one size fits all design. Keep in mind that while XKF may support running in multiple regions at the same time, this may not be the case for applications that depend on existing state. +::: + +A core design principal of all XKF Terraform modules is that every resource has to be able to be deployed in a multi region setting. There are some exceptions to this like the global governance resources which are shared across all regions. In Azure individual resource groups are created with the region name in the resource group while in AWS the same account can be used for multiple regions. Some cloud resources, while located in a resource group or region, may have unique naming restrictions. These restrictions can be enforced across all regions in the same account or globally in all of Azure or AWS. Additionally resources like Azure AD are global per tenant, meaning there may occur conflicts between XKF deployments in different regions. For this reason all resources created in XKF should include both the environment name and the region name where possible, to avoid any potential conflicts between XKF deployments in different regions. + +When running XKF with multiple regions the Terraform states have to be organized into global states and regional directories. The global states will only have to exist in a single instance, and represent the resources which are shared between resources. While the Kubernetes cluster resources have multiple regional replicas which represent each region deployment. + +Mulit Region + diff --git a/docs/xks/operator-guide/index.md b/docs/xks/operator-guide/index.md deleted file mode 100644 index 6b416a3f1f7..00000000000 --- a/docs/xks/operator-guide/index.md +++ /dev/null @@ -1,49 +0,0 @@ ---- -id: index -title: Overview ---- - -import useBaseUrl from '@docusaurus/useBaseUrl'; - -[Xenit Kubernetes Service](https://xenit.se/it-tjanster/kubernetes-service/) (XKS) is an opinionated Kubernetes deployment on a public cloud provider's managed Kubernetes service. It combines a cloud provider's managed Kubernetes offering and ancillary services with additional configuration and services that run on top of Kubernetes. - -## Architecture - -XKS is set up from a set of Terraform modules that when combined creates the full XKS service. There are multiple individual states that all fulfill their own purpose and build -upon each other in a hierarchical manner. The first setup requires applying the Terraform in the correct order, but after that ordering should not matter. Separate states are used -as it allows for a more flexible architecture that could be changed in parallel. -XKS Overview - -## Network diagram - -Looking at a cluster, the simple network diagram looks like this: - -XKS Overview - -## Terraform modules - -The following Terraform modules are used in XKS. - -### Governance - -Governance is split into [global](https://github.com/XenitAB/terraform-modules/tree/main/modules/azure/governance-global) and [regional](https://github.com/XenitAB/terraform-modules/tree/main/modules/azure/governance-regional), it handles the creation and delegation of Azure Resource Groups, Azure KeyVaults, Azure AD groups, Service Principals and resources like that. - -### Core - -[Core](https://github.com/XenitAB/terraform-modules/tree/main/modules/azure/core) sets up the main network for an environment. - -### Hub - -[Hub](https://github.com/XenitAB/terraform-modules/tree/main/modules/azure/hub) is setup in the production subscription and is used for things like Azure Pipelines agents. - -### AKS - -The AKS Terraform contains three modules that are used to setup a Kubernetes cluster. To allow for blue/green deployments of AKS clusters resources have to be split up into -global resources that can be shared between the clusters, and cluster specific resources. - -The aks-global module contains the global resources like ACR, DNS and Azure AD configuration. - -The aks and aks-core modules create a AKS cluster and configures it. This cluster will have a suffix, normally a number to allow for temporarily creating multiple clusters -when performing a blue/green deployment of the clusters. Namespaces will be created in the cluster for each of the configured tenants. Each namespaces is linked to a resource -group in Azure where namespace resources are expected to be created. -AKS Resource Groups diff --git a/docs/xks/operator-guide/networking.md b/docs/xks/operator-guide/networking.md index 4ac0d5fe3ba..3606ce82eef 100644 --- a/docs/xks/operator-guide/networking.md +++ b/docs/xks/operator-guide/networking.md @@ -9,7 +9,15 @@ debugging networking issues. ## Kubernetes -TBD +### Ingress + +Ingress is all traffic that originates from outside the Kubernetes cluster with a destination inside the cluster. The majority of ingress traffic is HTTP but TCP ingress traffic is also possible. There are multiple components which makes ingress work. Load balancers, DNS records, TLS certificates, and Ingress Controllers are all required to achieve a production ready ingress solution in Kubernetes. The diagram below shows an overview of all the components involved in ingress networking. + +Ingress Networking + +An important note is that the DNS zone in which the records are create does not belong to a specific cluster or region. It is instead global to the specific region. This is the case regardless of the cloud or DNS provider used. Additionally XKF supports multiple DNS zones in the same cluster. This is useful if for example different applications should be exposed with totally different DNS records. + +The ingress controller is the application where all HTTP traffic will first reach inside of the Kubernetes cluster. It will forward request to the correct destination based on request parameters. The ingress controller has an accompanying load balancer with an IP which is routable from outside of the Kubernetes cluster. Requests to this IP will reach the ingress controller. It is possible to run multiple ingress controller in Kubernetes but XKF only has a single [NGINX ingress controller](https://github.com/kubernetes/ingress-nginx). DNS records are managed by [external-dns](https://github.com/kubernetes-sigs/external-dns) in the Kubernetes clusters. It looks at Ingress resources in the Kubernetes cluster and creates DNS records in the correct zone. The IP which the DNS record points towards is the load balancer which directs traffic to the ingress controller. Lastly certificates have to be provisioned. This is done with the help of [cert-manager](https://github.com/cert-manager/cert-manager). It runs in each cluster and provisions new certificates through [Let's Encrypt](Let's Encrypt). The certificate provisioning process includes a validation process to verify ownership of the DNS record for which the certificate is being created for. XKF uses [DNS01 challenges](https://letsencrypt.org/docs/challenge-types/#dns-01-challenge) in favor of [HTTP01 challenges](https://letsencrypt.org/docs/challenge-types/#http-01-challenge) to accomplish this for two specific reasons. The first being that HTTP01 challenges do not allow creation of wildcard certificates. The second being that the validation process would not work with how XKF implements blue/green cluster upgrades, as the requests to `http:///.well-known/acme-challenge/` has to routed to the new cluster. The DNS01 works by creating a TXT record at `_acme-challenge.` with the value of the TXT record being a specific token value. The record only has to exist during the certificate provisioning and can be removed after it is complete. ### Node Local DNS diff --git a/sidebars.js b/sidebars.js index dab0a9e5156..864e9f0ac99 100755 --- a/sidebars.js +++ b/sidebars.js @@ -47,8 +47,8 @@ module.exports = { "type": "category", "label": "Operator Guide", "items": [ - "xks/operator-guide/index", "xks/operator-guide/getting-started", + "xks/operator-guide/architecture", "xks/operator-guide/agents", "xks/operator-guide/networking", "xks/operator-guide/blast-radius", diff --git a/static/img/assets/xks/operator-guide/blue-green.jpg b/static/img/assets/xks/operator-guide/blue-green.jpg new file mode 100644 index 00000000000..6f2c531e290 Binary files /dev/null and b/static/img/assets/xks/operator-guide/blue-green.jpg differ diff --git a/static/img/assets/xks/operator-guide/deployment-structure.jpg b/static/img/assets/xks/operator-guide/deployment-structure.jpg new file mode 100644 index 00000000000..2da3267b1d0 Binary files /dev/null and b/static/img/assets/xks/operator-guide/deployment-structure.jpg differ diff --git a/static/img/assets/xks/operator-guide/ingress-networking.jpg b/static/img/assets/xks/operator-guide/ingress-networking.jpg new file mode 100644 index 00000000000..35be66570ad Binary files /dev/null and b/static/img/assets/xks/operator-guide/ingress-networking.jpg differ diff --git a/static/img/assets/xks/operator-guide/multi-region.jpg b/static/img/assets/xks/operator-guide/multi-region.jpg new file mode 100644 index 00000000000..d8552cd520a Binary files /dev/null and b/static/img/assets/xks/operator-guide/multi-region.jpg differ diff --git a/static/img/assets/xks/operator-guide/xkf-overview.jpg b/static/img/assets/xks/operator-guide/xkf-overview.jpg new file mode 100644 index 00000000000..61b63e8903f Binary files /dev/null and b/static/img/assets/xks/operator-guide/xkf-overview.jpg differ