diff --git a/.flake8 b/.flake8 new file mode 100644 index 0000000000..63e7cad58a --- /dev/null +++ b/.flake8 @@ -0,0 +1,5 @@ +[flake8] +max-line-length = 79 +# E203: whitespace before :, flake8 disagrees with PEP-8 +# W503: line break after binary operator, flake8 disagrees with PEP-8 +ignore = E203, W503 diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 25289bcb46..392431d196 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -6,40 +6,50 @@ name: CI - "master" jobs: - yamllint: + lint: runs-on: ubuntu-latest steps: - - name: Checkout - uses: actions/checkout@v2 + - uses: actions/checkout@v3 + + - name: Set up go + uses: actions/setup-go@v3 + + - name: Install helm-docs + run: go install github.com/norwoodj/helm-docs/cmd/helm-docs@latest + env: + GOBIN: /usr/local/bin/ - - name: yaml-lint - uses: ibiqlik/action-yamllint@master + - name: Set up Python + uses: actions/setup-python@v4 with: - config_file: ".yamllint.yml" + python-version: "3.10" + + - name: Run pre-commit + uses: pre-commit/action@v3.0.0 helm: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: fetch-depth: 0 - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: 3.9 - name: Install test dependencies - run: pip install -r tests/requirements.txt + run: pip install . - name: Expand modified charts - run: tests/expand-services + run: expand-charts - name: Set up chart-testing - uses: helm/chart-testing-action@v2.2.0 + uses: helm/chart-testing-action@v2.3.1 - name: Run chart-testing (lint) run: ct lint --all --config ct.yaml @@ -52,10 +62,11 @@ jobs: minikube: name: Test deploy runs-on: ubuntu-latest + needs: [helm] steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Filter paths uses: dorny/paths-filter@v2 @@ -70,14 +81,15 @@ jobs: - "science-platform/values-minikube.yaml" - "services/*/Chart.yaml" - "services/*/templates/**" + - "services/*/values.yaml" - "services/*/values-minikube.yaml" - name: Setup Minikube if: steps.filter.outputs.minikube == 'true' - uses: manusa/actions-setup-minikube@v2.4.3 + uses: manusa/actions-setup-minikube@v2.7.2 with: - minikube version: 'v1.24.0' - kubernetes version: 'v1.22.5' + minikube version: 'v1.28.0' + kubernetes version: 'v1.25.2' - name: Test interaction with the cluster if: steps.filter.outputs.minikube == 'true' @@ -86,16 +98,17 @@ jobs: - name: Download installer dependencies if: steps.filter.outputs.minikube == 'true' run: | - curl -sSL -o /tmp/vault.zip https://releases.hashicorp.com/vault/1.9.1/vault_1.9.1_linux_amd64.zip + curl -sSL -o /tmp/vault.zip https://releases.hashicorp.com/vault/1.12.1/vault_1.12.1_linux_amd64.zip unzip /tmp/vault.zip sudo mv vault /usr/local/bin/vault sudo chmod +x /usr/local/bin/vault - sudo curl -sSL -o /usr/local/bin/argocd https://github.com/argoproj/argo-cd/releases/download/v2.1.7/argocd-linux-amd64 + sudo curl -sSL -o /usr/local/bin/argocd https://github.com/argoproj/argo-cd/releases/download/v2.5.1/argocd-linux-amd64 sudo chmod +x /usr/local/bin/argocd sudo apt-get install socat sudo pip install -r installer/requirements.txt - name: Run installer + timeout-minutes: 30 if: steps.filter.outputs.minikube == 'true' run: | cd installer diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index f1e4a44686..8ecb1ec9e3 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -4,6 +4,8 @@ name: Docs pull_request: paths: - "docs/**" + - "services/*/Chart.yaml" + - "services/*/values.yaml" push: branches-ignore: # These should always correspond to pull requests, so ignore them for @@ -18,32 +20,35 @@ name: Docs - "*" paths: - "docs/**" + - "services/*/Chart.yaml" + - "services/*/values.yaml" jobs: docs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: - python-version: 3.9 + python-version: "3.10" - name: Python install run: | - python -m pip install --upgrade pip - python -m pip install -r docs/requirements.txt + python -m pip install --upgrade pip tox + python -m pip install -e ".[dev]" python -m pip install ltd-conveyor - name: Install graphviz run: sudo apt-get install graphviz - - name: Build - run: | - cd docs - make html + - name: Run tox + uses: lsst-sqre/run-tox@v1 + with: + python-version: "3.10" + tox-envs: "docs" # Only attempt documentation uploads for long-lived branches, tagged # releases, and pull requests from ticket branches. This avoids version diff --git a/.gitignore b/.gitignore index 80d17d262b..5b7ee1e96e 100644 --- a/.gitignore +++ b/.gitignore @@ -2,5 +2,167 @@ /installer/docker-creds /services/*/charts/*.tgz /services-expanded/ -.DS_Store **/Chart.lock + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ +docs/api/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +.idea/workspace.xml + +.idea/inspectionProfiles/Project_Default.xml + +services/alert-stream-broker/charts/.DS_Store + +services/.DS_Store + +services/alert-stream-broker/.DS_Store + +.idea/modules.xml + +.idea/phalanx.iml + +.idea/vcs.xml + +.DS_Store + +.idea/inspectionProfiles/profiles_settings.xml + +services/alert-stream-broker/.idea/alert-stream-broker.iml + +services/alert-stream-broker/.idea/vcs.xml + +services/alert-stream-broker/.idea/inspectionProfiles/profiles_settings.xml + +services/alert-stream-broker/.idea/inspectionProfiles/Project_Default.xml + +services/alert-stream-broker/.idea/modules.xml + +services/alert-stream-broker/.idea/misc.xml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000000..8452563f31 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,45 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.4.0 + hooks: + - id: trailing-whitespace + - id: check-toml + + - repo: https://github.com/adrienverge/yamllint.git + rev: v1.29.0 + hooks: + - id: yamllint + args: + - "-c=.yamllint.yml" + + - repo: https://github.com/norwoodj/helm-docs + rev: v1.11.0 + hooks: + - id: helm-docs + args: + - "--chart-search-root=." + # The `./` makes it relative to the chart-search-root set above + - "--template-files=./helm-docs.md.gotmpl" + + - repo: https://github.com/PyCQA/isort + rev: 5.12.0 + hooks: + - id: isort + additional_dependencies: + - toml + + - repo: https://github.com/psf/black + rev: 23.1a1 + hooks: + - id: black + + - repo: https://github.com/asottile/blacken-docs + rev: 1.13.0 + hooks: + - id: blacken-docs + additional_dependencies: [black==23.1a1] + + - repo: https://github.com/PyCQA/flake8 + rev: 6.0.0 + hooks: + - id: flake8 diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000000..6b5e25a46c --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2019-2022 Association of Universities for Research in Astronomy, Inc. (AURA) + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000000..8fd55e5a25 --- /dev/null +++ b/Makefile @@ -0,0 +1,16 @@ +.PHONY: +help: + @echo "Make targets for Phalanx:" + @echo "make init - Set up dev environment (install pre-commit hooks)" + +.PHONY: +init: + pip install --upgrade pre-commit tox + pre-commit install + pip install -e ".[dev]" + rm -rf .tox + +.PHONY: +clean: + rm -rf .tox + make -C docs clean diff --git a/README.rst b/README.rst index d53d0ad30a..31218daa26 100644 --- a/README.rst +++ b/README.rst @@ -22,11 +22,6 @@ IDF: * `data-int.lsst.cloud `__ (idfint) * `data.lsst.cloud `__ (idfprod) -NCSA: - -* `lsst-lsp-int.ncsa.illinois.edu `__ (int) -* `lsst-lsp-stable.ncsa.illinois.edu `__ (stable) - Telescope and Site: * `tucson-teststand.lsst.codes `__ (tucson-teststand) diff --git a/docs/.gitignore b/docs/.gitignore index f1d176e896..61fb47c494 100644 --- a/docs/.gitignore +++ b/docs/.gitignore @@ -1,2 +1,3 @@ /_build /_static/*.png +.venv diff --git a/docs/Makefile b/docs/Makefile index 4ec685ce45..02d05fdc68 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -1,52 +1,11 @@ -# Makefile for Sphinx documentation -# - -# You can set these variables from the command line. -SPHINXOPTS = -SPHINXBUILD = sphinx-build -PAPER = -BUILDDIR = _build - -# Internal variables. -IMAGES = _static/notebook-tap.png _static/portal-tap.png -PAPEROPT_a4 = -D latex_paper_size=a4 -PAPEROPT_letter = -D latex_paper_size=letter -ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . -# the i18n builder cannot share the environment and doctrees with the others -I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . +# Makefile for Sphinx documentation. +# Use tox -e docs,docs-linkcheck to build the docs. .PHONY: help help: @echo "Please use \`make ' where is one of" - @echo " html to make standalone HTML files" - @echo " linkcheck to check all external links for integrity" - @echo " dummy to check syntax errors of document sources" + @echo " clean delete builds" .PHONY: clean clean: - rm -rf $(BUILDDIR)/* - -_static/notebook-tap.png: - python _static/notebook-tap.py - -_static/portal-tap.png: - python _static/portal-tap.py - -.PHONY: html -html: $(IMAGES) - $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html - @echo - @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." - -.PHONY: linkcheck -linkcheck: $(IMAGES) - $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck - @echo - @echo "Link check complete; look for any errors in the above output " \ - "or in $(BUILDDIR)/linkcheck/output.txt." - -.PHONY: dummy -dummy: - $(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy - @echo - @echo "Build finished. Dummy builder generates no files." + rm -rf _build/* diff --git a/docs/_rst_epilog.rst b/docs/_rst_epilog.rst new file mode 100644 index 0000000000..42dd4962ea --- /dev/null +++ b/docs/_rst_epilog.rst @@ -0,0 +1,46 @@ +.. _1Password: https://1password.com/ +.. _Apache Avro: https://avro.apache.org/ +.. _Apache Kafka: https://kafka.apache.org/ +.. _Argo CD: https://argoproj.github.io/argo-cd/ +.. _CILogon: https://www.cilogon.org/home +.. _ConfigMap: https://kubernetes.io/docs/concepts/configuration/configmap/ +.. _Confluent Schema Registry: https://docs.confluent.io/current/schema-registry/index.html +.. _DataLink: https://www.ivoa.net/documents/DataLink/ +.. _Data Management workflow guide: https://developer.lsst.io/work/flow.html +.. _Deployments: +.. _Deployment: https://kubernetes.io/docs/concepts/workloads/controllers/deployment/ +.. _Docker: https://www.docker.com/ +.. _Documentation Style Guide: https://developer.lsst.io/user-docs/index.html +.. _FastAPI: https://fastapi.tiangolo.com/ +.. _Felis: https://felis.lsst.io/ +.. _Google Documentation Style Guide: https://developers.google.com/style/ +.. _Google Filestore: https://cloud.google.com/filestore +.. _Helm: https://helm.sh +.. _helm-docs: https://github.com/norwoodj/helm-docs +.. _Ingress: https://kubernetes.io/docs/concepts/services-networking/ingress/ +.. _InfluxDB: https://www.influxdata.com/ +.. _IVOA: https://ivoa.net/documents/ +.. _Kubernetes: https://kubernetes.io/ +.. _LSST Vault Utilites: https://github.com/lsst-sqre/lsstvaultutils/ +.. _Namespace: https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/ +.. _`lsst-sqre/phalanx`: +.. _ObsTAP: https://www.ivoa.net/documents/ObsCore/ +.. _Phalanx repository: https://github.com/lsst-sqre/phalanx +.. _Pods: +.. _Pod: https://kubernetes.io/docs/concepts/workloads/pods/ +.. _pre-commit: https://pre-commit.com +.. _Roundtable: https://roundtable.lsst.io/ +.. _Safir: https://safir.lsst.io/ +.. _Secret: https://kubernetes.io/docs/concepts/configuration/secret/ +.. _semantic versioning: https://semver.org/ +.. _Services: +.. _Service: https://kubernetes.io/docs/concepts/services-networking/service/ +.. _SODA: https://ivoa.net/documents/SODA/ +.. _Sphinx: https://www.sphinx-doc.org/en/master/ +.. _TAP: https://www.ivoa.net/documents/TAP/ +.. _Telegraf: https://www.influxdata.com/time-series-platform/telegraf/ +.. _tox: https://tox.wiki/en/latest/ +.. _UWS: https://www.ivoa.net/documents/UWS/ +.. _Vault: https://www.vaultproject.io/ +.. _Vault Secrets Operator: https://github.com/ricoberger/vault-secrets-operator +.. _venv: https://packaging.python.org/en/latest/guides/installing-using-pip-and-virtual-environments/#creating-a-virtual-environment diff --git a/docs/about/contributing-docs.rst b/docs/about/contributing-docs.rst new file mode 100644 index 0000000000..1ace4686d7 --- /dev/null +++ b/docs/about/contributing-docs.rst @@ -0,0 +1,76 @@ +################################# +Contributing to the documentation +################################# + +This documentation is a Sphinx_ project hosted out of the ``docs`` directory of the `phalanx repository`_ on GitHub. +You can contribute to this documentation by editing the source files in a clone of this repository and submitting a pull request on GitHub. +This page provides the basic steps. + +Set up for documentation development +==================================== + +Cloning phalanx +--------------- + +Start by cloning Phalanx into your own editing environment. +Members of the `lsst-sqre/phalanx`_ repository on GitHub can clone the repository directly and create a ticket branch, per the `Data Management workflow guide`_. +Otherwise, fork lsst-sqre/phalanx `following GitHub's guide `__. + +Set up pre-commit +----------------- + +Phalanx uses Pre-commit_ to lint files and, in some cases, automatically reformat files. +Follow the instructions in :doc:`precommit-and-helm-docs`. + +Initialize the development environment +-------------------------------------- + +From the ``phalanx`` directory, initialize your environment: + +.. code-block:: bash + + make init + +This steps installs tox_, the tooling for builds with isolated Python environments, and pre-commit_, a tool for linting and formatting files (see :doc:`precommit-and-helm-docs`). + +Compiling the documentation +=========================== + +Use the tox_ ``docs`` environment for compiling the documentation: + +.. code-block:: bash + + tox -e docs + +The built documentation is located in the ``docs/_build/html`` directory. + +Sphinx caches build products and in some cases you may need to delete the build to get a consistent result: + +.. code-block:: bash + + make clean + +Checking links +============== + +Links in the documentation are validated in the GitHub Actions workflow, but you can also run this validation on your local clone: + +.. code-block:: bash + + tox -e docs-linkcheck + +Submitting a pull request and sharing documentation drafts +========================================================== + +Members of the `lsst-sqre/phalanx`_ repository should submit pull requests following the `Data Management workflow guide`_. +Note that GitHub Actions builds the documentation and uploads a draft edition of the documentation to the web. +You can find your branch's development edition at https://phalanx.lsst.io/v. + +If you are submitting a GitHub pull request from a fork, the documentation will build as a check, however the draft won't upload for public staging. + +More information on writing documentation +========================================= + +When writing documentation for Rubin Observatory, refer to our `Documentation Style Guide`_, based on the `Google Documentation Style Guide`_, for guidelines on writing effective documentation content. + +For technical tips on writing Sphinx documentation, see the `reStructuredText Style Guide `__ and `Documenteer's documentation for User guides `__. diff --git a/docs/about/index.rst b/docs/about/index.rst new file mode 100644 index 0000000000..4457544e45 --- /dev/null +++ b/docs/about/index.rst @@ -0,0 +1,22 @@ +##### +About +##### + +This section helps you understand the crucial concepts behind Phalanx, and how to work with and contribute to the `phalanx repository`_. + +After you have reviewed this documentation, see the :doc:`/developers/index` section to develop and deploy applications, or the :doc:`/admin/index` section to operate a Kubernetes cluster with Phalanx applications. + +.. toctree:: + :maxdepth: 1 + :caption: Design + + introduction + repository + secrets + +.. toctree:: + :maxdepth: 1 + :caption: Contributing + + precommit-and-helm-docs + contributing-docs diff --git a/docs/about/introduction.rst b/docs/about/introduction.rst new file mode 100644 index 0000000000..7c4ec0d857 --- /dev/null +++ b/docs/about/introduction.rst @@ -0,0 +1,120 @@ +######################################### +Overview of the Phalanx platform concepts +######################################### + +Rubin Observatory's application deployments, like the Rubin Science Platform, run in Kubernetes_ clusters. +Phalanx is how these application deployments are defined — both generally, and specifically for each Kubernetes cluster. +In a nutshell, Phalanx is a Git repository containing Helm charts for individual applications (like websites and web APIs) that are configured for multiple environments (like different data access centers and production/development versions of each). +`Argo CD`_ instances synchronize these application deployment manifests into the Kubernetes cluster of each environment. + +Expanding on that, this page briefly introduces the Phalanx's key features, terminology, and technology ecosystem. + +Kubernetes and Docker containers +================================ + +Phalanx deploys applications on Kubernetes_ clusters — where "cluster" refers to one or more compute nodes that provide CPU, storage, and networking. + +Kubernetes_ is a *container orchestration* system. +These Docker_ containers are isolated environments where instances of an application (such as a web API or website) run. +Containers are instances of Docker *images* and those images are the built products of individual application codebases. + +Kubernetes layers upon Docker by running multiple containers according to configuration, while also managing the networking and storage needs of those containers. +For application developers, the main interface for defining how an application runs is through resources that are commonly represented as YAML files. + +.. sidebar:: Common Kubernetes resources + + A Deployment_ resource defines a set of Pods_ that run simultaneously, and those Pods in turn define one or more containers that run together. + Deployments and their pods can be configured with ConfigMap_ and Secret_ resources. + Deployments are made available to the network by defining a Service_. + An Ingress_ resource publishes that Service to the internet and defines what authentication and authorization is needed. + + You can `learn more about Kubernetes from its documentation `_, and also in Phalanx's :doc:`documentation on creating applications `. + +Environments are specific Kubernetes clusters +--------------------------------------------- + +Phalanx treats specific Kubernetes clusters as separate environments. +Each environment is configured to run specific sets of applications with specific configurations, although all environments running Phalanx benefit from a base of shared applications and Kubernetes-based infrastructure. + +Infrastructure agnostic +----------------------- + +Although Phalanx *uses* Kubernetes, this platform is agnostic about how Kubernetes itself is deployed for a specific environment. +Phalanx has been deployed on both public clouds (the public Rubin Science Platform runs on the Google Kubernetes Engine) and on-premises Kubernetes clusters (US Data Facility and most international data access centers [IDACs]). +Running on a public cloud versus on-premises generally impacts the specifics of how individual applications are configured. + +Helm +==== + +Helm_ is a tool for packaging applications for deployment in Kubernetes. +Helm *charts* are templates for Kubernetes resources. +By supplying values (i.e., through "values.yaml" files), Helm renders templates for specific Kubernetes environments. + +Phalanx takes practical advantage of Helm charts in two ways. +First, each application has a values file for each environment. +This is the key mechanism for how Phalanx supports application deployments across multiple diverse environments. + +Second, Helm enables us to deploy existing Helm charts for external open source software. +In some cases, Phalanx application charts are shells around an external Helm chart, such as ingress-nginx. +In other cases, external Helm charts are composed as sub-charts within Phalanx's first-party application — like a Redis cluster within a Rubin API application. + +Applications are Helm charts in Phalanx +--------------------------------------- + +In Phalanx, the word *application* specifically refers to a Helm chart located in the :file:`services` directory of the `phalanx repository`_. +That Helm chart directory includes the Kubernetes templates and Docker image references to deploy the application, as well as values files to configure the application for each environment. + +Argo CD +======= + +`Argo CD`_ manages the Kubernetes deployments of each application's Helm chart from the Phalanx repository. +Each environment runs its own instance of Argo CD (as Argo CD is itself an application in Phalanx). + +Argo CD provides a web UI that shows resources in the Kubernetes cluster, provides lightweight access to logs, and most importantly provides controls for syncing and restarting applications to match the current definitions in the Phalanx GitHub repository. + +In development environments, Argo CD's UI makes it possible to temporarily edit Kubernetes resources for testing configurations outside from the Git-based process. +Argo CD replaces most need for the standard Kubernetes command-line client, ``kubectl``. +In fact, most developers for individual applications only have Argo CD access in most environments. + +Vault and secrets management +============================ + +Phalanx adopts Vault_ as its secret store. +Since the `phalanx repository`_ is public, secret cannot be included directly — instead, secrets are referenced from a Vault secret store. +The Vault Secrets Operator connects information in the secret store with Phalanx applications. +Applications that need a secret include a ``VaultSecret`` resource in their Helm chart. +Inside Kubernetes, the `Vault Secrets Operator`_ obtains the secret information from a Vault instance and formats it into a standard Kubernetes Secret_ that the application's containers can consume as environment variables or mounted files. + +Phalanx itself does not manage Vault. +Most Rubin Science Platform environments use the Vault server at ``vault.lsst.codes``, which is hosted on `Roundtable`_. +Each installation environment has its own root path in that Vault server. +Phalanx also includes scripts for syncing a 1Password_ vault into the Vault_ service. +See :doc:`secrets` to learn more. + +The core applications +===================== + +Phalanx includes applications that provide key functionality for other applications: + +``argocd`` (application management) + As described above, Argo CD is an application that synchronizes applications defined in Phalanx with running resources in Kubernetes and provides a UI for developers and administrators. + +``cert-manager`` (TLS certificate management) + Cert-manager acquires and renews TLS certificates from Let's Encrypt. + +``ingress-nginx`` (ingress) + The ingress-nginx application routes traffic from the internet to individual applications, while also terminating TLS and integrating with Gafaelfawr, the auth handler. + +``vault-secrets-operator`` (secret configuration) + Vault Secrets Operator bridges secrets in Vault_ with Kubernetes Secret_ resources. + +Next steps +========== + +This page provided a brief tour of the concepts and components of Phalanx-based application deployments. +For more introductory topics, see the :doc:`index` overview topics. + +Start working with Phalanx: + +- If you are a developer looking to integrate your application into Phalanx, see the :doc:`/developers/index` section to get started. +- If you are an administrator looking to create a new environment or operate an existing one, see the :doc:`/admin/index` section. diff --git a/docs/about/precommit-and-helm-docs.rst b/docs/about/precommit-and-helm-docs.rst new file mode 100644 index 0000000000..272ccbbb42 --- /dev/null +++ b/docs/about/precommit-and-helm-docs.rst @@ -0,0 +1,66 @@ +.. _pre-commit-howto: + +###################################################### +Setting up pre-commit linting and helm-docs generation +###################################################### + +The Phalanx repository uses `pre-commit`_ to lint source files and generate Helm chart documentation with `helm-docs`_. +If you're contributing to Phalanx, you should enable pre-commit locally to ensure your work is clean and Helm chart docs are up to date. + +.. important:: + + Pre-commit also runs in GitHub Actions to ensure that contributions conform to the linters. + If your pull request's "lint" step fails, it's likely because pre-commit wasn't enabled locally. + This page shows you how to fix that. + + +.. _pre-commit-install: + +Install pre-commit and helm-docs locally +======================================== + +In your clone of Phalanx, run: + +.. code-block:: sh + + make init + +This command uses Python to install pre-commit and enable it in your Phalanx clone. + +**You will also need to install helm-docs separately.** +See the `helm-docs installation guide `__ for details. + +What to expect when developing in Phalanx with pre-commit +========================================================= + +Once installed, your Git commits in Phalanx are checked by the linters. +If a linter "fails" the commit, you'll need to make the necessary changes and re-try the Git commit. + +Many linters make the required changes when "failing." +For example, helm-docs updates the README files for Helm charts and black reformats Python files. +For these cases, you only need to ``git add`` the updated files for ``git commit`` to be successful. + +Other linters, such as flake8, only point out issues. +You'll need to manually resolve those issues before re-adding and committing. + +Running all files +================= + +Pre-commit normally runs only on changed files. +To check all files (similar to how we run pre-commit in GitHub Actions): + +.. code-block:: sh + + pre-commit run --all-files + +By-passing pre-commit +===================== + +In an emergency situation, it's possible to by-pass pre-commit when making git commits: + +.. code-block:: sh + + git commit --no-verify + +Keep in mind that the pre-commit linters always run on GitHub Actions. +Merging to Phalanx's default branch while the linters "fail" the repo needs a repository admin's action. diff --git a/docs/about/repository.rst b/docs/about/repository.rst new file mode 100644 index 0000000000..b79836979c --- /dev/null +++ b/docs/about/repository.rst @@ -0,0 +1,119 @@ +################################ +Phalanx Git repository structure +################################ + +Phalanx is an open source Git repository hosted at https://github.com/lsst-sqre/phalanx. +This page provides an overview of this repository's structure, for both application developers and environment administrators alike. +For background on Phalanx and its technologies, see :doc:`introduction` first. + +Key directories +=============== + +services directory +------------------ + +:bdg-link-primary-line:`Browse /services/ on GitHub ` + +Every Phalanx application has its own sub-directory within ``services`` named after the application itself (commonly the name is also used as a Kubernetes Namespace_). +A Phalanx application is itself a Helm_ chart. +Helm charts define Kubernetes templates for the application deployment, values for the templates, and references to any sub-charts from external repositories to include as a sub-chart. +See the `Helm documentation for details on the structure of Helm charts. `__ + +Per-environment Helm values +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Phalanx Helm charts in Phalanx include the per-environment configuration, in addition to a common set of defaults. +A chart's defaults are located in its main ``values.yaml`` file. +The per-environment values files, named ``values-.yaml``, override those default values for the application's deployment in the corresponding environments. + +Applications based on third-party charts +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Note that some applications are based entirely (or primarily) on third-party open source charts. +In this case, the application's Helm chart includes that external chart as a *dependency* through its ``Chart.yaml``. +See the `Helm documentation on chart dependencies. `__ + +science-platform directory +-------------------------- + +:bdg-link-primary-line:`Browse /science-platform/ on GitHub ` + +The ``science-platform`` directory is where environments are defined (an environment is a distinct Kubernetes cluster). + +The ``/science-platform/templates`` directory contains a Helm template per application, like this one for the ``noteburst`` application: + +.. literalinclude:: ../../science-platform/templates/noteburst-application.yaml + :caption: /science-platform/templates/noteburst-application.yaml + +The template defines a Kubernetes Namespace_ and an Argo CD ``Application`` for each Phalanx application. +``Application`` resources direct Argo CD to deploy and synchronize the corresponding application Helm chart from the Phalanx ``services`` directory. + +Notice that these templates are wrapped in a conditional, which controls whether an application is deployed in a given environment. +The ``values.yaml`` file in the ``science-platform`` directory defines boolean variables for each application. +Then in corresponding values files for each environment, named ``values-.yaml``, applications are enabled, or not, for the specific environment. + +installer directory +------------------- + +:bdg-link-primary-line:`Browse /installer/ on GitHub ` + +This directory contains a script named `install.sh `__. +The arguments to this are the name of the environment, the FQDN, and the read key for Vault (see :ref:`secrets` for more details on Vault). +This installer script is the entry point for setting up a new environment. +It can also be run on an existing environment to update it. +See the :ref:`environment bootstrapping documentation ` for details. + +docs directory +-------------- + +:bdg-link-primary-line:`Browse /docs/ on GitHub ` + +This directory contains the Sphinx_ documentation that you are reading now. +See :doc:`contributing-docs`. + +starters directory +------------------ + +:bdg-link-primary-line:`Browse /docs/ on GitHub ` + +This directory contains templates for contributing new applications to Phalanx. +See :doc:`/developers/add-application`. + +Branches +======== + +The default branch is ``master`` [#1]_. +This default branch is considered the source of truth for fullly synchronized Phalanx environments. + +.. [#1] This branch will be renamed to ``main`` in the near future. + +Updates to Phalanx are introduced as pull requests on GitHub. +Repository members create branches directly on the https://github.com/lsst-sqre/phalanx origin (see the `Data Management workflow guide`_, while external collaborators should fork Phalanx and provide pull requests. + +It is possible (particularly in non-production environments) to deploy from branches of Phalanx, which is useful for debugging new and updating applications before updating the ``master`` branch. +You can learn how to do this in :doc:`/developers/deploy-from-a-branch`. + +Test and formatting infrastructure +================================== + +The Phalanx repository uses two levels of testing and continuous integration. + +`Pre-commit`_ performs file formatting and linting, both on your local editing environment (when configured) and verified in GitHub Actions. +In one check, Pre-commit regenerates Helm chart documentation for applications with helm-docs_. +See the `.pre-commit-config.yaml `__ file for configuration details. +Learn how to set up Pre-commit in your local editing environment in :doc:`precommit-and-helm-docs`. + +Second, GitHub Actions runs a CI workflow (`.github/workflows/ci.yaml `__). +This workflow has three key jobs: + +- Linting with Pre-commit_, mirroring the local editing environment. +- Static validation of Helm charts with the `helm/chart-testing-action `__ GitHub action. +- An integration test of a Phalanx environment in a minikube. + +Next steps +========== + +Start working with Phalanx: + +- If you are a developer looking to integrate your application into Phalanx, see the :doc:`/developers/index` section to get started. +- If you are an administrator looking to create a new environment or operate an existing one, see the :doc:`/admin/index` section. diff --git a/docs/arch/secrets.rst b/docs/about/secrets.rst similarity index 57% rename from docs/arch/secrets.rst rename to docs/about/secrets.rst index 045db983f0..879b841dd8 100644 --- a/docs/arch/secrets.rst +++ b/docs/about/secrets.rst @@ -1,46 +1,43 @@ .. _secrets: -####### -Secrets -####### +########################### +Secrets management overview +########################### + +Phalanx is a public repository on GitHub, nevertheless application configurations generally require some secrets such as random numbers, certificates, or passwords. +This page explains how secrets are managed in Phalanx with Vault, 1Password, and Vault Secrets Operator. Vault ===== -Argo CD allows all service configuration to be checked into Git and deployed from that repository. -However, many service configurations require some secrets such as random numbers, certificates, or passwords. +Argo CD allows all application configurations to be checked into Git and deployed from that repository. +However, many application configurations require some secrets such as random numbers, certificates, or passwords. These obviously cannot be committed to a public repository. -We instead use `Vault`_ to store secrets and then materialize them in Kubernetes using :ref:`vault-secrets-operator`. +We instead use `Vault`_ to store secrets and then materialize them in Kubernetes using :px-app:`vault-secrets-operator`. .. _Vault: https://www.vaultproject.io/ -Charts that need secrets use ``VaultSecret`` resources with the name matching the ``Secret`` resource to create. +Helm charts that need secrets use ``VaultSecret`` resources with the name matching the Secret_ resource to create. Those ``VaultSecret`` resources are configured with the path in Vault to the secret. -That path, in turn, is configured in the Helm per-environment values files for those services. - -Most Rubin Science Platform installations use the Vault server at vault.lsst.codes, which is managed using `Roundtable`_. +That path, in turn, is configured in the Helm per-environment values files for those applications. -.. _Roundtable: https://roundtable.lsst.io/ +Most Rubin Science Platform installations use the Vault server at ``vault.lsst.codes``, which is managed using Roundtable_. Each installation environment has its own root path in that Vault server. -The path is ``k8s_operator/`` where ```` is the domain name of that environment. +The path is formatted as ``k8s_operator/`` where ```` is the domain name of that environment. When the environment is bootstrapped, it is given a Kubernetes secret with the Vault token required to read that path of Vault. -See `DMTN-112`_ for more information about that Vault instance and its naming conventions. - -.. _DMTN-112: https://dmtn-112.lsst.io/ +See :dmtn:`122` for more information about that Vault instance and its naming conventions. 1Password ========= While Kubernetes and Argo CD do not look beyond Vault, Vault is not the source of truth for persistent secrets for Rubin Science Platform environments maintained by SQuaRE. -Secrets for external services or which for whatever reason cannot be randomly regenerated when the environment is reinstalled are stored in `1Password`_. - -.. _1Password: https://1password.com/ +Secrets for external applications or which for whatever reason cannot be randomly regenerated when the environment is reinstalled are stored in `1Password`_. -Inside 1Password, there is a vault named RSP-Vault that contains all of the persistent secrets. +Inside 1Password, there is a vault named ``RSP-Vault`` that contains all of the persistent secrets. Each secret is stored in either a Login or a Secure Note object. Inside that object, there must be a key named ``generate_secrets_key`` whose value is two words separated by a space. -The first word is the service and the second is the name of that secret among the secrets for that service. +The first word is the application's name and the second is the name of that secret among the secrets for that application. There may also be one or more keys named ``environment``. Its values are the domain names of the environments to which that specific secret applies. If ``environment`` is missing, that 1Password object provides a default for the given ``generate_secrets_key`` key, which will be used if there is no other object with the same key and a matching environment. @@ -49,5 +46,5 @@ These 1Password objects are used by the `generate_secrets.py script `__ uses the ``onepassword_uuid`` setting in `/science-platform/values.yaml `__ to locate the appropriate 1Password vault. -For a step-by-step guide on adding a 1Password-based secret, see :doc:`/service-guide/add-a-onepassword-secret`. -For updating an existing 1Password-based secret, see :doc:`/service-guide/update-a-onepassword-secret`. +For a step-by-step guide on adding a 1Password-based secret, see :doc:`/developers/add-a-onepassword-secret`. +For updating an existing 1Password-based secret, see :doc:`/developers/update-a-onepassword-secret`. diff --git a/docs/admin/bootstrapping.rst b/docs/admin/bootstrapping.rst new file mode 100644 index 0000000000..51ee579c2c --- /dev/null +++ b/docs/admin/bootstrapping.rst @@ -0,0 +1,101 @@ +############################### +Bootstrapping a new environment +############################### + +This is (somewhat incomplete) documentation on how to create a new Rubin Science Platform environment. + +Requirements +============ + +* The installer assumes Git 2.22 or later. + +* We presume that you are using Vault_ coupled with `Vault Secrets Operator`_ to manage your Kubernetes secrets, and that all of the secrets for your environment will be stored under a single common prefix. + See the `LSST Vault Utilities documentation `__ for the naming convention that we usually use. + We strongly recommend using the `LSST Vault Utilites`_ to create multiple enclaves (one per instance), so that then compromise of one instance doesn't expose all your secrets for all instances. + +* Rubin Science Platform applications expect the public hostname of the Science Platform to have a TLS certificate that can be verified using standard CA roots. + Using a self-signed certificate or an institutional CA that is not in the normal list of CAs shipped with Docker base images will probably not work. + See :ref:`hostnames` for more information. + +Checklist +========= + +.. rst-class:: open + +#. Fork the `Phalanx repository`_ if this work is separate from the SQuaRE-managed environments. + +#. Create a virtual environment with the tools you will need from the installer's `requirements.txt `__. + +#. Create a new ``values-.yaml`` file in `/science-platform `__. + Start with a template copied from an existing environment that's similar to the new environment. + Edit it so that ``environment``, ``fqdn``, and ``vault_path_prefix`` at the top match your new environment. + Choose which applications to enable or leave disabled. + +#. Decide on your approach to TLS certificates. + See :ref:`hostnames` for more details. + This may require DNS configuration in Route 53 if this is the first deployment in a new domain and you are using Let's Encrypt for certificates. + +#. Do what DNS setup you can. + If you already know the IP address where your instance will reside, create the DNS records (A or possibly CNAME) for that instance. + If you are using a cloud provider or something like minikube where the IP address is not yet known, then you will need to create that record once the top-level ingress is created and has an external IP address. + +#. Decide on your approach to user home directory storage. + The Notebook Aspect requires a POSIX file system. + The most frequently used method of providing that file system is NFS mounts, but you may instead want to use a different file system that's mounted on the Kubernetes cluster nodes and exposed to pods via ``hostPath``. + Either way, you will need to configure appropriate mount points in :px-app:`nublado2` and :px-app:`moneypenny` when you configure each application in the next step. + +#. For each enabled application, create a corresponding ``values-.yaml`` file in the relevant directory under `/services `__. + Customization will vary from application to application. + The following applications have special bootstrapping considerations: + + - :px-app-bootstrap:`argo-cd` + - :px-app-bootstrap:`cachemachine` + - :px-app-bootstrap:`gafaelfawr` + - :px-app-bootstrap:`nublado2` + - :px-app-bootstrap:`portal` + - :px-app-bootstrap:`squareone` + +#. Generate the secrets for the new environment and store them in Vault with `/installer/update_secrets.sh `__. + You will need the write key for the Vault enclave you are using for this environment. + If you are using 1Password as a source of secrets, you will also need the access token for the 1Password Connect server. + (For SQuaRE-managed deployments, this is in the ``SQuaRE Integration Access Token: Argo`` 1Password item in the SQuaRE vault.) + +#. Run the installer script at `/installer/install.sh `__. + Debug any problems. + The most common source of problems are errors or missing configuration in the ``values-.yaml`` files you created for each application. + +#. If the installation is using a dynamically-assigned IP address, while the installer is running, wait until the ingress-nginx-controller Service_ comes up and has an external IP address; then go set the A record for your endpoint to that address (or set an A record with that IP address for the ingress and a CNAME from the endpoint to the A record). + For installations that are intended to be long-lived, it is worth capturing the IP address at this point and modifying your configuration to use it statically should you ever need to reinstall the instance. + +.. _hostnames: + +Hostnames and TLS +================= + +The Science Platform is designed to run under a single hostname. +``Ingress`` resources for all applications use different routes on the same external hostname. +That hostname, in turn, is served by an NGINX proxy web server, configured via the ``ingress-nginx`` Helm chart. +An NGINX ingress controller is required since its ``auth_request`` mechanism is used for authentication. + +The external hostname must have a valid TLS certificate that is trusted by the stock configuration of standard CentOS, Debian, and Alpine containers. +There are supported two mechanisms to configure that TLS certificate: + +#. Purchase a commercial certificate and configure it as the ingress-nginx default certificate. + For more information, see :doc:`/applications/ingress-nginx/certificates`. + Do not add TLS configuration to any of the application ``Ingress`` resources. + With this approach, the certificate will have to be manually renewed and replaced at whatever frequency the commercial certificate provider requires. + Usually this is once per year. + +#. Configure Let's Encrypt to obtain a certificate via the DNS solver. + Once this is configured, TLS will be handled automatically without further human intervention. + However, this approach is far more complex to set up and has some significant prerequisites. + For more information, see :px-app-bootstrap:`cert-manager`. + +To use the second approach, you must have the following: + +* An :abbr:`AWS (Amazon Web Services)` account in which you can create two Route 53 hosted domains. + You must use this domain for the hostname of the Science Platform installation. +* The ability to delegate to that Route 53 hosted domain from some public DNS domain. + This means either registering a domain via Amazon, registering a domain elsewhere and pointing it to Amazon's Route 53 DNS servers, or creating a subdomain of an existing public domain by adding ``NS`` records to that domain for a subdomain hosted on Route 53. + +If neither of those requirements sound familiar, you almost certainly want to use the first option and purchase a commercial certificate. diff --git a/docs/admin/index.rst b/docs/admin/index.rst new file mode 100644 index 0000000000..f47b71e6a7 --- /dev/null +++ b/docs/admin/index.rst @@ -0,0 +1,31 @@ +############## +Administrators +############## + +Administrators operate infrastructure, bootstrap infrastructure, and are involved in the deployment, configuration, and Argo CD synchronization of applications. + +.. toctree:: + :caption: Bootstrapping + :maxdepth: 1 + :name: bootstrapping-toc + + bootstrapping + +.. toctree:: + :caption: Procedures + :maxdepth: 1 + + upgrade-windows + sync-argo-cd + update-pull-secret + +.. toctree:: + :caption: Troubleshooting + + troubleshooting + +.. toctree:: + :caption: Infrastructure + :maxdepth: 2 + + infrastructure/filestore/index diff --git a/docs/admin/infrastructure/filestore/index.rst b/docs/admin/infrastructure/filestore/index.rst new file mode 100644 index 0000000000..1500de52c5 --- /dev/null +++ b/docs/admin/infrastructure/filestore/index.rst @@ -0,0 +1,18 @@ +######### +Filestore +######### + +Filestore is not an RSP application, nor does it (generally) run in Kubernetes. +All current filestore implementations are simply implementations of NFS that are mounted into RSP Pods_ (both JupyterLab user Pods_ and application Pods_) by ``Volume`` and ``VolumeMount`` definitions. + +.. note:: + + There is nothing in the filestore that mandates NFS. + What is required is simply something that can present some storage to user and application Pods_ as a POSIX filesystem. + To this point, NFS has been the most convenient way to accomplish that, but it is certainly not fundamental to the concept. + +.. toctree:: + :caption: Guides + :titlesonly: + + privileged-access diff --git a/docs/admin/infrastructure/filestore/privileged-access.rst b/docs/admin/infrastructure/filestore/privileged-access.rst new file mode 100644 index 0000000000..a5f6a21a40 --- /dev/null +++ b/docs/admin/infrastructure/filestore/privileged-access.rst @@ -0,0 +1,75 @@ +################################## +Privileged access to the filestore +################################## + +Currently, we do not have any way to make containers with privileged filesystem access available from JupyterHub. + +In order to get privileged access to the filestore, you will need access to ``kubectl`` with admin privileges to Kubernetes cluster you want to work on. + +Procedure +========= + +Save the following file as ``copier.yaml``. +You may need to edit it to point to the correct filestore. +If you need multiple filestores present (for instance, for copying data between environments), then you will need to create multiple ``Volume``\ /``VolumeMount`` pairs so multiple filestores are present within the container. + +.. code-block:: yaml + :caption: copier.yaml + + apiVersion: v1 + kind: Pod + metadata: + name: copier + namespace: copier + spec: + containers: + - name: main + image: ubuntu:latest + args: [ "tail", "-f", "/dev/null" ] + volumeMounts: + - mountPath: /mnt + name: share + volumes: + - name: share + nfs: + path: /share1 + server: 10.13.105.122 + # 10.87.86.26 is IDF dev + # 10.22.240.130 is IDF int + # 10.13.105.122 is IDF prod + +Spin up this Pod_ and log into its shell: + +.. code-block:: bash + + kubectl create ns copier + kubectl apply -f copier.yaml + kubectl exec -it -n copier copier -- /bin/bash -l + +Once you do that, you have a root prompt and the instance filestore is mounted at ``/mnt``. +*With great power comes great responsibility.* + +When you're done, delete the namespace. +This will also destroy the privileged pod: + +.. code-block:: bash + + kubectl delete ns copier + +Examples +======== + +- Get usage data by username, sorted by usage, largest at the bottom: + + .. code-block:: bash + + du -s -BM /mnt/home/* \ + | sed -e 's/\s\+/,/' \ + | sed -e 's|/mnt/home/||' \ + | sort -nr + +- Make an archival copy of user ``foo``\ ’s previous ``.local`` file for analysis: + + .. code-block:: bash + + tar cvpfz /tmp/foo-local.tgz /mnt/home/foo/.local.20210804223021 diff --git a/docs/service-guide/sync-argo-cd.rst b/docs/admin/sync-argo-cd.rst similarity index 62% rename from docs/service-guide/sync-argo-cd.rst rename to docs/admin/sync-argo-cd.rst index 2c1995fc5e..e32860ae9e 100644 --- a/docs/service-guide/sync-argo-cd.rst +++ b/docs/admin/sync-argo-cd.rst @@ -1,31 +1,23 @@ -############### -Syncing Argo CD -############### +################################# +Syncing Argo CD in an environment +################################# -Go to Argo CD for the environment -================================= +Phalanx enables environment administrators to roll out new and updated applications by synchronizing deployemnts in Kubernetes to the current HEAD of the `phalanx repository`_ using `Argo CD`_. +This page explains the key steps in this process for environment administrators. -To access the Argo CD UI, go to the ``/argo-cd`` URL under the domain name of that deployment of the Rubin Science Platform. -See `the Phalanx README `__ for the names of all Phalanx environments and direct links to their Argo CD pages. - -Depending on the environment, you will need to authenticate with either GitHub or with Google OAuth. -You can use the ``admin`` account and password, stored in 1Password for deployments managed by SQuaRE, in case of an emergency. +.. important:: -When deploying an update, it should normally follow this sequence (skipping environments that aren't relevant to that update). + Keep in mind that environments have specific upgrade windows and that application updates should be rolled out to environments in sequence to development and integration environments before production environments. + See :doc:`upgrade-windows` for details. -* data-dev.lsst.cloud -* data-int.lsst.cloud -* lsst-lsp-int.ncsa.illinois.edu -* tucson-teststand.lsst.codes -* data.lsst.cloud -* lsst-lsp-stable.ncsa.illinois.edu -* base-lsp.lsst.codes -* summit-lsp.lsst.codes +Log into Argo CD for the environment +==================================== -Some of these environments have maintenance windows, in which case, in the absence of an emergency, updates should only be synced during the maintenance window. -See `SQR-056`_ for more information. +To access the Argo CD UI, go to the ``/argo-cd`` URL under the domain name of that deployment of the Rubin Science Platform. +See :doc:`/environments/index` for a list of Phalanx environments and direct links to their Argo CD pages. -.. _SQR-056: https://sqr-056.lsst.io/ +Depending on the environment, you will need to authenticate with either GitHub, Google OAuth, CILogon, or another OAuth provider as relevant. +You can use the ``admin`` account and password, stored in 1Password for deployments managed by SQuaRE, in case of an emergency. Sync the application ==================== diff --git a/docs/ops/troubleshooting.rst b/docs/admin/troubleshooting.rst similarity index 62% rename from docs/ops/troubleshooting.rst rename to docs/admin/troubleshooting.rst index d55446aa2a..103f2d9cb5 100644 --- a/docs/ops/troubleshooting.rst +++ b/docs/admin/troubleshooting.rst @@ -2,28 +2,28 @@ Troubleshooting the Rubin Science Platform ########################################## -Intended audience: Anyone who is administering an installation of the Rubin Science Platform. +Intended audience: Anyone who is administering a Rubin Science Platform environment. Sometimes things break, and we are assembling the most common failure scenarios, and their fixes, in this document. PostgreSQL cannot mount its persistent volume ============================================= -**Symptoms:** When restarted, the ``postgres`` service pod fails to start because it cannot mount its persistent volume. +**Symptoms:** When restarted, the ``postgres`` application pod fails to start because it cannot mount its persistent volume. If the pod is already running, it gets I/O errors from its database, hangs, or otherwise shows signs of storage problems. **Cause:** The ``postgres`` deployment requests a ``PersistentVolume`` via a ``PersistentVolumeClaim``. If the backing store is corrupt or has been deleted or otherwise is disrupted, sometimes the ``PersistentVolume`` will become unavailable, but the ``PersistentVolumeClaim`` will hang on to it and keep trying to futilely mount it. When this happens, you may need to recreate the persistent volume. -**Solution:** :doc:`postgres/recreate-pvc` +**Solution:** :ref:`recreate-postgres-pvc` Spawner menu missing images, cachemachine stuck pulling the same image ====================================================================== **Symptoms:** When a user goes to the spawner page for the Notebook Aspect, the expected menu of images is not available. Instead, the menu is either empty or missing the right number of images of different classes. -The cachemachine service is continuously creating a ``DaemonSet`` for the same image without apparent forward progress. +The cachemachine application is continuously creating a ``DaemonSet`` for the same image without apparent forward progress. Querying the cachemachine ``/available`` API shows either nothing in ``images`` or not everything that was expected. **Cause:** Cachemachine is responsible for generating the menu used for spawning new JupyterLab instances. @@ -35,7 +35,7 @@ The most common cause of this problem is a Kubernetes limitation. By default, the Kubernetes list node API only returns the "first" (which usually means oldest) 50 cached images. If more than 50 images are cached, images may go missing from that list even though they are cached, leading cachemachine to think they aren't cached and omitting them from the spawner menu. -**Solution:** :doc:`cachemachine/pruning` +**Solution:** :doc:`/applications/cachemachine/pruning` If this doesn't work, another possibility is that there is a node that cachemachine thinks is available for JupyterLab images but which is not eligible for its ``DaemonSet``. This would be a bug in cachemachine, which should ignore cordoned nodes, but it's possible there is a new iteration of node state or a new rule for where ``DaemonSets`` are allowed to run that it does not know about. @@ -60,22 +60,22 @@ Spawning a notebook fails with a pending error In this case, JupyterHub may not recover without assistance. You may need to delete the record for the affected user, and also make sure the user's lab namespace (visible in Argo CD under the ``nublado-users`` application) has been deleted. -**Solution:** :doc:`nublado2/database` +**Solution:** :ref:`nublado2-clear-session-database` -User gets permission denied from services -========================================= +User gets permission denied from applications +============================================= -**Symptoms:** A user is able to authenticate to the Rubin Science Platform (prompted by going to the first authenticated URL, such as the Notebook Aspect spawner page), but then gets permission denied from other services. +**Symptoms:** A user is able to authenticate to the Rubin Science Platform (prompted by going to the first authenticated URL, such as the Notebook Aspect spawner page), but then gets permission denied from other application. -**Causes:** Authentication and authorization to the Rubin Science Platform is done via a service called Gafaelfawr (see :doc:`./gafaelfawr/index`). +**Causes:** Authentication and authorization to the Rubin Science Platform is done via a application called :doc:`Gafaelfawr `. After the user authenticates, Gafaelfawr asks their authentication provider for the user's group memberships and then translates that to a list of scopes. The mapping of group memberships to scopes is defined in the ``values.yaml`` file for Gafaelfawr for the relevant environment, in the ``gafaelfawr.config.groupMapping`` configuration option. -The most likely cause of this problem is that the user is not a member of a group that grants them access to that service. -Gafaelfawr will prevent the user from logging in at all if they are not a member of any group that grants access to a service. -If they are a member of at least one group, they'll be able to log in but may get permission denied errors from other services. +The most likely cause of this problem is that the user is not a member of a group that grants them access to that application. +Gafaelfawr will prevent the user from logging in at all if they are not a member of any group that grants access to an application. +If they are a member of at least one group, they'll be able to log in but may get permission denied errors from other application. -**Solution:** :doc:`gafaelfawr/debugging` +**Solution:** :ref:`gafaelfawr-no-access` You need privileged access to the filestore =========================================== @@ -93,6 +93,36 @@ User pods don't spawn, reporting "permission denied" from Moneypenny **Symptoms:** A user pod fails to spawn, and the error message says that Moneypenny did not have permission to execute. **Cause:** The ``gafaelfawr-token`` VaultSecret in the ``nublado2`` namespace is out of date. -This happened because the ``gafaelfawr-redis`` pod restarted and either it lacked persistent storage (at the T&S sites, as of October 2021), or because that storage had been lost. +This happened because the ``gafaelfawr-redis`` pod restarted and either it lacked persistent storage (at the T&S sites, as of July 2022), or because that storage had been lost. + +**Solution:** :doc:`/applications/gafaelfawr/recreate-token` + +Login fails with "bad verification code" error +============================================== + +**Symptoms:** When attempting to authenticate to a Science Platform deployment using GitHub, the user gets the error message ``Authentication provider failed: bad_verification_code: The code passed is incorrect or expired.`` + +**Cause:** GitHub login failed after the OAuth 2.0 interaction with GitHub was successfully completed, and then the user reloaded the failed login page (or reloaded the page while Gafaelfawr was attempting to complete the authentication). +Usually this happens because Gafaelfawr was unable to write to its storage, either Redis or PostgreSQL. +If the storage underlying the deployment is broken, this can happen without producing obvious error messages, since the applications can go into disk wait and just time out. +Restarting the in-cluster ``postgresql`` pod, if PostgreSQL is running inside the Kubernetes deployment, will generally make this problem obvious because PostgreSQL will be unable to start. + +**Solution:** Check the underlying storage for Redis and Gafaelfawr. +For in-cluster PostgreSQL, if this is happening for all users, try restarting the ``postgresql`` pod, which will not fix the problem but will make it obvious if it is indeed storage. +If the problem is storage, this will need to be escalated to whoever runs the storage for that Gafaelfawr deployment. + +Note that reloading a failed login page from Gafaelfawr will never work and will always produce this error, so it can also be caused by user impatience. +In that case, the solution is to just wait or to return to the front page and try logging in again, rather than reloading the page. + +User keeps logging in through the wrong identity provider +========================================================= + +**Symptoms**: When attempting to use a different identity provider for authentication, such as when linking a different identity to the same account, the CILogon screen to select an identity provider doesn't appear. +Instead, the user is automatically sent to the last identity provider they used. + +**Cause:** The CILogon identity provider selection screen supports remembering your selection, in which case it's stored in a browser cookie or local storage and you are not prompted again. +Even when you want to be prompted. -**Solution:** :doc:`gafaelfawr/recreate-token` +**Solution:** Have the user go to `https://cilogin.org/me `__ and choose "Delete ALL". +This will clear their remembered selection. +They can they retry whatever operation they were attempting. diff --git a/docs/admin/update-pull-secret.rst b/docs/admin/update-pull-secret.rst new file mode 100644 index 0000000000..7e590a06aa --- /dev/null +++ b/docs/admin/update-pull-secret.rst @@ -0,0 +1,44 @@ +############################################################# +Updating the Docker pull secret stored in 1Password and Vault +############################################################# + +The pull secret, present in each RSP instance, and shared by many +applications there, is notoriously tricky to format correctly. + +The recommended way to update it is to edit the pull secret in 1Password +and then deploy it with the ``installer/update-secrets.sh`` script; +however, this only works (at the time of writing, 20 May 2022) on Linux +systems with the 1Password 1.x CLI installed. + +If you need to update the pull secret manually for an environment, here +are the important things to know: + +You will first set the necessary environment variables: + +* ``VAULT_ADDR`` must be set to ``https://vault.lsst.codes`` +* ``VAULT_TOKEN`` must be set to the appropriate write token for the RSP + instance. + +Then you will construct the updated secret. Just create a legal JSON +object. The trick is, this value must be represented to Vault as a +*string*. The easiest way to do this is: + +#. Ensure the secret doesn't, itself, have any single quotes in it. If + it does, replace each single quote with ``'\''`` +#. Copy the secret you've created into your paste buffer +#. Type ``vault kv patch secret/k8s_operator//pull-secret + .dockerconfigjson='`` (*nota bene*: that ends with a single quote) +#. Paste the secret into the command line +#. Type ``'`` and press Enter. + +That will avoid the pain and hassle of multiple layers of quoting in +JSON objects, by handing the secret value as a (possibly multi-line) +string literal to Vault. + +Then restart the ``vault-secrets-operator`` deployment and watch the pod +logs to make sure that pull-secret was correctly updated. + +If you mess up, remember that our vault secrets are versioned, and you +can pull earlier versions of the secret with ``vault kv get secret + -version ``; this (and the above technique) should let +you get back to a less-broken state. diff --git a/docs/admin/upgrade-windows.rst b/docs/admin/upgrade-windows.rst new file mode 100644 index 0000000000..32b16f45b8 --- /dev/null +++ b/docs/admin/upgrade-windows.rst @@ -0,0 +1,27 @@ +############################ +Upgrade windows and sequence +############################ + +Phalanx provides configurations for multiple environments. +Many of these are production environments that service different user groups. +Other environments are intended for development and integration. + +In general, new and updated services should be rolled out to development and integration environments before production environments. + +Production environments also generally have specific maintenance windows when upgrades can occur. + +SQuaRE environments +=================== + +In the case of environments managed by SQuaRE, the process for gated updates to environments is canonically defined in :sqr:`056`, but also summarized here. + +The sequence for rolling out updates is: + +* ``data-dev.lsst.cloud`` +* ``data-int.lsst.cloud`` +* ``tucson-teststand.lsst.codes`` +* ``data.lsst.cloud`` +* ``base-lsp.lsst.codes`` +* ``summit-lsp.lsst.codes`` + +See :sqr:`056` for the change coordination and upgrade windows (as relevant) for each environment. diff --git a/docs/applications/_summary.rst.jinja b/docs/applications/_summary.rst.jinja new file mode 100644 index 0000000000..ac7e756d30 --- /dev/null +++ b/docs/applications/_summary.rst.jinja @@ -0,0 +1,53 @@ +.. list-table:: + + * - View on GitHub + - :bdg-link-primary-line:`/services/{{ app.name }} ` + :bdg-link-primary-line:`Application template ` + {% if app.homepage_url %} + * - Homepage + - {{ app.homepage_url }} + {% endif %} + {% if app.source_urls %} + * - Source + {% if app.source_urls|length == 1 %} + - {{ app.source_urls[0] }} + {% else %} + - - {{ app.source_urls[0] }} + {% endif %} + {% if app.source_urls|length > 1 %} + {% for source_url in app.source_urls[1:] %} + - {{ source_url }} + {% endfor %} + {% endif %} + {% endif %} + {% if app.doc_links %} + * - Related docs + {% if app.doc_links|length == 1 %} + - {{ app.doc_links[0] }} + {% else %} + - - {{ app.doc_links[0] }} + {% endif %} + {% if app.doc_links|length > 1 %} + {% for doc_link in app.doc_links[1:] %} + - {{ doc_link }} + {% endfor %} + {% endif %} + {% endif %} + * - Type + - Helm_ + * - Namespace + - {{ app.namespace }} + {% if app.active_environments %} + * - Environments + - .. list-table:: + + {% for env_name in app.active_environments %} + * - :px-env:`{{ env_name }}` + - `values `__ + {% if envs[env_name].argocd_url != "N/A" %} + - `Argo CD <{{ envs[env_name].argocd_url }}/applications/{{ app.name }}>`__ + {% else %} + - + {% endif %} + {% endfor %} + {% endif %} diff --git a/docs/applications/alert-stream-broker/index.rst b/docs/applications/alert-stream-broker/index.rst new file mode 100644 index 0000000000..85c49f98d0 --- /dev/null +++ b/docs/applications/alert-stream-broker/index.rst @@ -0,0 +1,20 @@ +.. px-app:: alert-stream-broker + +################################################### +alert-stream-broker — Alert transmission to brokers +################################################### + +The Alert Stream Broker is responsible for rapid dissemination of alerts (from observatory operations) to community alert brokers. +It is built on top of `Apache Kafka`_ and uses `Apache Avro`_ as the schema for alerts. + +For testing during construction, the alert-stream-broker application includes an alert stream simulator, which periodically posts a static set of alerts to allow testing the alert pipeline. +During normal observatory operations, the alerts will instead come from the Alert Production pipelines. + +.. jinja:: alert-stream-broker + :file: applications/_summary.rst.jinja + +.. Guides +.. ====== +.. +.. .. toctree:: +.. :maxdepth: 1 diff --git a/docs/ops/argo-cd/authentication.rst b/docs/applications/argo-cd/authentication.rst similarity index 82% rename from docs/ops/argo-cd/authentication.rst rename to docs/applications/argo-cd/authentication.rst index e4dd6af967..0fe7e45f30 100644 --- a/docs/ops/argo-cd/authentication.rst +++ b/docs/applications/argo-cd/authentication.rst @@ -20,9 +20,9 @@ Configuring Google SSO To set up Google SSO authentication to Argo CD in a new cluster, take the following steps as a user with the ``roles/oauthconfig.editor`` role: -#. On the GCP console, go to "OAuth consent screen" under "APIs & Services." +#. On the GCP console, go to :guilabel:`OAuth consent screen` under :guilabel:`APIs & Services`. -#. Select "Internal" and click Create. +#. Select :guilabel:`Internal` and click :guilabel:`Create`. #. Enter the environment information. For example (adjust for the environment): @@ -32,24 +32,24 @@ To set up Google SSO authentication to Argo CD in a new cluster, take the follow - Authorized domains: lsst.cloud - Developer contact information email addresses: Work email address -#. Click "Save and Continue." +#. Click :guilabel:`Save and Continue`. -#. Add the ``openid`` scope and click "Save and Continue." +#. Add the ``openid`` scope and click :guilabel:`Save and Continue`. -#. Click "Back to Dashboard." +#. Click :guilabel:`Back to Dashboard`. -#. Go to "Credentials" still under "APIs & Services." +#. Go to :guilabel:`Credentials` still under :guilabel:`APIs & Services`. -#. Click "Create Credentials" and choose "OAuth client ID." +#. Click :guilabel:`Create Credentials` and choose :guilabel:`OAuth client ID`. -#. Choose "Web application" as the application type. +#. Choose :guilabel:`Web application` as the application type. #. Enter "Argo CD" as the name. #. Add the ``/argo-cd/api/dex/callback`` route under "Authorized redirect URIs." - For example: https://data-int.lsst.cloud/argo-cd/api/dex/callback + For example: ``https://data-int.lsst.cloud/argo-cd/api/dex/callback`` -#. Click on create. +#. Click on :guilabel:`Create`. This will pop up a dialog with the client ID and secret for the newly-created OAuth client. #. For SQuaRE-run enviroments, go to the RSP-Vault 1Password vault and create a new Login item with a name like "Argo CD Google OAuth - data-int.lsst.cloud" (replacing the last part with the FQDN of the environment). @@ -102,7 +102,7 @@ To set up Google SSO authentication to Argo CD in a new cluster, take the follow Change the list of users to the email addresses of the users who should have admin access to this environment. -#. Create a PR with the above changes, merge it, and then sync Argo CD. +#. If the environment already exists, create a PR with the above changes, merge it, and then sync Argo CD. Ensure that both the ``argocd-server`` and ``argocd-dex-server`` deployments are restarted (in case the Argo CD Helm chart doesn't ensure this). #. Go to the ``/argo-cd`` route on the environment. @@ -116,27 +116,27 @@ Configuring GitHub SSO To set up Google SSO authentication to Argo CD in a new cluster, take the following steps: -#. From the GitHub page of the organization in which you want to create the OAuth application (such as https://github.com/lsst-sqre), go to Settings → Developer Settings → OAuth Apps. +#. From the GitHub page of the organization in which you want to create the OAuth application (such as `lsst-sqre `__), go to :guilabel:`Settings → Developer Settings → OAuth Apps`. -#. Click New OAuth App. +#. Click :guilabel:`New OAuth App`. #. Enter the following information (adjust for the environment): - - Application name: RSP Argo CD (NCSA int) - - Homepage URL: https://lsst-lsp-int.ncsa.illinois.edu/argo-cd - - Authorization callback URL: https://lsst-lsp-int.ncsa.illinois.edu/argo-cd/api/dex/callback + - Application name: ``RSP Argo CD (IDF-int)`` + - Homepage URL: ``https://data-int.lsst.cloud/argo-cd`` + - Authorization callback URL: ``https://data-int.lsst.cloud/argo-cd/api/dex/callback`` -#. Click "Register Application". +#. Click :guilabel:`Register Application`. -#. Click "Generate a new client secret". +#. Click :guilabel:`Generate a new client secret`. -#. For SQuaRE-run enviroments, go to the RSP-Vault 1Password vault and create a new Login item with a name like "Argo CD GitHub OAuth - lsst-lsp-int.ncsa.illinois.edu" (replacing the last part with the FQDN of the environment). +#. For SQuaRE-run enviroments, go to the RSP-Vault 1Password vault and create a new Login item with a name like "Argo CD GitHub OAuth - data-int.lsst.cloud" (replacing the last part with the FQDN of the environment). In this secret, put the client ID in the username field. Put the client secret in the password field. Create a field labeled ``generate_secrets_key`` with value ``argocd dex.clientSecret``. - Create a field labeled ``environment`` with value ``lsst-lsp-int.ncsa.illinois.edu`` (replace with the FQDN of the environment). + Create a field labeled ``environment`` with value ``data-int.lsst.cloud`` (replace with the FQDN of the environment). Save this 1Password secret. -#. If the environment already exists, get a Vault write token for the environment (or the Vault admin token) and set the ``dex.clientSecret`` key in the ``argocd`` secret in the Vault path for that environment (something like ``secret/k8s_operator/lsst-lsp-int.ncsa.illinois.edu``, replacing the last part with the FQDN of the environment). +#. If the environment already exists, get a Vault write token for the environment (or the Vault admin token) and set the ``dex.clientSecret`` key in the ``argocd`` secret in the Vault path for that environment (something like ``secret/k8s_operator/data-int.lsst.cloud``, replacing the last part with the FQDN of the environment). Be sure to use ``vault kv patch`` to add the key to the existing secret. This will add the value to the Argo CD secret once vault-secrets-operator notices the change. You can delete ``argocd-secret`` to immediately recreate it to speed up the propagation. @@ -146,7 +146,7 @@ To set up Google SSO authentication to Argo CD in a new cluster, take the follow .. code-block:: yaml - url: https://lsst-lsp-int.ncsa.illinois.edu/argo-cd + url: https://data-int.lsst.cloud/argo-cd dex.config: | connectors: # Auth using GitHub. @@ -175,7 +175,7 @@ To set up Google SSO authentication to Argo CD in a new cluster, take the follow Add lines for additional GitHub teams as needed for that environment. Be aware that this uses the human-readable name of the team (with capital letters and spaces if applicable), not the slug. -#. Create a PR with the above changes, merge it, and then sync Argo CD. +#. If the environment already exists, create a PR with the above changes, merge it, and then sync Argo CD. Ensure that both the ``argocd-server`` and ``argocd-dex-server`` deployments are restarted (in case the Argo CD Helm chart doesn't ensure this). #. Go to the ``/argo-cd`` route on the environment. diff --git a/docs/applications/argo-cd/bootstrap.rst b/docs/applications/argo-cd/bootstrap.rst new file mode 100644 index 0000000000..9ae414e241 --- /dev/null +++ b/docs/applications/argo-cd/bootstrap.rst @@ -0,0 +1,17 @@ +.. px-app-bootstrap:: argo-cd + +##################### +Bootstrapping Argo CD +##################### + +Initial installation of the Rubin Science Platform is done using Argo CD and a static password for the ``admin`` account. +You can then log on to the ``admin`` account using that password to manage the resulting environment. +No special bootstrapping is required. + +That said, using the ``admin`` account for longer than necessary is not recommended. +Instead, you should configure single sign-on for Argo CD as soon as possible and prefer that for day-to-day operations to minimize the chances of leaking the ``admin`` password. + +To do that, follow the instructions in :doc:`authentication`. + +You may want to do this during your initial bootstrapping process, or very shortly afterwards. +The execution of the installer script itself will use the ``admin`` account and password regardless, but if Argo CD SSO is set up in advance, you can then immediately switch to using it for management of the environment. diff --git a/docs/applications/argo-cd/index.rst b/docs/applications/argo-cd/index.rst new file mode 100644 index 0000000000..92ed9114c9 --- /dev/null +++ b/docs/applications/argo-cd/index.rst @@ -0,0 +1,23 @@ +.. px-app:: argocd + +####################################### +argocd — Kubernetes application manager +####################################### + +`Argo CD`_ is the software that manages all Kubernetes resources in a deployment of the Rubin Science Platform. +It is itself a set of Kubernetes resources and running pods managed with Helm_. + +.. jinja:: argocd + :file: applications/_summary.rst.jinja + :debug: + +Guides +====== + +.. toctree:: + + notes + bootstrap + authentication + upgrade + values diff --git a/docs/applications/argo-cd/notes.rst b/docs/applications/argo-cd/notes.rst new file mode 100644 index 0000000000..a3ca9efdd7 --- /dev/null +++ b/docs/applications/argo-cd/notes.rst @@ -0,0 +1,24 @@ +.. px-app-notes:: argocd + +############################## +Argo CD architecture and notes +############################## + +Argo CD is installed and bootstrapped as part of the cluster creation process. +The UI is exposed on the ``/argo-cd`` route for the Science Platform. + +Unlike other resources on the Science Platform, it is not protected by Gafaelfawr. +See :doc:`authentication` + +Namespace for Application resources +=================================== + +Everything related to Argo CD that can be namespaced must be in the ``argocd`` namespace. + +.. warning:: + + ``Application`` resources must be in the ``argocd`` namespace, not in the namespace of the application. + + If you accidentally create an ``Application`` resource outside of the ``argocd`` namespace, Argo CD will display it in the UI but will not be able to sync it. + You also won't be able to easily delete it if it defines the normal Argo CD finalizer because that finalizer will not run outside the ``argocd`` namespace. + To delete the stray ``Application`` resource, edit it with ``kubectl edit`` and delete the finalizer, and then delete it with ``kubectl delete``. diff --git a/docs/applications/argo-cd/upgrade.rst b/docs/applications/argo-cd/upgrade.rst new file mode 100644 index 0000000000..8427264eee --- /dev/null +++ b/docs/applications/argo-cd/upgrade.rst @@ -0,0 +1,123 @@ +.. px-app-upgrade:: argocd + +################# +Upgrading Argo CD +################# + +This page provides upgrade procedures for the :px-app:`argocd` application. + +.. warning:: + + Do not use the `documented Argo CD upgrade method `__ that uses ``kubectl apply``. + This will not work properly when Argo CD is installed via Helm, as it is in Phalanx. + +Automatic upgrades +================== + +Normally, you can let Argo CD upgrade itself (see `Manage Argo CD Using Argo CD `__). +The upgrade will appear to proceed up to a point and then will apparently stall when the frontend pod is restarted. +When that happens, wait a minute or two and reload the page. +You should be presented with the login screen, can authenticate with GitHub or Google, and then will see the completed upgrade. + +In some cases after an upgrade, Argo CD will claim that syncing itself failed. +This is usually a spurious failure caused by the controller restarting due to the upgrade. +Simply sync Argo CD again to resolve the error state. + +If the upgrade results in a non-working Argo CD, often you can get it back to a working state by selectively downgrading the failed component using ``kubectl edit`` on the relevant ``Deployment`` resource. +This is particularly true if Dex failed (which will cause errors when logging in), since it is largely independent of the rest of Argo CD. + +Manual upgrade process +====================== + +Only use this process if the automatic upgrade failed or if there are documented serious problems with automatic upgrades. + +#. Determine the current version of Argo CD. + + The easiest way to do this is to go to the ``/argo-cd`` route and look at the version number in the top left sidebar. + Ignore the hash after the ``+`` sign; the part before that is the version number. + +#. Ensure your default ``kubectl`` context is the cluster you want to upgrade. + Check your current context with ``kubectl config current-context`` and switch as necessary with ``kubectl config use-context``. + +#. Back up the Argo CD configuration: + + .. code-block:: sh + + chmod 644 ~/.kube/config + docker run -v ~/.kube:/home/argocd/.kube --rm \ + argoproj/argocd:$VERSION argocd-util export -n argocd > backup.yaml + chmod 600 ~/.kube/config + + You have to temporarily make your ``kubectl`` configuration file world-readable so that the Argo CD Docker image can use your credentials. + Do this on a private system with no other users. + Replace ``$VERSION`` with the version of Argo CD as discovered above. + The version will begin with a ``v``. + + This is taken from the `Argo CD disaster recovery documentation `__ with the addition of the namespace flag. + + The backup will not be needed if all goes well. + +#. Determine the new version of the Argo CD Helm chart (**not** Argo CD itself) to which you will be upgrading: + + .. code-block:: sh + + helm repo add argo https://argoproj.github.io/argo-helm + helm repo update + helm search repo argo-cd + + Note the chart version for ``argo/argo-cd``. + +#. Upgrade Argo CD using Helm. + Check out the `Phalanx repository`_ first. + + .. code-block:: sh + + cd phalanx/installer + helm upgrade --install argocd argo/argo-cd --version $VERSION \ + --values argo-cd-values.yaml --namespace argocd --wait --timeout 900s + + Replace ``$VERSION`` with the Helm chart version (**not** the Argo CD application version) that you want to install. + +If all goes well, you can now view the UI at ``/argo-cd`` and confirm that everything still looks correct. + +Recovering from a botched upgrade +--------------------------------- + +If everything goes horribly wrong, you can remove Argo CD entirely and the restore it from the backup that you took. +To do this, first drop the Argo CD namespace: + +.. code-block:: sh + + kubectl delete namespace argocd + +You will then need to manually remove the finalizers for all the Argo CD application resources in order for the namespace deletion to succeed. +The following instructions are taken from `kubernetes/kubernetes#77086 `__: + +.. code-block:: sh + + kubectl api-resources --verbs=list --namespaced -o name \ + | xargs -n 1 kubectl get --show-kind --ignore-not-found -n argocd + +This will show all resources that need manual attention. +It should only be Argo CD ``Application`` and ``AppProject`` resources. +For each resource, edit it with ``kubectl edit -n argocd`` and delete the finalizer. +As you save each resource, its deletion should succeed. +By the end, the namespace should successfully finish deletion. +You can then recreate the namespace, reinstall Argo CD, and restore the backup: + +.. code-block:: sh + + kubectl create namespace argocd + cd phalanx/installer + helm upgrade --install argocd argo/argo-cd --version $HELM_VERSION \ + --values argo-cd-values.yaml --namespace argocd --wait --timeout 900s + chmod 644 ~/.kube/config + docker run -i -v ~/.kube:/home/argocd/.kube --rm \ + argoproj/argocd:$VERSION argocd-util import -n argocd - < backup.yaml + chmod 600 ~/.kube/config + +Replace ``$HELM_VERSION`` with the version of the Helm chart you want to use and ``$VERSION`` with the corresponding Argo CD version (as shown via ``helm search repo``). + +This should hopefully restore Argo CD to a working state. +If it doesn't, you'll need to reinstall it using the more extended process used by the cluster installer. +See `installer/install.sh `__ for the commands to run. diff --git a/docs/applications/argo-cd/values.md b/docs/applications/argo-cd/values.md new file mode 100644 index 0000000000..d6c99f881b --- /dev/null +++ b/docs/applications/argo-cd/values.md @@ -0,0 +1,12 @@ +```{px-app-values} argocd +``` + +# Argo CD Helm values reference + +Helm values reference table for the {px-app}`argocd` application. + +```{include} ../../../services/argocd/README.md +--- +start-after: "## Values" +--- +``` diff --git a/docs/applications/cachemachine/bootstrap.rst b/docs/applications/cachemachine/bootstrap.rst new file mode 100644 index 0000000000..c34ce36628 --- /dev/null +++ b/docs/applications/cachemachine/bootstrap.rst @@ -0,0 +1,40 @@ +.. px-app-bootstrap:: cachemachine + +########################## +Bootstrapping cachemachine +########################## + +By default, cachemachine doesn't do any prepulling and doesn't provide a useful menu for Notebook Aspect spawning. +As part of bootstrapping a new environment, you will want to configure it to prepull appropriate images. + +For deployments on Google Kubernetes Engine, you will want to use Google Artifact Repository (GAR) as the source of images. +See :doc:`gar` for basic information and instructions on how to configure workload identity. +A good starting point for the cachemachine configuration is the `configuration from the IDF environment `__, which sets up GAR as the image source and prepulls a reasonable number of images. + +For Telescope and Site deployments that need special images and image cycle configuration, start from the `summit configuration `__. +Consult with Telescope and Site to determine the correct recommended tag and cycle number. + +For other deployments that use the normal Rubin Notebook Aspect images, a reasonable starting configuration for cachemachine is: + +.. code-block:: yaml + + autostart: + jupyter: | + { + "name": "jupyter", + "labels": {}, + "repomen": [ + { + "type": "RubinRepoMan", + "registry_url": "registry.hub.docker.com", + "repo": "lsstsqre/sciplat-lab", + "recommended_tag": "recommended", + "num_releases": 1, + "num_weeklies": 2, + "num_dailies": 3 + } + ] + } + +This prepulls the latest release, the latest two weeklies, and the latest three dailies, as well as the image tagged ``recommended``. +However, also see :ref:`prepull-recommended` for information on how to ensure cachemachine knows the correct tag and description for the recommended image. diff --git a/docs/applications/cachemachine/gar.rst b/docs/applications/cachemachine/gar.rst new file mode 100644 index 0000000000..5ba3ad269c --- /dev/null +++ b/docs/applications/cachemachine/gar.rst @@ -0,0 +1,62 @@ +################################################ +Google Cloud Artifact Registry (GAR) integration +################################################ + +Cachemachine optionally supports using the Google Cloud Artifact Registry (GAR) API to list images rather than the Docker API. + +This allows workload identity credentials to be used instead of Docker credentials when the images are stored in GAR. +Docker client authentication with GAR is cumbersome because a JSON token is used for authentication, and that token contains special characters that make it difficult to pass between multiple secret engine layers. + +Using the GAR API directly also avoids the need to build a cache of hashes to resolve tags to images. +The Docker API returns a list of images with a single tag, which requires constructing a cache of known hashes to determine which tags are alternate names for images that have already been seen. +The GAR API returns a list of images with all tags for that image, avoiding this problem. + +Container Image Streaming +========================= + +`Container Image Streaming `__ is used by cachemachine to decrease the time for the image pull time. +It's also used when an image isn't cached, which makes it practical to use uncached images. +With normal Docker image retrieval, using an uncached image can result in a five-minute wait and an almost-certain timeout. + +The ``sciplatlab`` images are 4GB. +Image pull time for those images decreased from 4 minutes to 30 seconds using image streaming. + +Image streaming is per project by enabling the ``containerfilesystem.googleapis.com`` API. +This was enabled via Terraform for the Interim Data Facility environments. + +Workload Identity +================= + +`Workload Identity `__ is used by Cachemachine to authenticate to the GAR API. +Workload Identity allows Kubernetes service accounts to impersonate Google Cloud Platform (GCP) Service Accounts to authenticate to GCP services. +Workload Identity is enabled on all of the Rubin Science Platform (RSP) Google Kuberentes Engine (GKE) Clusters. + +The binding between the Kubernetes and the GCP service account is done through IAM permissions deployed via Terraform. +The following Kubernetes annotation must be added to the Kubernetes ``ServiceAccount`` object as deployed via Phalanx to bind that service account to the GCP service account. + +.. code-block:: yaml + + serviceAccount: + annotations: { + iam.gke.io/gcp-service-account: cachemachine-wi@science-platform-dev-7696.iam.gserviceaccount.com + } + +To troubleshoot or validate Workload Identity, a test pod can be provisioned using `these instructions `__. + +Validating operations +===================== + +To validate cachemachine is running, check the status page at ``https://data-dev.lsst.cloud/cachemachine/jupyter``. +(Replace ``data-dev`` with the appropriate environment.) +Check the ``common_cache`` key for cached images, and see if ``images_to_cache`` is blank or only showing new images that are in the process of being downloaded. + +Future work +=========== + +- Cachemachine and Nublado both default to configuring an image pull secret when spawning pods. + This value is not used by GAR. + In GKE, the nodes default to using the built-in service account to pull images. + This means we can drop the ``pull-secret`` secret and its configuration when GAR is in use. + +- Image streaming is currently a per-region setting. + If GKE clustes are deployed outside of ``us-central1`` in the future, a GAR repository should be created for that region to stream images. diff --git a/docs/applications/cachemachine/index.rst b/docs/applications/cachemachine/index.rst new file mode 100644 index 0000000000..065cf6cfc6 --- /dev/null +++ b/docs/applications/cachemachine/index.rst @@ -0,0 +1,25 @@ +.. px-app:: cachemachine + +######################################### +cachemachine — JupyterLab image prepuller +######################################### + +The Docker images used for lab pods run by the Notebook Aspect are quite large, since they contain the full Rubin Observatory software stack. +If the image is not already cached on a Kubernetes node, starting a lab pod can take as long as five minutes and may exceed the timeout allowed by JupyterHub. + +Cachemachine is an image prepulling service designed to avoid this problem by ensuring every node in the Science Platform Kubernetes cluster has the most frequently used lab images cached. +It is also responsible for reporting the available images to :doc:`Nublado <../nublado2/index>`, used to generate the menu of images when the user creates a new lab pod. + +.. jinja:: cachemachine + :file: applications/_summary.rst.jinja + +Guides +====== + +.. toctree:: + + bootstrap + pruning + updating-recommended + gar + values diff --git a/docs/applications/cachemachine/pruning.rst b/docs/applications/cachemachine/pruning.rst new file mode 100644 index 0000000000..b7f2829f81 --- /dev/null +++ b/docs/applications/cachemachine/pruning.rst @@ -0,0 +1,18 @@ +############# +Image pruning +############# + +If the list of cached images on nodes gets excessively long, Kubernetes may stop updating its list of cached images. +The usual symptom is that the Notebook Aspect spawner menu of available images will be empty or missing expected images. + +This is a limitation of the Kubernetes node API. +By default, `only 50 images on a node will be shown `__. +You can work around this, if you control the Kubernetes installation, by adding ``--node-status-max-images=-1`` on the kubelet command line, or by setting ``nodeStatusMaxImages`` to ``-1`` in the kubelet configuration file. + +If you cannot change that setting, you will need to trim the node image cache so that the total number of images is under 50. + +#. Download `purge `__. + +#. Run it on each node, using an account allowed to use the Docker socket (thus, probably in group ``docker``). + You may want to run it with ``-x`` first to see what it's going to do. + If you want output during the actual run, run it with ``-v``. diff --git a/docs/applications/cachemachine/updating-recommended.rst b/docs/applications/cachemachine/updating-recommended.rst new file mode 100644 index 0000000000..1292ca0588 --- /dev/null +++ b/docs/applications/cachemachine/updating-recommended.rst @@ -0,0 +1,68 @@ +############################################## +Updating the recommended Notebook Aspect image +############################################## + +The ``recommended`` tag for JupyterLab images is usually a recent weekly image. +The image tagged ``recommended`` is guaranteed by SQuaRE to be compatible with other services and materials, such as tutorial or system testing notebooks, that we make available on RSP deployments. + +Because this process requires quite a bit of checking and sign-off from multiple stakeholders, it is possible that approving a new recommended version may take more than the two weeks (for most deployments) it takes for a weekly image to roll off the default list of images to pull. +This can cause the RSP JupyterHub options form to display empty parentheses rather than the correct target version when a user requests a lab container. + +This document explains the process for moving the ``recommended`` tag, and how to circumvent that display bug by changing cachemachine's ``values-.yaml`` for the appropriate instance when moving the ``recommended`` tag. + +Tagging a new container version +-------------------------------- + +When a new version is to be approved (after passing through its prior QA and sign-off gates), the ``recommended`` tag must be updated to point to the new version. + +To do this, run the GitHub retag workflow for the `sciplat-lab `__ repository, as follows: + +#. Go to `the retag workflow page `__. +#. Click on :guilabel:`Run workflow`. +#. Enter the tag of the image to promote to recommended under :guilabel:`Docker tag of input container`. + This will be a tag like ``w_2022_40``. +#. Enter ``recommended`` under :guilabel:`Additional value to tag container with`. +#. Click on the :guilabel:`Run workflow` submit button. + +Don't change the URIs. + +.. _prepull-recommended: + +Ensure the recommended image is pre-pulled +------------------------------------------ + +In most environments, cachemachine only prepulls the latest two weekly images. +It is common for more than two weeks to go by before approving a new version of recommended. +While the recommended tag is always prepulled, cachemachine cannot resolve that tag to a regular image tag unless the corresponding image tag is also prepulled. +The result is a display bug where recommended is not resolved to a particular tag, and therefore is missing the information in parentheses after the :guilabel:`Recommended` menu option in the spawner form. + +To avoid this, we therefore explicitly prepull the weekly tag corresponding to the ``recommended`` tag. +This ensures that cachemachine can map the ``recommended`` tag to a weekly tag. +This doesn't consume any additional cache space on the nodes, since Kubernetes, when cachemachine tells it to cache that weekly tag, will realize that it already has it cached under another name. + +We add this configuration to the IDF environments. +Other Phalanx environments handle recommended images differently and don't need this configuration. + +In cachemachine's ``values-.yaml`` file for the affected environment, go towards the bottom and look in ``repomen``. +The first entry will always be of type ``RubinRepoMan``, and will contain the definitions of how many daily, weekly, and release images to prepull. +Beneath the ``RubinRepoMan`` entry, you should find an entry that looks like: + +.. code-block:: json + + { + "type": "SimpleRepoMan", + "images": [ + { + "image_url": "registry.hub.docker.com/lsstsqre/sciplat-lab:w_2021_33", + "name": "Weekly 2021_33" + } + ] + } + +Replace the tag and name with the weekly tag and corresponding name for the weekly image that is also tagged ``recommended``. + +Once this change is merged, sync cachemachine (using Argo CD) in the affected environments. +You do not have to wait for a maintenance window to do this, since the change is low risk, although it will result in a very brief outage for Notebook Aspect lab spawning while cachemachine is restarted. + +cachemachine will then spawn a ``DaemonSet`` that pulls the weekly tag to every node, which as mentioned above will be fairly quick since Kubernetes will realize it already has the image cached under another name. +Once cachemachine rechecks the cached images on each node, it will have enough information to build the menu correctly, and the spawner menu in the Notebook Aspect should be correct. diff --git a/docs/applications/cachemachine/values.md b/docs/applications/cachemachine/values.md new file mode 100644 index 0000000000..b21aea8ffb --- /dev/null +++ b/docs/applications/cachemachine/values.md @@ -0,0 +1,12 @@ +```{px-app-values} cachemachine +``` + +# Cachemachine Helm values reference + +Helm values reference table for the {px-app}`cachemachine` application. + +```{include} ../../../services/cachemachine/README.md +--- +start-after: "## Values" +--- +``` diff --git a/docs/ops/cert-issuer/bootstrapping.rst b/docs/applications/cert-manager/bootstrap.rst similarity index 70% rename from docs/ops/cert-issuer/bootstrapping.rst rename to docs/applications/cert-manager/bootstrap.rst index c639253908..d7dd489a21 100644 --- a/docs/ops/cert-issuer/bootstrapping.rst +++ b/docs/applications/cert-manager/bootstrap.rst @@ -1,15 +1,17 @@ -######################### -Bootstrapping cert-issuer -######################### +.. px-app-bootstrap:: cert-manager -The issuer defined in the ``cert-issuer`` service uses the DNS solver. +########################## +Bootstrapping cert-manager +########################## + +The issuer defined in the :px-app:`cert-manager` application uses the DNS solver. The advantage of the DNS solver is that it works behind firewalls and can provision certificates for environments not exposed to the Internet, such as the Tucson teststand. The DNS solver uses an AWS service user with write access to Route 53 to answer Let's Encrypt challenges. -In order to use ``cert-issuer``, you must be hosting the DNS for the external hostname of the Science Platform installation in AWS Route 53. +In order to use ``cert-manager``, you must be hosting the DNS for the external hostname of the Science Platform installation in AWS Route 53. See :ref:`hostnames` for more information. -First, ensure that ``cert-issuer`` is set up for the domain in which the cluster will be hosted. +First, ensure that ``cert-manager`` is set up for the domain in which the cluster will be hosted. If this is a new domain, follow the instructions in :doc:`route53-setup`. Then, in Route 53, create a CNAME from ``_acme-challenge.`` to ``_acme-challenge.tls.`` where ```` is the domain in which the cluster is located (such as ``lsst.codes`` or ``lsst.cloud``). @@ -23,13 +25,10 @@ Add the following to the ``values-*.yaml`` file for an environment: .. code-block:: yaml - solver: + config: route53: - aws-access-key-id: - hosted-zone: - vault-secret-path: "secret/k8s_operator//cert-manager" - -replacing ```` with the FQDN of the cluster, corresponding to the root of the Vault secrets for that cluster (see :doc:`../vault-secrets-operator/index`). + awsAccessKeyId: "" + hostedZone: "" ```` and ```` must correspond to the domain under which the cluster is hosted. The values for the two most common Rubin Science Platform domains are: @@ -37,11 +36,11 @@ The values for the two most common Rubin Science Platform domains are: .. code-block:: yaml lsst.codes: - aws-access-key-id: AKIAQSJOS2SFLUEVXZDB - hosted-zone: Z06873202D7WVTZUFOQ42 + awsAccessKeyId: "AKIAQSJOS2SFLUEVXZDB" + hostedZone: "Z06873202D7WVTZUFOQ42" lsst.cloud: - aws-access-key-id: AKIAQSJOS2SFKQBMDRGR - hosted-zone: Z0567328105IEHEMIXLCO + awsAccessKeyId: "AKIAQSJOS2SFKQBMDRGR" + hostedZone: "Z0567328105IEHEMIXLCO" This key ID is for an AWS service user that has write access to the ``tls`` subdomain of the domain in which the cluster is hosted, and therefore can answer challenges. @@ -51,8 +50,12 @@ The Vault secret should look something like this: .. code-block:: yaml data: - aws-access-key-id: - aws-secret-access-key: + aws-access-key-id: "" + aws-secret-access-key: "" The secrets for the SQuaRE-maintained Rubin Science Platform domains are stored in 1Password (search for ``cert-manager-lsst-codes`` or ``cert-manager-lsst-cloud``). If this cluster is in the same domain as another, working cluster, you can copy the secret from that cluster into the appropriate path for the new cluster. + +.. seealso:: + + `cert-manager documentation for Route 53 `__. diff --git a/docs/applications/cert-manager/index.rst b/docs/applications/cert-manager/index.rst new file mode 100644 index 0000000000..6bffb9f54b --- /dev/null +++ b/docs/applications/cert-manager/index.rst @@ -0,0 +1,24 @@ +.. px-app:: cert-manager + +###################################### +cert-manager — TLS certificate manager +###################################### + +cert-manager manages TLS certificates internal to the Science Platform Kubernetes cluster. +It may also manage the external TLS certificate for the cluster ingresses if the `Let's Encrypt `__ approach to certificate management was chosen. + +See :ref:`hostnames` for more details on the supported approaches for managing the external TLS certificate. + +.. jinja:: cert-manager + :file: applications/_summary.rst.jinja + +Guides +====== + +.. toctree:: + + notes + bootstrap + route53-setup + upgrade + values diff --git a/docs/applications/cert-manager/notes.rst b/docs/applications/cert-manager/notes.rst new file mode 100644 index 0000000000..95417dd23e --- /dev/null +++ b/docs/applications/cert-manager/notes.rst @@ -0,0 +1,27 @@ +.. px-app-notes:: cert-manager + +################################### +Cert-manager architecture and notes +################################### + +The :px-app:`cert-manager` service is an installation of `cert-manager `__ from its `Helm chart repository `__. +It creates cluster-internal private TLS certificates for applications that need them (such as for admission webhooks). +It may also create TLS certificates via `Let's Encrypt `__ and automatically renew them if the environment uses Let's Encrypt certificates. + +``cert-manager`` optionally creates a cluster issuer that uses the DNS solver and Route 53 for DNS. +Set ``config.createIssuer`` to ``false`` for environments where cert-manager should be installed but not use a Route 53 cluster issuer. + +For more information on the options for TLS certificate management, see :ref:`hostnames`. + +Using cert-manager +================== + +To configure an Ingress_ to use certificates issued by it, add a ``tls`` configuration to the ingress and the annotation: + +.. code-block:: yaml + + cert-manager.io/cluster-issuer: "letsencrypt-dns" + +Typically, this should be done on one and only one Ingress_ for an environment using ``cert-manager``. +The RSP conventionally uses the :px-app:`squareone` application. +(There are some special exceptions that have their own ingresses or otherwise need valid CA-issued certificates, such as :px-app:`alert-stream-broker` and :px-app:`sasquatch`.) diff --git a/docs/ops/cert-issuer/route53-setup.rst b/docs/applications/cert-manager/route53-setup.rst similarity index 61% rename from docs/ops/cert-issuer/route53-setup.rst rename to docs/applications/cert-manager/route53-setup.rst index 3743b9d5b8..cfde31924e 100644 --- a/docs/ops/cert-issuer/route53-setup.rst +++ b/docs/applications/cert-manager/route53-setup.rst @@ -1,30 +1,29 @@ -################################### -Setting up Route 53 for cert-issuer -################################### +#################################### +Setting up Route 53 for cert-manager +#################################### -Each domain under which ``cert-issuer`` needs to issue certificates must be configured in AWS. +Each domain under which ``cert-manager`` needs to issue certificates must be configured in AWS. This involves creating a new hosted zone for the DNS challenges for that domain, creating an AWS service user with an appropriate IAM policy, and creating an access key for that user which will be used by ``cert-manager``. Normally, DNS challenges work by writing a text record to the ``_acme-challenge.`` record for the hostname for which one is obtaining a certificate. However, Route 53 IAM policies are only granular to the level of a hosted zone. -To give ``cert-manager`` write access to the whole hosted zone would be exessive, since it could then modify any other records. +To give ``cert-manager`` write access to the whole hosted zone would be excessive, since it could then modify any other records. Therefore, we use a strategy documented in the `cert-manager documentation for Route 53 `__ to delegate only the relevant records. To do this for a new zone, do the following. -In these instructions, the new zone is shown as ``new.zone``. +In these instructions, the new zone is shown as :samp:`new.zone`. In practice this will be a zone like ``lsst.codes`` or ``lsst.cloud``. This must be a public domain served from normal Internet domain servers. It cannot be a private domain present only in Route 53. -#. Create a new hosted zone named ``tls.new.zone`` in Route 53. +#. Create a new hosted zone named :samp:`tls.{new.zone}` in Route 53. Make a note of its zone ID. -#. Add the NS glue record for ``tls.new.zone`` to ``new.zone`` in Route 53. - See `the Amazon documentation `__ for more details. +#. Add the NS glue record for :samp:`tls.{new.zone}` to :samp:`{new.zone}` in Route 53. + See `the Route 53 documentation `__ for more details. -#. Create a new IAM user named ``cert-manager-new-zone``. - (Don't forget to replace ``new-zone`` with the name of your zone.) - Attach an inline IAM policy for that user that gives it access to the new ``tls.new.zone`` hosted zone. +#. Create a new IAM user named :samp:`cert-manager-{new-zone}` (replace ``new-zone`` with the name of your zone). + Attach an inline IAM policy for that user that gives it access to the new :samp:`tls.{new.zone}` hosted zone. .. code-block:: json @@ -49,13 +48,13 @@ It cannot be a private domain present only in Route 53. ] } - replacing ```` with the ID of the hosted zone. - (This will be a string similar to ``Z0567328105IEHEMIXLCO``.) + Replace :samp:`{}` with the ID of the hosted zone + (which is a string looking like ``Z0567328105IEHEMIXLCO``.) #. Create an access key for that user. - Store the access key and secret key pair in 1Password as ``cert-manager-new-zone``. + Store the access key and secret key pair in 1Password as :samp:`cert-manager-{new-zone}`. -You can now follow the instructions in :doc:`bootstrapping` to set up the new cluster. +You can now follow the instructions in :px-app-bootstrap:`cert-manager` to set up the new cluster. The above instructions only have to be done once per domain. -After that, any new clusters in the same domain will only need the addition of a CNAME and some Vault and Argo CD configuration, as described in :doc:`bootstrapping`. +After that, any new clusters in the same domain will only need the addition of a CNAME and some Vault and Argo CD configuration, as described in :px-app-bootstrap:`cert-manager`. diff --git a/docs/applications/cert-manager/upgrade.rst b/docs/applications/cert-manager/upgrade.rst new file mode 100644 index 0000000000..496badca15 --- /dev/null +++ b/docs/applications/cert-manager/upgrade.rst @@ -0,0 +1,17 @@ +.. px-app-upgrade:: cert-manager + +###################### +Upgrading cert-manager +###################### + +Upgrading :px-app:`cert-manager` is generally painless. +The only custom configuration that we use, beyond installing a cluster issuer, is to tell the Helm chart to install the Custom Resource Definitions. + +Normally, it's not necessary to explicitly test :px-app:`cert-manager` after a routine upgrade. +We will notice if the certificates expire. +However, if you want to be sure that cert-manager is still working after an upgrade, delete the TLS secret and ``Certificate`` resource in the ``squareone`` namespace. +It should be recreated by cert-manager. + +.. warning:: + + This may cause an outage for the Science Platform since it is using this certificate, so you may want to be prepared to port-forward to get to the Argo CD UI in case something goes wrong. diff --git a/docs/applications/cert-manager/values.md b/docs/applications/cert-manager/values.md new file mode 100644 index 0000000000..3d82099205 --- /dev/null +++ b/docs/applications/cert-manager/values.md @@ -0,0 +1,12 @@ +```{px-app-values} cert-manager +``` + +# Cert-manager Helm values reference + +Helm values reference table for the {px-app}`cert-manager` application. + +```{include} ../../../services/cert-manager/README.md +--- +start-after: "## Values" +--- +``` diff --git a/docs/applications/datalinker/index.rst b/docs/applications/datalinker/index.rst new file mode 100644 index 0000000000..a47312fa62 --- /dev/null +++ b/docs/applications/datalinker/index.rst @@ -0,0 +1,24 @@ +.. px-app:: datalinker + +################################## +datalinker — IVOA DataLink service +################################## + +datalinker provides various facilities for discovering and referring to data products and services within the Rubin Science Platform. +It is primarily based on the IVOA DataLink standard, but also provides some related service discovery facilities beyond the scope of that standard. + +Most significantly, datalinker is used to retrieve images referenced in the results of an ObsTAP search. +It does this by returning a DataLink response for the image that includes a signed URL, allowing direct image download from the underlying data store. + +It also provides the HiPS list service, which collects the property files of HiPS data sets served by :px-app:`hips` and returns them with appropriate URLs, and implements a variety of "microservice" endpoints that rewrite simple service-descriptor-friendly APIs into redirects to other RSP services. + +.. jinja:: datalinker + :file: applications/_summary.rst.jinja + +Guides +====== + +.. toctree:: + :maxdepth: 1 + + values diff --git a/docs/applications/datalinker/values.md b/docs/applications/datalinker/values.md new file mode 100644 index 0000000000..8964d8098d --- /dev/null +++ b/docs/applications/datalinker/values.md @@ -0,0 +1,12 @@ +```{px-app-values} datalinker +``` + +# Datalinker Helm values reference + +Helm values reference table for the {px-app}`datalinker` application. + +```{include} ../../../services/datalinker/README.md +--- +start-after: "## Values" +--- +``` diff --git a/docs/applications/exposurelog/index.rst b/docs/applications/exposurelog/index.rst new file mode 100644 index 0000000000..bc70c9fb45 --- /dev/null +++ b/docs/applications/exposurelog/index.rst @@ -0,0 +1,18 @@ +.. px-app:: exposurelog + +################################## +exposurelog — Exposure message log +################################## + +Exposure log is a REST web service to create and manage log messages that are associated with a particular exposure. + +.. jinja:: exposurelog + :file: applications/_summary.rst.jinja + +Guides +====== + +.. toctree:: + :maxdepth: 1 + + values diff --git a/docs/applications/exposurelog/values.md b/docs/applications/exposurelog/values.md new file mode 100644 index 0000000000..67c1045b96 --- /dev/null +++ b/docs/applications/exposurelog/values.md @@ -0,0 +1,12 @@ +```{px-app-values} exposurelog +``` + +# Exposure log Helm values reference + +Helm values reference table for the {px-app}`exposurelog` application. + +```{include} ../../../services/exposurelog/README.md +--- +start-after: "## Values" +--- +``` diff --git a/docs/applications/gafaelfawr/bootstrap.rst b/docs/applications/gafaelfawr/bootstrap.rst new file mode 100644 index 0000000000..e958d145d7 --- /dev/null +++ b/docs/applications/gafaelfawr/bootstrap.rst @@ -0,0 +1,37 @@ +.. px-app-bootstrap:: gafaelfawr + +######################## +Bootstrapping Gafaelfawr +######################## + +The primary documentation for configuring Gafaelfawr for a new environment is the `Gafaelfawr user guide `__. +That guide should provide most of the information required to write the ``values-.yaml`` file for Gafaelfawr for a new environment. + +For a new environment, it's worth reading all of the user guide. +There are a lot of configuration decisions you will need to make. + +If you run into authentication problems after installing your new environment, see :doc:`troubleshoot`. + +Choose an identity provider +=========================== + +As described there, the primary configuration you will need to do is to choose between GitHub, CILogon, and a local OpenID Connect identity provider as a source of authentication. +If you choose an identity provider other than GitHub, you will then also have to decide how to retrieve user identity information such as full name, email address, UID, GID, and group membership. + +:dmtn:`225` is a useful reference for user identity information sources for current Science Platform environments. +It may be helpful as a model for deciding policy for new environments. + +If you choose GitHub as the identity provider, you may need to configure the privacy settings of organizations used for user groups. +See :doc:`github-organizations` for more details. + +Assign scopes and admins +======================== + +You will also need to assign scopes to users based on either their group membership (for CILogon and local identity providers) or their GitHub team membership. +This is done with the ``config.groupMapping`` setting in ``values-.yaml``. + +See :dmtn:`235` for a list of scopes used by the Science Platform. +You will need to assign all of them except ``admin:token`` and ``user:token``, which are handled internally by Gafaelfawr. + +For ``admin:token``, ensure that the list of usernames in ``config.initialAdmins`` is correct before you start Gafaelfawr for the first time. +Otherwise, you will need to add admins later via the Gafaelfawr API. diff --git a/docs/applications/gafaelfawr/github-oauth.png b/docs/applications/gafaelfawr/github-oauth.png new file mode 100644 index 0000000000..9b26608c2c Binary files /dev/null and b/docs/applications/gafaelfawr/github-oauth.png differ diff --git a/docs/applications/gafaelfawr/github-organizations.rst b/docs/applications/gafaelfawr/github-organizations.rst new file mode 100644 index 0000000000..17f0022ac0 --- /dev/null +++ b/docs/applications/gafaelfawr/github-organizations.rst @@ -0,0 +1,22 @@ +################################## +Releasing GitHub organization data +################################## + +This applies only to Science Platform environments that use GitHub for authentication, not to ones that use CILogon or a local identity provider. + +When the user is sent to GitHub to perform an OAuth 2.0 authentication, they are told what information about their account the application is requesting, and are prompted for which organizational information to release. +Since we're using GitHub for group information, all organizations that should contribute to group information (via team membership) must have their data released. +GitHub supports two ways of doing this: make the organization membership public, or grant the OAuth App access to that organization's data explicitly. + +GitHub allows the user to do the latter in the authorization screen during OAuth 2.0 authentication. + +.. figure:: github-oauth.png + :name: GitHub OAuth authorization screen + + The authorization screen shown by GitHub during an OAuth App authentication. + The organizations with green checkmarks either have public membership or that OAuth App was already authorized to get organization data from them. + The "InterNetNews" organization does not share organization membership but allows any member to authorize new OAuth Apps with the :guilabel:`Grant`. + The "cracklib" organization does not share organization membership and requires any new authorizations be approved by administrators, which can be requested with :guilabel:`Request`. + +This UI is not very obvious for users, and for security reasons we may not wish users who are not organization administrators to be able to release organization information to any OAuth App that asks. +Therefore, either organization membership should be set to public for all organizations used to control access to Science Platform deployments protected by GitHub, or someone authorized to approve OAuth Apps for each organization that will be used for group information should authenticate to the Science Platform deployment and use the :guilabel:`Grant` button to grant access to that organization's data. diff --git a/docs/applications/gafaelfawr/index.rst b/docs/applications/gafaelfawr/index.rst new file mode 100644 index 0000000000..13ee26b32c --- /dev/null +++ b/docs/applications/gafaelfawr/index.rst @@ -0,0 +1,28 @@ +.. px-app:: gafaelfawr + +###################################### +gafaelfawr — Authentication & identity +###################################### + +Gafaelfawr provides authentication and identity management services for the Rubin Science Platform. +It is primarily used as an NGINX ``auth_request`` handler configured via annotations on the ``Ingress`` resources of Science Platform services. +In that role, it requires a user have the required access scope to use that service, rejects users who do not have that scope, and redirects users who are not authenticated to the authentication process. + +Gafaelfawr supports authentication via either OpenID Connect (often through CILogon_ or GitHub). + +Gafaelfawr also provides a token management API and (currently) UI for users of the Science Platform. + +.. jinja:: gafaelfawr + :file: applications/_summary.rst.jinja + +Guides +====== + +.. toctree:: + :maxdepth: 2 + + bootstrap + recreate-token + github-organizations + troubleshoot + values diff --git a/docs/ops/gafaelfawr/recreate-token.rst b/docs/applications/gafaelfawr/recreate-token.rst similarity index 53% rename from docs/ops/gafaelfawr/recreate-token.rst rename to docs/applications/gafaelfawr/recreate-token.rst index a6b9ac46c7..21da258661 100644 --- a/docs/ops/gafaelfawr/recreate-token.rst +++ b/docs/applications/gafaelfawr/recreate-token.rst @@ -3,13 +3,14 @@ Recreating Gafaelfawr service tokens #################################### Where possible, we use persistent storage for Gafaelfawr's Redis database so that its tokens survive restarts and upgrades. -However, persistent storage isn't enabled for some clusters, such as (at the time of this writing) the yagan cluster at the summit. -On those clusters, if the ``gafaelfawr-redis`` service is restarted, its storage is cleared, and therefore all tokens will be invalidated. +However, if that persistent storage is deleted for some reason, or if Gafaelfawr is not configured to use persistent storage, all tokens will be invalidated. -When this happens, depending on the order of restart, the ``gafaelfawr-tokens`` pod that is responsible for maintaining service tokens in the cluster may not realize those tokens are no longer valid. -This will primarily affect the Notebook Aspect, which will be unable to authenticate to ``moneypenny`` and thus will not be able to spawn pods. +When this happens, depending on the order of restart, the ``gafaelfawr-tokens`` pod that is responsible for maintaining service tokens in the cluster may take up to 30 minutes to realize those tokens are no longer valid. +This will primarily affect the Notebook Aspect, which will be unable to authenticate to moneypenny and thus will not be able to spawn pods. The result will be a "permission denied" error from moneypenny. +Gafaelfawr will automatically fix this problem after 30 minutes, but unfortunately the JupyterHub component of ``nublado2`` currently loads its token on startup and doesn't pick up changes. + The easiest way to fix this problem is to force revalidation of all of the Gafaelfawr service tokens. To do that: @@ -21,3 +22,7 @@ To do that: Be aware that when the Redis storage is wipoed, all user tokens will also be invalidated. Users will be prompted to log in again the next time they go to the Science Platform. + +Invalidating the Redis storage will also result in inconsistencies between Redis and SQL that will produce nightly alerts if Gafaelfawr is configured to send Slack alerts. +To fix the inconsistencies, run ``gafaelfawr audit --fix`` inside the Gafaelfawr pod using ``kubectl exec``. +This will locate all the tokens that are no longer valid and mark them as expired in the database as well. diff --git a/docs/applications/gafaelfawr/troubleshoot.rst b/docs/applications/gafaelfawr/troubleshoot.rst new file mode 100644 index 0000000000..0428201946 --- /dev/null +++ b/docs/applications/gafaelfawr/troubleshoot.rst @@ -0,0 +1,53 @@ +.. px-app-troubleshooting:: gafaelfawr + +############### +Troubleshooting +############### + +.. _gafaelfawr-no-access: + +User has no access to services +============================== + +If a user successfully authenticates through the Gafaelfawr ``/login`` route but then cannot access an application such as the Notebook or Portal, or if Gafaelfawr tells them that they are not a member of any authorized groups, start by determining what groups the user is a member of. + +Have the user go to ``/auth/api/v1/user-info``, which will provide a JSON dump of their authentication information. +There is nothing secret in this information, so they can safely cut and paste it into a help ticket, Slack, etc. + +The important information is in the ``groups`` portion of the JSON document. +This shows the group membership as seen by Gafaelfawr. +Scopes are then assigned based on the ``config.groupMapping`` configuration in the ``values-*.yaml`` file for that environment. +Chances are good that the user is not a member of a group that conveys the appropriate scopes. + +From there, the next step is usually to determine why the user is not a member of the appropriate group. +Usually this means they weren't added or (in the case of groups from GitHub teams) didn't accept the invitation. + +For a new GitHub configuration, it's possible that the organizational membership is private and the user didn't release it. +See :doc:`github-organizations` for more details about that problem. + +COmanage enrollment fails after prompting for attributes +======================================================== + +If all attempts to enroll new users in COmanage fail after the user enters their name and email address with the error "Please recheck the highlighted fields," the issue is probably with the enrollment attribute configuration. +If there is a problem with the configuration of a hidden field, the error message may be very confusing and non-specific. + +Double-check the configuration of the "Self Signup With Approval" enrollment flow against :sqr:`055`. +Pay careful attention to the enrollment attributes, particularly the "Users group" configuration, which has a hidden value. +There is currently a bug in COmanage that causes it to not display the default values for attributes properly, so you may need to edit the enrollment attribute and set the default value again to be certain it's correct. + +Viewing logs +============ + +For other issues, looking at the pod logs for the ``gafaelfawr`` deployment in the ``gafaelfawr`` namespace is the best next step. +The best way to look at current logs is via Argo CD, which will group together the logs from all pods managed by that deployment and optionally add timestamps. + +Find the ``Deployment`` resource named ``gafaelfawr`` (not the Redis or tokens deployment) and choose :guilabel:`Logs` from the menu. +Then, use the :guilabel:`Containers` button (it looks like three horizontal lines with the middle one offset) at the top and select the ``gafaelfawr`` container. +That will show a merged view of the logs of all of the pods, and you can look for error messages. + +You can also add timestamps to the start of each line and download the logs with other buttons at the top. +Downloading logs will give you somewhat older logs, although usually only about a half-hour's worth since Gafaelfawr generates a lot of logs. + +The logs from Gafaelfawr are in JSON format. +The best way to search older logs (and arguably the best way to look at current logs) is to use a JSON-aware log view and search tool if available for the environment that you're debugging. +For the IDF environments, use `Google Log Explorer `__. diff --git a/docs/applications/gafaelfawr/values.md b/docs/applications/gafaelfawr/values.md new file mode 100644 index 0000000000..1693601a81 --- /dev/null +++ b/docs/applications/gafaelfawr/values.md @@ -0,0 +1,12 @@ +```{px-app-values} gafaelfawr +``` + +# Gafaelfawr Helm values reference + +Helm values reference table for the {px-app}`gafaelfawr` application. + +```{include} ../../../services/gafaelfawr/README.md +--- +start-after: "## Values" +--- +``` diff --git a/docs/applications/hips/index.rst b/docs/applications/hips/index.rst new file mode 100644 index 0000000000..cb5578b875 --- /dev/null +++ b/docs/applications/hips/index.rst @@ -0,0 +1,25 @@ +.. px-app:: hips + +####################### +hips — HiPS tile server +####################### + +Serves HiPS_ tiles from an object store backed by Google Cloud Storage. +This is an interim approach that will eventually be replaced by serving the tiles directly from Google Cloud Storage with special code to handle authentication. + +.. _HiPS: https://www.ivoa.net/documents/HiPS/ + +It is a replacement for the normal static file server approach to serving HiPS file trees, used because Rubin Observatory prefers object storage for all data products. + +The HiPS list, which catalogues all available HiPS file trees, is generated and served by :px-app:`datalinker` instead of this application. + +.. jinja:: hips + :file: applications/_summary.rst.jinja + +Guides +====== + +.. toctree:: + :maxdepth: 1 + + values diff --git a/docs/applications/hips/values.md b/docs/applications/hips/values.md new file mode 100644 index 0000000000..30ebaf14a9 --- /dev/null +++ b/docs/applications/hips/values.md @@ -0,0 +1,12 @@ +```{px-app-values} hips +``` + +# hips Helm values reference + +Helm values reference table for the {px-app}`hips` application. + +```{include} ../../../services/hips/README.md +--- +start-after: "## Values" +--- +``` diff --git a/docs/applications/index.rst b/docs/applications/index.rst new file mode 100644 index 0000000000..c1130ba7af --- /dev/null +++ b/docs/applications/index.rst @@ -0,0 +1,57 @@ +############ +Applications +############ + +Applications are individual *atomic* services that are configured and deployed through Phalanx. +Each environment can opt whether to deploy an application, and also customize the configuration of the application. +This section of the documentation describes each Phalanx application. + +To learn how to develop applications for Phalanx, see the :doc:`/developers/index` section. + +.. toctree:: + :maxdepth: 1 + :caption: Cluster infrastructure + + argo-cd/index + cert-manager/index + ingress-nginx/index + gafaelfawr/index + postgres/index + vault-secrets-operator/index + +.. toctree:: + :maxdepth: 1 + :caption: Rubin Science Platform + + cachemachine/index + datalinker/index + hips/index + linters/index + mobu/index + moneypenny/index + noteburst/index + nublado2/index + portal/index + semaphore/index + sherlock/index + sqlproxy-cross-project/index + squareone/index + tap/index + tap-schema/index + times-square/index + vo-cutouts/index + +.. toctree:: + :maxdepth: 1 + :caption: RSP+ + + alert-stream-broker/index + exposurelog/index + narrativelog/index + plot-navigator/index + production-tools/index + sasquatch/index + strimzi/index + strimzi-registry-operator/index + telegraf/index + telegraf-ds/index diff --git a/docs/ops/ingress-nginx/certificates.rst b/docs/applications/ingress-nginx/certificates.rst similarity index 60% rename from docs/ops/ingress-nginx/certificates.rst rename to docs/applications/ingress-nginx/certificates.rst index f6d62034f6..785326622f 100644 --- a/docs/ops/ingress-nginx/certificates.rst +++ b/docs/applications/ingress-nginx/certificates.rst @@ -5,8 +5,8 @@ TLS certificates The entire Science Platform uses the same external hostname and relies on NGINX merging all the ingresses into a single virtual host with a single TLS configuration. As discussed in :ref:`hostnames`, TLS for the Science Platform can be configured with either a default certificate in ``ingress-nginx`` or through Let's Encrypt with the DNS solver. -If an installation is using Let's Encrypt with the DNS solver, no further configuration of the NGINX ingresss is required. -See :doc:`../cert-issuer/bootstrapping` for setup information. +If an installation is using Let's Encrypt with the DNS solver, no further configuration of the NGINX ingress is required. +See :px-app-bootstrap:`cert-manager` for setup information. When using a commercial certificate, that certificate should be configured in the ``values-*.yaml`` for ``ingress-nginx`` for that environment. Specifically, add the following under ``ingress-nginx.controller``: @@ -14,20 +14,18 @@ Specifically, add the following under ``ingress-nginx.controller``: .. code-block:: yaml extraArgs: - default-ssl-certificate: ingress-nginx/ingress-certificate + default-ssl-certificate: "ingress-nginx/ingress-certificate" -and add, at the top level: +And at the top level, add: .. code-block:: yaml - vault_certificate: + vaultCertificate: enabled: true - path: secret/k8s_operator//ingress-nginx -replacing ```` with the hostname of the environment. -Then, in the Vault key named by that path, store the commercial certificate. -The Vault secret should have two keys: ``tls.crt`` and ``tls.key``. -The first should contain the full public certificate chain. -The second should contain the private key (without a passphrase). +Then, in the Vault key named ``ingress-nginx`` in the Vault enclave for that environment, store the commercial certificate. +The Vault secret must have two keys: ``tls.crt`` and ``tls.key``. +The first must contain the full public certificate chain. +The second must contain the private key (without a passphrase). For an example of an environment configured this way, see `/services/ingress-nginx/values-minikube.yaml `__ diff --git a/docs/applications/ingress-nginx/index.rst b/docs/applications/ingress-nginx/index.rst new file mode 100644 index 0000000000..a08dbe76b4 --- /dev/null +++ b/docs/applications/ingress-nginx/index.rst @@ -0,0 +1,23 @@ +.. px-app:: ingress-nginx + +################################## +ingress-nginx — Ingress controller +################################## + +The ``ingress-nginx`` application is an installation of `ingress-nginx `__ from its `Helm chart `__. +It is used as the ingress controller for all Science Platform applications. + +We use ingress-nginx, rather than any native ingress controller, in all Rubin Science Platform environments because we use the NGINX ``auth_request`` feature to do authentication and authorization with :px-app:`gafaelfawr`. +We also apply custom configuration required for correct operation of the Portal Aspect, to support our ``NetworkPolicy`` rules, and to ensure `mostly-correct logging of client IP addresses `__. + +.. jinja:: ingress-nginx + :file: applications/_summary.rst.jinja + +Guides +====== + +.. toctree:: + :maxdepth: 2 + + certificates + values diff --git a/docs/applications/ingress-nginx/values.md b/docs/applications/ingress-nginx/values.md new file mode 100644 index 0000000000..b006a12d1c --- /dev/null +++ b/docs/applications/ingress-nginx/values.md @@ -0,0 +1,12 @@ +```{px-app-values} ingress-nginx +``` + +# Ingress-nginx Helm values reference + +Helm values reference table for the {px-app}`ingress-nginx` application. + +```{include} ../../../services/ingress-nginx/README.md +--- +start-after: "## Values" +--- +``` diff --git a/docs/applications/linters/index.rst b/docs/applications/linters/index.rst new file mode 100644 index 0000000000..e91f97b9c2 --- /dev/null +++ b/docs/applications/linters/index.rst @@ -0,0 +1,21 @@ +.. px-app:: linters + +###################################### +linters - automated chechking of DNS +###################################### + +Linters provides a way to automatically and repeatedly check things in ops, such as if DNS entries +are pointing to IP addresses that we are using, or are they dangling. We use the route53 API +as well as the Google API to cross-reference these configuration details and alert on things that +don't look right. + +.. jinja:: linters + :file: applications/_summary.rst.jinja + +Guides +====== + +.. toctree:: + :maxdepth: 1 + + values diff --git a/docs/applications/linters/values.md b/docs/applications/linters/values.md new file mode 100644 index 0000000000..bd47e34e7a --- /dev/null +++ b/docs/applications/linters/values.md @@ -0,0 +1,12 @@ +```{px-app-values} linters +``` + +# Linters Helm values reference + +Helm values reference table for the {px-app}`linters` application. + +```{include} ../../../services/linters/README.md +--- +start-after: "## Values" +--- +``` diff --git a/docs/applications/mobu/configuring.rst b/docs/applications/mobu/configuring.rst new file mode 100644 index 0000000000..017f2fe685 --- /dev/null +++ b/docs/applications/mobu/configuring.rst @@ -0,0 +1,139 @@ +################ +Configuring mobu +################ + +Configuring mobu consists primarily of defining the flocks of monkeys that it should run. +This is done by setting the ``autostart`` key in the ``values-*.yaml`` file for that deployment to a list of flock definitions. +The definition of a flock must follow the same schema as a ``PUT`` to the ``/mobu/flocks`` route to create a new flock via the API. +Complete documentation is therefore available at the ``/mobu/redoc`` route on a given deployment. +This is just an overview of the most common configurations. + +Simple configuration +==================== + +Here is a simple configuration with a single flock that tests the Notebook Aspect by spawning a pod, running some Python, and then destroying the pod again: + +.. code-block:: yaml + + autostart: + - name: "python" + count: 1 + users: + - username: "bot-mobu-user" + scopes: ["exec:notebook"] + business: "JupyterPythonLoop" + options: + jupyter: + image_size: "Small" + restart: true + +Important points to note here: + +* The ``autostart`` key takes a list of flocks of monkeys. + Each one must have a ``name`` (which controls the URL for that flock under ``/mobu/flocks`` once it has been created) and a ``count`` key specifying how many monkeys will be performing this test. + +* Users must be defined for each monkey. + There are two ways to do this: specifying a list of users equal to the number of monkeys being run, or providing a specification for users that is used to programmatically generate usernames, UIDs, and GIDs. + An example of the latter will be given below. + Here, this specifies a single user with the name ``bot-mobu-user``. + Usernames must begin with ``bot-``. + Neither a UID nor a GID is specified, which means that Gafaelfawr has to be ble to generate UIDs and GIDs on the fly. + This configuration will therefore only work if this deployment enables Firestore for UID and GID generation, and enables synthesizing user private groups. + +* If the monkey user will need additional scopes, they must be specified. + Here, the required scope is ``exec:notebook``, which allows spawning Notebooks. + More scopes would be needed if the monkey were running notebooks that interacted with other applications. + +* The ``business`` key specifies the type of test to perform. + Here, ``JupyterPythonLoop`` just runs a small bit of Python through the Jupyter lab API after spawning a lab pod. + ``options.jupyter`` specifies additional options for the chosen business and are business-specific. + See the full mobu documentation for more details. + +* ``restart: true`` tells mobu to shut down and respawn the pod if there is any failure. + The default is to attempt to keep using the same pod despite the failure. + +Testing with notebooks +====================== + +Here is a more complex example that runs a set of notebooks as a test: + +.. code-block:: yaml + + autostart: + - name: "firefighter" + count: 1 + users: + - username: "bot-mobu-recommended" + uidnumber: 74768 + gidnumber: 74768 + scopes: + - "exec:notebook" + - "exec:portal" + - "read:image" + - "read:tap" + business: "NotebookRunner" + options: + repo_url: "https://github.com/lsst-sqre/system-test.git" + repo_branch: "prod" + max_executions: 1 + restart: true + +Here, note that the UID and primary GID for the user are specified, so this example will work in deployments that do not use Firestore and synthesized user private groups. + +This uses the business ``NotebookRunner`` instead, which checks out a Git repository and runs all notebooks at the top level of that repository. +The repository URL and branch are configured in ``options``. +``options.max_executions: 1`` tells mobu to shut down and respawn the pod after each notebook. +This exercises pod spawning more frequently, but does not test the lab's ability to run a long series of notebooks. +One may wish to run multiple flocks in a given environment with different configurations for ``max_executions``. +These notebooks need more scopes, so those scopes are specified. + +Here is a different example that runs multiple monkeys in a flock: + +.. code-block:: yaml + + autostart: + - name: "firefighter" + count: 5 + user_spec: + username_prefix: "bot-mobu-recommended" + uid_start: 74768 + gid_start: 74768 + scopes: + - "exec:notebook" + - "exec:portal" + - "read:image" + - "read:tap" + business: "NotebookRunner" + options: + repo_url: "https://github.com/lsst-sqre/system-test.git" + repo_branch: "prod" + max_executions: 1 + restart: true + +This is almost identical except that it specifies five monkeys and provides a specification for creating the users instead of specifying each user. +The users will be assigned consecutive UIDs and GIDs starting with the specified ``uid_start`` and ``gid_start``. +The usernames will be formed by adding consecutive digits to the end of the ``username_prefix``. + +Testing TAP +=========== + +Here is an example of testing the TAP application: + +.. code-block:: yaml + + autostart: + - name: "tap" + count: 1 + users: + - username: "bot-mobu-tap" + uidnumber: 74775 + gidnumber: 74775 + scopes: ["read:tap"] + business: "TAPQueryRunner" + restart: true + options: + tap_sync: true + tap_query_set: "dp0.2" + +Note that ``business`` is set to ``TAPQueryRunner`` instead. +``options.tap_sync`` can choosen between sync and async queries, and ``options.tap_query_set`` can be used to specify the query set to run. diff --git a/docs/applications/mobu/index.rst b/docs/applications/mobu/index.rst new file mode 100644 index 0000000000..e047dc36b4 --- /dev/null +++ b/docs/applications/mobu/index.rst @@ -0,0 +1,23 @@ +.. px-app:: mobu + +########################## +mobu — Integration testing +########################## + +mobu is the continuous integration testing framework for the Rubin Science Platform. +It runs some number of "monkeys" that simulate a random user of the Science Platform. +Those monkeys are organized into "flocks" that share a single configuration across all of the monkeys. +Failures are reported to Slack using a Slack incoming webhook. + +.. jinja:: mobu + :file: applications/_summary.rst.jinja + +Guides +====== + +.. toctree:: + :maxdepth: 2 + + configuring + manage-flocks + values diff --git a/docs/service-guide/mobu-manage-flocks.rst b/docs/applications/mobu/manage-flocks.rst similarity index 76% rename from docs/service-guide/mobu-manage-flocks.rst rename to docs/applications/mobu/manage-flocks.rst index 535beebdc3..8bb81a58ef 100644 --- a/docs/service-guide/mobu-manage-flocks.rst +++ b/docs/applications/mobu/manage-flocks.rst @@ -1,8 +1,6 @@ +#################### Managing mobu flocks -==================== - -mobu is our monitoring system for the Science Platform. -It exercises JupyterHub and labs, and tests other services within the Science Platform by running notebooks on those labs. +#################### mobu calls each test runner a "monkey" and organizes them into groups called "flocks." You can get a list of flocks from the mobu API. diff --git a/docs/applications/mobu/values.md b/docs/applications/mobu/values.md new file mode 100644 index 0000000000..29c2e394b8 --- /dev/null +++ b/docs/applications/mobu/values.md @@ -0,0 +1,12 @@ +```{px-app-values} mobu +``` + +# Mobu Helm values reference + +Helm values reference table for the {px-app}`mobu` application. + +```{include} ../../../services/mobu/README.md +--- +start-after: "## Values" +--- +``` diff --git a/docs/applications/moneypenny/index.rst b/docs/applications/moneypenny/index.rst new file mode 100644 index 0000000000..fcc7b01682 --- /dev/null +++ b/docs/applications/moneypenny/index.rst @@ -0,0 +1,22 @@ +.. px-app:: moneypenny + +############################## +moneypenny — User provisioning +############################## + +Moneypenny is responsible for provisioning new users of the Notebook Aspect of a Science Platform installation. +It is invoked by :px-app:`nublado2` whenever a user pod is spawned and decides whether provisioning is required. +If so, it does so before the lab spawn, usually by spawning a privileged pod. + +A typical example of the type of provisioning it does is creating the user's home directory, with appropriate ownership and permissions, in an NFS file store. + +.. jinja:: moneypenny + :file: applications/_summary.rst.jinja + +Guides +====== + +.. toctree:: + :maxdepth: 1 + + values diff --git a/docs/applications/moneypenny/values.md b/docs/applications/moneypenny/values.md new file mode 100644 index 0000000000..58a6586b0c --- /dev/null +++ b/docs/applications/moneypenny/values.md @@ -0,0 +1,12 @@ +```{px-app-values} moneypenny +``` + +# moneypenny Helm values reference + +Helm values reference table for the {px-app}`moneypenny` application. + +```{include} ../../../services/moneypenny/README.md +--- +start-after: "## Values" +--- +``` diff --git a/docs/applications/narrativelog/index.rst b/docs/applications/narrativelog/index.rst new file mode 100644 index 0000000000..fa553d9371 --- /dev/null +++ b/docs/applications/narrativelog/index.rst @@ -0,0 +1,19 @@ +.. px-app:: narrativelog + +######################################## +narrativelog — Narrative observatory log +######################################## + +Narrative log provides an API for telescope operators to create and manage observatory log messages. +See :px-app:`exposurelog` for a similar service for log messages related to a specific exposure. + +.. jinja:: narrativelog + :file: applications/_summary.rst.jinja + +Guides +====== + +.. toctree:: + :maxdepth: 1 + + values diff --git a/docs/applications/narrativelog/values.md b/docs/applications/narrativelog/values.md new file mode 100644 index 0000000000..64f214b837 --- /dev/null +++ b/docs/applications/narrativelog/values.md @@ -0,0 +1,12 @@ +```{px-app-values} narrativelog +``` + +# narrativelog Helm values reference + +Helm values reference table for the {px-app}`narrativelog` application. + +```{include} ../../../services/narrativelog/README.md +--- +start-after: "## Values" +--- +``` diff --git a/docs/applications/noteburst/index.rst b/docs/applications/noteburst/index.rst new file mode 100644 index 0000000000..dcd707021d --- /dev/null +++ b/docs/applications/noteburst/index.rst @@ -0,0 +1,18 @@ +.. px-app:: noteburst + +########################################### +noteburst — Notebook execution-as-a-service +########################################### + +Noteburst is a notebook execution service for the Rubin Science Platform. + +.. jinja:: noteburst + :file: applications/_summary.rst.jinja + +Guides +====== + +.. toctree:: + :maxdepth: 1 + + values diff --git a/docs/applications/noteburst/values.md b/docs/applications/noteburst/values.md new file mode 100644 index 0000000000..75cc07fd25 --- /dev/null +++ b/docs/applications/noteburst/values.md @@ -0,0 +1,12 @@ +```{px-app-values} noteburst +``` + +# noteburst Helm values reference + +Helm values reference table for the {px-app}`noteburst` application. + +```{include} ../../../services/noteburst/README.md +--- +start-after: "## Values" +--- +``` diff --git a/docs/applications/nublado2/bootstrap.rst b/docs/applications/nublado2/bootstrap.rst new file mode 100644 index 0000000000..0401e7d390 --- /dev/null +++ b/docs/applications/nublado2/bootstrap.rst @@ -0,0 +1,32 @@ +.. px-app-bootstrap:: nublado2 + +##################### +Bootstrapping Nublado +##################### + +Nublado and :px-app:`moneypenny` need to know where the NFS server that provides user home space is. +Nublado also requires other persistent storage space. +Ensure the correct definitions are in place in their configuration. + +Telescope and Site deployments +============================== + +For Telescope and Site deployments that require instrument control, make sure you have any Multus network definitions you need in the ``values-.yaml``. +This will look something like: + +.. code-block:: yaml + + singleuser: + extraAnnotations: + k8s.v1.cni.cncf.io/networks: "kube-system/macvlan-conf" + initContainers: + - name: "multus-init" + image: "lsstit/ddsnet4u:latest" + securityContext: + privileged: true + +It's possible to list multiple Multus network names separated by commas in the annotation string. +Experimentally, it appears that the interfaces will appear in the order specified. + +The ``initContainers`` entry should be inserted verbatim. +It creates a privileged container that bridges user pods to the specified networks before releasing control to the user's lab. diff --git a/docs/applications/nublado2/index.rst b/docs/applications/nublado2/index.rst new file mode 100644 index 0000000000..88a29f0b52 --- /dev/null +++ b/docs/applications/nublado2/index.rst @@ -0,0 +1,22 @@ +.. px-app:: nublado2 + +############################# +nublado2 — JupyterHub for RSP +############################# + +The ``nublado2`` service is an installation of a Rubin Observatory flavor of `Zero to JupyterHub `__ with some additional resources. +It provides the Notebook Aspect of the Rubin Science Platform. + +.. jinja:: nublado2 + :file: applications/_summary.rst.jinja + +Guides +====== + +.. toctree:: + :maxdepth: 2 + + bootstrap + upgrade + troubleshoot + values diff --git a/docs/ops/nublado2/database.rst b/docs/applications/nublado2/troubleshoot.rst similarity index 79% rename from docs/ops/nublado2/database.rst rename to docs/applications/nublado2/troubleshoot.rst index 9946d41909..c5223be9c1 100644 --- a/docs/ops/nublado2/database.rst +++ b/docs/applications/nublado2/troubleshoot.rst @@ -1,6 +1,13 @@ -############################ +.. px-app-troubleshooting:: nublado2 + +######################## +Troubleshooting nublado2 +######################## + +.. _nublado2-clear-session-database: + Clear session database entry -############################ +============================ Sometimes JupyterHub and its session database will get into an inconsistent state where it thinks a pod is already running but cannot shut it down. The typical symptom of this is that spawns for that user fail with an error saying that the user's lab is already pending spawn or pending deletion, but the user cannot connect to their pod. @@ -8,16 +15,16 @@ The typical symptom of this is that spawns for that user fail with an error sayi Recovery may require manually clearing the user's entry in the session database as follows: #. Remove the user's lab namespace, if it exists. - + #. Remove the user from the session database. - Connect to the database with: - + First, connect to the database: + .. code-block:: shell pod=$(kubectl get pods -n postgres | grep postgres | awk '{print $1}') kubectl exec -it -n postgres ${pod} -- psql -U jovyan jupyterhub - - and then, at the PostgreSQL prompt, run: + + Then, at the PostgreSQL prompt: .. code-block:: sql diff --git a/docs/applications/nublado2/upgrade.rst b/docs/applications/nublado2/upgrade.rst new file mode 100644 index 0000000000..9d24ba3bce --- /dev/null +++ b/docs/applications/nublado2/upgrade.rst @@ -0,0 +1,23 @@ +.. px-app-upgrade:: nublado2 + +################# +Upgrading Nublado +################# + +Most of the time, upgrading Nublado can be done simply by syncing the application in Argo CD. +There will be a brief outage for spawning new pods, but users with existing pods should be able to continue working. + +Occasionally, new versions of JupyterHub will require a schema update. +We do not routinely enable automatic schema updates currently, so JupyterHub will refuse to start if a database schema update is required. +To enable schema updates, add: + +.. code-block:: yaml + + jupyterhub: + hub: + db: + upgrade: true + +(The ``jupyterhub`` and ``hub`` keys probably already exist in the ``values-.yaml`` file, so just add the ``db.upgrade`` setting in the correct spot.) +Then, JupyterHub will automatically upgrade its database when the new version starts. +You can then remove this configuration again if you're worried about automatic updates misbehaving later. diff --git a/docs/applications/nublado2/values.md b/docs/applications/nublado2/values.md new file mode 100644 index 0000000000..3be8c0fcf2 --- /dev/null +++ b/docs/applications/nublado2/values.md @@ -0,0 +1,12 @@ +```{px-app-values} nublado2 +``` + +# nublado2 Helm values reference + +Helm values reference table for the {px-app}`nublado2` application. + +```{include} ../../../services/nublado2/README.md +--- +start-after: "## Values" +--- +``` diff --git a/docs/applications/plot-navigator/index.rst b/docs/applications/plot-navigator/index.rst new file mode 100644 index 0000000000..1b241aa5ef --- /dev/null +++ b/docs/applications/plot-navigator/index.rst @@ -0,0 +1,18 @@ +.. px-app:: plot-navigator + +############################################ +plot-navigator — Data production plot viewer +############################################ + +Panel-based plot viewer. + +.. jinja:: plot-navigator + :file: applications/_summary.rst.jinja + +Guides +====== + +.. toctree:: + :maxdepth: 1 + + values diff --git a/docs/applications/plot-navigator/values.md b/docs/applications/plot-navigator/values.md new file mode 100644 index 0000000000..068567fa77 --- /dev/null +++ b/docs/applications/plot-navigator/values.md @@ -0,0 +1,12 @@ +```{px-app-values} plot-navigator +``` + +# plot-navigator Helm values reference + +Helm values reference table for the {px-app}`plot-navigator` application. + +```{include} ../../../services/plot-navigator/README.md +--- +start-after: "## Values" +--- +``` diff --git a/docs/applications/portal/bootstrap.rst b/docs/applications/portal/bootstrap.rst new file mode 100644 index 0000000000..a7d33530ad --- /dev/null +++ b/docs/applications/portal/bootstrap.rst @@ -0,0 +1,16 @@ +.. px-app-bootstrap:: portal + +#################### +Bootstrapping Portal +#################### + +If the Portal Aspect is configured with a ``replicaCount`` greater than one (recommended for production installations), ``config.volumes.workareaHostPath`` or ``config.volumes.workareaNfs`` must be set and point to an underlying filesystem that supports shared multiple-write. +This is not supported by most Kubernetes persistent volume backends, which is why only a host path or an NFS mount are supported. + +The IDF environments use `Google Filestore`_ via NFS. + +The provisioning of this underlying backing store is manual, so make sure you either have created it or gotten a system administrator with appropriate permissions for your site to do so. + +Ensure that it is writable by the Portal pods. +The default UID for the Portal pods is 91. +If this needs to be changed, you'll need to add a new ``values.yaml`` parameter and plumb it through to the ``Deployment`` configuration. diff --git a/docs/applications/portal/index.rst b/docs/applications/portal/index.rst new file mode 100644 index 0000000000..31e2c7214b --- /dev/null +++ b/docs/applications/portal/index.rst @@ -0,0 +1,20 @@ +.. px-app:: portal + +################################# +portal — Firefly-based RSP Portal +################################# + +The Portal Aspect of the Rubin Science Platform, powered by Firefly. +This provides a graphical user interface for astronomical data exploration and also provides a data viewer that can be used within the Notebook Aspect (:px-app:`nublado2`). + +.. jinja:: portal + :file: applications/_summary.rst.jinja + +Guides +====== + +.. toctree:: + :maxdepth: 1 + + bootstrap + values diff --git a/docs/applications/portal/values.md b/docs/applications/portal/values.md new file mode 100644 index 0000000000..15328e3b51 --- /dev/null +++ b/docs/applications/portal/values.md @@ -0,0 +1,12 @@ +```{px-app-values} portal +``` + +# portal Helm values reference + +Helm values reference table for the {px-app}`portal` application. + +```{include} ../../../services/portal/README.md +--- +start-after: "## Values" +--- +``` diff --git a/docs/applications/postgres/add-database.rst b/docs/applications/postgres/add-database.rst new file mode 100644 index 0000000000..0b49b39963 --- /dev/null +++ b/docs/applications/postgres/add-database.rst @@ -0,0 +1,104 @@ +##################### +Adding a new database +##################### + +From time to time, you may need to add a new database to the internal PostgreSQL instance. + +Before you do, ask yourself how valuable the data is. +The internal PostgreSQL service is not intended to be highly available or extremely reliable. +It's designed for persistent storage for low-value data such as the JupyterHub session database, where the worst thing that happens after data loss is that users lose running sessions and may have to reauthenticate. + +Assuming that the internal PostgreSQL is indeed the right choice for your needs, there are several steps. + +Decide on a database name +========================= + +The service requires a database name, a username, and a password. +Usually the database name and user should be identical and should match the application that will consume the database (for example, ``gafaelfawr`` or ``exposurelog``). +We will use ``exposurelog`` as the model for the remainder of this document. + +Add the database to the deployment +================================== + +Go to the ``/services/postgres/templates`` directory and edit ``deployment.yaml`` to add an entry for the new database. +You should copy an existing entry to get the syntax correct, and then change the names. +The result should look like this: + +.. code-block:: yaml + + {{- with .Values.exposurelog_db }} + - name: VRO_DB_EXPOSURELOG_USER + value: {{ .user }} + - name: VRO_DB_EXPOSURELOG_DB + value: {{ .db }} + - name: VRO_DB_EXPOSURELOG_PASSWORD + valueFrom: + secretKeyRef: + name: "postgres" + key: "exposurelog_password" + {{- end }} + +Add the database to Phalanx installer +===================================== + +Add a password entry to Phalanx's installer, so the next time a new cluster is deployed or an extant cluster is redeployed, the password will be created. +This belongs in ``installer/generate_secrets.py`` in the ``_postgres()`` method. + +Typically, we use passwords that are ASCII representations of random 32-byte hexadecimal sequences. +The passwords for all the non-root PostgreSQL users already look like that, so copying an existing line and changing the name to reflect your application is usually correct: + +.. code-block:: python + :caption: /installer/generate_secrets.py + + self._set_generated("postgres", "exposurelog_password", secrets.token_hex(32)) + +Finally, edit the ``postgres`` ``values-.yaml`` files for the environments that need this database and add a section for your new database with appropriate ``user`` and ``db`` entries: + +.. code-block:: yaml + :caption: /services/postgres/values-.yaml + + exposurelog_db: + user: "exposurelog" + db: "exposurelog" + +Now start the PR and review process. + +Manually add the secret to Vault +================================ + +Since you have already added generation of the password to the installer, you could just generate new secrets for each environment and push them into Vault. +That, however, would require that you restart everything with randomly-generated passwords, and that's a fairly disruptive operation, so you probably are better off manually injecting just your new password. + +.. rst-class:: open + +#. Consult 1Password and retrieve the appropriate vault write token for the instance you're working with from ``vault_keys.json``. + +#. Set up your environment: + + .. code-block:: bash + + export VAULT_ADDR=vault.lsst.codes + export VAULT_TOKEN= + +#. Generate and store a new random password: + + .. code-block:: bash + + vault kv patch secret/k8s_operator//postgres \ + _password=$(openssl rand -hex 32) + +#. Delete the ``postgres`` ``Secret`` from the ``postgres`` namespace to force Vault Secrets Operator to recreate it. + +#. Repeat for each environment where you need the new database. + +Restart with new values +======================= + +Now it's finally time to synchronize PostgreSQL in each environment. +All you should need to do is sync the application in Argo CD. + +This will cause a brief service interruption in the cluster while the deployment is recreated with additional environment variables and PostgreSQL restarts. +You may therefore want to wait for a maintenance window. + +Once PostgreSQL restarts, the new database will be present, with the user and password set. +At that point it is ready for use by your new application. diff --git a/docs/applications/postgres/index.rst b/docs/applications/postgres/index.rst new file mode 100644 index 0000000000..7c2f8b845c --- /dev/null +++ b/docs/applications/postgres/index.rst @@ -0,0 +1,33 @@ +.. px-app:: postgres + +############################### +postgres — In-cluster SQL store +############################### + +The ``postgres`` service is a very small PostgreSQL installation to provide relational storage for applications and environments where data loss is acceptable. +Two intended purposes for this service are: + +- The JupyterHub user session database +- Backing store for Gafaelfawr's authentication tokens + +It may also be used by other applications, such as :px-app:`exposurelog` and :px-app:`narrativelog`. + +.. important:: + + Do not use this service for important data. + Use a managed relational database, such as Google CloudSQL, instead. + + Production instances of the Science Platform use CloudSQL or a local external PostgreSQL server for the Gafaelfawr token database instead of this service. + +.. jinja:: postgres + :file: applications/_summary.rst.jinja + +Guides +====== + +.. toctree:: + :maxdepth: 2 + + add-database + troubleshoot + values diff --git a/docs/applications/postgres/troubleshoot.rst b/docs/applications/postgres/troubleshoot.rst new file mode 100644 index 0000000000..b3469f21c1 --- /dev/null +++ b/docs/applications/postgres/troubleshoot.rst @@ -0,0 +1,18 @@ +.. px-app-troubleshooting:: postgres + +######################## +Troubleshooting postgres +######################## + +.. _recreate-postgres-pvc: + +Recreating postgres PV/PVC +========================== + +If you get into a state where the cluster has completely crashed, perhaps due to hardware problems, and the backing store for persistent volumes has been lost, Postgres may refuse to start. +The reason for this is that if you are using an autoprovisioned storage class (such as GKE and Rook provide), the PVC will reference a volume that no longer exists. +This loss is acceptable; the :px-app:`postgres` database is intended to hold only fairly low-value data. +If your cluster has crashed that hard, the authentication Redis cache and JupyterHub session database are unlikely to still be relevant. + +To recover, you need to delete the PVC, recreate it (which will re-allocate the persistent storage), and restart the deployment. +This is most easily accomplished with Argo CD, although ``kubectl`` works as well. diff --git a/docs/applications/postgres/values.md b/docs/applications/postgres/values.md new file mode 100644 index 0000000000..80c963a1d4 --- /dev/null +++ b/docs/applications/postgres/values.md @@ -0,0 +1,12 @@ +```{px-app-values} postgres +``` + +# postgres Helm values reference + +Helm values reference table for the {px-app}`postgres` application. + +```{include} ../../../services/postgres/README.md +--- +start-after: "## Values" +--- +``` diff --git a/docs/applications/production-tools/index.rst b/docs/applications/production-tools/index.rst new file mode 100644 index 0000000000..4656e9e501 --- /dev/null +++ b/docs/applications/production-tools/index.rst @@ -0,0 +1,18 @@ +.. px-app:: production-tools + +############################################# +production-tools — Data Production monitoring +############################################# + +Production Tools provides a collection of utility pages for monitoring data processing. + +.. jinja:: production-tools + :file: applications/_summary.rst.jinja + +Guides +====== + +.. toctree:: + :maxdepth: 1 + + values diff --git a/docs/applications/production-tools/values.md b/docs/applications/production-tools/values.md new file mode 100644 index 0000000000..86f3a62b08 --- /dev/null +++ b/docs/applications/production-tools/values.md @@ -0,0 +1,12 @@ +```{px-app-values} production-tools +``` + +# production-tools Helm values reference + +Helm values reference table for the {px-app}`production-tools` application. + +```{include} ../../../services/production-tools/README.md +--- +start-after: "## Values" +--- +``` diff --git a/docs/applications/sasquatch/index.rst b/docs/applications/sasquatch/index.rst new file mode 100644 index 0000000000..7be5eef342 --- /dev/null +++ b/docs/applications/sasquatch/index.rst @@ -0,0 +1,18 @@ +.. px-app:: sasquatch + +################################# +sasquatch — Observatory telemetry +################################# + +Rubin Observatory's telemetry service. + +.. jinja:: sasquatch + :file: applications/_summary.rst.jinja + +Guides +====== + +.. toctree:: + :maxdepth: 1 + + values diff --git a/docs/applications/sasquatch/values.md b/docs/applications/sasquatch/values.md new file mode 100644 index 0000000000..10e996925f --- /dev/null +++ b/docs/applications/sasquatch/values.md @@ -0,0 +1,12 @@ +```{px-app-values} sasquatch +``` + +# sasquatch Helm values reference + +Helm values reference table for the {px-app}`sasquatch` application. + +```{include} ../../../services/sasquatch/README.md +--- +start-after: "## Values" +--- +``` diff --git a/docs/applications/semaphore/index.rst b/docs/applications/semaphore/index.rst new file mode 100644 index 0000000000..438a37e47a --- /dev/null +++ b/docs/applications/semaphore/index.rst @@ -0,0 +1,21 @@ +.. px-app:: semaphore + +############################# +semaphore — User notification +############################# + +Semaphore is the user notification and messaging service for the Rubin Science Platform. +UI applications like :px-app:`squareone` can display messages from Semaphore's API. + +Edit broadcast messages for SQuaRE-managed environments at `lsst-sqre/rsp_broadcast `__. + +.. jinja:: semaphore + :file: applications/_summary.rst.jinja + +Guides +====== + +.. toctree:: + :maxdepth: 1 + + values diff --git a/docs/applications/semaphore/values.md b/docs/applications/semaphore/values.md new file mode 100644 index 0000000000..94defb1928 --- /dev/null +++ b/docs/applications/semaphore/values.md @@ -0,0 +1,12 @@ +```{px-app-values} semaphore +``` + +# semaphore Helm values reference + +Helm values reference table for the {px-app}`semaphore` application. + +```{include} ../../../services/semaphore/README.md +--- +start-after: "## Values" +--- +``` diff --git a/docs/applications/sherlock/index.rst b/docs/applications/sherlock/index.rst new file mode 100644 index 0000000000..45d08b943b --- /dev/null +++ b/docs/applications/sherlock/index.rst @@ -0,0 +1,18 @@ +.. px-app:: sherlock + +######################################### +sherlock — App ingress status and metrics +######################################### + +Sherlock collects service status and metrics from :px-app:`ingress-nginx` logs and can aggregate them across environments. + +.. jinja:: sherlock + :file: applications/_summary.rst.jinja + +Guides +====== + +.. toctree:: + :maxdepth: 1 + + values diff --git a/docs/applications/sherlock/values.md b/docs/applications/sherlock/values.md new file mode 100644 index 0000000000..772943ae80 --- /dev/null +++ b/docs/applications/sherlock/values.md @@ -0,0 +1,12 @@ +```{px-app-values} sherlock +``` + +# sherlock Helm values reference + +Helm values reference table for the {px-app}`sherlock` application. + +```{include} ../../../services/sherlock/README.md +--- +start-after: "## Values" +--- +``` diff --git a/docs/applications/sqlproxy-cross-project/index.rst b/docs/applications/sqlproxy-cross-project/index.rst new file mode 100644 index 0000000000..7e63ea2af7 --- /dev/null +++ b/docs/applications/sqlproxy-cross-project/index.rst @@ -0,0 +1,21 @@ +.. px-app:: sqlproxy-cross-project + +################################################# +sqlproxy-cross-project — External Cloud SQL proxy +################################################# + +Sometimes, we want to allow arbitrary pods in one Google Kubernetes Engine cluster access Cloud SQL services in a different project. +For example, the IDF dev environment needs to be able to access the Cloud SQL Butler registry in the IDF int environment for testing purposes. + +This application enables that type of cross-environment Cloud SQL connection by running a general-use instance of the `Google Cloud SQL Auth Proxy `__. + +.. jinja:: sqlproxy-cross-project + :file: applications/_summary.rst.jinja + +Guides +====== + +.. toctree:: + :maxdepth: 1 + + values diff --git a/docs/applications/sqlproxy-cross-project/values.md b/docs/applications/sqlproxy-cross-project/values.md new file mode 100644 index 0000000000..5a4cc17c79 --- /dev/null +++ b/docs/applications/sqlproxy-cross-project/values.md @@ -0,0 +1,12 @@ +```{px-app-values} sqlproxy-cross-project +``` + +# sqlproxy-cross-project Helm values reference + +Helm values reference table for the {px-app}`sqlproxy-cross-project` application. + +```{include} ../../../services/sqlproxy-cross-project/README.md +--- +start-after: "## Values" +--- +``` diff --git a/docs/applications/squareone/bootstrap.rst b/docs/applications/squareone/bootstrap.rst new file mode 100644 index 0000000000..eae9ab4118 --- /dev/null +++ b/docs/applications/squareone/bootstrap.rst @@ -0,0 +1,17 @@ +.. px-app-bootstrap:: squareone + +####################### +Bootstrapping Squareone +####################### + +By default, Squareone manages the TLS configuration for the entirety of the Science Platform. +This assumes the Let's Encrypt approach to obtaining TLS certificates, and the default TLS configuration requires the cert-manager cluster issuer be set up. +See :doc:`/applications/cert-manager/notes` for more information. + +If you instead are using a commercial certificate and configuring ingress-nginx to use it, you need to disable the TLS configuration for Squareone. +Do that with the following in ``values-.yaml`` in `/services/squareone `__: + +.. code-block:: yaml + + ingress: + tls: false diff --git a/docs/applications/squareone/index.rst b/docs/applications/squareone/index.rst new file mode 100644 index 0000000000..b7dd11c0e1 --- /dev/null +++ b/docs/applications/squareone/index.rst @@ -0,0 +1,20 @@ +.. px-app:: squareone + +######################## +squareone — RSP homepage +######################## + +Squareone is the Rubin Science Platform's homepage and general-purpose UI. + +.. jinja:: squareone + :file: applications/_summary.rst.jinja + +Guides +====== + +.. toctree:: + :maxdepth: 1 + + notes + bootstrap + values diff --git a/docs/applications/squareone/notes.rst b/docs/applications/squareone/notes.rst new file mode 100644 index 0000000000..02ffa82281 --- /dev/null +++ b/docs/applications/squareone/notes.rst @@ -0,0 +1,19 @@ +.. px-app-notes:: squareone + +################################ +Squareone architecture and notes +################################ + +TLS configuration merging +========================= + +This applies only to environments that use Let's Encrypt for certificate management. + +Because all application ingresses share the same external hostname, the way the ingress configuration is structured in Phalanx is somewhat unusual. +Nearly all application create an ingress without adding TLS configuration. +Instead, they all use the same hostname, without a TLS stanza. +The Squareone ingress is the one designated ingress with a TLS configuration to request creation of certificates. +Because each ingress uses the same hostname, the NGINX ingress will merge all of those ingresses into one virtual host and will set up TLS if TLS is defined on any of them. + +Were TLS defined on more than one ingress, only one of those TLS configurations would be used, but which one is chosen is somewhat random. +Therefore, we designate Squareone as the single application to hold the configuration to avoid any confusion from unused configurations. diff --git a/docs/applications/squareone/values.md b/docs/applications/squareone/values.md new file mode 100644 index 0000000000..75385c192a --- /dev/null +++ b/docs/applications/squareone/values.md @@ -0,0 +1,12 @@ +```{px-app-values} squareone +``` + +# Squareone Helm values reference + +Helm values reference table for the {px-app}`squareone` application. + +```{include} ../../../services/squareone/README.md +--- +start-after: "## Values" +--- +``` diff --git a/docs/applications/strimzi-registry-operator/index.rst b/docs/applications/strimzi-registry-operator/index.rst new file mode 100644 index 0000000000..6e0cd951c4 --- /dev/null +++ b/docs/applications/strimzi-registry-operator/index.rst @@ -0,0 +1,19 @@ +.. px-app:: strimzi-registry-operator + +############################################################ +strimzi-registry-operator — Schema registry for Alert Broker +############################################################ + +:px-app:`alert-stream-broker` uses `Apache Kafka`_ as the mechanism for publishing alerts. +The `Confluent Schema Registry`_ for that Kafka cluster is created and managed by this installation of the Strimzi Registry Operator. + +Note that :px-app:`sasquatch` includes a separate installation of the Strimzi Registry Operator to manage its Confluent Schema Registry. + +.. jinja:: strimzi-registry-operator + :file: applications/_summary.rst.jinja + +.. Guides +.. ====== +.. +.. .. toctree:: +.. :maxdepth: 1 diff --git a/docs/applications/strimzi/index.rst b/docs/applications/strimzi/index.rst new file mode 100644 index 0000000000..0b3fdec5c9 --- /dev/null +++ b/docs/applications/strimzi/index.rst @@ -0,0 +1,17 @@ +.. px-app:: strimzi + +############################### +strimzi — Kafka cluster manager +############################### + +The ``strimzi`` application is an installation of the `Strimzi Kafka Operator `__, used to manage `Apache Kafka`_ installations in the Rubin Science Platform. +It is used by both :px-app:`alert-stream-broker` and :px-app:`sasquatch` to create their respective Kafka clusters. + +.. jinja:: strimzi + :file: applications/_summary.rst.jinja + +.. Guides +.. ====== +.. +.. .. toctree:: +.. :maxdepth: 1 diff --git a/docs/applications/tap-schema/index.rst b/docs/applications/tap-schema/index.rst new file mode 100644 index 0000000000..ee558bd129 --- /dev/null +++ b/docs/applications/tap-schema/index.rst @@ -0,0 +1,22 @@ +.. px-app:: tap-schema + +######################## +tap-schema — TAP schemas +######################## + +The TAP schema database, for the :px-app:`tap` application. +This database is generated directly from the canonical Felis_ description of the project database schemas. + +.. jinja:: tap-schema + :file: applications/_summary.rst.jinja + + +Guides +====== + +.. toctree:: + :maxdepth: 1 + + notes + upgrade + values diff --git a/docs/applications/tap-schema/notes.rst b/docs/applications/tap-schema/notes.rst new file mode 100644 index 0000000000..ec9350303e --- /dev/null +++ b/docs/applications/tap-schema/notes.rst @@ -0,0 +1,13 @@ +.. px-app-notes:: tap-schema + +################################# +tap-schema architecture and notes +################################# + +The TAP schema may vary by environment, depending on the tables and data available in that environment. +This is controlled by the `build-all script in the lsst/sdm_schemas repository `__. + +Each variation of the schema is represented by a different Docker image, which is a MySQL server with the appropriate data preloaded. +Whenever a new version of `lsst/sdm_schemas `__ is tagged, GitHub Actions builds and pushes all of those Docker images. + +Each Science Platform environment then selects the schema to deploy by configuring which Docker image to use in its ``values-.yaml`` file. diff --git a/docs/applications/tap-schema/upgrade.rst b/docs/applications/tap-schema/upgrade.rst new file mode 100644 index 0000000000..d7fb02f361 --- /dev/null +++ b/docs/applications/tap-schema/upgrade.rst @@ -0,0 +1,25 @@ +.. px-app-upgrade:: tap-schema + +#################### +Upgrading tap-schema +#################### + +Upgrading the tap-schema Argo CD application itself requires no special steps. +Syncing the Argo CD application is all that's required. +The new schema will automatically be picked up by the TAP service. + +Releasing a new schema version +============================== + +When a new version of the project schema is ready for deployment, use the following procedure: + +#. Ensure all PRs to `lsst/sdm_schemas `__ that should go into the new release have been merged. + +#. Make a new GitHub release of sdm_schemas with a new `semantic versioning`_ version number (such as ``1.1.5``). + (Ignore the other tags in the repository, such as ``w.2022.45``, created by other Rubin release processes.) + This will create a tag and run the publishing pipeline GitHub Action. + That, in turn, will run Felis_ against the YAML schema files in the ``yml`` directory and build the Docker images for the different supported environments. + +#. Update the ``appVersion`` field to the version of the new release in `/services/tap-schema/Chart.yaml `__. + +#. Sync the tap-schema Argo CD application on affected environments as normal. diff --git a/docs/applications/tap-schema/values.md b/docs/applications/tap-schema/values.md new file mode 100644 index 0000000000..5b84e377ec --- /dev/null +++ b/docs/applications/tap-schema/values.md @@ -0,0 +1,12 @@ +```{px-app-values} tap-schema +``` + +# tap-schema Helm values reference + +Helm values reference table for the {px-app}`tap-schema` application. + +```{include} ../../../services/tap-schema/README.md +--- +start-after: "## Values" +--- +``` diff --git a/docs/applications/tap/index.rst b/docs/applications/tap/index.rst new file mode 100644 index 0000000000..56bedb616b --- /dev/null +++ b/docs/applications/tap/index.rst @@ -0,0 +1,26 @@ +.. px-app:: tap + +################################ +tap — IVOA Table Access Protocol +################################ + +TAP_ (Table Access Protocol) is an IVOA_ service that provides access to general table data, including astronomical catalogs. +On the Rubin Science Platform, it is provided by `lsst-tap-service `__, which is derived from the `CADC TAP service `__. +The same service provides both TAP and ObsTAP_ schemas. + +The TAP data itself, apart from schema queries, comes from Qserv. +The TAP schema is provided by the separate :px-app:`tap-schema` application. + +See :px-app-upgrade:`tap-schema` for information on how to update the TAP schema. + +.. jinja:: tap + :file: applications/_summary.rst.jinja + + +Guides +====== + +.. toctree:: + + notes + values diff --git a/docs/_static/notebook-tap.py b/docs/applications/tap/notebook-tap.py similarity index 81% rename from docs/_static/notebook-tap.py rename to docs/applications/tap/notebook-tap.py index 71604f4242..a37f422adc 100644 --- a/docs/_static/notebook-tap.py +++ b/docs/applications/tap/notebook-tap.py @@ -1,19 +1,11 @@ -import os - -from diagrams import Cluster, Diagram, Edge +from diagrams import Cluster, Edge from diagrams.gcp.compute import KubernetesEngine -from diagrams.gcp.database import Datastore, Memorystore, SQL +from diagrams.gcp.database import SQL, Datastore, Memorystore from diagrams.gcp.network import LoadBalancing from diagrams.onprem.client import User +from sphinx_diagrams import SphinxDiagram -os.chdir(os.path.dirname(__file__)) - -with Diagram( - "Notebook to TAP", - show=False, - filename="notebook-tap", - outformat="png", -): +with SphinxDiagram(title="Notebook to TAP"): user = User("End User") with Cluster("Kubernetes"): diff --git a/docs/applications/tap/notes.rst b/docs/applications/tap/notes.rst new file mode 100644 index 0000000000..002e46f01f --- /dev/null +++ b/docs/applications/tap/notes.rst @@ -0,0 +1,11 @@ +.. px-app-notes:: tap + +########################## +TAP architecture and notes +########################## + +The ``tap`` application consists of the TAP Java web application, a PostgreSQL database used to track user job submissions (the backing store for the UWS_ protocol), and (on development deployments) a mock version of Qserv. + +.. diagrams:: notebook-tap.py + +.. diagrams:: portal-tap.py diff --git a/docs/_static/portal-tap.py b/docs/applications/tap/portal-tap.py similarity index 82% rename from docs/_static/portal-tap.py rename to docs/applications/tap/portal-tap.py index 0afbf5f454..0a1fce548f 100644 --- a/docs/_static/portal-tap.py +++ b/docs/applications/tap/portal-tap.py @@ -1,19 +1,11 @@ -import os - -from diagrams import Cluster, Diagram, Edge +from diagrams import Cluster, Edge from diagrams.gcp.compute import KubernetesEngine -from diagrams.gcp.database import Datastore, Memorystore, SQL +from diagrams.gcp.database import SQL, Datastore, Memorystore from diagrams.gcp.network import LoadBalancing from diagrams.onprem.client import User +from sphinx_diagrams import SphinxDiagram -os.chdir(os.path.dirname(__file__)) - -with Diagram( - "Portal to TAP", - show=False, - filename="portal-tap", - outformat="png", -): +with SphinxDiagram(title="Portal to TAP"): user = User("End User") with Cluster("Kubernetes"): diff --git a/docs/applications/tap/values.md b/docs/applications/tap/values.md new file mode 100644 index 0000000000..cf4f0c7f22 --- /dev/null +++ b/docs/applications/tap/values.md @@ -0,0 +1,12 @@ +```{px-app-values} tap +``` + +# tap Helm values reference + +Helm values reference table for the {px-app}`tap` application. + +```{include} ../../../services/tap/README.md +--- +start-after: "## Values" +--- +``` diff --git a/docs/applications/telegraf-ds/index.rst b/docs/applications/telegraf-ds/index.rst new file mode 100644 index 0000000000..d4b963b769 --- /dev/null +++ b/docs/applications/telegraf-ds/index.rst @@ -0,0 +1,21 @@ +.. px-app:: telegraf-ds + +########################################### +telegraf-ds — Per-node telemetry collection +########################################### + +Telegraf_ is used to gather system metrics about the services running on the Science Platform and send them to a central InfluxDB_ service, where they can be used for dashboards and alerting. + +This application deploys a Kubernetes ``DaemonSet`` to gather metrics from every node on the cluster. +For application-level metrics gathering, see the :px-app:`telegraf` application. + +.. jinja:: telegraf-ds + :file: applications/_summary.rst.jinja + +Guides +====== + +.. toctree:: + :maxdepth: 1 + + values diff --git a/docs/applications/telegraf-ds/values.md b/docs/applications/telegraf-ds/values.md new file mode 100644 index 0000000000..3cd67db5d4 --- /dev/null +++ b/docs/applications/telegraf-ds/values.md @@ -0,0 +1,12 @@ +```{px-app-values} telegraf-ds +``` + +# telegraf-ds Helm values reference + +Helm values reference table for the {px-app}`telegraf-ds` application. + +```{include} ../../../services/telegraf-ds/README.md +--- +start-after: "## Values" +--- +``` diff --git a/docs/applications/telegraf/index.rst b/docs/applications/telegraf/index.rst new file mode 100644 index 0000000000..f09a4f429f --- /dev/null +++ b/docs/applications/telegraf/index.rst @@ -0,0 +1,21 @@ +.. px-app:: telegraf + +########################################### +telegraf — Application telemetry collection +########################################### + +Telegraf_ is used to gather system metrics about the services running on the Science Platform and send them to a central InfluxDB_ service, where they can be used for dashboards and alerting. + +This application gathers application-level metrics. +For node-level metrics gathering, see the :px-app:`telegraf-ds` application. + +.. jinja:: telegraf + :file: applications/_summary.rst.jinja + +Guides +====== + +.. toctree:: + :maxdepth: 1 + + values diff --git a/docs/applications/telegraf/values.md b/docs/applications/telegraf/values.md new file mode 100644 index 0000000000..75ec2e5e27 --- /dev/null +++ b/docs/applications/telegraf/values.md @@ -0,0 +1,12 @@ +```{px-app-values} telegraf +``` + +# telegraf Helm values reference + +Helm values reference table for the {px-app}`telegraf` application. + +```{include} ../../../services/telegraf/README.md +--- +start-after: "## Values" +--- +``` diff --git a/docs/applications/times-square/index.rst b/docs/applications/times-square/index.rst new file mode 100644 index 0000000000..9545768591 --- /dev/null +++ b/docs/applications/times-square/index.rst @@ -0,0 +1,18 @@ +.. px-app:: times-square + +###################################### +times-square — Parameterized notebooks +###################################### + +An API service for managing and rendering parameterized Jupyter notebooks, integrated with :px-app:`squareone` (user interface) and :px-app:`noteburst` (notebook execution). + +.. jinja:: times-square + :file: applications/_summary.rst.jinja + +Guides +====== + +.. toctree:: + :maxdepth: 1 + + values diff --git a/docs/applications/times-square/values.md b/docs/applications/times-square/values.md new file mode 100644 index 0000000000..a6e033e3be --- /dev/null +++ b/docs/applications/times-square/values.md @@ -0,0 +1,12 @@ +```{px-app-values} times-square +``` + +# times-square Helm values reference + +Helm values reference table for the {px-app}`times-square` application. + +```{include} ../../../services/times-square/README.md +--- +start-after: "## Values" +--- +``` diff --git a/docs/applications/vault-secrets-operator/bootstrap.rst b/docs/applications/vault-secrets-operator/bootstrap.rst new file mode 100644 index 0000000000..d8a5fa7c54 --- /dev/null +++ b/docs/applications/vault-secrets-operator/bootstrap.rst @@ -0,0 +1,27 @@ +.. px-app-bootstrap:: vault-secrets-operator + +#################################### +Bootstrapping vault-secrets-operator +#################################### + +Vault Secrets Operator is the only component of the Science Platform whose secret has to be manually created, so that it can create the secrets for all other applications. +This will be done automatically by the `install script `__. + +Its secret will look like this: + +.. code-block:: yaml + + apiVersion: v1 + kind: Secret + metadata: + name: vault-secrets-operator + namespace: vault-secrets-operator + type: Opaque + stringData: + VAULT_TOKEN: + VAULT_TOKEN_LEASE_DURATION: 86400 + +Replace ```` with the ``read`` Vault token for the path ``secret/k8s_operator/`` in Vault (or whatever Vault enclave you plan to use for this Phalanx environment). +The path must match the path configured in ``values-.yaml`` in `/science-platform `__. + +See :dmtn:`112` for more information. diff --git a/docs/applications/vault-secrets-operator/index.rst b/docs/applications/vault-secrets-operator/index.rst new file mode 100644 index 0000000000..24a1d0dbda --- /dev/null +++ b/docs/applications/vault-secrets-operator/index.rst @@ -0,0 +1,21 @@ +.. px-app:: vault-secrets-operator + +############################################ +vault-secrets-operator — Vault to Kubernetes +############################################ + +The ``vault-secrets-operator`` application is an installation of `Vault Secrets Operator`_ to retrieve necessary secrets from Vault and materialize them as Kubernetes secrets for the use of other applications. +It processes ``VaultSecret`` resources defined in the `Phalanx repository`_ and creates corresponding Kubernetes Secret_ resources. + +.. jinja:: vault-secrets-operator + :file: applications/_summary.rst.jinja + +Guides +====== + +.. toctree:: + :maxdepth: 1 + + bootstrap + upgrade + values diff --git a/docs/applications/vault-secrets-operator/upgrade.rst b/docs/applications/vault-secrets-operator/upgrade.rst new file mode 100644 index 0000000000..2cc6551c19 --- /dev/null +++ b/docs/applications/vault-secrets-operator/upgrade.rst @@ -0,0 +1,13 @@ +.. px-app-upgrade:: vault-secrets-operator + +################################ +Upgrading vault-secrets-operator +################################ + +Upgrading to newer upstream releases of the Helm chart is normally simple and straightforward. +We have no significant local customization. + +If you want to verify that an upgrade has been successful, or if at any point you want to verify that Vault Secrets Operator is still working, find a ``VaultSecret`` and ``Secret`` resource pair in the Argo CD dashboard and delete the ``Secret`` resource. +It should be nearly immediately re-created from the ``VaultSecret`` resource by Vault Secrets Operator. + +The Gafaelfawr secret is a good one to use for this purpose since it is only read during Gafaelfawr start-up, so deleting the ``Secret`` resource won't cause an outage. diff --git a/docs/applications/vault-secrets-operator/values.md b/docs/applications/vault-secrets-operator/values.md new file mode 100644 index 0000000000..4d134ce79d --- /dev/null +++ b/docs/applications/vault-secrets-operator/values.md @@ -0,0 +1,12 @@ +```{px-app-values} vault-secrets-operator +``` + +# vault-secrets-operator Helm values reference + +Helm values reference table for the {px-app}`vault-secrets-operator` application. + +```{include} ../../../services/vault-secrets-operator/README.md +--- +start-after: "## Values" +--- +``` diff --git a/docs/applications/vo-cutouts/index.rst b/docs/applications/vo-cutouts/index.rst new file mode 100644 index 0000000000..f50cbbdfbc --- /dev/null +++ b/docs/applications/vo-cutouts/index.rst @@ -0,0 +1,20 @@ +.. px-app:: vo-cutouts + +#################################### +vo-cutouts — IVOA SODA image cutouts +#################################### + +``vo-cutouts`` provides image cutouts via an API complying with the IVOA_ SODA_ specification. +It is returned as part of the DataLink_ record for images found via TAP searches and is used by the Portal Aspect (see :px-app:`portal`) to obtain cutouts. +It can also be used directly by any other IVOA-compatible client. + +.. jinja:: vo-cutouts + :file: applications/_summary.rst.jinja + +Guides +====== + +.. toctree:: + :maxdepth: 1 + + values diff --git a/docs/applications/vo-cutouts/values.md b/docs/applications/vo-cutouts/values.md new file mode 100644 index 0000000000..fcd04ea69a --- /dev/null +++ b/docs/applications/vo-cutouts/values.md @@ -0,0 +1,12 @@ +```{px-app-values} vo-cutouts +``` + +# vo-cutouts Helm values reference + +Helm values reference table for the {px-app}`vo-cutouts` application. + +```{include} ../../../services/vo-cutouts/README.md +--- +start-after: "## Values" +--- +``` diff --git a/docs/arch/repository.rst b/docs/arch/repository.rst deleted file mode 100644 index 3aeabe9741..0000000000 --- a/docs/arch/repository.rst +++ /dev/null @@ -1,67 +0,0 @@ -#################### -Repository structure -#################### - -Layout -====== - -While ArgoCD can be used and configured in any number of ways, there is also a layer of convention to simplify and add some structure that works for us to deploy the science platform services. - -First, there is the `installer directory `__. -This directory contains a script named `install.sh `__. -The arguments to this are the name of the environment, the FQDN, and the read key for Vault (see :ref:`secrets` for more details on Vault). -This installer script is the entrypoint for setting up a new environment. -It can also be run on an existing environment to update it. - -Next, there is the `services directory `__. -Each sub-directory in services is one service installed in (at least some environments of) the Rubin Science Platform. -This directory contains Helm values files for each of the environments that use that service. -It also specifies which Helm chart is used to deploy that service. -Each of the values files are named ``values-.yaml``. - -Finally, there is the `science-platform directory `__. -This contains an Argo CD parent application that specifies which services an environment should use and creates the corresponding Argo CD applications in Argo CD. -The values files in this directory contain the service manifest and other top level configuration. - -Charts -====== - -Argo CD manages services in the Rubin Science Platform through a set of Helm charts. -Which Helm charts to deploy in a given environment is controlled by the ``values-.yaml`` files in `/science-platform `__. - -For nearly all charts, there are at least two layers of charts. -The upper layer of charts, the ones installed directly by Argo CD, are found in the `/services `__ directory. -These charts usually contain only dependencies and ``values-.yaml`` files to customize the service for each environment. -Sometimes they may contain a small set of resources that are very specific to the Science Platform. - -The real work of deploying an service is done by the next layer of charts, which are declared as dependencies (via the ``dependencies`` key in ``Chart.yaml``) of the top layer of charts. -By convention, the top-level chart has the same name as the underlying chart that it deploys. -This second layer of charts may be external third-party Helm charts provided by other projects, or may be Helm charts maintained by Rubin Observatory. -In the latter case, these charts are maintained in the `lsst-sqre/charts GitHub repository `__. - -.. _chart-versioning: - -Chart versioning -================ - -The top level of charts defined in the ``/services`` directory are used only by Argo CD and are never published as Helm charts. -Their versions are therefore irrelevant. -The version of each chart is set to ``1.0.0`` because ``version`` is a required field in ``Chart.yaml`` and then never changed. -Reverting to a previous configuration in this layer of charts is done via a manual revert in Argo CD or by reverting a change in the GitHub repository, not by pointing Argo CD to an older chart. - -The second layer of charts that are declared as dependencies are normal, published Helm charts that follow normal Helm semantic versioning conventions. -In the case of the ``lsst-sqre/charts`` repository, this is enforced by CI. -We can then constrain the version of the chart Argo CD will deploy by changing the ``dependencies`` configuration in the top-level chart. - -Best practice is for a release of a chart to deploy the latest version of the corresponding service, so that upgrading the chart also implies upgrading the service. -This allows automatic creation of pull requests to upgrade any services deployed by Argo CD (see `SQR-042 `__ for more details). -Charts maintained in lsst-sqre/charts follow this convention (for the most part). -Most upstream charts also follow this convention, but some require explicitly changing version numbers in ``values-*.yaml``. - -In general, we pin the version of the chart to deploy in the ``dependencies`` metadata of the top-level chart. -This ensures deterministic cluster configuration and avoids inadvertently upgrading services. -However, for services still under development, we sometimes use a floating dependency to reduce the number of pull requests required when iterating, and then switch to a pinned version once the service is stable. - -There is currently no mechanism to deploy different versions of a chart in different environments. -We will probably need a mechanism to do this eventually, and have considered possible implementation strategies, but have not yet started on this work. -In the meantime, we disable automatic deployment in Argo CD so there is a human check on whether a given chart is safe to deploy in a given environment. diff --git a/docs/conf.py b/docs/conf.py index d68e7ca5fb..9d4279b46a 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,145 +1,34 @@ -import os -import re -import sys +from typing import Dict +from pathlib import Path -import lsst_sphinx_bootstrap_theme +from documenteer.conf.guide import * # noqa: F401 F403 +from phalanx.docs.models import Phalanx as PhalanxModel -# Work around Sphinx bug related to large and highly-nested source files -sys.setrecursionlimit(2000) +phalanx_metadata = PhalanxModel.load_phalanx(Path(__file__).parent.parent) +jinja_contexts: Dict[str, Dict] = {} +for env in phalanx_metadata.environments: + jinja_contexts[env.name] = {"env": env} +for app in phalanx_metadata.apps: + jinja_contexts[app.name] = { + "app": app, + "envs": {env.name: env for env in phalanx_metadata.environments}, + } -# -- General configuration ------------------------------------------------ -# If your documentation needs a minimal Sphinx version, state it here. -# needs_sphinx = '1.0' - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.intersphinx', - 'sphinx.ext.todo', - 'sphinx.ext.ifconfig', - 'documenteer.sphinxext' -] - -# The suffix(es) of source filenames. -# You can specify multiple suffix as a list of string: -# source_suffix = ['.rst', '.md'] -source_suffix = '.rst' - -# The master toctree document. -master_doc = 'index' - -# General information about the project. -project = 'Phalanx' -copyright = '2020, Association of Universities for Research in Astronomy, Inc. (AURA)' -author = 'LSST SQuaRE' - -# The version info for the project you're documenting, acts as replacement for -# |version| and |release|, also used in various other places throughout the -# built documents. -github_ref = os.getenv('GITHUB_REF', default='refs/heads/master') -ref_match = re.match(r'refs/(heads|tags|pull)/(?P.+)', github_ref) -if ref_match is None: - version = 'Current' -elif ref_match['ref'] == 'master': - version = 'Current' -else: - version = ref_match['ref'] -release = version - -html_title = f"{project} ({version}) documentation" - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -# -# This is also used if you do content translation via gettext catalogs. -# Usually you set "language" from the command line for these cases. -language = None - -# There are two options for replacing |today|: either, you set today to some -# non-false value, then it is used: -# today = '' -# Else, today_fmt is used as the format for a strftime call. -# today_fmt = '%B %d, %Y' - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -exclude_patterns = [ - '_build', - 'README.rst' -] - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' - -# The reST default role cross-links Python (used for this markup: `text`) -default_role = 'py:obj' - -# Intersphinx - -intersphinx_mapping = { - # 'python': ('https://docs.python.org/3/', None), +jinja_env_kwargs = { + "lstrip_blocks": True, } -rst_epilog = """ -.. _Argo CD: https://argoproj.github.io/argo-cd/ -.. _Helm: https://helm.sh -.. _IVOA: https://ivoa.net/documents/ -.. _semantic versioning: https://semver.org/ -""" - -# -- Options for linkcheck builder ---------------------------------------- - -linkcheck_retries = 2 -linkcheck_timeout = 5 # seconds -linkcheck_ignore = [ - r'^http://localhost', - r'^http(s)*://ls.st', +exclude_patterns.extend( # noqa: F405 + [ + "requirements.txt", + "environments/_summary.rst.jinja", + "applications/_summary.rst.jinja", + ] +) + +linkcheck_anchors = False +linkcheck_exclude_documents = [ + r"applications/.*/values", ] - -# -- Options for HTML output ---------------------------------------------- - -templates_path = [ - '_templates', - lsst_sphinx_bootstrap_theme.get_html_templates_path() -] - -html_theme = 'lsst_sphinx_bootstrap_theme' -html_theme_path = [lsst_sphinx_bootstrap_theme.get_html_theme_path()] - - -html_context = { - # Enable "Edit in GitHub" link - 'display_github': True, - # https://{{ github_host|default("github.com") }}/{{ github_user }}/ - # {{ github_repo }}/blob/ - # {{ github_version }}{{ conf_py_path }}{{ pagename }}{{ suffix }} - 'github_user': 'lsst-sqre', - 'github_repo': 'phalanx', - 'conf_py_path': 'docs/', - # TRAVIS_BRANCH is available in CI, but master is a safe default - 'github_version': os.getenv('TRAVIS_BRANCH', default='master') + '/' -} - -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -html_theme_options = {'logotext': project} - -# The name for this set of Sphinx documents. If None, it defaults to -# " v documentation". -# html_title = None - -# A shorter title for the navigation bar. Default is the same as html_title. -html_short_title = project - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = [] - -# If true, links to the reST sources are added to the pages. -html_show_sourcelink = False diff --git a/docs/service-guide/add-a-onepassword-secret.rst b/docs/developers/add-a-onepassword-secret.rst similarity index 75% rename from docs/service-guide/add-a-onepassword-secret.rst rename to docs/developers/add-a-onepassword-secret.rst index f56c59a431..d0b69900e9 100644 --- a/docs/service-guide/add-a-onepassword-secret.rst +++ b/docs/developers/add-a-onepassword-secret.rst @@ -2,10 +2,10 @@ Add a secret with 1Password and VaultSecret ########################################### -Static secrets for services are stored in a 1Password vault before being automatically synced to the Vault service itself and ultimately to Kubernetes ``Secret`` resources via :ref:`vault-secrets-operator`. +Static secrets for applications are stored in a 1Password vault before being automatically synced to the Vault service itself and ultimately to Kubernetes Secret_ resources via :px-app:`vault-secrets-operator`. Such secrets are things for external cloud services where we don't automatically provision accounts and password. When we manually create such a secret, we store it in 1Password. -This page provides steps for adding a service secret through 1Password. +This page provides steps for adding an application secret through 1Password. .. note:: @@ -14,8 +14,8 @@ This page provides steps for adding a service secret through 1Password. .. note:: - This document only covers creating a 1Password-backed Secret for the first time for a service. - If you want to update a Secret, either by adding new 1Password secrets or by changing their secret values, you should follow the instructions in :doc:`/service-guide/update-a-onepassword-secret`. + This document only covers creating a 1Password-backed secret for the first time for an application. + If you want to update a secret, either by adding new 1Password secrets or by changing their secret values, you should follow the instructions in :doc:`/developers/update-a-onepassword-secret`. Part 1. Open the 1Password vault ================================ @@ -33,7 +33,7 @@ Each item in a Kubernetes ``Secret`` corresponds to either the contents of a sec .. code-block:: text - {{service}} {{env}} {{description}} + {{application}} {{env}} {{description}} This format is a convention and isn't tied into the automation. The ``env`` can be omitted if the secret applies to all environments. @@ -47,9 +47,9 @@ Each item in a Kubernetes ``Secret`` corresponds to either the contents of a sec .. code-block:: text - {{service}} {{secret name}} + {{application}} {{secret name}} - This field provides part of a Vault path for the secret value, which in turn is used by :ref:`vault-secrets-operator` resources to create Kubernetes secrets. + This field provides part of a Vault path for the secret value, which in turn is used by :px-app:`vault-secrets-operator` resources to create Kubernetes secrets. - Add a metadata field labeled ``environment``. The value of that field should be the **hostname** of the RSP environment that this secret applies to (e.g. ``data.lsst.cloud``, not the Phalanx name ``idfprod``). @@ -60,7 +60,10 @@ Each item in a Kubernetes ``Secret`` corresponds to either the contents of a sec Part 3. Sync 1Password items into Vault ======================================= -Once a service's secrets are stored in 1Password, you need to sync them into Vault. +Once an application's secrets are stored in 1Password, you need to sync them into Vault. + +First, set the ``OP_CONNECT_TOKEN`` environment variable to the access token for the SQuaRE 1Password Connect service. +This is stored in the SQuaRE 1Password vault under the item named ``SQuaRE Integration Access Token: Argo``. Open Phalanx's ``installer/`` directory: @@ -95,10 +98,10 @@ To sync multiple environments at once: Next steps: connecting Vault to Kubernetes with VaultSecret =========================================================== -Once a secret is in Vault, you need to create or update a ``VaultSecret`` resource in your services deployment (typically in its Helm_ chart). -See :doc:`create-service` for more details about creating a Helm chart for a service. +Once a secret is in Vault, you need to create or update a ``VaultSecret`` resource in your application's deployment (typically in its Helm_ chart). +See :doc:`create-an-application` for more details about creating a Helm chart for an application. -A conventional ``VaultSecret`` Helm template looks like this (update ``myapp`` with your service's name): +A conventional ``VaultSecret`` Helm template looks like this (update ``myapp`` with your application's name): .. code-block:: yaml @@ -117,12 +120,12 @@ This Vault path is formatted as: .. code-block:: text - secret/k8s_operator/{{host}}/{{service}} + secret/k8s_operator/{{host}}/{{application}} The path components correspond to metadata in 1Password items: - ``{{host}}`` corresponds to the value of the ``environment`` metadata field -- ``{{service}}`` corresponds to the first part of the ``generate_secrets_key`` metadata field +- ``{{application}}`` corresponds to the first part of the ``generate_secrets_key`` metadata field Within Kubernetes, vault-secrets-operator acts on the ``VaultSecret`` to create a ``Secret`` resource. The ``Secret`` has the same name and namespace as the ``VaultSecret`` that you explicitly template in your Helm chart. diff --git a/docs/developers/add-application.rst b/docs/developers/add-application.rst new file mode 100644 index 0000000000..0910709388 --- /dev/null +++ b/docs/developers/add-application.rst @@ -0,0 +1,171 @@ +################################ +Add a new application to Phalanx +################################ + +This page provides the steps for integrating an application with Phalanx by adding the application's Helm chart. +For background on building an application, see the :ref:`dev-build-toc` documentation. + +Create the Helm chart +===================== + +To deploy your application with Phalanx, it must have either a Helm chart or a Kustomize configuration. +Currently, all applications use Helm charts. + +.. note:: + + Kustomize is theoretically supported but has not been used to date in the `Phalanx repository`_, and therefore isn't recommended. + +There does not yet exist a SQuaRE-produced a template for the Helm chart; rather, we use the built-in Helm starter template. +Use ``helm create -p starters/web-service`` to create a new chart from that template. +**Be sure you are using Helm v3.** +Helm v2 is not supported. + +You will need to make at least the following changes to the default Helm chart template: + +- All secrets must come from ``VaultSecret`` resources, not Kubernetes ``Secret`` resources. + You should use a configuration option named ``vaultSecretsPath`` in your ``values.yaml`` to specify the path in Vault for your secret. + This option will be customized per environment when you add the application to Phalanx (see :ref:`add-argocd-application`). + See :doc:`add-a-onepassword-secret` for more information about secrets. + +- Application providing a web API should be protected by Gafaelfawr and require an appropriate scope. + This is set up for you by the template using a ``GafaelfawrIngress`` resource in ``templates/ingress.yaml``, but you will need to customize the scope required for access, and may need to add additional configuration. + You will also need to customize the path under which your application should be served. + + See `the Gafaelfawr's documentation on Ingress configurations `__ for more information, and see :dmtn:`235` for a guide to what scopes to use to protect the application. + +- If your application exposes Prometheus endpoints, you will want to configure these in the `telegraf application's prometheus_config `__. + +Documentation +------------- + +Phalanx uses `helm-docs`_ to generate documentation for Helm charts. +This produces a nice Markdown README file that documents all the chart options, but it requires special formatting of the ``values.yaml`` file that is not present in the default Helm template. + +Publication +----------- + +Rubin-developed Helm charts for the Science Platform are stored as part of the `phalanx repository `__. They can be found in the `services directory `__. + +Examples +-------- + +Existing Helm charts that are good examples to read or copy are: + +- `hips `__ (fairly simple) +- `mobu `__ (also simple) +- `gafaelfawr `__ (complex, including CRDs and multiple pods) + +.. _add-argocd-application: + +Adding an Argo CD Application for your application +================================================== + +Once you have a chart and a Docker image and you have added your static application secrets to 1Password (see :doc:`add-a-onepassword-secret`), you need to integrate your application into Phalanx. +This is done by creating an Argo CD ``Application`` that manages your application. + +#. For each environment in which your application will run, create a ``values-.yaml`` file in your application's directory. + This should hold only the customization per Rubin Science Platform deployment. + Any shared configuration should go into the defaults of your chart (``values.yaml``). + + If it is a third-party application repackaged as a Phalanx chart, you will need to add its configuration a little differently. See :ref:`external-chart-config` for more discussion.) + +#. Most applications will need a base URL, which is the top-level externally-accessible URL (this is presented within the chart as a separate parameter, although as we will see it is derived from the hostname) for the ingress to the application, the hostname, and the base path within Vault for storage of secrets. + + In general these will be set within the application definition within the ``science-platform`` directory and carried through to application charts via global Argo CD variables. + You should generally simply need the boilerplate setting them to empty: + + .. code-block:: yaml + + # The following will be set by parameters injected by Argo CD and should not + # be set in the individual environment values files. + global: + # -- Base URL for the environment + # @default -- Set by Argo CD + baseUrl: "" + + # -- Host name for ingress + # @default -- Set by Argo CD + host: "" + + # -- Base path for Vault secrets + # @default -- Set by Argo CD + vaultSecretsPath: "" + +#. Create the Argo CD application resource. + This is a new file in `/science-platform/templates `__ named ``-application.yaml`` where ```` must match the name of the directory created above. + The contents of this file should look like: + + .. code-block:: yaml + + {{- if .Values..enabled -}} + apiVersion: v1 + kind: Namespace + metadata: + name: + spec: + finalizers: + - kubernetes + --- + apiVersion: argoproj.io/v1alpha1 + kind: Application + metadata: + name: + namespace: argocd + finalizers: + - resources-finalizer.argocd.argoproj.io + spec: + destination: + namespace: + server: https://kubernetes.default.svc + project: default + source: + path: services/ + repoURL: {{ .Values.repoURL }} + targetRevision: {{ .Values.revision }} + helm: + parameters: + - name: "global.host" + value: {{ .Values.fqdn | quote }} + - name: "global.baseUrl" + value: "https://{{ .Values.fqdn }}" + - name: "global.vaultSecretsPath" + value: {{ .Values.vault_path_prefix | quote }} + valueFiles: + - "values.yaml" + - 'values-{{ .Values.environment }}.yaml" + {{- end -}} + + Replace every instance of ```` with the name of your application. + This creates the namespace and Argo CD application for your application. + Note that this is where we derive baseURL from host. + + Both the ``fqdn`` and ``host`` must be defined in each RSP instance definition file (that is, ``/science-platform/values-.yaml`` files in the `phalanx repository`_). + Typically this is done at the top; should you at some point deploy an entirely new instance of the RSP, remember to do this in the base science-platform application definition for the new instance. + +#. If your application image resides at a Docker repository which requires authentication (either to pull the image at all or to raise + the pull rate limit), then you must tell any pods deployed by your application to use a pull secret named ``pull-secret``, and you must + configure that pull secret in the application's ``vault-secrets.yaml``. + If you are using the default Helm template, this will mean a block like: + + .. code-block:: yaml + + imagePullSecrets: + - name: "pull-secret" + + If you are using an external chart, see its documentation for how to configure pull secrets. + + Note that if your container image is built through GitHub actions and stored at ghcr.io, there is no rate limiting (as long as your container image is built from a public repository, which it should be). If it is stored at Docker Hub, you should use a pull secret, because we have been (and will no doubt continue to be) rate-limited at Docker Hub in the past. If it is pulled from a private repository, obviously you will need authentication, and if the container is stored within the Rubin Google Artifact Registry, there is likely to be some Google setup required to make pulls magically work from within a given cluster. + + In general, copying and pasting the basic setup from another application (``cachemachine`` or ``mobu`` recommended for simple applications) is a good way to save effort. + +#. Finally, edit ``values.yaml`` and each of the ``values-*.yaml`` files in `/science-platform `__ and add a stanza for your application. + The stanza in ``values.yaml`` should always say: + + .. code-block:: yaml + + : + enabled: false + + Replace ```` with the name of your application. + For the other environments, set ``enabled`` to ``true`` if your application should be deployed there. + You almost certainly want to start in a development or integration environment and enable your new application in production environments only after it has been smoke-tested in less critical environments. diff --git a/docs/service-guide/add-external-chart.rst b/docs/developers/add-external-chart.rst similarity index 65% rename from docs/service-guide/add-external-chart.rst rename to docs/developers/add-external-chart.rst index 99f70ccfa3..e3b013912e 100644 --- a/docs/service-guide/add-external-chart.rst +++ b/docs/developers/add-external-chart.rst @@ -2,13 +2,12 @@ Adding an external Helm chart ############################# -Sometimes, rather than deploying a new service we wrote ourselves (see :doc:`create-service`), we want to deploy an existing external service in the Rubin Science Platform with some customizations. +Sometimes, rather than deploying a new application written specifically for Rubin Observatory (see :doc:`create-an-application`), we want to deploy an existing third-party application in the Rubin Science Platform with some customizations. -If the service has an existing published Helm chart (and most major open source services do, albeit sometimes not from the upstream service maintainers), we should use that Helm chart. +If the application has an existing published Helm chart, we should use that Helm chart. Below are details on how to do that. -This guide is somewhat general since every external service will be different in new and exciting ways. -Expect to spend a lot of time reading the upstream Helm chart documentation and iterating on configuration approaches when adding an external Helm chart. +This guide is somewhat general since every external application will be different. Potential problems ================== @@ -16,22 +15,21 @@ Potential problems No existing Helm chart ---------------------- -If the service does not have an existing published Helm chart, you should consider that a red flag that prompts you to reconsider whether this service is the right choice for the Rubin Science Platform. -To deploy it, you will need to write and maintain a Helm chart and keep it up-to-date for new releases of the service. +If the application does not have an existing published Helm chart, you should consider that a red flag that prompts you to reconsider whether this application is the right choice for the Rubin Science Platform. +To deploy it, you will need to write and maintain a Helm chart and keep it up-to-date for new releases of the application. This can be a substantial amount of work. -For large and complex services, it can even be a full-time job. +For large and complex applications, it can even be a full-time job. -**We cannot accept services in the Rubin Science Platform that are not kept up-to-date.** -It is a hard requirement that every service keep up with new upstream development and releases so that we get continued security support. -You must be able to commit to doing this for the lifetime of the project before adding an external service to the Rubin Science Platform. +**We cannot accept applications in the Rubin Science Platform that are not kept up-to-date.** +It is a hard requirement that every application keep up with new upstream development and releases so that we get continued security support. +You must be able to commit to doing this for the lifetime of the project before adding an external application to the Rubin Science Platform. If the benefit to the Rubin Science Platform seems worth the ongoing effort to write and maintain a Helm chart, try to contribute that Helm chart to the upstream maintainers so that we can share the burden of maintaining it with other projects that use Kubernetes. No published Helm chart ----------------------- -If the service has an existing, maintained Helm chart, but it's not published in a Helm repository, this is also a red flag, albeit a lesser one. -This normally means the people maintaining the Helm chart don't entirely understand Helm or the conventions of the Kubernetes ecosystem. +If the application has an existing, maintained Helm chart, but it's not published in a Helm repository, this is also a red flag, albeit a lesser one. In exceptional circumstances we can import such an external Helm chart into the `charts repository `__, but we would prefer not to do this since keeping it up-to-date with upstream changes is very awkward. .. _external-chart-config: @@ -41,10 +39,10 @@ Configure the external chart Configuration mostly involves carefully reading the documentation of the upstream Helm chart and building a ``values.yaml`` file that configures it appropriately. You may also need to add additional resources not created by the upstream Helm chart, particularly ``VaultSecret`` objects to create any secrets that it needs. -(See :doc:`add-a-onepassword-secret` for more about secrets.) +See :doc:`add-a-onepassword-secret` for more about secrets. If the required configuration for the chart is simple enough, you can reference the chart directly from Phalanx and put its configuration in the per-environment Phalanx ``values-*.yaml`` files. -In this case, you can skip ahead to :doc:`add-service`, although still read the information below on what settings you may need to configure. +In this case, you can skip ahead to :doc:`add-application`, although still read the information below on what settings you may need to configure. If configuring the chart is sufficiently complex, if you want to provide additional Kubernetes resources that are not part of the upstream chart, or if there is substantial configuration that should be shared between all Rubin Science Platform environments, you may want to create a wrapper chart. This is a chart that lives in the `charts repository `__ and includes the upstream chart as a subchart. @@ -66,6 +64,6 @@ If it is not, you will need to add a stanza like: - url: https://kubernetes.github.io/ingress-nginx/ name: ingress-nginx -to that configuration key for the ``values-*.yaml`` file for every environment in Phalanx that will deploy this service. +to that configuration key for the ``values-*.yaml`` file for every environment in Phalanx that will deploy this application. (The example above is for the ``ingress-nginx`` chart; the URL and name will obviously vary.) -Do that as a pull request, probably as part of your pull request to add your Argo CD application (see :doc:`add-service`). +Do that as a pull request, probably as part of your pull request to add your Argo CD application (see :doc:`add-application`). diff --git a/docs/developers/application-edit-button.jpg b/docs/developers/application-edit-button.jpg new file mode 100644 index 0000000000..bce80da878 Binary files /dev/null and b/docs/developers/application-edit-button.jpg differ diff --git a/docs/developers/application-revision-edit.jpg b/docs/developers/application-revision-edit.jpg new file mode 100644 index 0000000000..399c22d652 Binary files /dev/null and b/docs/developers/application-revision-edit.jpg differ diff --git a/docs/developers/argocd-application.jpg b/docs/developers/argocd-application.jpg new file mode 100644 index 0000000000..79b71d911d Binary files /dev/null and b/docs/developers/argocd-application.jpg differ diff --git a/docs/developers/chart-overview.rst b/docs/developers/chart-overview.rst new file mode 100644 index 0000000000..50482d6904 --- /dev/null +++ b/docs/developers/chart-overview.rst @@ -0,0 +1,108 @@ +######################################## +Overview of Helm charts for applications +######################################## + +This page provides overall guidelines on how Phalanx uses Helm charts for applications. + +Charts +====== + +Argo CD manages applications in the Rubin Science Platform through a set of Helm charts. +Which Helm charts to deploy in a given environment is controlled by the ``values-.yaml`` files in `/science-platform `__. + +The `/services `__ directory defines templates in its ``templates`` directory and values to resolve those templates in ``values.yaml`` and ``values-.yaml`` files to customize the application for each environment. For first-party charts, the ``templates`` directory is generally richly populated. + +For third-party charts the ``templates`` directory might not exist or might have only a small set of resources specific to the Science Platform. +In that case, most of the work of deploying an application is done by charts declared as dependencies (via the ``dependencies`` key in ``Chart.yaml``) of the top-level chart. +By convention, the top-level chart has the same name as the underlying chart that it deploys. +Subcharts may be external third-party Helm charts provided by other projects, or, in rare instances, they may be Helm charts maintained by Rubin Observatory. +In the latter case, these charts are maintained in the `lsst-sqre/charts GitHub repository `__. + +.. _chart-versioning: + +Chart versioning +================ + +The top level of charts defined in the ``/services`` directory are used only by Argo CD and are never published as Helm charts. +Their versions are therefore irrelevant. +The version of each chart is set to ``1.0.0`` because ``version`` is a required field in ``Chart.yaml`` and then never changed. +It is instead the ``appVersion`` field that is used to point to a particular release of a first-person chart. Reverting to a previous configuration in this layer of charts is done via a manual revert in Argo CD or by reverting a change in the GitHub repository so that the ``appVersion`` points to an earlier release. It is **not** done by pointing Argo CD to an older chart. + +Third-party charts are declared as dependencies; they are normal, published Helm charts that follow normal Helm semantic versioning conventions. +In the case of the ``lsst-sqre/charts`` repository, this is enforced by CI. +We can then constrain the version of the chart Argo CD will deploy by changing the ``dependencies`` configuration in the top-level chart. + +Best practice is for a release of a chart to deploy the latest version of the corresponding application, so that upgrading the chart also implies upgrading the application. +This allows automatic creation of pull requests to upgrade any applications deployed by Argo CD (see :sqr:`042`). +Charts maintained as first-party charts in Phalanx follow this convention (for the most part). +Most upstream charts also follow this convention, but some require explicitly changing version numbers in ``values-*.yaml``. + +In general, we pin the version of the chart to deploy in the ``dependencies`` metadata of the top-level chart. +This ensures deterministic cluster configuration and avoids inadvertently upgrading applications. +However, for applications still under development, we sometimes use a floating dependency to reduce the number of pull requests required when iterating, and then switch to a pinned version once the application is stable. + +There is currently no generic mechanism to deploy different versions of a chart in different environments, as appVersion is set in ``Chart.yaml``. + +That does not mean that rolling out a new version is all-or-nothing: you have a couple of different options for testing new versions. +The easiest is to modify the appVersion in ``Chart.yaml`` on your development branch and then use Argo CD to deploy the application from the branch, rather than ``master``, ``main``, or ``HEAD`` (as the case may be). +This will cause the application resource in the ``science-platform`` app to show as out of sync, which is indeed correct, and a helpful reminder that you may be running from a branch when you forget and subsequently rediscover that fact weeks later. +Additionally, many charts allow specification of a tag (usually some variable like ``image.tag`` in a values file), so that is a possibility as well. +If your chart doesn't have a way to control what image tag you're deploying from, consider adding the capability. +In any event, for RSP instances, we (as a matter of policy) disable automatic deployment in Argo CD so there is a human check on whether a given chart is safe to deploy in a given environment, and updates are deployed to production environments (barring extraordinary circumstances) during our specified maintenance windows. + +.. _chart-doc-links: + +Source and documentation links in Chart.yaml +============================================ + +You can add source and documentation links to an app's ``Chart.yaml`` and that information is included in the :doc:`app's homepage in the Phalanx docs `. + +home +---- + +Use the ``home`` field in ``Chart.yaml`` for the app's documentation site (if it has one). +For example: + +.. code-block:: yaml + :caption: Chart.yaml + + home: https://gafaelfawr.lsst.io/ + +Don't use the ``home`` field for links to documents (technotes) or source repositories. + +sources +------- + +Use ``sources`` to link to the Git repositories related to the application. +Note that ``sources`` is an array of URLs: + +.. code-block:: yaml + :caption: Chart.yaml + + sources: + - https://github.com/lsst-sqre/gafaelfawr + +phalanx.lsst.io/docs +-------------------- + +Use this custom annotation to link to documents (as opposed to the user guide, see ``home``). +Documents are technotes and change-controlled documents: + + +.. code-block:: yaml + :caption: Chart.yaml + + annotations: + phalanx.lsst.io/docs: | + - id: "SQR-065" + title: "Design of Noteburst, a programatic JupyterLab notebook execution service for the Rubin Science Platform" + url: "https://sqr-065.lsst.io/" + - id: "SQR-062" + title: "The Times Square service for publishing parameterized Jupyter Notebooks in the Rubin Science platform" + url: "https://sqr-062.lsst.io/" + +.. note:: + + Note how the value of ``phalanx.lsst.io/docs`` is a YAML-formatted string (hence the ``|`` symbol). + The ``id`` field is optional, but can be set to the document's handle. + The ``title`` and ``url`` fields are required. diff --git a/docs/developers/create-an-application.rst b/docs/developers/create-an-application.rst new file mode 100644 index 0000000000..c9159e804d --- /dev/null +++ b/docs/developers/create-an-application.rst @@ -0,0 +1,55 @@ +########################## +Building a new application +########################## + +This page provides general guidance for creating an application in Python that can be deployed through Phalanx. +If the goal is to instead deploy an existing third-party application with its own Helm chart in the Rubin Science Platform, see :doc:`add-external-chart`. + +To be deployed in the Rubin Science Platform, an application must come in the form of one or more Docker images and a Helm chart (or Kustomize configuration, although no application currently uses that approach) that deploys those images in Kubernetes. + +After you have finished the steps here, add any secrets you need for your application: :doc:`add-a-onepassword-secret`. +Once you have done that, add the application to Phalanx: :doc:`add-application`. + +Write the application +===================== + +Rubin-developed applications for the Rubin Science Platform should be written in Python unless there's some reason (such as using code developed elsewhere) that forces choice of a different language. +For the common case of a web application (one that exposes an API via HTTP), we recommend using the `FastAPI framework `__. + +The easiest way to start a new FastAPI_ application written in Python and intended for the Rubin Science Platform is to create a new project using sqrbot-jr. +On the LSSTC Slack, send the message ``create project`` to ``@sqrbot-jr``. +Select ``FastAPI application (Safir)`` from the list of project types. +This will create a new GitHub repository with the basic framework of a FastAPI_ application that will work well inside the Rubin Science Platform. +The template uses Safir_ to simplify and regularize many parts of your FastAPI_ application, from logger to database handling. + +Any Python application destined for the RSP should regularly update its dependencies to pick up any security fixes. +If your application follows the code layout of the FastAPI template, use `neophile `__ to automatically create PRs to update your dependencies. +To add your application to the list of repositories that neophile updates, submit a PR to add the repository owner and name to `neophile's configuration `__. + +Each release of your application must be tagged. +The tag should use `semantic versioning`_ (for example, ``1.3.2``). +Creating a GitHub release for the tag is optional but recommended, and we recommend setting the title of the release to the name of the tag. +If you are using the FastAPI template, tagging in this fashion is required since it triggers the GitHub Actions workflow to build and publish a Docker image with a tag matching the release version. + +Create the Docker image +======================= + +The Docker image can be stored in any container registry that is usable by Kubernetes, but for Rubin-developed applications using the FastAPI template, we usually push `GitHub Container Registry (ghcr.io) `__. +The Google Artifact Registry hosts the Science Platform images and may eventually be used more widely. +If your image must be stored in a private container registry, the credentials for that registry must be added to the pull secret. + +If you use the FastAPI application template, a ``Dockerfile`` is be created as part of the new repository template, and a GitHub Actions workflow is set up in the new repository to build and push Docker images for tagged releases. + +If you use ``ghcr.io`` as your repository (which is the FastAPI template default) you can use GitHub's built-in ``GITHUB_TOKEN``; you don't need +to create an additional secret. +If you are using Docker Hub you must create two secrets in your new GitHub repository, ``DOCKER_USERNAME`` and ``DOCKER_TOKEN``. +``DOCKER_USERNAME`` should be set to the Docker Hub username of the account that will be pushing the new Docker images. +``DOCKER_TOKEN`` should be set to a secret authentication token for that account. +We recommend creating a separate token for each GitHub repository for which you want to enable automatic image publication, even if they all use the same username. + +If using Docker Hub You may need to have a Docker Pro or similar paid Docker Hub account. +Alternately, you can contact SQuaRE to set up Docker image publication using our Docker account. + +The next step is to create secrets for your application: :doc:`add-a-onepassword-secret`. + +Finally, deploy your application by creating a Helm chart and an Argo CD Application in Phalanx: :doc:`add-application`. diff --git a/docs/developers/deploy-from-a-branch.rst b/docs/developers/deploy-from-a-branch.rst new file mode 100644 index 0000000000..2b41e1b2ab --- /dev/null +++ b/docs/developers/deploy-from-a-branch.rst @@ -0,0 +1,186 @@ +####################################### +Deploying from a branch for development +####################################### + +When developing applications and their :doc:`Helm charts `, it's useful to temporarily deploy from a branch of Phalanx on :doc:`designated development environments ` before merging to Phalanx's default branch. + +Some use cases include: + +- Testing that a new or updated Helm chart works in a higher-fidelity environment than the Minikube GitHub Actions CI cluster. +- Testing how a new or updated application interacts with other deployed applications and cluster infrastructure like databases. + +Through this process it is possible to develop an application in a fairly tight loop, though it's best to augment this practice with unit tests within the application's codebase. + +.. seealso:: + + This page focuses on using a development environment to iteratively develop and test changes to an application, ultimately yielding a applicatino upgrade in Phalanx. + You can achieve the same result, without the iterative deployment testing, following the steps in :doc:`upgrade`. + +.. _deploy-branch-prep: + +Preparing and pushing a branch +============================== + +Start by creating a branch of the `phalanx repository`_ and editing your appliation. + +You can make many types of edits to the application. +The most straightforward changes are updates to your application's Docker images or the Helm sub-charts the application depends on. +See :doc:`upgrade`. +You can also make changes to the Helm values by editing the application's defaults in its ``values.yaml`` file, or the values for the development environment in the corresponding ``values-.yaml`` file. +Finally, you can also make changes to the Helm templates for Kubernetes resources. + +Commit your changes and push your branch to GitHub. +Throughout this process, you can continue to commit changes and push updates to your branch to GitHub. + +.. tip:: + + In a development environment it's useful to force Kubernetes to pull the application's Docker images every time a Pod_ starts up. + This way you can push edits to the Docker images with a specific development tag [1]_ and then have your test deployment use those updated images. + This setting is controlled by the ``imagePullPolicy`` key in Deployment_ resources (and specifically their Pods_). + In typical application Helm charts the image pull policy is accessible from Helm values. + In the application's Helm values file for the development environment, set this pull policy to ``Always``: + + .. code-block:: yaml + :caption: services//values-.yaml + + image: + pullPolicy: Always + + Consult the Helm values documentation for your application for details. + + .. [1] SQuaRE Docker images are tagged with the Git branch or tag they are built from, with a typical branch build being tagged as ``tickets-DM-00000``. + +Switching the Argo CD Application to sync the branch +==================================================== + +By default, Argo CD syncs your application from the default branch (``master``) of the `phalanx repository`_. +Change the application in Argo CD to instead sync from the branch you've pushed to GitHub: + +1. Open your application's page in your environment's Argo CD UI. + Generally the URL path for this page, relative to the environment's domain, is ``/argo-cd/applications/``. + +2. Click on the resource of type ``Application``. + In the tree view this is the root node. + + .. image:: argocd-application.jpg + +3. Click on the :guilabel:`Edit` button in the :guilabel:`Summary` pane. + + .. image:: application-edit-button.jpg + +4. Edit the application to sync from your branch: + + 1. Edit the :guilabel:`Target revision` field and enter your branch's name. + 2. Finally, click on the :guilabel:`Save` button. + + .. image:: application-revision-edit.jpg + +5. In the application's page in Argo CD, click on the :guilabel:`Sync` button to redeploy the application from your branch. + + .. image:: sync-button.jpg + +Updating the application's Helm chart +===================================== + +While your application is in active development, you may need to update its Helm chart and corresponding Kubernetes resources. +There are two ways of approaching these updates. + +.. _updating-resources-in-argo-cd: + +Editing resources directly in Argo CD +------------------------------------- + +The fastest method for trying out changes to Kubernetes resources is to directly edit those resources in the Argo CD UI. +In your application's Argo CD page you can click on a specific resource (such as a ConfigMap_ or Deployment_) and click the :guilabel:`Edit` button on the live manifest. +Make your changes, then click :guilabel:`Save`. + +Your application should show as out of sync. +Click the :guilabel:`Sync` button to redeploy the resources to the Kubernetes cluster. + +Note that some changes won't affect a running deployment. +In some cases you many also need to restart Pods_ in Deployments_ to see changes take affect. +See :ref:`branch-deploy-restart`. + +.. important:: + + Edits to resources via the Argo CD UI are temporary. + To make permanent changes, you need to edit the application's Helm chart in the `phalanx repository`_. + +.. _updating-and-resyncing-from-branch: + +Updating and resyncing from the branch +-------------------------------------- + +When you have edited your application's Helm chart in your development branch of the `phalanx repository`_, you need to sync those changes to Kubernetes. + +Argo CD generally refreshes automatically. +If you have pushed your branch to GitHub and Argo CD doesn't show that your application is out-of-sync, you can click the :guilabel:`Refresh` button on your application's Argo CD page. + +When your application shows an out-of-sync status, you can click the :guilabel:`Sync` button on your application's Argo CD page. +When individual applications are synchronized their status changes from yellow to green. + +In some cases you many also need to restart Pods_ in Deployments_ to see changes take affect. +See :ref:`branch-deploy-restart`. + +Refreshing a deployment's Docker images +======================================= + +Besides developing the Helm chart, you can also test branch builds of your application's Docker images inside Deployment_ resources. + +To start, ensure that the Deployment_ is using development builds of your application's Docker images. +The best way to do this is to edit the application's Helm chart for the application in the development environment and to :ref:`sync those changes `. +For many applications you can set the ``appVersion`` in the field in the application's ``Chart.yaml`` file to the name of the development Docker tag (see also :doc:`upgrade`). + +You should also ensure that the Deployment_ is always pulling new images, rather than caching them, by setting the ``imagePullPolicy`` to ``Always``. +This is covered in :ref:`deploy-branch-prep`. + +When new Docker images for your application are available with the corresponding branch tag from a container repository, you will need to restart the deployments using those images. See :ref:`branch-deploy-restart`. + +.. _branch-deploy-restart: + +Restarting a Deployment +======================= + +Some changes won't affect a running Deployment_. +For example, many Deployments_ only read ConfigMap_ or Secret_ resources when Pods_ initially start up. +To realize an update, you'll see to restart the Pods_ in Deployments_. + +To restart a Deployment_, find the Deployment_ resources in your application's Argo CD page, click on the three-vertical-dots icon, and select :guilabel:`Restart` from the menu. +New pods will appear while old pods will shut down. + +.. figure:: restart-deployment.png + :alt: Screenshot showing a Deployment in the Argo CD with its drop down menu, highlighting the Restart item. + + The Deployment drop-down menu for accessing + Click on the three-vertical-dots to open the drop-down menu for a Deployment resource. + Select the :guilabel:`Restart` item to restart the deployment. + +If the new pods fail to start up, they will show a "crash-loop backoff" status and the old pods will continue to operate. +You'll need to resolve the error with changes to the application's Docker image and/or Helm charts. +After making fixes, you may need to restart the Deployment again. + +Merging and switching the Argo CD Application to the default branch +=================================================================== + +Once development and testing is complete, you should submit the pull request for review following the `Data Management workflow guide`_. +Once your branch is merged, remember to reset your application's Argo CD ``Application`` resource to point back to the default branch (currently ``master``). + +1. Open your application's page in your environment's Argo CD UI. + Generally the URL path for this page, relative to the environment's domain, is ``argo-cd/applications/``. + +2. Click on the resource of type ``Application``. + In the tree view this is the root node. + +3. Click on the :guilabel:`Edit` button in the :guilabel:`Summary` pane: + + - Edit the :guilabel:`Target revision` field back to the default branch (``master``). + - Finally, click on the :guilabel:`Save` button. + +4. In the application's page in Argo CD, click on the :guilabel:`Sync` button to redeploy the application from the default branch. + +Next steps +========== + +Follow this page, you have iterated on the development of your application and ultimately upgraded that application in a development environment. +The next step is to roll out this change to other environments. +This activity is normally done by the administrators for each environment, see :doc:`/admin/sync-argo-cd`. diff --git a/docs/developers/index.rst b/docs/developers/index.rst new file mode 100644 index 0000000000..6c1d2be848 --- /dev/null +++ b/docs/developers/index.rst @@ -0,0 +1,39 @@ +########## +Developers +########## + +Developers can deploy their applications on Rubin's Kubernetes environments, such as the Rubin Science Platform, by integrating into Phalanx. +In this section of the Phalanx documentation you can learn how to build and integrate your application, and how to test your application's deployment in development Phalanx environments. + +For background on Phalanx and how to contribute to the Phalanx repository itself, see the :doc:`/about/index` section. +Individual applications are documented in the :doc:`/applications/index` section. + +.. toctree:: + :maxdepth: 2 + :titlesonly: + :caption: Build + :name: dev-build-toc + + create-an-application + +.. toctree:: + :maxdepth: 2 + :titlesonly: + :caption: Integration + :name: dev-int-toc + + chart-overview + add-application + add-external-chart + add-a-onepassword-secret + update-a-onepassword-secret + +.. toctree:: + :maxdepth: 2 + :titlesonly: + :caption: Deploy & maintain + :name: dev-deploy-toc + + upgrade + deploy-from-a-branch + local-development diff --git a/docs/service-guide/local-development.rst b/docs/developers/local-development.rst similarity index 80% rename from docs/service-guide/local-development.rst rename to docs/developers/local-development.rst index b2f0e4e365..ae05cd59e5 100644 --- a/docs/service-guide/local-development.rst +++ b/docs/developers/local-development.rst @@ -2,17 +2,20 @@ Set up a local development environment with minikube #################################################### -Using `minikube `__ you can quickly set up a local Kubernetes cluster to help you adding a service to Phalanx (see :doc:`add-service`). +Using `minikube `__ you can quickly set up a local Kubernetes cluster to help you develop and test an application for Phalanx (see :doc:`add-application`). This page shows you how to run a Minikube cluster on macOS (amd64 or arm64) using the `docker driver `__. You may be able to deploy the entire Science Platform, provided that you have enough cpu and memory on your local machine. -If not, you can enable only the essential services to develop with minikube. +If not, you can enable only the essential applications to develop with minikube. -.. note:: +.. warning:: This procedure may not create a fully-operational auth system since the ingress is different from the production system. As well, this procedure does not create a TLS certificate. + Instead, the recommended pattern for developing an application in a Kubernetes cluster is to use a development environment. + See :doc:`deploy-from-a-branch` for details. + Start minikube ============== @@ -49,9 +52,9 @@ Requirements #. Install `Helm 3 `__. -#. Install `Vault `__. +#. Install `Vault `__. -#. Clone the `Phalanx repository `__. +#. Clone the `Phalanx repository`_. Open Phalanx's ``installer/`` directory: @@ -75,39 +78,36 @@ Lastly, set the environment variables for Vault access: The Vault read key for minikube is accessible from the ``vault_keys_json`` item in the LSST IT/RSP-Vault 1Password Vault. The key itself is under the ``k8s_operator/minikube.lsst.codes`` → ``read`` → ``id`` field. If you do not have Vault access, ask SQuaRE for the minikube Vault read key. -See also :doc:`../arch/secrets`. - -Enable essential services +See also :doc:`/about/secrets`. Set up a Phalanx branch for your local minikube deployment ---------------------------------------------------------- The ``install.sh`` uses the locally checked out branch of your Phalanx repository clone. -To conserve resources, you may want to deploy a subset of Phalanx services in your local minikube cluster. +To conserve resources, you may want to deploy a subset of Phalanx applications in your local minikube cluster. You can do this by editing the `/science-platform/values-minikube.yaml `_ file. -Set any service you do not want to deploy to ``enabled: false``. +Set any application you do not want to deploy to ``enabled: false``. Commit any changes with Git into a development branch of the Phalanx repository. -**You must also push this development branch to the GitHub origin,** https://github.com/lsst-sqre/phalanx.git. +**You must also push this development branch to the GitHub origin,** ``https://github.com/lsst-sqre/phalanx.git``. The ``install.sh`` script uses your locally-checked out branch of Phalanx, but also requires that the branch be accessible from GitHub. -**Services that must be disabled for local Minikube:** +**Application that must be disabled for local Minikube:** - ``ingress-nginx`` (conflicts with the minikube addon of Nginx Ingress Controller) -**Minimal set of services that should be enabled:** +**Minimal set of applications that should be enabled:** - ``vault_secrets_operator`` (for Vault secrets) - ``gafaelfawr`` (for authentication) -- ``postgreql`` (for gafaelfawr) +- ``postgresql`` (for gafaelfawr) Run the installer ------------------ Finally, run the installer for the minikube environment. - .. code-block:: sh ./install.sh minikube $VAULT_TOKEN @@ -122,7 +122,7 @@ Add the following line to ``/etc/hosts``. 127.0.0.1 minikube.lsst.codes -On a new terminal, use ``minikube tunnel`` to route traffic from the host to the services in minikube. +On a new terminal, use ``minikube tunnel`` to route traffic from the host to the application in minikube. .. code-block:: sh @@ -136,4 +136,4 @@ The minikube Argo CD admin password can be retrieved from Vault. VAULT_PATH_PREFIX=`yq -r .vault_path_prefix ../science-platform/values-minikube.yaml` vault kv get --field=argocd.admin.plaintext_password $VAULT_PATH_PREFIX/installer -With Argo CD you can sync your service (see :doc:`sync-argo-cd`). +With Argo CD you can sync your application (see :doc:`/admin/sync-argo-cd`). diff --git a/docs/developers/restart-deployment.png b/docs/developers/restart-deployment.png new file mode 100644 index 0000000000..ac172977d3 Binary files /dev/null and b/docs/developers/restart-deployment.png differ diff --git a/docs/developers/sync-button.jpg b/docs/developers/sync-button.jpg new file mode 100644 index 0000000000..83526cf63b Binary files /dev/null and b/docs/developers/sync-button.jpg differ diff --git a/docs/service-guide/update-a-onepassword-secret.rst b/docs/developers/update-a-onepassword-secret.rst similarity index 92% rename from docs/service-guide/update-a-onepassword-secret.rst rename to docs/developers/update-a-onepassword-secret.rst index cf6c0d186d..d116d0f0ca 100644 --- a/docs/service-guide/update-a-onepassword-secret.rst +++ b/docs/developers/update-a-onepassword-secret.rst @@ -5,7 +5,7 @@ Updating a secret stored in 1Password and VaultSecret Secrets that are stored in 1Password are synchronized into Vault using the `installer/generate_secrets.py `__ script. Once they are in Vault, they are accessible to the Vault Secrets Operator, which responds to creation of any ``VaultSecret`` resources in Kubernetes by grabbing the current value of the secret data in Vault. -The Vault Secrets Operator reconciles any changes as well by comparing Vault's state with that of any ``VaultSecret``s every 60 seconds. +The Vault Secrets Operator reconciles any changes as well by comparing Vault's state with that of any ``VaultSecret`` resources every 60 seconds. This reconciliation process can also take a bit of time; the net result is that you can expect changes to be reflected after a few minutes. .. note:: @@ -16,7 +16,7 @@ This reconciliation process can also take a bit of time; the net result is that So, if you want to make any changes to a ``VaultSecret``'s data, you'll need to: 1. Make the changes in 1Password -2. Run the `installer/update_secrets.sh `__ script, as described in :doc:`/service-guide/add-a-onepassword-secret`. +2. Run the `installer/update_secrets.sh `__ script, as described in :doc:`add-a-onepassword-secret`. 3. Wait a few minutes for automatic reconciliation diff --git a/docs/developers/upgrade.rst b/docs/developers/upgrade.rst new file mode 100644 index 0000000000..23c34d88da --- /dev/null +++ b/docs/developers/upgrade.rst @@ -0,0 +1,14 @@ +######################## +Upgrading an application +######################## + +#. Release a new version of the application by pushing an image with the new version tag to whichever Docker repository is used. + For more recent applications, this image should be built and pushed as a GitHub action upon release of a new version. + +#. There are multiple possibilities that depend on the sort of application you have. + - If it is a first-party application such as ``cachemachine``, with its chart directly in Phalanx, then it should use the recommended pattern of determining the default Docker tag via the ``appVersion`` chart metadata. This will only require updating ``appVersion`` in ``Chart.yaml``. + - If, like ``cert-manager``, it's a third-party application with some extra resources glued in, and you are updating to a newer version of the third-party Helm chart, you will need to update the ``version`` in the dependency. + - If it is a complex application such as ``sasquatch`` that bundles first- and third-party applications, you may need to do both, or indeed descend into the ``charts`` directory and update the ``appVersion`` of the subcharts therein. Tricky cases such as these may require some study before deciding on the best course of action. + +Once you have updated the application, Argo CD will that the change is pending, but no changes will be applied automatically. +To apply the changes in a given environment, see :doc:`/admin/sync-argo-cd`. diff --git a/docs/documenteer.toml b/docs/documenteer.toml new file mode 100644 index 0000000000..974db92ca8 --- /dev/null +++ b/docs/documenteer.toml @@ -0,0 +1,35 @@ +[project] +title = "Phalanx" +copyright = "2020-2022 Association of Universities for Research in Astronomy, Inc. (AURA)" +base_url = "https://phalanx.lsst.io" +github_url = "https://github.com/lsst-sqre/phalanx" +github_default_branch = "master" +version = "Current" + +[sphinx] +rst_epilog_file = "_rst_epilog.rst" +extensions = [ + "sphinx_diagrams", + "sphinx_jinja", + "phalanx.docs.crossref", +] + +[sphinx.linkcheck] +ignore = [ + '^http://localhost', + '^http(s)*://ls.st', + '^https://tucson-teststand.lsst.codes', + '^https://summit-lsp.lsst.codes', + '^https://minikube.lsst.codes', + '^https://base-lsp.lsst.codes', + '^https://data-dev.lsst.cloud', + '^https://data-int.lsst.cloud', + '^https://data.lsst.cloud', + '^https://data-dev.lsst.eu', + '^https://rsp.lsst.ac.uk', + '^https://github.com/lsst-sqre/phalanx/blob/master/services/strimzi/values.yaml', + '^https://github.com/orgs/', + # Temporary until merged. + "^https://github.com/lsst-sqre/phalanx/tree/master/services/sqlproxy-cross-project", + "^https://github.com/lsst-sqre/phalanx/blob/master/services/sqlproxy-cross-project", +] diff --git a/docs/environments/_summary.rst.jinja b/docs/environments/_summary.rst.jinja new file mode 100644 index 0000000000..ea7d8f9df9 --- /dev/null +++ b/docs/environments/_summary.rst.jinja @@ -0,0 +1,45 @@ +.. list-table:: + + * - Phalanx name + - ``{{ env.name }}`` + * - Root domain + - `{{ env.domain }} `__ + * - Argo CD + - {{ env.argocd_url }} + * - Applications + - .. list-table:: + + * - Documentation + - Environment values + - Defaults + {% for app in env.apps %} + * - :px-app:`{{ app.name }}` + - `values-{{ env.name }}.yaml `__ + - `values.yaml `__ + {% endfor %} + * - Identity provider + - {{ env.identity_provider }} + {% if env.gafaelfawr_roles %} + * - Gafaelfawr groups + - .. list-table:: + + * - Role + - Groups + {% for role_groups in env.gafaelfawr_roles %} + * - ``{{ role_groups[0] }}`` + - - {{ role_groups[1][0] }} + {% if role_groups[1]|length > 1 %} + {% for group in role_groups[1][1:] %} + - {{ group }} + {%- endfor %} + {%- endif %} + {%- endfor %} + {% endif %} + {% if env.argocd_rbac_csv %} + * - Argo CD RBAC + - .. csv-table:: + + {% for csvline in env.argocd_rbac_csv %} + {{ csvline }} + {%- endfor %} + {% endif %} diff --git a/docs/environments/base/index.rst b/docs/environments/base/index.rst new file mode 100644 index 0000000000..df3bec85ba --- /dev/null +++ b/docs/environments/base/index.rst @@ -0,0 +1,10 @@ +.. px-env:: base + +###################################### +base — base-lsp.lsst.codes (La Serena) +###################################### + +``base`` is the environment for the Rubin Science Platform at the Rubin Base facility in La Serena. + +.. jinja:: base + :file: environments/_summary.rst.jinja diff --git a/docs/environments/ccin2p3/index.rst b/docs/environments/ccin2p3/index.rst new file mode 100644 index 0000000000..733aa9b0ca --- /dev/null +++ b/docs/environments/ccin2p3/index.rst @@ -0,0 +1,10 @@ +.. px-env:: ccin2p3 + +################################################# +ccin2p3 — data-dev.lsst.eu (French Data Facility) +################################################# + +``ccin2p3`` is the environment for the Rubin Science Platform at the `CC-IN2P3 `__. + +.. jinja:: ccin2p3 + :file: environments/_summary.rst.jinja diff --git a/docs/environments/idfdev/index.rst b/docs/environments/idfdev/index.rst new file mode 100644 index 0000000000..bed3bed456 --- /dev/null +++ b/docs/environments/idfdev/index.rst @@ -0,0 +1,11 @@ +.. px-env:: idfdev + +################################################ +idfdev — data-dev.lsst.cloud (SQuaRE dev in GCP) +################################################ + +``idfdev`` is a development environment for the Rubin Science Platform at the Interim Data Facility (IDF) hosted on Google Cloud Platform. +The primary use of ``idfdev`` is for application development by the SQuaRE team. + +.. jinja:: idfdev + :file: environments/_summary.rst.jinja diff --git a/docs/environments/idfint/index.rst b/docs/environments/idfint/index.rst new file mode 100644 index 0000000000..d1c3a25cdb --- /dev/null +++ b/docs/environments/idfint/index.rst @@ -0,0 +1,11 @@ +.. px-env:: idfint + +##################################################### +idfint — data-int.lsst.cloud (RSP integration in GCP) +##################################################### + +``idfint`` is a development and integration environment for the Rubin Science Platform at the Interim Data Facility (IDF) hosted on Google Cloud Platform. +The primary use of ``idfint`` is for Rubin construction and operations teams to integrate applications into the Rubin Science Platform. + +.. jinja:: idfint + :file: environments/_summary.rst.jinja diff --git a/docs/environments/idfprod/index.rst b/docs/environments/idfprod/index.rst new file mode 100644 index 0000000000..44a4a826cf --- /dev/null +++ b/docs/environments/idfprod/index.rst @@ -0,0 +1,11 @@ +.. px-env:: idfprod + +################################################# +idfprod — data.lsst.cloud (Production RSP in GCP) +################################################# + +``idfprod`` is the production environment for the Rubin Science Platform at the Interim Data Facility (IDF) hosted on Google Cloud Platform. +``idfprod`` serves as the public Rubin Science Platform for the Data Previews. + +.. jinja:: idfprod + :file: environments/_summary.rst.jinja diff --git a/docs/environments/index.rst b/docs/environments/index.rst new file mode 100644 index 0000000000..dde7b92396 --- /dev/null +++ b/docs/environments/index.rst @@ -0,0 +1,23 @@ +############ +Environments +############ + +Environments are specific Kubernetes clusters deploying Phalanx services. +Each environment can deploy a specific collection of applications, and with specific configurations. + +To learn more about operating a Phalanx environment, see the :doc:`/admin/index` section. + +.. Add a table of environments, possibly linking to their own documentation sets. + +.. toctree:: + :maxdepth: 1 + + base/index + ccin2p3/index + idfdev/index + idfint/index + idfprod/index + minikube/index + roe/index + summit/index + tucson-teststand/index diff --git a/docs/environments/minikube/index.rst b/docs/environments/minikube/index.rst new file mode 100644 index 0000000000..500e76e690 --- /dev/null +++ b/docs/environments/minikube/index.rst @@ -0,0 +1,14 @@ +.. px-env:: minikube + +################################################## +minikube — minikube.lsst.codes (GitHub Actions CI) +################################################## + +``minikube`` is the Phalanx testing environment for the Rubin Science Platform. +minikube is stood up in the GitHub Actions CI workflow for testing pull requests to the Phalanx repository. + +``minikube`` can also be used locally as a development deployment of the Science Platform. +See :doc:`/developers/local-development` for more information. + +.. jinja:: minikube + :file: environments/_summary.rst.jinja diff --git a/docs/environments/roe/index.rst b/docs/environments/roe/index.rst new file mode 100644 index 0000000000..cc76a32978 --- /dev/null +++ b/docs/environments/roe/index.rst @@ -0,0 +1,10 @@ +.. px-env:: roe + +####################################### +roe — rsp.lsst.ac.uk (UK Data Facility) +####################################### + +``roe`` is the environment for the Rubin Science Platform hosted at the `Royal Observatory, Edinburgh `__. + +.. jinja:: roe + :file: environments/_summary.rst.jinja diff --git a/docs/environments/summit/index.rst b/docs/environments/summit/index.rst new file mode 100644 index 0000000000..49dca48169 --- /dev/null +++ b/docs/environments/summit/index.rst @@ -0,0 +1,11 @@ +.. px-env:: summit + +############################################# +summit — summit-lsp.lsst.codes (Rubin Summit) +############################################# + +``summit`` is the environment for the Rubin Science Platform at the Rubin summit. +The primary use of ``summit`` is for observatory operations at the summit site itself. + +.. jinja:: summit + :file: environments/_summary.rst.jinja diff --git a/docs/environments/tucson-teststand/index.rst b/docs/environments/tucson-teststand/index.rst new file mode 100644 index 0000000000..db03138c5a --- /dev/null +++ b/docs/environments/tucson-teststand/index.rst @@ -0,0 +1,10 @@ +.. px-env:: tucson-teststand + +########################################################### +tucson-teststand — tucson-teststand.lsst.codes (T&S/SITCom) +########################################################### + +``tucson-teststand`` is the development and integration environment for the Telescope & Site and Commissioning teams, hosted out of NOIRLab in Tucson, Arizona. + +.. jinja:: tucson-teststand + :file: environments/_summary.rst.jinja diff --git a/docs/index.rst b/docs/index.rst index 0126dd397c..06cb98feae 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,97 +1,58 @@ -########################### -Science Platform operations -########################### +################################################################ +Phalanx: Rubin Observatory Kubernetes Application Configurations +################################################################ -The Rubin Science Platform is described in `LDM-542 `__. -This document contains operational notes of interest to administrators of the Science Platform and maintainers of services deployed via the Science Platform, but not of interest to users. +Phalanx [#name]_ is a GitOps repository for Rubin Observatory's Kubernetes environments, notably including Rubin Science Platform deployments like https://data.lsst.cloud. +Using Helm_ and `Argo CD`_, Phalanx defines the configurations of applications in each environment. -For user documentation of the Notebook Aspect of the Rubin Science Platform, see `nb.lsst.io `__. +This documentation is for Rubin team members that are developing applications and administering Kubernetes clusters. +Astronomers and other end-users can visit the `Rubin Documentation Portal `__ to learn how to use Rubin Observatory's software, services, and datasets. -The Science Platform uses `Argo CD`_ to manage its Kubernetes resources. -The Argo CD configuration and this documentation are maintained on `GitHub `__. +Phalanx is on GitHub at https://github.com/lsst-sqre/phalanx. -A phalanx is a SQuaRE deployment (Science Quality and Reliability Engineering, the team responsible for the Rubin Science Platform). -Phalanx is how we ensure that all of our services work together as a unit. - -Overview -======== +.. [#name] A phalanx is a SQuaRE deployment (Science Quality and Reliability Engineering, the team responsible for the Rubin Science Platform). + Phalanx is how we ensure that all of our applications work together as a unit. .. toctree:: - :maxdepth: 2 - - introduction - arch/repository - arch/secrets + :maxdepth: 1 + :hidden: -For service maintainers -======================= + about/index + developers/index + admin/index + applications/index + environments/index -General development and operations ----------------------------------- +.. grid:: 3 -.. toctree:: - :maxdepth: 2 + .. grid-item-card:: About + :link: about/index + :link-type: doc - service-guide/create-service - service-guide/add-a-onepassword-secret - service-guide/update-a-onepassword-secret - service-guide/add-service - service-guide/add-external-chart - service-guide/local-development - service-guide/sync-argo-cd - service-guide/upgrade - service-guide/chart-changes + Learn about Phalanx's design and how to contribute. -Specific tasks --------------- + .. grid-item-card:: Developers + :link: developers/index + :link-type: doc -.. toctree:: - :maxdepth: 2 + Learn how to develop applications that are deployed with Phalanx. - service-guide/update-tap-schema - service-guide/mobu-manage-flocks + .. grid-item-card:: Administrators + :link: admin/index + :link-type: doc -For science platform administrators -=================================== + Learn how install and operate Phalanx applications, such as the Rubin Science Platform, in your data access center. -Services --------- +.. grid:: 2 -.. toctree:: - :maxdepth: 2 - - ops/argo-cd/index - ops/cachemachine/index - ops/cert-issuer/index - ops/cert-manager/index - ops/gafaelfawr/index - ops/ingress-nginx/index - ops/nublado2/index - ops/postgres/index - ops/squash-api/index - ops/tap/index - ops/vault-secrets-operator/index - -Bootstrapping -------------- - -.. toctree:: - :maxdepth: 3 + .. grid-item-card:: Applications + :link: applications/index + :link-type: doc - ops/bootstrapping + Learn about the individual applications that are configured to deploy with Phalanx. -Infrastructure --------------- - -.. toctree:: - :maxdepth: 2 - - ops/infrastructure/filestore/index - -Troubleshooting ---------------- - -.. toctree:: - :maxdepth: 2 + .. grid-item-card:: Environments + :link: environments/index + :link-type: doc - ops/troubleshooting + Learn about the Kubernetes clusters that are running Phalanx. diff --git a/docs/introduction.rst b/docs/introduction.rst deleted file mode 100644 index d5cfb5d660..0000000000 --- a/docs/introduction.rst +++ /dev/null @@ -1,34 +0,0 @@ -###################################### -Introduction to Kubernetes and Argo CD -###################################### - -The Rubin Science Platform runs on `Kubernetes`_ -Kubernetes provides a way to coordinate running services on multiple nodes. -Kubernetes runs a set of `Docker`_ containers and sets up the networking, storage, and configuration of those containers. - -.. _Kubernetes: https://kubernetes.io/ -.. _Docker: https://docker.com/ - -Git repositories for individual services typically have build pipelines resulting in new Docker container builds when code changes are merged. -For example, our Jenkins build system builds stack and JupyterLab containers, and the `lsst-tap-service repository `__ builds the TAP service containers. - -An service deployed on Kubernetes is made up of a number of resources, such as p -ods, deployments, and configmaps. -These resources must be configured to work together to form a logical service, such as the Portal or Notebook Aspects. -Each logical service is contained in a `Helm`_ chart that uses templates to create each resource with some configuration applied. -The configuration for a Helm chart is called a values file, and is a simple YAML document that contains inputs for the templating of the chart. - -Be aware that, confusingly, both "service" and "application" are also names of specific Kubernetes resources that are only one component of a logical service. -In the rest of this documentation, "service" refers to the logical service, not the Kubernetes resource. -Argo CD manages resources via an abstraction called an "application," which tells Argo CD what Helm chart to use to manage the resources. -In the rest of this documentation, "application" will refer to the Argo CD abstraction concept. -In general, each Argo CD application corresponds to a logical service. - -But Helm doesn't keep track of the service once it is deployed. -That is, it won't notice when the configuration changes and apply those changes. -`Argo CD`_ fills this need. -Argo CD watches its source repository for new Git commits and will keep track of those changes, either applying them automatically ("syncing" them), or waiting for an operator to press the sync button in the web UI. -Argo CD is the only layer in this stack that has a web UI that can be easily navigated, and it provides many useful features, such as deleting resources and resyncing services. - -The Rubin Science Platform stores its Argo CD configuration in the `phalanx repository `__. -This includes the Argo CD application resources, pointers to the Helm charts for all services that are installed as part of the Science Platform, and values files to configure those services. diff --git a/docs/ops/argo-cd/index.rst b/docs/ops/argo-cd/index.rst deleted file mode 100644 index 0e9fa56886..0000000000 --- a/docs/ops/argo-cd/index.rst +++ /dev/null @@ -1,50 +0,0 @@ -####### -Argo CD -####### - -.. list-table:: - :widths: 10,40 - - * - Type - - Helm_ - * - Namespace - - ``argocd`` - -.. rubric:: Overview - -`Argo CD`_ is the software that manages all Kubernetes resources in a deployment of the Rubin Science Platform. -It is itself a set of Kubernetes resources and running pods managed with `Helm`_. -Argo CD cannot manage and upgrade itself, so it periodically should be upgraded manually. - -Argo CD is installed and bootstrapped as part of the cluster creation process. -The UI is exposed on the ``/argo-cd`` route for the Science Platform. -Unlike other resources on the Science Platform, it is not protected by Gafaelfawr. -It instead uses username and password authentication. -The username and password are stored in the SQuaRE 1Password vault. - -.. rubric:: Warnings - -Argo CD is somewhat particular about how its resources are set up. -Everything related to Argo CD that can be namespaced must be in the ``argocd`` namespace. - -.. warning:: - - ``Application`` resources must be in the ``argocd`` namespace, not in the namespace of the application. - -If you accidentally create an ``Application`` resource outside of the ``argocd`` namespace, Argo CD will display it in the UI but will not be able to sync it. -You also won't be able to easily delete it if it defines the normal Argo CD finalizer because that finalizer will not run outside the ``argocd`` namespace. -To delete the stray ``Application`` resource, edit it with ``kubectl edit`` and delete the finalizer, and then delete it with ``kubectl delete``. - -.. warning:: - - Do not use the documented Argo CD upgrade method that uses ``kubectl apply``. - This will not work properly when Argo CD was installed via Helm, as it is on the Science Platform, and it will create a huge mess. - -Instead, follow the upgrade process described below. - -.. rubric:: Guides - -.. toctree:: - - upgrading - authentication diff --git a/docs/ops/argo-cd/upgrading.rst b/docs/ops/argo-cd/upgrading.rst deleted file mode 100644 index 2347b5b47d..0000000000 --- a/docs/ops/argo-cd/upgrading.rst +++ /dev/null @@ -1,147 +0,0 @@ -################# -Upgrading Argo CD -################# - -Automatic upgrades -================== - -Normally, you can let Argo CD upgrade itself. -According to the documentation, this is not necessarily safe. -The developers recommend the manual process documented below instead. -However, it's much more convenient to do the upgrade through Argo CD and we have had good luck with it. -Just be aware that it's not entirely supported. - -When performing the upgrade through Argo CD, it appears to be somewhat more reliable to use the following process rather than telling Argo CD to sync everything at once: - -#. Sync everything except the deployments by unchecking them in the sync dialog -#. Sync the argocd-redis deployment and wait for it to be green -#. Sync the remaining deployments one at a time in the following order (the exact order probably doesn't matter, but this is what we've done): - - ``argocd-application-controller`` - - ``argocd-server`` - - ``argocd-repo-server`` - - ``argocd-dex-server`` - -Manual upgrade process -====================== - -#. Determine the current version of Argo CD. - The easiest way to do this is to go to the ``/argo-cd`` route and look at the version number in the top left sidebar. - Ignore the hash after the ``+`` sign; the part before that is the version number. - -#. Ensure your default ``kubectl`` context is the cluster you want to upgrade. - Check your current context with ``kubectl config current-context`` and switch as necessary with ``kubectl config use-context``. - -#. Back up the Argo CD configuration. - - .. code-block:: console - - $ chmod 644 ~/.kube/config - $ docker run -v ~/.kube:/home/argocd/.kube --rm \ - argoproj/argocd:$VERSION argocd-util export -n argocd > backup.yaml - $ chmod 600 ~/.kube/config - - You have to temporarily make your ``kubectl`` configuration file world-readable so that the Argo CD Docker image can use your credentials. - Do this on a private system with no other users. - Replace ``$VERSION`` with the version of Argo CD as discovered above. - The version will begin with a ``v``. - - This is taken from the `Argo CD disaster recovery documentation `__ with the addition of the namespace flag. - - The backup will not be needed if all goes well. - -#. Determine the new version of the Argo CD Helm chart (**not** Argo CD itself) to which you will be upgrading. - - .. code-block:: console - - $ helm repo add argo https://argoproj.github.io/argo-helm - $ helm repo update - $ helm search repo argo-cd - - Note the chart version for ``argo/argo-cd``. - -#. Upgrade Argo CD using Helm. - Check out the `phalanx repository `_ first. - - .. code-block:: console - - $ cd phalanx/installer - $ helm upgrade --install argocd argo/argo-cd --version $VERSION \ - --values argo-cd-values.yaml --namespace argocd --wait --timeout 900s - - Replace ``$VERSION`` with the Helm chart version (**not** the Argo CD application version) that you want to install. - -If all goes well, you can now view the UI at ``/argo-cd`` and confirm that everything still looks correct. - -If the ``helm upgrade`` command returns an error like this: - - Error: rendered manifests contain a resource that already - exists. Unable to continue with install: Service - "argocd-application-controller" in namespace "argocd" exists and - cannot be imported into the current release: invalid ownership - metadata; label validation error: key "app.kubernetes.io/managed-by" - must equal "Helm": current value is "Tiller"; annotation validation - error: missing key "meta.helm.sh/release-name": must be set to - "argocd"; annotation validation error: missing key - "meta.helm.sh/release-namespace": must be set to "argocd" - -that means Argo CD was originally installed with Helm v2 and you're using Helm v3. -You can proceed with Helm v3, but you will need to fix all of the annotations and labels first. -For all namespaced resources, you can do this by running the following two commands for each resource type that ``helm upgrade`` warns about. - -.. code-block:: console - - $ kubectl -n argocd label --overwrite $RESOURCE \ - -l "app.kubernetes.io/managed-by=Tiller" \ - "app.kubernetes.io/managed-by=Helm" - $ kubectl -n argocd annotate $RESOURCE \ - -l "app.kubernetes.io/managed-by=Helm" \ - meta.helm.sh/release-name=argocd meta.helm.sh/release-namespace=argocd - -Replace ``$RESOURCE`` with the type of the resource. -You should not use this command for non-namespaced resources (specifically ``ClusterRole`` and ``ClusterRoleBinding``). -For those resources, instead of using the ``-l`` selector, find the resources that are part of Argo CD via the ``argocd-`` prefix and then run the ``label`` and ``annotate`` commands naming them explicitly. -If you fix those non-namespaced resources and then iterate for each namespaced resource, eventually the ``helm upgrade`` command will succeed. - -You should only have to do this once per cluster, and then subsequent upgrades with Helm v3 should work smoothly. - -Recovering from a botched upgrade -================================= - -If everything goes horribly wrong, you can remove Argo CD entirely and the restore it from the backup that you took. -To do this, first drop the Argo CD namespace: - -.. code-block:: console - - $ kubectl delete namespace argocd - -You will then need to manually remove the finalizers for all the Argo CD application resources in order for the namespace deletion to succeed. -The following instructions are taken from `an old Kubernetes issue `__. - -.. code-block:: console - - $ kubectl api-resources --verbs=list --namespaced -o name \ - | xargs -n 1 kubectl get --show-kind --ignore-not-found -n argocd - -This will show all resources that need manual attention. -It should only be Argo CD ``Application`` and ``AppProject`` resources. -For each resource, edit it with ``kubectl edit -n argocd`` and delete the finalizer. -As you save each resource, its deletion should succeed. -By the end, the namespace should successfully finish deletion. -You can then recreate the namespace, reinstall Argo CD, and restore the backup. - -.. code-block:: console - - $ kubectl create namespace argocd - $ cd phalanx/installer - $ helm upgrade --install argocd argo/argo-cd --version $HELM_VERSION \ - --values argo-cd-values.yaml --namespace argocd --wait --timeout 900s - $ chmod 644 ~/.kube/config - $ docker run -i -v ~/.kube:/home/argocd/.kube --rm \ - argoproj/argocd:$VERSION argocd-util import -n argocd - < backup.yaml - $ chmod 600 ~/.kube/config - -Replace ``$HELM_VERSION`` with the version of the Helm chart you want to use and ``$VERSION`` with the corresponding Argo CD version (as shown via ``helm search repo``). - -This should hopefully restore Argo CD to a working state. -If it doesn't, you'll need to reinstall it using the more extended process used by the cluster installer. -See `installer/install.sh `__ for the commands to run. diff --git a/docs/ops/bootstrapping.rst b/docs/ops/bootstrapping.rst deleted file mode 100644 index 50258ac671..0000000000 --- a/docs/ops/bootstrapping.rst +++ /dev/null @@ -1,186 +0,0 @@ -############################## -Bootstrapping a new deployment -############################## - -This is (somewhat incomplete) documentation on how to add a new Rubin Science Platform environment. - -Requirements -============ - -* The installer assumes Git 2.22 or later. - -* We presume that you are using `Vault `__ coupled with `Vault Secrets Operator `__ to manage your Kubernetes secrets, and further that you will use the same taxonomy that SQuaRE does as described in the `LSST Vault Utilities `__ documentation (essentially ``secret/k8s_operator/``). - We strongly recommend using the `LSST Vault Utilites `__ to create multiple enclaves (one per instance), so that then compromise of one instance doesn't expose all your secrets for all instances. - -* Rubin Science Platform applications expect the public hostname of the Science Platform to have a TLS certificate that can be verified using standard CA roots. - Using a self-signed certificate or an institutional CA that is not in the normal list of CAs shipped with Docker base images will probably not work. - See :ref:`hostnames` for more information. - -Checklist -========= - -#. Fork the `phalanx repository `__ if this work is separate from the SQuaRE-managed environments. - -#. Create a virtual environment with the tools you will need from the installer's `requirements.txt `__. - If you are not using 1password as your source of truth (which, if you are not in a SQuaRE-managed environment, you probably are not) then you may omit ``1password``. - In any event, note the write key for your Vault enclave. - -#. Create a new ``values-.yaml`` file in `/science-platform `__. - Start with a template copied from an existing environment that's similar to the new environment. - Edit it to change the environment name at the top to match ```` and choose which services to enable or disable. - -#. Decide on your approach to TLS certificates. - See :ref:`hostnames` for more details. - -#. Do what DNS setup you can. - If you already know the IP address where your instance will reside, create the DNS records (A or possibly CNAME) for that instance. - If you are using a cloud provider or something like minikube where the IP address is not yet known, then you will need to create that record once the top-level ingress is created and has an external IP address. - - The first time you set up the RSP for a given domain (note: *not* hostname, but *domain*, so if you were setting up ``dev.my-rsp.net`` and ``prod.my-rsp.net``, ``dev`` first, you would only need to do this when you created ``dev``), if you are using Let's Encrypt for certificate management (which we highly recommend), you will need to create glue records to enable Let's Encrypt to manage TLS for the domain. - See :doc:`cert-issuer/route53-setup` for more details. - -#. For each enabled service, create a corresponding ``values-.yaml`` file in the relevant directory under `/services `__. - Customization will vary from service to service, but the most common change required is to set the fully-qualified domain name of the environment to the one that will be used for your new deployment. - This will be needed in ingress hostnames, NGINX authentication annotations, and the paths to Vault secrets (the part after ``k8s_operator`` should be the same fully-qualified domain name). - - See :ref:`service-notes` for more details on special considerations for individual services. - -#. Generate the secrets for the new environment with `/installer/generate_secrets.py `__ and store them in Vault with `/installer/push_secrets.sh `__. - This is where you will need the write key for the Vault enclave. - -#. Run the installer script at `/installer/install.sh `__. - - If the installation is using a dynamically-assigned IP address, while the installer is running, wait until the ingress-nginx-controller service comes up and has an external IP address; then go set the A record for your endpoint to that address (or set an A record with that IP address for the ingress and a CNAME from the endpoint to the A record). - For installations that are intended to be long-lived, it is worth capturing the IP address at this point and modifying your configuration to use it statically should you ever need to reinstall the instance. - -.. _hostnames: - -Hostnames and TLS -================= - -The Science Platform is designed to run under a single hostname. -All ingresses for all services use different routes on the same external hostname. -That hostname, in turn, is served by an NGINX proxy web server, configured via the ``ingress-nginx`` Helm chart (normally installed with the Science Platform). -An NGINX ingress controller is required since its ``auth_request`` mechanism is used for authentication. - -The external hostname must have a valid TLS certificate that is trusted by the stock configuration of standard CentOS, Debian, and Alpine containers. -There are supported two mechanisms to configure that TLS certificate: - -#. Purchase a commercial certificate and configure it as the ingress-nginx default certificate. - Do not add TLS configuration to any of the service ingresses. - For more information, see :doc:`ingress-nginx/certificates`. - With this approach, the certificate will have to be manually renewed and replaced once per year. - -#. Configure Let's Encrypt to obtain a certificate via the DNS solver. - Once this is configured, TLS will be handled automatically without further human intervention. - However, this approach is far more complex to set up and has some significant prerequisites. - For more information, see :doc:`cert-issuer/bootstrapping`. - -To use the second approach, you must have the following: - -* An :abbr:`AWS (Amazon Web Services)` account in which you can create two Route 53 hosted domains. - You must use this domain for the hostname of the Science Platform installation. -* The ability to delegate to that Route 53 hosted domain from some public DNS domain. - This means either registering a domain via Amazon, registering a domain elsewhere and pointing it to Amazon's Route 53 DNS servers, or creating a subdomain of an existing public domain by adding ``NS`` records to that domain for a subdomain hosted on Route 53. - -If neither of those requirements sound familiar, you almost certainly want to use the first option and purchase a commercial certificate. - -.. _service-notes: - -Service notes -============= - -Gafaelfawr ----------- - -When creating the Gafaelfawr configuration for a new environment, in addition to choosing between OpenID Connect authentication and GitHub authentication, you will need to define a group mapping. -This specifies which scopes a user will receive based on which groups they are a member of in the upstream identity system. -The current default expects the NCSA groups, which will not be accurate unless you're using CILogon with NCSA LDAP as an attribute source. - -The most important scopes to configure are: - -* ``exec:admin``: provides access to administrative tools (users do not need this) -* ``exec:user``: allows users to create personal tokens -* ``exec:notebook``: allows users to use the Notebook Aspect -* ``exec:portal``: allows users to use the Portal Aspect -* ``read:tap``: allows users to make TAP queries - -If you are using OpenID Connect, the group values for each scope should be group names as shown in the ``isMemberOf`` claim. - -If you are using GitHub, group membership will be synthesized from all of the teams of which the user is a member. -These must be team memberships, not just organization memberships. -The corresponding group for Gafaelfawr purposes will be ``-`` where ```` is the team **slug**, not the team name. -That means the team name will be converted to lowercase and spaces will be replaced with dashes, and other transformations will be done for special characters. -For more information about how Gafaelfawr constructs groups from GitHub teams, see `the Gafaelfawr documentation `__. - -For an example of a ``group_mapping`` configuration for GitHub authentication, see `/services/gafaelfawr/values-idfdev.yaml `__. - -If you run into authentication problems, see :doc:`the Gafaelfawr operational documentation ` for debugging instructions. - -Nublado 2 ---------- - -Nublado (the ``nublado2`` service) and moneypenny need to know where the NFS server that provides user home space is. -Nublado also requires other persistent storage space. -Ensure the correct definitions are in place in their configuration. - -For T&S deployments that require instrument control, make sure you have any Multus network definitions you need in the ``nublado2`` ``values.yaml``. -This will look something like: - -.. code-block:: yaml - - singleuser: - extraAnnotations: - k8s.v1.cni.cncf.io/networks: "kube-system/auxtel-dds, kube-system/comcam-dds, kube-system/misc-dds" - initContainers: - - name: "multus-init" - image: "lsstit/ddsnet4u:latest" - securityContext: - privileged: true - -The Multus network names are given as an annotation string containing the networks, separated by commas. -Experimentally, it appears that the interfaces will appear in the order specified. - -The ``initContainers`` entry should be inserted verbatim. -It creates a privileged container that bridges user pods to the specified networks before releasing control to the user's lab. - -Portal ------- - -If the Portal Aspect is configured with a ``replicaCount`` greater than one (recommended for production installations), ``firefly_shared_workdir`` must be set and point to an underlying filesystem that supports shared multiple-write. -This is **not** supported by most Kubernetes persistent volume backends. - -At GKE, we use Filestore via NFS. -At NCSA, we use a ``hostPath`` mount of an underlying GPFS volume. - -Currently the provisioning of this underlying backing store is manual, so make sure you either have created it or gotten a system administrator with appropriate permissions for your site to do so. - -The default UID for the Portal Aspect is 91, although it is tunable in the deployment if need be. - -Squareone ---------- - -If you are using the Let's Encrypt approach to obtain TLS certificates, you must give the Squareone ingress with an appropriate TLS configuration. - -Because all service ingresses share the same external hostname, the way the ingress configuration is structured is somewhat unusual. -Nearly all of the services create an ingress without adding TLS configuration. -Instead, they all use the same hostname, without a TLS stanza. -The Squareone ingress is the one designated ingress with a TLS configuration to request creation of certificates. -Because each ingress uses the same hostname, the NGINX ingress will merge all of those ingresses into one virtual host and will set up TLS if TLS is defined on any of them. - -Were TLS defined on more than one ingress, only one of those TLS configurations would be used, but which one is chosen is somewhat random. -Therefore, we designate a single service to hold the configuration to avoid any confusion from unused configurations. - -This means adding something like the following to ``values-.yaml`` in `/services/squareone `__: - -.. code-block:: yaml - - squareone: - ingress: - host: "rsp.example.com" - annotations: - cert-manager.io/cluster-issuer: cert-issuer-letsencrypt-dns - tls: - - secretName: squareone-tls - hosts: - - "rsp.example.com" diff --git a/docs/ops/cachemachine/index.rst b/docs/ops/cachemachine/index.rst deleted file mode 100644 index bc860e4455..0000000000 --- a/docs/ops/cachemachine/index.rst +++ /dev/null @@ -1,27 +0,0 @@ -############ -cachemachine -############ - -.. list-table:: - :widths: 10,40 - - * - Edit on GitHub - - `/services/cachemachine `__ - * - Type - - Helm_ - * - Namespace - - ``cachemachine`` - -.. rubric:: Overview - -The ``cachemachine`` service is an installation of the RSP's image-prepulling service from its `Helm chart `__. - -Upgrading ``cachemachine`` is generally painless. -A simple Argo CD sync is sufficient. - -.. rubric:: Guides - -.. toctree:: - - pruning - updating-recommended diff --git a/docs/ops/cachemachine/pruning.rst b/docs/ops/cachemachine/pruning.rst deleted file mode 100644 index 54b0346214..0000000000 --- a/docs/ops/cachemachine/pruning.rst +++ /dev/null @@ -1,13 +0,0 @@ -############# -Image pruning -############# - -If the list of cached images on nodes gets excessively long (we've only seen this at NCSA, where there is lots of disk for images and the nodes have been around forever), K8s may stop updating its list of cached images. This will manifest as the spawner options form being devoid of prepulled images. - -This is a function of Kubernetes, by default, `only showing 50 images on a node `__. You can work around this, if you control the Kubernetes installation, with ``--node-status-max-images`` set to ``-1`` on the kubelet command line, or by setting ``nodeStatusMaxImages`` to ``-1`` in the kubelet configuration file. - -Should you encounter this problem, for each node, perform the following actions: - -#. Download `purge `__ -#. Run it using an account allowed to use the Docker socket (thus, probably in group ``docker``). You may want to run it with ``-x`` first to see what it's going to do. If you want output during the actual run, run it with ``-v``. - diff --git a/docs/ops/cachemachine/updating-recommended.rst b/docs/ops/cachemachine/updating-recommended.rst deleted file mode 100644 index 115aae37a6..0000000000 --- a/docs/ops/cachemachine/updating-recommended.rst +++ /dev/null @@ -1,83 +0,0 @@ -###################### -Updating "recommended" -###################### - -The "recommended" tag for JupyterLab images is usually a recent weekly image. -The image marked "recommended" is guaranteed by SQuaRE to be compatible with other services and materials--such as tutorial or system testing notebooks--that we make available on RSP deployments. -Because this process requires quite a bit of checking and sign-off from multiple stakeholders, it is possible that approving a new version for "recommended" may take more than the two weeks (for most deployments) it takes for a weekly image to roll off the default list of images to pull. -This can cause the RSP JupyterHub options form to display empty parentheses rather than the correct target version when a user requests a lab container. - -This document explains how to circumvent that display bug by changing cachemachine's ``values-.yaml`` for the appropriate instance when moving the "recommended" tag. - -Tagging a new container version --------------------------------- - -When a new version is to be approved (after passing through its prior QA and sign-off gates), the "recommended" tag must be updated to point to the new version. - -This really is as simple as pulling the new target version, tagging it as recommended, and pushing it again. -This is, sadly, necessary — there is no way to tag an image on Docker Hub without pulling and re-pushing it. -However, the push will be a no-op, since all the layers are, by definition, already there, so while the pull may be slow, the push will be fast. - -The procedure is as follows: - -.. code-block:: sh - - docker pull registry.hub.docker.com/lsstsqre/sciplat-lab:w_2021_33 # or whatever tag - docker tag registry.hub.docker.com/lsstsqre/sciplat-lab:w_2021_33 registry.hub.docker.com/lsstsqre/sciplat-lab:recommended - docker login # This may require interaction, depending on how you've set up your docker credentials - docker push registry.hub.docker.com/lsstsqre/sciplat-lab:recommended - -The DockerHub ``sqreadmin`` user could be used for this; however, when the process is not automated (it currently is not), using personal credentials is acceptible. -The ``sqreadmin`` DockerHub credentials are within the SQuaRE 1Password credential store. - -.. _prepull-recommended: - -Updating Phalanx to ensure the "recommended" target is pre-pulled ------------------------------------------------------------------ - -In most environments, cachemachine only ensures pulling of the latest two weekly images, and it is therefore not at all unusual for more than two weeks to go by before approving a new version. - -Usually this doesn't matter: the image cache on a node uses a Least Recently Used replacement strategy, and the great majority of users spawn "recommended," so it's not going to be purged. -However, there is a display bug in the Notebook Aspect spawner form can occur. -If a new node has come online after the recommended weekly has rolled out of the weekly list, then, although the new node will pre-pull "recommended", it will not pre-pull the corresponding weekly by the weekly tag -Cachemachine, and therefore the options form, will fail to resolve "recommended" to a particular weekly, which means the description in parentheses after the image name will be empty. - -Fortunately, this is easy to fix. - -In cachemachine's ``values-.yaml`` file for the affected environment, go towards the bottom and look in ``repomen``. -The first entry will always be of type ``RubinRepoMan``, and will contain the definitions of how many daily, weekly, and release images to prepull. - -There are currently only four environments in which we care about keeping the "recommended" target pre-pulled: - -#. IDF Production (``data.lsst.cloud``) -#. IDF Integration (``data-int.lsst.cloud``) -#. NCSA Stable (``lsst-lsp-stable.ncsa.illinois.edu``) -#. NCSA Integration (``lsst-lsp-int.ncsa.illinois.edu``) - -Beneath the ``RubinRepoMan`` entry, you should find an entry that looks like: - -.. code-block:: yaml - - { - "type": "SimpleRepoMan", - "images": [ - { - "image_url": "registry.hub.docker.com/lsstsqre/sciplat-lab:w_2021_33", - "name": "Weekly 2021_33" - } - ] - } - -Replace the tag and image name with the current approved versions. - -If you are adding these definitions to an instance that does not already ensure that the target image for "recommended" is always prepulled, add an entry to the ``repomen`` list that looks like the above, with current approved versions. - -Commit your changes to a git branch, and then create a GitHub pull request to ``services/cachemachine`` in `Phalanx `__ from that branch. -Request that someone review the PR, and then merge it. - -Then synchronize cachemachine (using Argo CD) in the correct environment. -It is not generally required to wait for a maintenance window to do this, since making this change is low-risk. -The cachemachine deployment will automatically restart, and that will kick off any required pulls. -Since these pulls will just be pulling "recommended" under a different name, the image will almost certainly already be cached, and therefore the pull will be near-instant. -Each pod that starts from the pulled image simply sleeps for one minute and then terminates. -After each pod has run and terminated, the Notebook Aspect options form will again show the correct data. diff --git a/docs/ops/cert-issuer/index.rst b/docs/ops/cert-issuer/index.rst deleted file mode 100644 index 5a1884575d..0000000000 --- a/docs/ops/cert-issuer/index.rst +++ /dev/null @@ -1,48 +0,0 @@ -########### -cert-issuer -########### - -.. list-table:: - :widths: 10,40 - - * - Edit on GitHub - - `/services/cert-issuer `__ - * - Type - - Helm_ - * - Namespace - - ``cert-issuer`` - -.. rubric:: Overview - -The ``cert-issuer`` service creates a cluster issuer for the use of the Rubin Science Platform. -It depends on `cert-manager `__. -The issuer is named ``cert-issuer-letsencrypt-dns``. - -On most clusters where the Rubin Science Platform manages certificates, this is also handled by the Rubin Science Platform Argo CD, but on the base and summit clusters, cert-manager is maintained by IT and installed outside of Argo CD. -NCSA clusters use NCSA certificates issued via an internal process. - -``cert-issuer`` should only be enabled in environments using Route 53 for DNS and using cert-manager with the DNS solver. -For more information, see :ref:`hostnames`. - -.. rubric:: Using cert-issuer - -To configure an ingress to use certificates issued by it, add a ``tls`` configuration to the ingress and the annotation: - -.. code-block:: yaml - - cert-manager.io/cluster-issuer: cert-issuer-letsencrypt-dns - -This should be done on one and only one ingress for a deployment using ``cert-issuer``. -The RSP conventionally uses the ``landing-page`` service. - -.. rubric:: Guides - -.. toctree:: - - route53-setup - bootstrapping - -.. seealso:: - - * :doc:`../cert-manager/index` - * `cert-manager documentation for Route 53 `__. diff --git a/docs/ops/cert-manager/index.rst b/docs/ops/cert-manager/index.rst deleted file mode 100644 index 532b5bf953..0000000000 --- a/docs/ops/cert-manager/index.rst +++ /dev/null @@ -1,35 +0,0 @@ -############ -cert-manager -############ - -.. list-table:: - :widths: 10,40 - - * - Edit on GitHub - - `/services/cert-manager `__ - * - Type - - Helm_ - * - Namespace - - ``cert-manager`` - -.. rubric:: Overview - -The ``cert-manager`` service is an installation of `cert-manager `__ from its `Helm chart repository `__. -It creates TLS certificates via `Let's Encrypt `__ and automatically renews them. - -See the :doc:`cert-issuer service <../cert-issuer/index>` for how ``cert-manager`` is used. - -This service is only deployed on clusters managed by SQuaRE. -NCSA clusters use NCSA certificates issued via an internal process. -IT manages the cert-manager installation on the base and summit Rubin Science Platform clusters. - -Upgrading cert-manager is generally painless. -The only custom configuration that we use is to tell the Helm chart to install the Custom Resource Definitions. -Watch for changes that require updating ``ClusterIssuer`` or ``Certificate`` resources; those will require corresponding changes to the resources defined in `/services/cert-issuer `__. - -Normally, it's not necessary to explicitly test cert-manager after a routine upgrade. -We will notice if the certificates expire, and have monitoring of the important ones. -However, if you want to be sure that cert-manager is still working after an upgrade, delete the TLS secret in the ``nublado`` namespace. -It should be recreated by cert-manager. -(You may have to also delete the ``Certificate`` resource of the same name and let Argo CD re-create it to trigger this.) -This may cause an outage for the Science Platform since it is using this certificate, so you may want to be prepared to port-forward to get to the Argo CD UI in case something goes wrong. diff --git a/docs/ops/gafaelfawr/debugging.rst b/docs/ops/gafaelfawr/debugging.rst deleted file mode 100644 index 39d277b1fa..0000000000 --- a/docs/ops/gafaelfawr/debugging.rst +++ /dev/null @@ -1,16 +0,0 @@ -############################### -Debugging authentication issues -############################### - -If a user successfully authenticates through the Gafaelfawr ``/login`` route but then cannot access a service such as the Notebook or Portal Aspects, a good initial debugging step is to determine what scopes the user was granted on the basis of their group membership. - -Have the user go to ``/auth/analyze``, which will provide a JSON dump of their authentication information. -The important information is in the ``token.data`` portion of the JSON document. -The key information to look at is the ``isMemberOf`` claim, which shows the groups of which Gafaelfawr thinks the user is a member, and the ``scope`` claim, which shows how those group memberships were translated into access scopes using the ``config.groupMappings`` configuration. -This is usually the best tool for uncovering problems with group mapping. - -For other issues, looking at the pod logs for the ``gafaelfawr`` pod in the ``gafaelfawr`` namespace is the best next step. -(The actual pod name will have a random string appended to ``gafaelfawr``. -The pod of interest is the one that is not the Redis pod.) -``kubectl logs -n gafaelfawr`` or the Argo CD pod logs screen will show you the messages from Gafaelfawr, including any errors. -The logs from Gafaelfawr are in JSON format. diff --git a/docs/ops/gafaelfawr/index.rst b/docs/ops/gafaelfawr/index.rst deleted file mode 100644 index 6380a3acf7..0000000000 --- a/docs/ops/gafaelfawr/index.rst +++ /dev/null @@ -1,33 +0,0 @@ -########## -Gafaelfawr -########## - -.. list-table:: - :widths: 10,40 - - * - Edit on GitHub - - `/services/gafaelfawr `__ - * - Type - - Helm_ - * - Namespace - - ``gafaelfawr`` - -.. rubric:: Overview - -Gafaelfawr provides authentication and identity management services for the Rubin Science Platform. -It is primarily used as an NGINX ``auth_request`` handler configured via annotations on the ``Ingress`` resources of Science Platform services. -In that role, it requires a user have the required access scope to use that service, rejects users who do not have that scope, and redirects users who are not authenticated to the authentication process. - -Gafaelfawr supports authentication via either OpenID Connect (generally through `CILogon `__) or GitHub. - -.. rubric:: Guides - -.. toctree:: - - debugging - storage - recreate-token - -.. seealso:: - - * `Gafaelfawr documentation `__ diff --git a/docs/ops/gafaelfawr/storage.rst b/docs/ops/gafaelfawr/storage.rst deleted file mode 100644 index b3f625ed1a..0000000000 --- a/docs/ops/gafaelfawr/storage.rst +++ /dev/null @@ -1,60 +0,0 @@ -################### -Configuring storage -################### - -Gafaelfawr uses Redis for persistent storage. -When deploying Gafaelfawr, you will need to choose between three possible storage configurations based on the needs of the environment. - -Ephemeral -========= - -For test environments, or for environments where no one is expected to use persistent user tokens, it may be acceptable to invalidate all tokens on each Gafaelfawr restart. -This is the simplest configuration, since it doesn't require persistent volumes. -To choose this method, put: - -.. code-block:: yaml - - redis: - persistence: - enabled: false - -in the ``values-*.yaml`` file for that environment under the ``gafaelfawr`` key. - -.. _dynamic-gafaelfawr: - -Dynamic provisioning -==================== - -The default Gafaelfawr behavior is to use `dynamic provisioning `__. -Gafaelfawr will request (via a ``StatefulSet``) a 1GiB volume using the default storage class with access mode ``ReadWriteOnce``. -These values can be overridden with ``redis.persistence.size``, ``redis.persistence.storageClass``, and ``redis.persistence.accessMode``. - -On GKE environments, the recommended configuration is to enable the Google Compute Engine Physical Disk CSI driver (this can be done via the GKE cluster configuration) and then use its storage class. -Do this by putting: - -.. code-block:: yaml - - redis: - persistence: - storageClass: "standard-rwo" - -in the ``values-*.yaml`` file for that environment under the ``gafaelfawr`` key. - -In this configuration, you may want to start Gafaelfawr so that the persistent volume claim and corresponding persistent volume has been created, locate that persistent volume, and then change its reclaim policy from the default (usually ``Delete``) to ``Retain``. -This provides some additional protection against wiping the storage in accidents or service redeployments that cause the ``StatefulSet`` and its ``PersistentVolumeClaim`` to be deleted. - -Existing ``PersistentVolumeClaim`` -================================== - -Finally, Gafaelfawr can be configured to use an existing ``PersistentVolumeClaim``. -This is the most flexible approach, since the ``PersistentVolumeClaim`` can be created outside of the Gafaelfawr chart with whatever parameters are desired. - -To use this method, add: - -.. code-block:: yaml - - redis: - persistence: - volumeClaimName: "" - -to ``values-*.yaml`` file for that environment under the ``gafaelfawr`` key, replacing ```` with the name of an existing ``PersistentVolumeClaim`` in the ``gafaelfawr`` namespace. diff --git a/docs/ops/infrastructure/filestore/index.rst b/docs/ops/infrastructure/filestore/index.rst deleted file mode 100644 index dd6c7bf8dd..0000000000 --- a/docs/ops/infrastructure/filestore/index.rst +++ /dev/null @@ -1,22 +0,0 @@ -######### -Filestore -######### - -The thing we're calling ``filestore`` is not an RSP service at all. -Nor does it (generally) run in Kubernetes. - -All current ``filestore`` implementations are simply implementations of -NFS that are mounted into RSP pods (both user and service) by Volume and -VolumeMount definitions. - -There is nothing in the filestore that mandates NFS. What is required -is simply something that can present some storage to user and service -pods as a POSIX filesystem. To this point, NFS has been the most -convenient way to accomplish that, but it is certainly not fundamental -to the concept. - -.. rubric:: Guides - -.. toctree:: - - privileged-access diff --git a/docs/ops/infrastructure/filestore/privileged-access.rst b/docs/ops/infrastructure/filestore/privileged-access.rst deleted file mode 100644 index 8050456134..0000000000 --- a/docs/ops/infrastructure/filestore/privileged-access.rst +++ /dev/null @@ -1,68 +0,0 @@ -################################## -Privileged access to the filestore -################################## - -Currently, we do not have any way to make containers with privileged -filesystem access available from JupyterHub. - -In order to get privileged access to the filestore, you will need access -to ``kubectl`` with admin privileges to the cluster you want to work on. - -Save the following file as ``copier.yaml``; you may need to edit it to -point to the correct filestore, and of course if you need multiple -filestores present (for instance, for copying data between environments) -then you will need to create multiple Volume/VolumeMount pairs so -multiple filestores are presented within the container. - -.. code-block:: yaml - - apiVersion: v1 - kind: Pod - metadata: - name: copier - namespace: copier - spec: - containers: - - name: main - image: ubuntu:latest - args: [ "tail", "-f", "/dev/null" ] - volumeMounts: - - mountPath: /mnt - name: share - volumes: - - name: share - nfs: - path: /share1 - server: 10.13.105.122 - # 10.87.86.26 is IDF dev - # 10.22.240.130 is IDF int - # 10.13.105.122 is IDF prod - -In order to spin up this pod, do the following: - - * ``kubectl create ns copier`` - * ``kubectl apply -f copier.yaml`` - * ``kubectl exec -it -n copier copier -- /bin/bash -l`` - -Once you do that, you have a root prompt and the instance filestore is -mounted at ``/mnt``. -With great power comes great responsibility. - -When you're done, delete the namespace. This will also destroy the -privileged pod: - - * ``kubectl delete ns copier`` - -**Examples:** - - * Get usage data by username, sorted by usage, largest at the bottom:: - - du -s -BM /mnt/home/* \ - | sed -e 's/\s\+/,/' \ - | sed -e 's|/mnt/home/||' \ - | sort -nr - - * Make archival copy of user ``foo``'s previous ``.local`` for analysis:: - - tar cvpfz /tmp/foo-local.tgz /mnt/home/foo/.local.20210804223021 - diff --git a/docs/ops/ingress-nginx/index.rst b/docs/ops/ingress-nginx/index.rst deleted file mode 100644 index b3778ab5de..0000000000 --- a/docs/ops/ingress-nginx/index.rst +++ /dev/null @@ -1,29 +0,0 @@ -############# -ingress-nginx -############# - -.. list-table:: - :widths: 10,40 - - * - Edit on GitHub - - `/services/ingress-nginx `__ - * - Type - - Helm_ - * - Namespace - - ``ingress-nginx`` - -.. rubric:: Overview - -The ``ingress-nginx`` service is an installation of `ingress-nginx `__ from its `Helm chart `__. -We use NGINX as the ingress controller for all Rubin Science Platform deployments rather than native ingress controllers because we use the NGINX ``auth_request`` feature to do authentication and authorization. - -NCSA clusters also use the same software, but the NGINX ingress is managed by NCSA rather than via Argo CD. - -Upgrading ``ingress-nginx`` is generally painless. -A simple Argo CD sync is sufficient. - -.. rubric:: Guides - -.. toctree:: - - certificates diff --git a/docs/ops/nublado2/index.rst b/docs/ops/nublado2/index.rst deleted file mode 100644 index dc011cf118..0000000000 --- a/docs/ops/nublado2/index.rst +++ /dev/null @@ -1,26 +0,0 @@ -######## -nublado2 -######## - -.. list-table:: - :widths: 10,40 - - * - Edit on GitHub - - `/services/nublado2 `__ - * - Type - - Helm_ - * - Namespace - - ``nublado2`` - -.. rubric:: Overview - -The ``nublado2`` service is an installation of JupyterHub from its `Helm chart `__. - -Upgrading ``nublado2`` is generally painless. -A simple Argo CD sync is sufficient. - -.. rubric:: Guides - -.. toctree:: - - database diff --git a/docs/ops/postgres/index.rst b/docs/ops/postgres/index.rst deleted file mode 100644 index 2c0379007e..0000000000 --- a/docs/ops/postgres/index.rst +++ /dev/null @@ -1,39 +0,0 @@ -######## -postgres -######## - -.. list-table:: - :widths: 10,40 - - * - Edit on GitHub - - `/services/postgres `__ - * - Type - - Helm_ - * - Namespace - - ``postgres`` - -.. rubric:: Overview - -The ``postgres`` service is a very small PostgreSQL installation. -It is intended to provide persistent relational storage for low-value databases that it isn't a tragedy to lose. - -Do not use this service for important data. -Use a managed relational database such as Google CloudSQL. -Two intended purposes for this service are: - -#. The JupyterHub user session database -#. Backing store for Gafaelfawr's authentication tokens - -If either of those is destroyed, then all current user sessions and authentication tokens are invalidated, work up to the last checkpoint (5 minutes in JupyterLab) may be lost, and users will have to log in again, restart their sessions, and recreate any authentication tokens. -While irritating, this is not the end of the world; hence "low-value databases." -(That said, production instances of the Science Platform use CloudSQL for the Gafaelfawr token database.) - -Upgrading ``postgres`` is generally painless. -A simple Argo CD sync is sufficient. - -.. rubric:: Guides - -.. toctree:: - - recreate-pvc - diff --git a/docs/ops/postgres/recreate-pvc.rst b/docs/ops/postgres/recreate-pvc.rst deleted file mode 100644 index 315558e0c3..0000000000 --- a/docs/ops/postgres/recreate-pvc.rst +++ /dev/null @@ -1,21 +0,0 @@ -########################## -Recreating postgres PV/PVC -########################## - -If you get into a state where the cluster has completely crashed, -perhaps due to hardware problems, and the backing store for persistent -volumes has been lost, Postgres may refuse to start. - -The reason for this is that if you are using an autoprovisioned storage -class (such as GKE and Rook provide), the PVC will reference a volume -that no longer exists. - -This, in and of itself, is not a tragedy. The Postgres database is -intended to hold only fairly low-value data. If your cluster has -crashed that hard, the authentication Redis cache and JupyterHub session -database are unlikely to still be relevant. - -All you need to do to recover is to delete the PVC, recreate it (which -will re-allocate the persistent storage), and restart the deployment. -This is most easily accomplished with ArgoCD, although ``kubectl`` works -as well. diff --git a/docs/ops/squash-api/index.rst b/docs/ops/squash-api/index.rst deleted file mode 100644 index fb44c8e1f2..0000000000 --- a/docs/ops/squash-api/index.rst +++ /dev/null @@ -1,95 +0,0 @@ -########## -squash-api -########## - -.. list-table:: - :widths: 10,40 - - * - Edit on GitHub - - `/services/squash-api `__ - * - Type - - Helm_ - * - Namespace - - ``squash-api`` - -.. rubric:: Overview - -The ``squash-api`` app deploys a REST API for managing Science Pipelines metrics. -You can learn more about SQuaSH in SQR-009_. - -.. _SQR-009: https://sqr-009.lsst.io/ - -Currently, the ``squash-api`` is deployed using the ``squash-sandbox`` and ``squash-prod`` environments along with other services: - -- argo-cd -- cert-issuer -- cert-manager -- chronograf -- gafaelfawr -- influxdb -- kapacitor -- ingress-nginx -- vault-secrets-operator - -You can reach the following services, for example, for the ``https://squash-sandbox.lsst.codes`` deployment: - -- https://squash-sandbox.lsst.codes (SQuaSH API) -- https://squash-sandbox.lsst.codes/argo-cd (Argo CD) -- https://squash-sandbox.lsst.codes/chronograf (Chronograf) -- https://squash-sandbox.lsst.codes/influxdb (InfluxDB) - -The Science Pipelines use lsst.verify_ to collect metrics and their measurements and produce verification jobs that are uploaded to the SQuaSH API. -An internal task in the SQuaSH API extracts metric values and metadata from the verification jobs and stores them in InfluxDB. - -.. _lsst.verify: https://sqr-019.lsst.io/ - -Chronograf is the UI for displaying measurements of the Science Pipeline metrics and it uses Gafaelfawr to authenticate users with the CILogon provider. - -.. rubric:: SQuaSH data migration - -Here we document the steps to migrate data from an existing SQuaSH instance to a new one. -To exemplify this, let's assume we want to migrate data from https://squash-prod.lsst.codes to https://squash-sandbox.lsst.codes to make a clone of the production instance. - -The SQuaSH API uses a MySQL instance, managed by CloudSQL, to store the Science Pipelines verification jobs. -The steps to clone the CloudSQL instance are: - -* Clone the ``squash-db-prod`` database in CloudSQL to a new instance, e.g. ``squash-db-sandbox-N``, where N is an incremental number. -* Update the database user credentials, they have to match the the ``squash-db-user`` and ``squash-db-password`` keys in the ``squash-api`` secret for the new https://squash-sandbox.lsst.codes deployment. -* Update ``instanceConnectionName:`` in ``services/squash-api/values-squash-sandbox.yaml`` to the new value. -* Synchronize the ``squash-api`` app in https://squash-sandbox.lsst.codes/argo-cd to connect to the Cloud SQL instance clone. - -You can check if the connection was successful by inspecting the logs of the ``cloudsql-proxy`` container in the ``squash-api`` pod. - -To migrate InfluxDB databases use the ``dump.sh`` and ``restore.sh`` scripts in `squash-api/scripts/ `_. - -First, set the ``kubectl`` context of the source InfluxDB instance (https://squash-prod.lsst.codes) then run: - -.. code:: - - ./dump.sh influxdb squash-prod # this database stores measurements of the science pipelines metrics - ./dump.sh influxdb chronograf # this database stores chronograf data such as annotations and the alert history - -where ``influxdb`` is the namespace of the InfluxDB deployment, and the second argument is the name of the database to dump. - -Before running the ``restore.sh`` script, set the ``kubectl`` context of the destination InfluxDB instance (https://squash-sandbox.lsst.codes). -Then use the output directory from the ``dump.sh`` command as the input directory for the ``restore.sh`` command: - -.. code:: - - ./restore.sh influxdb squash-prod - ./restore.sh influxdb chronograf - -where ``influxdb`` is the namespace of the InfluxDB deployment, and the second argument is the name of the database to restore. - -In addition to the MySQL CloudSQL instance and the InfluxDB databases, there are two other context databases that need to be restored. -The Chronograf context database stores users, organizations, connection data to InfluxDB and Kapacitor, and dashboard data. -The Kapacitor database stores the alert rules and TICKScritps. - -To restore the Chronograf and Kapacitor databases set the ``kubectl`` context of the source instance and copy the files: - -.. code:: - - kubectl cp chronograf/:var/lib/chronograf/chronograf-v1.db chronograf-v1.db - kubectl cp kapacitor/:var/lib/kapacitor/kapacitor.db kapacitor.db - -Set the ``kubectl`` context of the destionation instance, copy the database files to the same location into the corresponding pods, and then restart the pods for that to take effect. diff --git a/docs/ops/tap/index.rst b/docs/ops/tap/index.rst deleted file mode 100644 index 0eb61764e1..0000000000 --- a/docs/ops/tap/index.rst +++ /dev/null @@ -1,35 +0,0 @@ -### -TAP -### - -.. list-table:: - :widths: 10,40 - - * - Edit on GitHub - - `/services/tap `__ - * - Type - - Helm_ - * - Namespace - - ``tap`` - -.. rubric:: Overview - -TAP (Table Access Protocol) is an IVOA_ service that provides access to general table data, including astronomical catalogs. -On the Rubin Science Platform, it is provided by `lsst-tap-service `__, which is derived from the `CADC TAP service `__. -The data itself, apart from schema queries, comes from qserv. - -The ``tap`` service consists of the TAP Java web service, a PostgreSQL database used to track user job submissions, and (on development deployments) a mock version of qserv. - -Upgrading ``tap`` normally only requires an Argo CD sync. - -.. rubric:: Architecture - -.. figure:: /_static/notebook-tap.png - :name: Flow for Notebook Aspect queries to TAP - - Flow for Notebook Aspect queries to TAP - -.. figure:: /_static/portal-tap.png - :name: Flow for Portal Aspect queries to TAP - - Flow for Portal Aspect queries to TAP diff --git a/docs/ops/vault-secrets-operator/index.rst b/docs/ops/vault-secrets-operator/index.rst deleted file mode 100644 index 94edfd81c7..0000000000 --- a/docs/ops/vault-secrets-operator/index.rst +++ /dev/null @@ -1,55 +0,0 @@ -.. _vault-secrets-operator: - -###################### -vault-secrets-operator -###################### - -.. list-table:: - :widths: 10,40 - - * - Edit on GitHub - - `/services/vault-secrets-operator `__ - * - Type - - Helm_ - * - Namespace - - ``vault-secrets-operator`` - -.. rubric:: Overview - -The ``vault-secrets-operator`` service is an installation of `Vault Secrets Operator`_ to retrieve necessary secrets from Vault and materialize them as Kubernetes secrets for the use of other services. -It processes ``VaultSecret`` resources defined in the `Science Platform repository `__ and creates corresponding Kubernetes ``Secret`` resources. - -.. _Vault Secrets Operator: https://github.com/ricoberger/vault-secrets-operator - -See `DMTN-112 `__ for the LSST Vault design. - -.. rubric:: Upgrading - -Upgrading to newer upstream releases of the Helm chart is normally simple and straightforward. -We have no significant local customization. - -After upgrading, check that Vault Secrets Operator is still working properly by finding a ``VaultSecret`` and ``Secret`` resource pair in the Argo CD dashboard and deleting the ``Secret`` resource. -It should be nearly immediately re-created from the ``VaultSecret`` resource by Vault Secrets Operator. -The Gafaelfawr secret is a good one to use for this purpose since it is only read during Gafaelfawr start-up. - -.. rubric:: Bootstrapping the service - -Vault Secrets Operator is the only component of the Science Platform whose secret has to be manually created, so that it can create the secrets for all other services. -This will be done automatically by the `install script `__. - -Its secret will look like this: - -.. code-block:: yaml - - apiVersion: v1 - kind: Secret - metadata: - name: vault-secrets-operator - namespace: vault-secrets-operator - type: Opaque - stringData: - VAULT_TOKEN: - VAULT_TOKEN_LEASE_DURATION: 86400 - -Replace ```` with the ``read`` Vault token for the path ``secret/k8s_operator/`` in Vault. -See `DMTN-112 `__ for more information. diff --git a/docs/requirements.txt b/docs/requirements.txt deleted file mode 100644 index 7e21850f4a..0000000000 --- a/docs/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -diagrams -documenteer[pipelines] -ltd-conveyor diff --git a/docs/service-guide/add-service.rst b/docs/service-guide/add-service.rst deleted file mode 100644 index 030ec62f6b..0000000000 --- a/docs/service-guide/add-service.rst +++ /dev/null @@ -1,139 +0,0 @@ -############################ -Add a new service to Phalanx -############################ - -Once you have a chart and a Docker image (see :doc:`create-service`) and you have added your static service secrets to 1Password (see :doc:`add-a-onepassword-secret`), you need to integrate your service into Phalanx. -This is done by creating an Argo CD application that manages your service. -This consists of an ``Application`` resource that's used by Argo CD and a small wrapper chart in the `Phalanx repository `__ that holds the ``values-*.yaml`` files to configure your service for each environment in which it's deployed. - -Add the wrapper chart -===================== - -#. Create a directory in `/services `__ named for the service (which should almost always be the same as the name of its chart). - -#. Create a ``Chart.yaml`` file in that directory for the wrapper chart. - This should look something like this: - - .. code-block:: yaml - - apiVersion: v2 - name: example - version: 1.0.0 - dependencies: - - name: example - version: 1.3.2 - repository: https://lsst-sqre.github.io/charts/ - - name: pull-secret - version: 0.1.2 - repository: https://lsst-sqre.github.io/charts/ - - The ``name`` field should be the same as the name of the directory, which again should be the same as the name of your chart. - The ``version`` field should always be ``1.0.0`` (see :ref:`chart-versioning` for an explanation). - The first entry in ``dependencies`` should point to your chart and pin its current version. - (Yes, this means you will need to make a PR against Phalanx for each new version of your chart.) - If you are directly referencing an external chart, the ``repository`` property may be different. - Finally, include the ``pull-secret`` dependency as-is. - This is used to configure a Docker pull secret that you will reference later. - -#. For each environment in which your service will run, create a ``values-.yaml`` file in this directory. - This should hold only the customization per Rubin Science Platform deployment. - Any shared configuration should go into the defaults of your chart. - (An exception is if you are using an external chart directly, in which case you will need to add all configuration required for that chart. - See :ref:`external-chart-config` for more discussion.) - - Some common things to need to configure per-environment: - - - The ingress hostname (usually ``ingress.host``) - - The ``vaultSecretsPath`` for a secret - - Always tell any pods deployed by your service to use a pull secret named ``pull-secret``. - If you are using the default Helm template, this will mean a block like: - - .. code-block:: yaml - - imagePullSecrets: - - name: "pull-secret" - - under the section for your chart. - If you are using an external chart, see its documentation for how to configure pull secrets. - Configuring a pull secret is important to avoid running into Docker pull rate limits, which could otherwise prevent a pod from starting. - - **All configuration for your chart must be under a key named for your chart.** - For example, for a service named ``example``, a typical configuration may look like: - - .. code-block:: yaml - - example: - imagePullSecrets: - - name: "pull-secret" - - ingress: - host: "data.lsst.cloud" - - vaultSecretsPath: "secret/k8s_operator/data.lsst.cloud/example" - - That ``example:`` on the top line and the indentation is important. - If you omit it, all of your configuration will be silently ignored. - - Finally, every ``values-*.yaml`` file (at least for now, until we find a better approach) must have, at the bottom, a stanza like: - - .. code-block:: yaml - - pull-secret: - enabled: true - path: "secret/k8s_operator//pull-secret" - - See all the other directories under `/services `__ for examples. - You may want to copy and paste the basic setup including the ``pull-secret`` configuration from another service to save effort. - -Add the Argo CD application -=========================== - -#. Create the Argo CD application resource. - This is a new file in `/science-platform/templates `__ named ``-application.yaml`` where ```` must match the name of the directory created above. - The contents of this file should look like:: - - {{- if .Values..enabled -}} - apiVersion: v1 - kind: Namespace - metadata: - name: - spec: - finalizers: - - kubernetes - --- - apiVersion: argoproj.io/v1alpha1 - kind: Application - metadata: - name: - namespace: argocd - finalizers: - - resources-finalizer.argocd.argoproj.io - spec: - destination: - namespace: - server: https://kubernetes.default.svc - project: default - source: - path: services/ - repoURL: {{ .Values.repoURL }} - targetRevision: {{ .Values.revision }} - helm: - valueFiles: - - values-{{ .Values.environment }}.yaml - {{- end -}} - - replacing every instance of ```` with the name of your service. - This creates the namespace and Argo CD application for your service. - -#. Finally, edit each of the ``values-*.yaml`` files in `/science-platform `__ and add a stanza for your service. - The stanza in ``values.yaml`` should always say: - - .. code-block:: yaml - - : - enabled: false - - replacing ```` with the name of your service. - For the other environments, set ``enabled`` to ``true`` if your service should be deployed there. - You may want to start in a dev or int environment and enable it in production environments later. diff --git a/docs/service-guide/chart-changes.rst b/docs/service-guide/chart-changes.rst deleted file mode 100644 index 3562b6555c..0000000000 --- a/docs/service-guide/chart-changes.rst +++ /dev/null @@ -1,26 +0,0 @@ -#################################### -Changing charts and phalanx together -#################################### - -Quite often when working on RSP services you will find that you need -simultaneous changes to both the `charts repository `__ and the `phalanx repository `__. - -You may not want to roll out the charts changes prior to the phalanx changes, but at the same time, the phalanx changes require the charts changes. - -If the charts changes are low-risk--perhaps they just add new objects or settings--then it's often OK to release a new charts version, and then point phalanx at the new version. Then you can just update in ArgoCD and it's all very easy. - -This section, however, is about the times when it's risky to do that. - -The bad news is, you can't do this via ArgoCD. The good news is, it's pretty easy to do anyway, but you do need ``kubectl`` access to whatever cluster you're working on. Ideally this is a local ``minikube`` cluster, but if you're, say, using an Apple Silicon Mac, or you need access to real data, maybe you're doing it in ``data-dev`` or ``data-int``. - -#. Make your changes to both charts and phalanx. - -#. Ensure that you're using ``kubectl`` with a kubeconfig file giving access to the cluster you want to use. - -#. Generate a new chart with ``helm package `` in the ``charts/charts`` directory. This will generate a .tgz package of the application. - -#. In the correct phalanx ``services`` directory, update ``Chart.yaml`` to the new (unreleased) Chart version. Then update phalanx with ``helm dependency build .`` . This will try to download the dependent charts and will fail, because the version hasn't been released. Create a ``charts`` directory if it doesn't already exist, and copy the tarball you created into the previous step into it. - -#. Finally, run ``helm install . --values=``. - -The running version will be out of sync in ArgoCD until you release the charts and phalanx changes, but it is testable in the cluster at this point. diff --git a/docs/service-guide/create-service.rst b/docs/service-guide/create-service.rst deleted file mode 100644 index ed5a10ef79..0000000000 --- a/docs/service-guide/create-service.rst +++ /dev/null @@ -1,102 +0,0 @@ -#################### -Create a new service -#################### - -This documentation is intended for service administrators who are writing a new service in Python. -If the goal is to instead deploy a third-party service with its own Helm chart in the Rubin Science Platform, see :doc:`add-external-chart`. - -To be deployed in the Rubin Science Platform, a service must come in the form of one or more Docker images and a Helm chart (or Kustomize configuration, although no one currently uses that approach) that deploys those images in Kubernetes. - -After you have finished the steps here, go to :doc:`add-service`. - -Write the service -================= - -Rubin-developed services for the Rubin Science Platform should be written in Python unless there's some reason (such as using code developed elsewhere) that forces choice of a different language. -For the common case of a web service (one that exposes an API via HTTP), we recommend using the `FastAPI framework `__. - -The easiest way to start a new FastAPI service written in Python and intended for the Rubin Science Platform is to create a new project using sqrbot-jr. -On the LSST Slack, send the message ``create project`` to ``@sqrbot-jr``. -Select ``FastAPI application (Safir)`` from the list of project types. -This will create a new GitHub repository with the basic framework of a FastAPI service that will work well inside the Rubin Science Platform. - -Any Python service destined for the RSP should regularly update its dependencies to pick up any security fixes. -If your service follows the code layout of the FastAPI service template, using `neophile `__ to automatically create PRs to update your dependencies is strongly recommended. -To add your service to the list of repositories that neophile updates, submit a PR to add the repository owner and name to `neophile's configuration `__. - -Each release of your service must be tagged. -The tag should use `semantic versioning`_ (for example, ``1.3.2``). -Creating a GitHub release for the tag is optional but recommended, and we recommend setting the title of the release to the name of the tag. -If you are using the FastAPI template, tagging in this fashion is required since it triggers the GitHub Actions workflow to build and publish a Docker image with a tag matching the release version. - -Create the Docker image -======================= - -The Docker image can be stored in any container registry that is usable by Kubernetes, but for Rubin-developed services, we normally use DockerHub. -(We may switch to the Google Container Registry later, but for now DockerHub is used for all images.) -If your image must be stored in a private container registery, the credentials for that registry must be added to the pull secret. - -If you use the FastAPI service template, a ``Dockerfile`` will be created as part of the new repository template, and GitHub Actions will be set up in the new repository to build and push new Docker images for tagged releases. -To enable this workflow, you must create two secrets in your new GitHub repository, ``DOCKER_USERNAME`` and ``DOCKER_TOKEN``. -``DOCKER_USERNAME`` should be set to the DockerHub username of the account that will be pushing the new Docker images. -``DOCKER_TOKEN`` should be set to a secret authentication token for that account. -We recommend creating a separate token for each GitHub repository for which you want to enable automatic image publication, even if they all use the same username. - -You may need to have a Docker Pro or similar paid DockerHub account. -Alternately, you can contact SQuaRE to set up Docker image publication using our Docker account. - -Create the Helm chart -===================== - -To deploy your service in the Rubin Science Platform, it must have either a Helm chart or a Kustomize configuration. -Currently, all services use Helm charts. -Kustomize is theoretically supported but there are no examples of how to make it work with multiple environments. -Using a Helm chart is recommended unless you are strongly motivated to work out the problems with using Kustomize and write new documentation. - -Unfortunately, unlike for the service itself, we do not (yet) have a template for the Helm chart. -However, Helm itself has a starter template that is not awful. -Use ``helm create`` to create a new chart from that template. -**Be sure you are using Helm v3.** -Helm v2 is not supported. - -You will need to make at least the following changes to the default Helm chart template: - -- All secrets must come from ``VaultSecret`` resources, not Kubernetes ``Secret`` resources. - You should use a configuration option named ``vaultSecretsPath`` in your ``values.yaml`` to specify the path in Vault for your secret. - This option will be customized per environment when you add the service to Phalanx (see :doc:`add-service`). - See :doc:`add-a-onepassword-secret` for more information about secrets. -- Services providing a web API should be protected by Gafaelfawr and require an appropriate scope. - This normally means adding annotations to the ``Ingress`` resource via ``values.yaml`` similar to: - - .. code-block:: yaml - - ingress: - annotations: - nginx.ingress.kubernetes.io/auth-method: "GET" - nginx.ingress.kubernetes.io/auth-url: "http://gafaelfawr.gafaelfawr.svc.cluster.local:8080/auth?scope=exec:admin" - - For user-facing services you will want a scope other than ``exec:admin``. - See `the Gafaelfawr documentation `__, specifically `protecting a service `__ for more information. - -Documentation -------------- - -We have begun using `helm-docs `__ to generate documentation for our Helm charts. -This produces a nice Markdown README file that documents all the chart options, but it requires special formatting of the ``values.yaml`` file that is not present in the default Helm template. -If you want to do the additional work, this will produce the most nicely-documented Helm chart, but using helm-docs is currently optional. - -Publication ------------ - -All Rubin-developed Helm charts for the Science Platform are stored in the `charts repository `__. -This repository automatically handles publication of the Helm chart when a new release is merged to the ``master`` branch, so you will not have to set up your own Helm chart repository. -You should create your new chart as a pull request in this repository, under the ``charts`` subdirectory. - -Examples --------- - -Existing Helm charts that are good examples to read or copy are: - -- `cachemachine `__ (fairly simple) -- `mobu `__ (also simple) -- `gafaelfawr `__ (complex, including CRDs and multiple pods) diff --git a/docs/service-guide/update-tap-schema.rst b/docs/service-guide/update-tap-schema.rst deleted file mode 100644 index 692f0139ab..0000000000 --- a/docs/service-guide/update-tap-schema.rst +++ /dev/null @@ -1,14 +0,0 @@ -Update the ``TAP_SCHEMA`` table -=============================== - -The ``TAP_SCHEMA`` table stores information about the tables available in a given installation of the Rubin Science Platform. -This table is kept in sync with the felis files using the following process: - -#. Make a PR to the `sdm_schemas repository `__ with a change to a felis YAML file. -#. After this is merged, make a GitHub release with a new version number. - This will create a tag and run a publishing pipeline GitHub Action. - That publishing pipeline will run the Python felis library against the YAML files in the ``yml`` directory and make different Docker images for the different supported environments. - It will then push the images to DockerHub. -#. Update the version of the `tap-schema chart `__ following the instructions in :doc:`upgrade`. - The ``appVersion`` in ``Chart.yaml`` should be updated to match the version of the new release, and the ``version`` of the chart increased following normal semver conventions. -#. Sync the ``tap-schema`` application in Argo CD in the relevant environment or environments (see :doc:`sync-argo-cd`). diff --git a/docs/service-guide/upgrade.rst b/docs/service-guide/upgrade.rst deleted file mode 100644 index c2d781a554..0000000000 --- a/docs/service-guide/upgrade.rst +++ /dev/null @@ -1,17 +0,0 @@ -################### -Upgrading a service -################### - -#. Release a new version of the service by pushing an image with the new version tag to Docker Hub (or whatever Docker repository is used). - -#. Update the chart in the `charts repository `__ to install the current version. - For charts using the recommended pattern of determining the default Docker tag via the ``appVersion`` chart metadata, this only requires updating ``appVersion`` in ``Chart.yaml``. - Some charts cannot (or do not) do this, in which case the version has to be changed elsewhere, normally in ``values.yaml``. - Also update the ``version`` of the chart in ``Chart.yaml`` (which follows `semantic versioning`_). - When this PR is merged, a new chart will automatically be published. - -#. Update the chart version in the Phalanx ``Chart.yaml`` file for the appropriate service under `/services `__. - If the chart is not pinned (if, in other words, it uses a version range constraint instead of a specific version), no Phalanx change is required. - -This will tell Argo CD that the change is pending, but no changes are applied automatically. -To apply the changes in a given environment, see :doc:`sync-argo-cd`. diff --git a/services/sasquatch/charts/kafka-connect-manager/README.md.gotmpl b/helm-docs.md.gotmpl similarity index 58% rename from services/sasquatch/charts/kafka-connect-manager/README.md.gotmpl rename to helm-docs.md.gotmpl index 0d310b45a2..c63e5b2103 100644 --- a/services/sasquatch/charts/kafka-connect-manager/README.md.gotmpl +++ b/helm-docs.md.gotmpl @@ -2,6 +2,8 @@ {{ template "chart.description" . }} -{{ template "chart.valuesSection" . }} +{{ template "chart.homepageLine" . }} + +{{ template "chart.sourcesSection" . }} -{{ template "helm-docs.versionFooter" . }} +{{ template "chart.valuesSection" . }} diff --git a/installer/generate_secrets.py b/installer/generate_secrets.py index 2508139475..9ce0360385 100755 --- a/installer/generate_secrets.py +++ b/installer/generate_secrets.py @@ -1,21 +1,20 @@ #!/usr/bin/env python3 import argparse import base64 -import bcrypt -from collections import defaultdict -from cryptography.fernet import Fernet -from cryptography.hazmat.primitives.asymmetric import rsa -from cryptography.hazmat.backends import default_backend -from cryptography.hazmat.primitives import serialization -from datetime import datetime, timezone import json import logging import os -from pathlib import Path import secrets -import yaml +from collections import defaultdict +from datetime import datetime, timezone +from pathlib import Path -from onepassword import OnePassword +import bcrypt +from cryptography.fernet import Fernet +from cryptography.hazmat.backends import default_backend +from cryptography.hazmat.primitives import serialization +from cryptography.hazmat.primitives.asymmetric import rsa +from onepasswordconnectsdk.client import new_client_from_environment class SecretGenerator: @@ -43,6 +42,7 @@ def generate(self): `regenerate` attribute is `True`. """ self._pull_secret() + self._rsp_alerts() self._butler_secret() self._postgres() self._tap() @@ -52,6 +52,8 @@ def generate(self): self._argocd() self._portal() self._vo_cutouts() + self._telegraf() + self._sherlock() self.input_field("cert-manager", "enabled", "Use cert-manager? (y/n):") use_cert_manager = self.secrets["cert-manager"]["enabled"] @@ -60,7 +62,9 @@ def generate(self): elif use_cert_manager == "n": self._ingress_nginx() else: - raise Exception(f"Invalid cert manager enabled value {use_cert_manager}") + raise Exception( + f"Invalid cert manager enabled value {use_cert_manager}" + ) def load(self): """Load the secrets files for each RSP component from the @@ -87,7 +91,9 @@ def save(self): def input_field(self, component, name, description): default = self.secrets[component].get(name, "") - prompt_string = f"[{component} {name}] ({description}): [current: {default}] " + prompt_string = ( + f"[{component} {name}] ({description}): [current: {default}] " + ) input_string = input(prompt_string) if input_string: @@ -97,7 +103,7 @@ def input_file(self, component, name, description): current = self.secrets.get(component, {}).get(name, "") print(f"[{component} {name}] ({description})") print(f"Current contents:\n{current}") - prompt_string = f"New filename with contents (empty to not change): " + prompt_string = "New filename with contents (empty to not change): " fname = input(prompt_string) if fname: @@ -120,7 +126,7 @@ def _set(self, component, name, new_value): self.secrets[component][name] = new_value def _exists(self, component, name): - return (component in self.secrets and name in self.secrets[component]) + return component in self.secrets and name in self.secrets[component] def _set_generated(self, component, name, new_value): if not self._exists(component, name) or self.regenerate: @@ -134,26 +140,42 @@ def _tap(self): ) def _postgres(self): - self._set_generated("postgres", "exposurelog_password", secrets.token_hex(32)) - self._set_generated("postgres", "gafaelfawr_password", secrets.token_hex(32)) - self._set_generated("postgres", "jupyterhub_password", secrets.token_hex(32)) + self._set_generated( + "postgres", "exposurelog_password", secrets.token_hex(32) + ) + self._set_generated( + "postgres", "gafaelfawr_password", secrets.token_hex(32) + ) + self._set_generated( + "postgres", "jupyterhub_password", secrets.token_hex(32) + ) self._set_generated("postgres", "root_password", secrets.token_hex(64)) - self._set_generated("postgres", "vo-cutouts_password", secrets.token_hex(32)) + self._set_generated( + "postgres", "vo_cutouts_password", secrets.token_hex(32) + ) + self._set_generated( + "postgres", "narrativelog_password", secrets.token_hex(32) + ) def _nublado2(self): crypto_key = secrets.token_hex(32) self._set_generated("nublado2", "crypto_key", crypto_key) self._set_generated("nublado2", "proxy_token", secrets.token_hex(32)) - self._set_generated("nublado2", "cryptkeeper_key", secrets.token_hex(32)) + self._set_generated( + "nublado2", "cryptkeeper_key", secrets.token_hex(32) + ) # Pluck the password out of the postgres portion. - self.secrets["nublado2"]["hub_db_password"] = self.secrets["postgres"]["jupyterhub_password"] + self.secrets["nublado2"]["hub_db_password"] = self.secrets["postgres"][ + "jupyterhub_password" + ] def _mobu(self): self.input_field( "mobu", "ALERT_HOOK", - "Slack webhook for reporting mobu alerts. Or use None for no alerting.", + "Slack webhook for reporting mobu alerts. " + "Or use None for no alerting.", ) def _cert_manager(self): @@ -177,7 +199,9 @@ def _gafaelfawr(self): self._set_generated( "gafaelfawr", "bootstrap-token", self._generate_gafaelfawr_token() ) - self._set_generated("gafaelfawr", "redis-password", os.urandom(32).hex()) + self._set_generated( + "gafaelfawr", "redis-password", os.urandom(32).hex() + ) self._set_generated( "gafaelfawr", "session-secret", Fernet.generate_key().decode() ) @@ -186,13 +210,22 @@ def _gafaelfawr(self): self.input_field("gafaelfawr", "cloudsql", "Use CloudSQL? (y/n):") use_cloudsql = self.secrets["gafaelfawr"]["cloudsql"] if use_cloudsql == "y": - self.input_field("gafaelfawr", "database-password", "Database password") + self.input_field( + "gafaelfawr", "database-password", "Database password" + ) elif use_cloudsql == "n": # Pluck the password out of the postgres portion. db_pass = self.secrets["postgres"]["gafaelfawr_password"] self._set("gafaelfawr", "database-password", db_pass) else: - raise Exception(f"Invalid gafaelfawr cloudsql value {use_cloudsql}") + raise Exception( + f"Invalid gafaelfawr cloudsql value {use_cloudsql}" + ) + + self.input_field("gafaelfawr", "ldap", "Use LDAP? (y/n):") + use_ldap = self.secrets["gafaelfawr"]["ldap"] + if use_ldap == "y": + self.input_field("gafaelfawr", "ldap-password", "LDAP password") self.input_field("gafaelfawr", "auth_type", "Use cilogon or github?") auth_type = self.secrets["gafaelfawr"]["auth_type"] @@ -200,44 +233,79 @@ def _gafaelfawr(self): self.input_field( "gafaelfawr", "cilogon-client-secret", "CILogon client secret" ) + use_ldap = self.secrets["gafaelfawr"]["ldap"] + if use_ldap == "y": + self.input_field( + "gafaelfawr", "ldap-secret", "LDAP simple bind password" + ) elif auth_type == "github": self.input_field( "gafaelfawr", "github-client-secret", "GitHub client secret" ) + elif auth_type == "oidc": + self.input_field( + "gafaelfawr", + "oidc-client-secret", + "OpenID Connect client secret", + ) + if use_ldap == "y": + self.input_field( + "gafaelfawr", "ldap-secret", "LDAP simple bind password" + ) else: raise Exception(f"Invalid auth provider {auth_type}") + slack_webhook = self._get_current("rsp-alerts", "slack-webhook") + if slack_webhook: + self._set("gafaelfawr", "slack-webhook", slack_webhook) + def _pull_secret(self): self.input_file( - "pull-secret", ".dockerconfigjson", ".docker/config.json to pull images" + "pull-secret", + ".dockerconfigjson", + ".docker/config.json to pull images", ) def _butler_secret(self): self.input_file( - "butler-secret", "aws-credentials.ini", "AWS credentials for butler" - ) + "butler-secret", + "aws-credentials.ini", + "AWS credentials for butler", + ) self.input_file( - "butler-secret", "butler-gcs-idf-creds.json", "Google credentials for butler" - ) + "butler-secret", + "butler-gcs-idf-creds.json", + "Google credentials for butler", + ) self.input_file( - "butler-secret", "postgres-credentials.txt", "Postgres credentials for butler" - ) + "butler-secret", + "postgres-credentials.txt", + "Postgres credentials for butler", + ) def _ingress_nginx(self): self.input_file("ingress-nginx", "tls.key", "Certificate private key") self.input_file("ingress-nginx", "tls.crt", "Certificate chain") def _argocd(self): - current_pw = self._get_current("installer", "argocd.admin.plaintext_password") + current_pw = self._get_current( + "installer", "argocd.admin.plaintext_password" + ) self.input_field( - "installer", "argocd.admin.plaintext_password", "Admin password for ArgoCD?" + "installer", + "argocd.admin.plaintext_password", + "Admin password for ArgoCD?", ) new_pw = self.secrets["installer"]["argocd.admin.plaintext_password"] if current_pw != new_pw or self.regenerate: - h = bcrypt.hashpw(new_pw.encode("ascii"), bcrypt.gensalt(rounds=15)).decode("ascii") - now_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + h = bcrypt.hashpw( + new_pw.encode("ascii"), bcrypt.gensalt(rounds=15) + ).decode("ascii") + now_time = datetime.now(timezone.utc).strftime( + "%Y-%m-%dT%H:%M:%SZ" + ) self._set("argocd", "admin.password", h) self._set("argocd", "admin.passwordMtime", now_time) @@ -245,28 +313,44 @@ def _argocd(self): self.input_field( "argocd", "dex.clientSecret", - "OAuth client secret for ArgoCD (either GitHub or Google)?" + "OAuth client secret for ArgoCD (either GitHub or Google)?", + ) + + self._set_generated( + "argocd", "server.secretkey", secrets.token_hex(16) ) - self._set_generated("argocd", "server.secretkey", secrets.token_hex(16)) + def _telegraf(self): + self.input_field( + "telegraf", + "influx-token", + "Token for communicating with monitoring InfluxDB2 instance", + ) + self._set("telegraf", "org-id", "square") def _portal(self): pw = secrets.token_hex(32) self._set_generated("portal", "ADMIN_PASSWORD", pw) def _vo_cutouts(self): - self._set_generated("vo-cutouts", "redis-password", os.urandom(32).hex()) + self._set_generated( + "vo-cutouts", "redis-password", os.urandom(32).hex() + ) self.input_field("vo-cutouts", "cloudsql", "Use CloudSQL? (y/n):") use_cloudsql = self.secrets["vo-cutouts"]["cloudsql"] if use_cloudsql == "y": - self.input_field("vo-cutouts", "database-password", "Database password") + self.input_field( + "vo-cutouts", "database-password", "Database password" + ) elif use_cloudsql == "n": # Pluck the password out of the postgres portion. db_pass = self.secrets["postgres"]["vo_cutouts_password"] self._set("vo-cutouts", "database-password", db_pass) else: - raise Exception(f"Invalid vo-cutouts cloudsql value {use_cloudsql}") + raise Exception( + f"Invalid vo-cutouts cloudsql value {use_cloudsql}" + ) aws = self.secrets["butler-secret"]["aws-credentials.ini"] self._set("vo-cutouts", "aws-credentials", aws) @@ -275,6 +359,27 @@ def _vo_cutouts(self): postgres = self.secrets["butler-secret"]["postgres-credentials.txt"] self._set("vo-cutouts", "postgres-credentials", postgres) + def _sherlock(self): + """This secret is for sherlock to push status to status.lsst.codes.""" + publish_key = secrets.token_hex(32) + self._set_generated("sherlock", "publish_key", publish_key) + + def _rsp_alerts(self): + """Shared secrets for alerting.""" + self.input_field( + "rsp-alerts", "slack-webhook", "Slack webhook for alerts" + ) + + def _narrativelog(self): + """Give narrativelog its own secret for externalization.""" + db_pass = self.secrets["postgres"]["narrativelog_password"] + self._set("narrativelog", "database-password", db_pass) + + def _exposurelog(self): + """Give exposurelog its own secret for externalization.""" + db_pass = self.secrets["postgres"]["exposurelog_password"] + self._set("exposureloglog", "database-password", db_pass) + class OnePasswordSecretGenerator(SecretGenerator): """A secret generator that syncs 1Password secrets into a secrets directory @@ -293,7 +398,7 @@ class OnePasswordSecretGenerator(SecretGenerator): def __init__(self, environment, regenerate): super().__init__(environment, regenerate) self.op_secrets = {} - self.op = OnePassword() + self.op = new_client_from_environment() self.parse_vault() def parse_vault(self): @@ -303,59 +408,51 @@ def parse_vault(self): This method is called automatically when initializing a `OnePasswordSecretGenerator`. """ - items = self.op.list_items("RSP-Vault") + vault = self.op.get_vault_by_title("RSP-Vault") + items = self.op.get_items(vault.id) - for i in items: + for item_summary in items: key = None + secret_notes = None + secret_password = None environments = [] - uuid = i["uuid"] - doc = self.op.get_item(uuid=uuid) - - logging.debug(f"Looking at {uuid}") - logging.debug(f"{doc}") - - for section in doc["details"]["sections"]: - if "fields" not in section: - continue - - for field in section["fields"]: - if field["t"] == "generate_secrets_key": - if key is None: - key = field["v"] - else: - raise Exception("Found two generate_secrets_keys for {key}") - elif field["t"] == "environment": - environments.append(field["v"]) - - # If we don't find a generate_secrets_key somewhere, then we shouldn't - # bother with this document in the vault. + item = self.op.get_item(item_summary.id, vault.id) + + logging.debug(f"Looking at {item.id}") + + for field in item.fields: + if field.label == "generate_secrets_key": + if key is None: + key = field.value + else: + msg = "Found two generate_secrets_keys for {key}" + raise Exception(msg) + elif field.label == "environment": + environments.append(field.value) + elif field.label == "notesPlain": + secret_notes = field.value + elif field.purpose == "PASSWORD": + secret_password = field.value + if not key: - logging.debug( - f"Skipping because of no generate_secrets_key, %s", - uuid - ) continue - # The type of secret is either a note or a password login. - # First, check the notes. - secret_value = doc["details"]["notesPlain"] + secret_value = secret_notes or secret_password - # If we don't find anything, pull the password from a login item. if not secret_value: - for f in doc["details"]["fields"]: - if f["designation"] == "password": - secret_value = f["value"] + logging.error("No value found for %s", item.title) + continue - logging.debug("Environments are %s for %s", environments, uuid) + logging.debug("Environments are %s for %s", environments, item.id) if self.environment in environments: self.op_secrets[key] = secret_value - logging.debug("Storing %s (matching environment)", uuid) + logging.debug("Storing %s (matching environment)", item.id) elif not environments and key not in self.op_secrets: self.op_secrets[key] = secret_value - logging.debug("Storing %s (applicable to all envs)", uuid) + logging.debug("Storing %s (applicable to all envs)", item.id) else: - logging.debug("Ignoring %s", uuid) + logging.debug("Ignoring %s", item.id) def input_field(self, component, name, description): """Query for a secret's value from 1Password (`op_secrets` attribute). @@ -397,15 +494,29 @@ def generate(self): if item_component in {"ingress-nginx", "cert-manager"}: continue - logging.debug("Updating component: %s/%s", item_component, item_name) + logging.debug( + "Updating component: %s/%s", item_component, item_name + ) self.input_field(item_component, item_name, "") if __name__ == "__main__": parser = argparse.ArgumentParser(description="generate_secrets") - parser.add_argument("--op", default=False, action="store_true", help="Load secrets from 1Password") - parser.add_argument("--verbose", default=False, action="store_true", help="Verbose logging") - parser.add_argument("--regenerate", default=False, action="store_true", help="Regenerate random secrets") + parser.add_argument( + "--op", + default=False, + action="store_true", + help="Load secrets from 1Password", + ) + parser.add_argument( + "--verbose", default=False, action="store_true", help="Verbose logging" + ) + parser.add_argument( + "--regenerate", + default=False, + action="store_true", + help="Regenerate random secrets", + ) parser.add_argument("environment", help="Environment to generate") args = parser.parse_args() diff --git a/installer/install.sh b/installer/install.sh index 2d1fc0c504..127206a019 100755 --- a/installer/install.sh +++ b/installer/install.sh @@ -34,17 +34,20 @@ echo "Update / install vault-secrets-operator..." helm dependency update ../services/vault-secrets-operator helm upgrade vault-secrets-operator ../services/vault-secrets-operator \ --install \ + --values ../services/vault-secrets-operator/values.yaml \ --values ../services/vault-secrets-operator/values-$ENVIRONMENT.yaml \ --create-namespace \ --namespace vault-secrets-operator \ --timeout 15m \ --wait -echo "Update / install argocd using helm3..." +echo "Update / install argocd using helm..." helm dependency update ../services/argocd helm upgrade argocd ../services/argocd \ --install \ + --values ../services/argocd/values.yaml \ --values ../services/argocd/values-$ENVIRONMENT.yaml \ + --set global.vaultSecretsPath="$VAULT_PATH_PREFIX" \ --create-namespace \ --namespace argocd \ --timeout 15m \ @@ -95,14 +98,6 @@ then kubectl -n cert-manager rollout status deploy/cert-manager-webhook fi -if [ $(yq -r .cert_issuer.enabled ../science-platform/values-$ENVIRONMENT.yaml) == "true" ]; -then - echo "Syncing cert-issuer..." - argocd app sync cert-issuer \ - --port-forward \ - --port-forward-namespace argocd -fi - if [ $(yq -r .postgres.enabled ../science-platform/values-$ENVIRONMENT.yaml) == "true" ]; then echo "Syncing postgres..." diff --git a/installer/requirements.txt b/installer/requirements.txt index b1e85a2719..73e8efa191 100644 --- a/installer/requirements.txt +++ b/installer/requirements.txt @@ -1,5 +1,5 @@ bcrypt cryptography +onepasswordconnectsdk pyyaml yq -1password diff --git a/installer/update_all_secrets.sh b/installer/update_all_secrets.sh index 61a810aa60..6a6a0020bc 100755 --- a/installer/update_all_secrets.sh +++ b/installer/update_all_secrets.sh @@ -1,11 +1,9 @@ #!/bin/bash -ex ./update_secrets.sh minikube.lsst.codes -./update_secrets.sh lsst-lsp-int.ncsa.illinois.edu -./update_secrets.sh lsst-lsp-stable.ncsa.illinois.edu ./update_secrets.sh base-lsp.lsst.codes ./update_secrets.sh summit-lsp.lsst.codes ./update_secrets.sh tucson-teststand.lsst.codes -./update_secrets.sh red-five.lsst.codes ./update_secrets.sh data.lsst.cloud ./update_secrets.sh data-int.lsst.cloud ./update_secrets.sh data-dev.lsst.cloud +./update_secrets.sh roe diff --git a/installer/update_secrets.sh b/installer/update_secrets.sh index d55d051e2f..4c30c4b570 100755 --- a/installer/update_secrets.sh +++ b/installer/update_secrets.sh @@ -4,6 +4,12 @@ ENVIRONMENT=$1 export VAULT_DOC_UUID=`yq -r .onepassword_uuid ../science-platform/values.yaml` export VAULT_ADDR=https://vault.lsst.codes export VAULT_TOKEN=`./vault_key.py $ENVIRONMENT write` +export OP_CONNECT_HOST=https://roundtable.lsst.codes/1password + +if [ -z "$OP_CONNECT_TOKEN" ]; then + echo 'OP_CONNECT_TOKEN must be set to a 1Password Connect token' >&2 + exit 1 +fi echo "Clear out any existing secrets" rm -rf secrets @@ -11,7 +17,7 @@ rm -rf secrets echo "Reading current secrets from vault" ./read_secrets.sh $ENVIRONMENT -echo "Generating missing secrets with values from onepassword" +echo "Generating missing secrets with values from 1Password" ./generate_secrets.py $ENVIRONMENT --op echo "Writing secrets to vault" diff --git a/installer/vault_key.py b/installer/vault_key.py index 50c175ea96..f7f47b4bad 100755 --- a/installer/vault_key.py +++ b/installer/vault_key.py @@ -2,16 +2,20 @@ import argparse import json import os -import pprint -from onepassword import OnePassword +from onepasswordconnectsdk import new_client_from_environment class VaultKeyRetriever: def __init__(self): - self.op = OnePassword() - vault_keys_doc = self.op.get_item(uuid=os.environ["VAULT_DOC_UUID"]) - vault_keys_json = vault_keys_doc["details"]["notesPlain"] + self.op = new_client_from_environment() + vault_keys = self.op.get_item( + os.environ["VAULT_DOC_UUID"], "RSP-Vault" + ) + for field in vault_keys.fields: + if field.label == "notesPlain": + vault_keys_json = field.value + break self.vault_keys = json.loads(vault_keys_json) def retrieve_key(self, environment, key_type): @@ -22,9 +26,15 @@ def retrieve_key(self, environment, key_type): if __name__ == "__main__": - parser = argparse.ArgumentParser(description="fetch the vault key for an environment") - parser.add_argument("environment", help="Environment name to retrieve key for") - parser.add_argument("key_type", choices=["read", "write"], help="Which key to retrieve") + parser = argparse.ArgumentParser( + description="fetch the vault key for an environment" + ) + parser.add_argument( + "environment", help="Environment name to retrieve key for" + ) + parser.add_argument( + "key_type", choices=["read", "write"], help="Which key to retrieve" + ) args = parser.parse_args() vkr = VaultKeyRetriever() diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000..55bca21138 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,102 @@ +[project] +# https://packaging.python.org/en/latest/specifications/declaring-project-metadata/ +name = "phalanx" +version = "1.0.0" +description = "Python support code for the Rubin Phalanx platform." +license = {file = "LICENSE"} +readme= "README.rst" +keywords = [ + "rubin", + "lsst", +] +# https://pypi.org/classifiers/ +classifiers = [ + "Development Status :: 5 - Production/Stable", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Intended Audience :: Developers", + "Operating System :: POSIX", +] +requires-python = ">=3.8" +dependencies = [ + "PyYAML", + "GitPython", +] + +[project.optional-dependencies] +dev = [ + # Testing + "coverage[toml]", + "pytest", + "pre-commit", + "mypy", + "types-PyYAML", + # Documentation + "documenteer[guide]>=0.7.0b4", + "sphinx-diagrams", + "sphinx-jinja", +] + +[project.scripts] +expand-charts = "phalanx.testing.expandcharts:main" + +[project.urls] +Homepage = "https://phalanx.lsst.io" +Source = "https://github.com/lsst-sqre/phalanx" + +[build-system] +requires = [ + "setuptools>=61", + "wheel", + "setuptools_scm[toml]>=6.2" +] +build-backend = "setuptools.build_meta" + +[tool.setuptools_scm] + +[tool.coverage.run] +parallel = true +branch = true +source = ["phalanx"] + +[tool.coverage.paths] +source = ["src", ".tox/*/site-packages"] + +[tool.coverage.report] +show_missing = true +exclude_lines = [ + "pragma: no cover", + "def __repr__", + "if self.debug:", + "if settings.DEBUG", + "raise AssertionError", + "raise NotImplementedError", + "if 0:", + "if __name__ == .__main__.:", + "if TYPE_CHECKING:" +] + +[tool.black] +line-length = 79 +target-version = ['py38'] +exclude = ''' +/( + \.eggs + | \.git + | \.mypy_cache + | \.tox + | \.venv + | _build + | build + | dist +)/ +''' +# Use single-quoted strings so TOML treats the string like a Python r-string +# Multi-line strings are implicitly treated by black as regular expressions + +[tool.isort] +include_trailing_comma = true +multi_line_output = 3 +known_first_party = [] +skip = ["docs/conf.py"] diff --git a/science-platform/README.md b/science-platform/README.md new file mode 100644 index 0000000000..0050d72612 --- /dev/null +++ b/science-platform/README.md @@ -0,0 +1,42 @@ +# science-platform + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| alert_stream_broker.enabled | bool | `false` | | +| cachemachine.enabled | bool | `false` | | +| cert_manager.enabled | bool | `false` | | +| datalinker.enabled | bool | `false` | | +| exposurelog.enabled | bool | `false` | | +| gafaelfawr.enabled | bool | `false` | | +| hips.enabled | bool | `false` | | +| ingress_nginx.enabled | bool | `false` | | +| linters.enabled | bool | `false` | | +| mobu.enabled | bool | `false` | | +| moneypenny.enabled | bool | `false` | | +| narrativelog.enabled | bool | `false` | | +| noteburst.enabled | bool | `false` | | +| nublado2.enabled | bool | `false` | | +| onepassword_uuid | string | `"dg5afgiadsffeklfr6jykqymeu"` | | +| plot_navigator.enabled | bool | `false` | | +| portal.enabled | bool | `false` | | +| postgres.enabled | bool | `false` | | +| production_tools.enabled | bool | `false` | | +| repoURL | string | `"https://github.com/lsst-sqre/phalanx.git"` | | +| revision | string | `"HEAD"` | | +| sasquatch.enabled | bool | `false` | | +| semaphore.enabled | bool | `false` | | +| sherlock.enabled | bool | `false` | | +| sqlproxy_cross_project.enabled | bool | `false` | | +| squareone.enabled | bool | `false` | | +| squash_api.enabled | bool | `false` | | +| strimzi.enabled | bool | `false` | | +| strimzi_registry_operator.enabled | bool | `false` | | +| tap.enabled | bool | `false` | | +| tap_schema.enabled | bool | `false` | | +| telegraf-ds.enabled | bool | `false` | | +| telegraf.enabled | bool | `false` | | +| times_square.enabled | bool | `false` | | +| vault_secrets_operator.enabled | bool | `false` | | +| vo_cutouts.enabled | bool | `false` | | diff --git a/science-platform/templates/_helpers.tpl b/science-platform/templates/_helpers.tpl new file mode 100644 index 0000000000..565b7bf3e3 --- /dev/null +++ b/science-platform/templates/_helpers.tpl @@ -0,0 +1,12 @@ +{{/* vim: set filetype=mustache: */}} +{{- define "enabled_services" -}} +argocd + {{- range $okey, $oval := .Values }} + {{- $otype := typeOf $oval -}} + {{- if eq $otype "map[string]interface {}" }} + {{- if hasKey $oval "enabled" }} +{{- if $oval.enabled }}@{{- $okey }}{{- end }} + {{- end }} + {{- end }} + {{- end }} +{{- end }} diff --git a/science-platform/templates/argocd-application.yaml b/science-platform/templates/argocd-application.yaml index ea9379a8f9..a03884b778 100644 --- a/science-platform/templates/argocd-application.yaml +++ b/science-platform/templates/argocd-application.yaml @@ -1,19 +1,23 @@ apiVersion: argoproj.io/v1alpha1 kind: Application metadata: - name: argocd - namespace: argocd + name: "argocd" + namespace: "argocd" finalizers: - - resources-finalizer.argocd.argoproj.io + - "resources-finalizer.argocd.argoproj.io" spec: destination: - namespace: argocd - server: https://kubernetes.default.svc - project: default + namespace: "argocd" + server: "https://kubernetes.default.svc" + project: "default" source: - path: services/argocd - repoURL: {{ .Values.repoURL }} - targetRevision: {{ .Values.revision }} + path: "services/argocd" + repoURL: {{ .Values.repoURL | quote }} + targetRevision: {{ .Values.revision | quote }} helm: + parameters: + - name: "global.vaultSecretsPath" + value: {{ .Values.vault_path_prefix | quote }} valueFiles: - - values-{{ .Values.environment }}.yaml + - "values.yaml" + - "values-{{ .Values.environment }}.yaml" diff --git a/science-platform/templates/cachemachine-application.yaml b/science-platform/templates/cachemachine-application.yaml index 0dc359a4de..3216ca518f 100644 --- a/science-platform/templates/cachemachine-application.yaml +++ b/science-platform/templates/cachemachine-application.yaml @@ -24,6 +24,14 @@ spec: repoURL: {{ .Values.repoURL }} targetRevision: {{ .Values.revision }} helm: + parameters: + - name: "global.host" + value: {{ .Values.fqdn | quote }} + - name: "global.baseUrl" + value: "https://{{ .Values.fqdn }}" + - name: "global.vaultSecretsPath" + value: {{ .Values.vault_path_prefix | quote }} valueFiles: - - values-{{ .Values.environment }}.yaml + - "values.yaml" + - "values-{{ .Values.environment }}.yaml" {{- end -}} diff --git a/science-platform/templates/cert-issuer-application.yaml b/science-platform/templates/cert-issuer-application.yaml deleted file mode 100644 index 40d20da92b..0000000000 --- a/science-platform/templates/cert-issuer-application.yaml +++ /dev/null @@ -1,21 +0,0 @@ -{{- if .Values.cert_issuer.enabled -}} -apiVersion: argoproj.io/v1alpha1 -kind: Application -metadata: - name: cert-issuer - namespace: argocd - finalizers: - - resources-finalizer.argocd.argoproj.io -spec: - destination: - namespace: cert-manager - server: https://kubernetes.default.svc - project: default - source: - path: services/cert-issuer - repoURL: {{ .Values.repoURL }} - targetRevision: {{ .Values.revision }} - helm: - valueFiles: - - values-{{ .Values.environment }}.yaml -{{- end -}} diff --git a/science-platform/templates/cert-manager-application.yaml b/science-platform/templates/cert-manager-application.yaml index 2069b76dd4..5bb27033f6 100644 --- a/science-platform/templates/cert-manager-application.yaml +++ b/science-platform/templates/cert-manager-application.yaml @@ -2,34 +2,41 @@ apiVersion: v1 kind: Namespace metadata: - name: cert-manager + name: "cert-manager" spec: finalizers: - - kubernetes + - "kubernetes" --- apiVersion: argoproj.io/v1alpha1 kind: Application metadata: - name: cert-manager - namespace: argocd + name: "cert-manager" + namespace: "argocd" finalizers: - - resources-finalizer.argocd.argoproj.io + - "resources-finalizer.argocd.argoproj.io" spec: destination: - namespace: cert-manager - server: https://kubernetes.default.svc - project: default + namespace: "cert-manager" + server: "https://kubernetes.default.svc" + project: "default" source: - path: services/cert-manager - repoURL: {{ .Values.repoURL }} - targetRevision: {{ .Values.revision }} + path: "services/cert-manager" + repoURL: {{ .Values.repoURL | quote }} + targetRevision: {{ .Values.revision | quote }} + helm: + parameters: + - name: "global.vaultSecretsPath" + value: {{ .Values.vault_path_prefix | quote }} + valueFiles: + - "values.yaml" + - "values-{{ .Values.environment }}.yaml" ignoreDifferences: - - group: admissionregistration.k8s.io - kind: MutatingWebhookConfiguration + - group: "admissionregistration.k8s.io" + kind: "MutatingWebhookConfiguration" jsonPointers: - "/webhooks/0/clientConfig/caBundle" - - group: admissionregistration.k8s.io - kind: ValidatingWebhookConfiguration + - group: "admissionregistration.k8s.io" + kind: "ValidatingWebhookConfiguration" jsonPointers: - "/webhooks/0/clientConfig/caBundle" {{- end -}} diff --git a/science-platform/templates/datalinker-application.yaml b/science-platform/templates/datalinker-application.yaml index f089e27bf7..c06c23762f 100644 --- a/science-platform/templates/datalinker-application.yaml +++ b/science-platform/templates/datalinker-application.yaml @@ -2,28 +2,38 @@ apiVersion: v1 kind: Namespace metadata: - name: datalinker + name: "datalinker" spec: finalizers: - - kubernetes + - "kubernetes" --- apiVersion: argoproj.io/v1alpha1 kind: Application metadata: - name: datalinker - namespace: argocd + name: "datalinker" + namespace: "argocd" finalizers: - - resources-finalizer.argocd.argoproj.io + - "resources-finalizer.argocd.argoproj.io" spec: destination: - namespace: datalinker - server: https://kubernetes.default.svc - project: default + namespace: "datalinker" + server: "https://kubernetes.default.svc" + project: "default" source: - path: services/datalinker - repoURL: {{ .Values.repoURL }} - targetRevision: {{ .Values.revision }} + path: "services/datalinker" + repoURL: {{ .Values.repoURL | quote }} + targetRevision: {{ .Values.revision | quote }} helm: + parameters: + - name: "global.baseUrl" + value: "https://{{ .Values.fqdn }}" + - name: "global.butlerRepositoryIndex" + value: {{ .Values.butlerRepositoryIndex | quote }} + - name: "global.host" + value: {{ .Values.fqdn | quote }} + - name: "global.vaultSecretsPath" + value: {{ .Values.vault_path_prefix | quote }} valueFiles: - - values-{{ .Values.environment }}.yaml + - "values.yaml" + - "values-{{ .Values.environment }}.yaml" {{- end -}} diff --git a/science-platform/templates/exposurelog-application.yaml b/science-platform/templates/exposurelog-application.yaml index c3051b5546..3e06116112 100644 --- a/science-platform/templates/exposurelog-application.yaml +++ b/science-platform/templates/exposurelog-application.yaml @@ -21,6 +21,14 @@ spec: repoURL: {{ .Values.repoURL }} targetRevision: {{ .Values.revision }} helm: + parameters: + - name: "global.host" + value: {{ .Values.fqdn | quote }} + - name: "global.baseUrl" + value: "https://{{ .Values.fqdn }}" + - name: "global.vaultSecretsPath" + value: {{ .Values.vault_path_prefix | quote }} valueFiles: - - values-{{ .Values.environment }}.yaml + - "values.yaml" + - "values-{{ .Values.environment }}.yaml" {{- end -}} diff --git a/science-platform/templates/gafaelfawr-application.yaml b/science-platform/templates/gafaelfawr-application.yaml index 257015fc6c..4eec7a8cb7 100644 --- a/science-platform/templates/gafaelfawr-application.yaml +++ b/science-platform/templates/gafaelfawr-application.yaml @@ -2,28 +2,36 @@ apiVersion: v1 kind: Namespace metadata: - name: gafaelfawr + name: "gafaelfawr" spec: finalizers: - - kubernetes + - "kubernetes" --- apiVersion: argoproj.io/v1alpha1 kind: Application metadata: - name: gafaelfawr - namespace: argocd + name: "gafaelfawr" + namespace: "argocd" finalizers: - - resources-finalizer.argocd.argoproj.io + - "resources-finalizer.argocd.argoproj.io" spec: destination: - namespace: gafaelfawr - server: https://kubernetes.default.svc - project: default + namespace: "gafaelfawr" + server: "https://kubernetes.default.svc" + project: "default" source: - path: services/gafaelfawr - repoURL: {{ .Values.repoURL }} - targetRevision: {{ .Values.revision }} + path: "services/gafaelfawr" + repoURL: {{ .Values.repoURL | quote }} + targetRevision: {{ .Values.revision | quote }} helm: + parameters: + - name: "global.host" + value: {{ .Values.fqdn | quote }} + - name: "global.baseUrl" + value: "https://{{ .Values.fqdn }}" + - name: "global.vaultSecretsPath" + value: {{ .Values.vault_path_prefix | quote }} valueFiles: - - values-{{ .Values.environment }}.yaml + - "values.yaml" + - "values-{{ .Values.environment }}.yaml" {{- end -}} diff --git a/science-platform/templates/hips-application.yaml b/science-platform/templates/hips-application.yaml new file mode 100644 index 0000000000..071d36dd01 --- /dev/null +++ b/science-platform/templates/hips-application.yaml @@ -0,0 +1,35 @@ +{{- if .Values.hips.enabled -}} +apiVersion: v1 +kind: Namespace +metadata: + name: "hips" +spec: + finalizers: + - "kubernetes" +--- +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: "hips" + namespace: "argocd" + finalizers: + - "resources-finalizer.argocd.argoproj.io" +spec: + destination: + namespace: "hips" + server: "https://kubernetes.default.svc" + project: "default" + source: + path: "services/hips" + repoURL: {{ .Values.repoURL | quote }} + targetRevision: {{ .Values.revision | quote }} + helm: + parameters: + - name: "global.host" + value: {{ .Values.fqdn | quote }} + - name: "global.baseUrl" + value: "https://{{ .Values.fqdn }}" + valueFiles: + - "values.yaml" + - "values-{{ .Values.environment }}.yaml" +{{- end -}} diff --git a/science-platform/templates/ingress-nginx-application.yaml b/science-platform/templates/ingress-nginx-application.yaml index e720da8484..ee99864bcb 100644 --- a/science-platform/templates/ingress-nginx-application.yaml +++ b/science-platform/templates/ingress-nginx-application.yaml @@ -24,6 +24,10 @@ spec: repoURL: {{ .Values.repoURL }} targetRevision: {{ .Values.revision }} helm: + parameters: + - name: "global.vaultSecretsPath" + value: {{ .Values.vault_path_prefix | quote }} valueFiles: - - values-{{ .Values.environment }}.yaml + - "values.yaml" + - "values-{{ .Values.environment }}.yaml" {{- end -}} diff --git a/science-platform/templates/linters-application.yaml b/science-platform/templates/linters-application.yaml new file mode 100644 index 0000000000..9a5e62c435 --- /dev/null +++ b/science-platform/templates/linters-application.yaml @@ -0,0 +1,37 @@ +{{- if .Values.linters.enabled -}} +apiVersion: v1 +kind: Namespace +metadata: + name: "linters" +spec: + finalizers: + - "kubernetes" +--- +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: "linters" + namespace: "argocd" + finalizers: + - "resources-finalizer.argocd.argoproj.io" +spec: + destination: + namespace: "linters" + server: "https://kubernetes.default.svc" + project: "default" + source: + path: "services/linters" + repoURL: {{ .Values.repoURL | quote }} + targetRevision: {{ .Values.revision | quote }} + helm: + parameters: + - name: "global.host" + value: {{ .Values.fqdn | quote }} + - name: "global.baseUrl" + value: "https://{{ .Values.fqdn }}" + - name: "global.vaultSecretsPath" + value: {{ .Values.vault_path_prefix | quote }} + valueFiles: + - "values.yaml" + - "values-{{ .Values.environment }}.yaml" +{{- end -}} diff --git a/science-platform/templates/mobu-application.yaml b/science-platform/templates/mobu-application.yaml index 83f169f52f..135dc420da 100644 --- a/science-platform/templates/mobu-application.yaml +++ b/science-platform/templates/mobu-application.yaml @@ -2,28 +2,36 @@ apiVersion: v1 kind: Namespace metadata: - name: mobu + name: "mobu" spec: finalizers: - - kubernetes + - "kubernetes" --- apiVersion: argoproj.io/v1alpha1 kind: Application metadata: - name: mobu - namespace: argocd + name: "mobu" + namespace: "argocd" finalizers: - - resources-finalizer.argocd.argoproj.io + - "resources-finalizer.argocd.argoproj.io" spec: destination: - namespace: mobu - server: https://kubernetes.default.svc - project: default + namespace: "mobu" + server: "https://kubernetes.default.svc" + project: "default" source: - path: services/mobu - repoURL: {{ .Values.repoURL }} - targetRevision: {{ .Values.revision }} + path: "services/mobu" + repoURL: {{ .Values.repoURL | quote }} + targetRevision: {{ .Values.revision | quote }} helm: + parameters: + - name: "global.host" + value: {{ .Values.fqdn | quote }} + - name: "global.baseUrl" + value: "https://{{ .Values.fqdn }}" + - name: "global.vaultSecretsPath" + value: {{ .Values.vault_path_prefix | quote }} valueFiles: - - values-{{ .Values.environment }}.yaml + - "values.yaml" + - "values-{{ .Values.environment }}.yaml" {{- end -}} diff --git a/science-platform/templates/moneypenny-application.yaml b/science-platform/templates/moneypenny-application.yaml index c6dea1081e..17d8518649 100644 --- a/science-platform/templates/moneypenny-application.yaml +++ b/science-platform/templates/moneypenny-application.yaml @@ -24,6 +24,14 @@ spec: repoURL: {{ .Values.repoURL }} targetRevision: {{ .Values.revision }} helm: + parameters: + - name: "global.host" + value: {{ .Values.fqdn | quote }} + - name: "global.baseUrl" + value: "https://{{ .Values.fqdn }}" + - name: "global.vaultSecretsPath" + value: {{ .Values.vault_path_prefix | quote }} valueFiles: - - values-{{ .Values.environment }}.yaml + - "values.yaml" + - "values-{{ .Values.environment }}.yaml" {{- end -}} diff --git a/science-platform/templates/narrativelog-application.yaml b/science-platform/templates/narrativelog-application.yaml index d3d95cd80b..3b903ba92d 100644 --- a/science-platform/templates/narrativelog-application.yaml +++ b/science-platform/templates/narrativelog-application.yaml @@ -21,6 +21,14 @@ spec: repoURL: {{ .Values.repoURL }} targetRevision: {{ .Values.revision }} helm: + parameters: + - name: "global.host" + value: {{ .Values.fqdn | quote }} + - name: "global.baseUrl" + value: "https://{{ .Values.fqdn }}" + - name: "global.vaultSecretsPath" + value: {{ .Values.vault_path_prefix | quote }} valueFiles: - - values-{{ .Values.environment }}.yaml + - "values.yaml" + - "values-{{ .Values.environment }}.yaml" {{- end -}} diff --git a/science-platform/templates/noteburst-application.yaml b/science-platform/templates/noteburst-application.yaml index 479fbd5a86..c80f8e1202 100644 --- a/science-platform/templates/noteburst-application.yaml +++ b/science-platform/templates/noteburst-application.yaml @@ -2,28 +2,34 @@ apiVersion: v1 kind: Namespace metadata: - name: noteburst + name: "noteburst" spec: finalizers: - - kubernetes + - "kubernetes" --- apiVersion: argoproj.io/v1alpha1 kind: Application metadata: - name: noteburst - namespace: argocd + name: "noteburst" + namespace: "argocd" finalizers: - - resources-finalizer.argocd.argoproj.io + - "resources-finalizer.argocd.argoproj.io" spec: destination: - namespace: noteburst - server: https://kubernetes.default.svc - project: default + namespace: "noteburst" + server: "https://kubernetes.default.svc" + project: "default" source: - path: services/noteburst - repoURL: {{ .Values.repoURL }} - targetRevision: {{ .Values.revision }} + path: "services/noteburst" + repoURL: {{ .Values.repoURL | quote }} + targetRevision: {{ .Values.revision | quote }} helm: + parameters: + - name: "global.host" + value: {{ .Values.fqdn | quote }} + - name: "global.baseUrl" + value: "https://{{ .Values.fqdn }}" valueFiles: - - values-{{ .Values.environment }}.yaml + - "values.yaml" + - "values-{{ .Values.environment }}.yaml" {{- end -}} diff --git a/science-platform/templates/nublado2-application.yaml b/science-platform/templates/nublado2-application.yaml index 2f7a6c7778..96e4deb8fc 100644 --- a/science-platform/templates/nublado2-application.yaml +++ b/science-platform/templates/nublado2-application.yaml @@ -2,28 +2,44 @@ apiVersion: v1 kind: Namespace metadata: - name: nublado2 + name: "nublado2" spec: finalizers: - - kubernetes + - "kubernetes" --- apiVersion: argoproj.io/v1alpha1 kind: Application metadata: - name: nublado2 - namespace: argocd + name: "nublado2" + namespace: "argocd" finalizers: - - resources-finalizer.argocd.argoproj.io + - "resources-finalizer.argocd.argoproj.io" spec: destination: - namespace: nublado2 - server: https://kubernetes.default.svc - project: default + namespace: "nublado2" + server: "https://kubernetes.default.svc" + project: "default" source: - path: services/nublado2 - repoURL: {{ .Values.repoURL }} - targetRevision: {{ .Values.revision }} + path: "services/nublado2" + repoURL: {{ .Values.repoURL | quote }} + targetRevision: {{ .Values.revision | quote }} helm: valueFiles: - - values-{{ .Values.environment }}.yaml + - "values.yaml" + - "values-{{ .Values.environment }}.yaml" + parameters: + - name: "global.vaultSecretsPath" + value: {{ .Values.vault_path_prefix | quote }} + ignoreDifferences: + - group: "" + kind: "Secret" + jsonPointers: + - "/data/hub.config.ConfigurableHTTPProxy.auth_token" + - "/data/hub.config.CryptKeeper.keys" + - "/data/hub.config.JupyterHub.cookie_secret" + - group: "apps" + kind: "Deployment" + jsonPointers: + - "/spec/template/metadata/annotations/checksum~1secret" + - "/spec/template/metadata/annotations/checksum~1auth-token" {{- end -}} diff --git a/science-platform/templates/plot-navigator-application.yaml b/science-platform/templates/plot-navigator-application.yaml index 25235b6de0..7277904dda 100644 --- a/science-platform/templates/plot-navigator-application.yaml +++ b/science-platform/templates/plot-navigator-application.yaml @@ -21,6 +21,14 @@ spec: repoURL: {{ .Values.repoURL }} targetRevision: {{ .Values.revision }} helm: + parameters: + - name: "global.host" + value: {{ .Values.fqdn | quote }} + - name: "global.baseUrl" + value: "https://{{ .Values.fqdn }}" + - name: "global.vaultSecretsPath" + value: {{ .Values.vault_path_prefix | quote }} valueFiles: - - values-{{ .Values.environment }}.yaml + - "values.yaml" + - "values-{{ .Values.environment }}.yaml" {{- end -}} diff --git a/science-platform/templates/portal-application.yaml b/science-platform/templates/portal-application.yaml index 0ad1c88f50..87861b1ae4 100644 --- a/science-platform/templates/portal-application.yaml +++ b/science-platform/templates/portal-application.yaml @@ -2,25 +2,33 @@ apiVersion: v1 kind: Namespace metadata: - name: portal + name: "portal" --- apiVersion: argoproj.io/v1alpha1 kind: Application metadata: - name: portal - namespace: argocd + name: "portal" + namespace: "argocd" finalizers: - - resources-finalizer.argocd.argoproj.io + - "resources-finalizer.argocd.argoproj.io" spec: destination: - namespace: portal - server: https://kubernetes.default.svc - project: default + namespace: "portal" + server: "https://kubernetes.default.svc" + project: "default" source: - path: services/portal - repoURL: {{ .Values.repoURL }} - targetRevision: {{ .Values.revision }} + path: "services/portal" + repoURL: {{ .Values.repoURL | quote }} + targetRevision: {{ .Values.revision | quote }} helm: + parameters: + - name: "global.host" + value: {{ .Values.fqdn | quote }} + - name: "global.baseUrl" + value: "https://{{ .Values.fqdn }}" + - name: "global.vaultSecretsPath" + value: {{ .Values.vault_path_prefix | quote }} valueFiles: - - values-{{ .Values.environment }}.yaml + - "values.yaml" + - "values-{{ .Values.environment }}.yaml" {{- end -}} diff --git a/science-platform/templates/postgres-application.yaml b/science-platform/templates/postgres-application.yaml index 03d0b0c3d8..deff1baaf7 100644 --- a/science-platform/templates/postgres-application.yaml +++ b/science-platform/templates/postgres-application.yaml @@ -21,6 +21,10 @@ spec: repoURL: {{ .Values.repoURL }} targetRevision: {{ .Values.revision }} helm: + parameters: + - name: "global.vaultSecretsPath" + value: {{ .Values.vault_path_prefix | quote }} valueFiles: - - values-{{ .Values.environment }}.yaml + - "values.yaml" + - "values-{{ .Values.environment }}.yaml" {{- end -}} diff --git a/science-platform/templates/production-tools-application.yaml b/science-platform/templates/production-tools-application.yaml new file mode 100644 index 0000000000..096be8ce2d --- /dev/null +++ b/science-platform/templates/production-tools-application.yaml @@ -0,0 +1,34 @@ +{{- if .Values.production_tools.enabled -}} +apiVersion: v1 +kind: Namespace +metadata: + name: production-tools +--- +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: production-tools + namespace: argocd + finalizers: + - resources-finalizer.argocd.argoproj.io +spec: + destination: + namespace: production-tools + server: https://kubernetes.default.svc + project: default + source: + path: services/production-tools + repoURL: {{ .Values.repoURL }} + targetRevision: {{ .Values.revision }} + helm: + parameters: + - name: "global.host" + value: {{ .Values.fqdn | quote }} + - name: "global.baseUrl" + value: "https://{{ .Values.fqdn }}" + - name: "global.vaultSecretsPath" + value: {{ .Values.vault_path_prefix | quote }} + valueFiles: + - values.yaml + - values-{{ .Values.environment }}.yaml +{{- end -}} diff --git a/science-platform/templates/sasquatch-application.yaml b/science-platform/templates/sasquatch-application.yaml index da996432b0..976de21e7f 100644 --- a/science-platform/templates/sasquatch-application.yaml +++ b/science-platform/templates/sasquatch-application.yaml @@ -24,7 +24,14 @@ spec: repoURL: {{ .Values.repoURL }} targetRevision: {{ .Values.revision }} helm: + parameters: + - name: "global.host" + value: {{ .Values.fqdn | quote }} + - name: "global.baseUrl" + value: "https://{{ .Values.fqdn }}" + - name: "global.vaultSecretsPath" + value: {{ .Values.vault_path_prefix | quote }} valueFiles: - - values.yaml - - values-{{ .Values.environment }}.yaml + - "values.yaml" + - "values-{{ .Values.environment }}.yaml" {{- end -}} diff --git a/science-platform/templates/semaphore-application.yaml b/science-platform/templates/semaphore-application.yaml index 4039b32c36..99a1e0a3f4 100644 --- a/science-platform/templates/semaphore-application.yaml +++ b/science-platform/templates/semaphore-application.yaml @@ -2,28 +2,36 @@ apiVersion: v1 kind: Namespace metadata: - name: semaphore + name: "semaphore" spec: finalizers: - - kubernetes + - "kubernetes" --- apiVersion: argoproj.io/v1alpha1 kind: Application metadata: - name: semaphore - namespace: argocd + name: "semaphore" + namespace: "argocd" finalizers: - - resources-finalizer.argocd.argoproj.io + - "resources-finalizer.argocd.argoproj.io" spec: destination: - namespace: semaphore - server: https://kubernetes.default.svc - project: default + namespace: "semaphore" + server: "https://kubernetes.default.svc" + project: "default" source: - path: services/semaphore - repoURL: {{ .Values.repoURL }} - targetRevision: {{ .Values.revision }} + path: "services/semaphore" + repoURL: {{ .Values.repoURL | quote }} + targetRevision: {{ .Values.revision | quote }} helm: + parameters: + - name: "global.host" + value: {{ .Values.fqdn | quote }} + - name: "global.baseUrl" + value: "https://{{ .Values.fqdn }}" + - name: "global.vaultSecretsPathPrefix" + value: {{ .Values.vault_path_prefix | quote }} valueFiles: - - values-{{ .Values.environment }}.yaml + - "values.yaml" + - "values-{{ .Values.environment }}.yaml" {{- end -}} diff --git a/science-platform/templates/sherlock-application.yaml b/science-platform/templates/sherlock-application.yaml index 73463b63f9..9c032cd1d5 100644 --- a/science-platform/templates/sherlock-application.yaml +++ b/science-platform/templates/sherlock-application.yaml @@ -24,6 +24,14 @@ spec: repoURL: {{ .Values.repoURL }} targetRevision: {{ .Values.revision }} helm: + parameters: + - name: "global.host" + value: {{ .Values.fqdn | quote }} + - name: "global.baseUrl" + value: "https://{{ .Values.fqdn }}" + - name: "global.vaultSecretsPath" + value: {{ .Values.vault_path_prefix | quote }} valueFiles: - - values-{{ .Values.environment }}.yaml + - "values.yaml" + - "values-{{ .Values.environment }}.yaml" {{- end -}} diff --git a/science-platform/templates/sqlproxy-cross-project-application.yaml b/science-platform/templates/sqlproxy-cross-project-application.yaml new file mode 100644 index 0000000000..631378b41f --- /dev/null +++ b/science-platform/templates/sqlproxy-cross-project-application.yaml @@ -0,0 +1,30 @@ +{{- if .Values.sqlproxy_cross_project.enabled -}} +apiVersion: v1 +kind: Namespace +metadata: + name: "sqlproxy-cross-project" +spec: + finalizers: + - "kubernetes" +--- +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: "sqlproxy-cross-project" + namespace: "argocd" + finalizers: + - "resources-finalizer.argocd.argoproj.io" +spec: + destination: + namespace: "sqlproxy-cross-project" + server: "https://kubernetes.default.svc" + project: "default" + source: + path: "services/sqlproxy-cross-project" + repoURL: {{ .Values.repoURL | quote }} + targetRevision: {{ .Values.revision | quote }} + helm: + valueFiles: + - "values.yaml" + - "values-{{ .Values.environment }}.yaml" +{{- end -}} diff --git a/science-platform/templates/squareone-application.yaml b/science-platform/templates/squareone-application.yaml index 8fa37f1892..63dd926704 100644 --- a/science-platform/templates/squareone-application.yaml +++ b/science-platform/templates/squareone-application.yaml @@ -2,28 +2,36 @@ apiVersion: v1 kind: Namespace metadata: - name: squareone + name: "squareone" spec: finalizers: - - kubernetes + - "kubernetes" --- apiVersion: argoproj.io/v1alpha1 kind: Application metadata: - name: squareone - namespace: argocd + name: "squareone" + namespace: "argocd" finalizers: - - resources-finalizer.argocd.argoproj.io + - "resources-finalizer.argocd.argoproj.io" spec: destination: - namespace: squareone - server: https://kubernetes.default.svc - project: default + namespace: "squareone" + server: "https://kubernetes.default.svc" + project: "default" source: - path: services/squareone - repoURL: {{ .Values.repoURL }} - targetRevision: {{ .Values.revision }} + path: "services/squareone" + repoURL: {{ .Values.repoURL | quote }} + targetRevision: {{ .Values.revision | quote }} helm: + parameters: + - name: "global.host" + value: {{ .Values.fqdn | quote }} + - name: "global.baseUrl" + value: "https://{{ .Values.fqdn }}" + - name: "global.vaultSecretsPathPrefix" + value: {{ .Values.vault_path_prefix | quote }} valueFiles: - - values-{{ .Values.environment }}.yaml + - "values.yaml" + - "values-{{ .Values.environment }}.yaml" {{- end -}} diff --git a/science-platform/templates/tap-application.yaml b/science-platform/templates/tap-application.yaml index 5f570f0717..c3419dca62 100644 --- a/science-platform/templates/tap-application.yaml +++ b/science-platform/templates/tap-application.yaml @@ -24,6 +24,14 @@ spec: repoURL: {{ .Values.repoURL }} targetRevision: {{ .Values.revision }} helm: + parameters: + - name: "global.host" + value: {{ .Values.fqdn | quote }} + - name: "global.baseUrl" + value: "https://{{ .Values.fqdn }}" + - name: "global.vaultSecretsPath" + value: {{ .Values.vault_path_prefix | quote }} valueFiles: - - values-{{ .Values.environment }}.yaml + - "values.yaml" + - "values-{{ .Values.environment }}.yaml" {{- end -}} diff --git a/science-platform/templates/tap-schema-application.yaml b/science-platform/templates/tap-schema-application.yaml index debd3f4f86..78e9ab9cc1 100644 --- a/science-platform/templates/tap-schema-application.yaml +++ b/science-platform/templates/tap-schema-application.yaml @@ -2,28 +2,32 @@ apiVersion: v1 kind: Namespace metadata: - name: tap-schema + name: "tap-schema" spec: finalizers: - - kubernetes + - "kubernetes" --- apiVersion: argoproj.io/v1alpha1 kind: Application metadata: - name: tap-schema - namespace: argocd + name: "tap-schema" + namespace: "argocd" finalizers: - - resources-finalizer.argocd.argoproj.io + - "resources-finalizer.argocd.argoproj.io" spec: destination: - namespace: tap-schema - server: https://kubernetes.default.svc - project: default + namespace: "tap-schema" + server: "https://kubernetes.default.svc" + project: "default" source: - path: services/tap-schema - repoURL: {{ .Values.repoURL }} - targetRevision: {{ .Values.revision }} + path: "services/tap-schema" + repoURL: {{ .Values.repoURL | quote }} + targetRevision: {{ .Values.revision | quote }} helm: + parameters: + - name: "global.vaultSecretsPath" + value: {{ .Values.vault_path_prefix | quote }} valueFiles: - - values-{{ .Values.environment }}.yaml + - "values.yaml" + - "values-{{ .Values.environment }}.yaml" {{- end -}} diff --git a/science-platform/templates/obstap-application.yaml b/science-platform/templates/telegraf-application.yaml similarity index 52% rename from science-platform/templates/obstap-application.yaml rename to science-platform/templates/telegraf-application.yaml index e1a5656445..d973ed5c84 100644 --- a/science-platform/templates/obstap-application.yaml +++ b/science-platform/templates/telegraf-application.yaml @@ -1,8 +1,9 @@ -{{- if .Values.obstap.enabled -}} + +{{- if .Values.telegraf.enabled -}} apiVersion: v1 kind: Namespace metadata: - name: obstap + name: telegraf spec: finalizers: - kubernetes @@ -10,20 +11,29 @@ spec: apiVersion: argoproj.io/v1alpha1 kind: Application metadata: - name: obstap + name: telegraf namespace: argocd finalizers: - resources-finalizer.argocd.argoproj.io spec: destination: - namespace: obstap + namespace: telegraf server: https://kubernetes.default.svc project: default source: - path: services/obstap + path: services/telegraf repoURL: {{ .Values.repoURL }} targetRevision: {{ .Values.revision }} helm: + parameters: + - name: "global.enabled_services" + value: {{ include "enabled_services" . | quote }} + - name: "global.host" + value: {{ .Values.fqdn | quote }} + - name: "global.vaultSecretsPath" + value: {{ .Values.vault_path_prefix | quote }} valueFiles: + - values.yaml - values-{{ .Values.environment }}.yaml {{- end -}} + diff --git a/science-platform/templates/squash-api-application.yaml b/science-platform/templates/telegraf-ds-application.yaml similarity index 51% rename from science-platform/templates/squash-api-application.yaml rename to science-platform/templates/telegraf-ds-application.yaml index 4c326dc1e5..330e461e38 100644 --- a/science-platform/templates/squash-api-application.yaml +++ b/science-platform/templates/telegraf-ds-application.yaml @@ -1,8 +1,8 @@ -{{- if .Values.squash_api.enabled -}} +{{- if .Values.telegraf.enabled -}} apiVersion: v1 kind: Namespace metadata: - name: squash-api + name: telegraf-ds spec: finalizers: - kubernetes @@ -10,20 +10,28 @@ spec: apiVersion: argoproj.io/v1alpha1 kind: Application metadata: - name: squash-api + name: telegraf-ds namespace: argocd finalizers: - resources-finalizer.argocd.argoproj.io spec: destination: - namespace: squash-api + namespace: telegraf-ds server: https://kubernetes.default.svc project: default source: - path: services/squash-api + path: services/telegraf-ds repoURL: {{ .Values.repoURL }} targetRevision: {{ .Values.revision }} helm: + parameters: + - name: "global.enabled_services" + value: {{ include "enabled_services" . | quote }} + - name: "global.host" + value: {{ .Values.fqdn | quote }} + - name: "global.vaultSecretsPath" + value: {{ .Values.vault_path_prefix | quote }} valueFiles: + - values.yaml - values-{{ .Values.environment }}.yaml {{- end -}} diff --git a/science-platform/templates/times-square-application.yaml b/science-platform/templates/times-square-application.yaml new file mode 100644 index 0000000000..f3056cf055 --- /dev/null +++ b/science-platform/templates/times-square-application.yaml @@ -0,0 +1,37 @@ +{{- if .Values.times_square.enabled -}} +apiVersion: v1 +kind: Namespace +metadata: + name: "times-square" +spec: + finalizers: + - "kubernetes" +--- +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: "times-square" + namespace: "argocd" + finalizers: + - "resources-finalizer.argocd.argoproj.io" +spec: + destination: + namespace: "times-square" + server: "https://kubernetes.default.svc" + project: "default" + source: + path: "services/times-square" + repoURL: {{ .Values.repoURL |quote }} + targetRevision: {{ .Values.revision | quote }} + helm: + parameters: + - name: "global.host" + value: {{ .Values.fqdn | quote }} + - name: "global.baseUrl" + value: "https://{{ .Values.fqdn }}" + - name: "global.vaultSecretsPathPrefix" + value: {{ .Values.vault_path_prefix | quote }} + valueFiles: + - "values.yaml" + - "values-{{ .Values.environment }}.yaml" +{{- end -}} diff --git a/science-platform/templates/vault-secrets-operator-application.yaml b/science-platform/templates/vault-secrets-operator-application.yaml index 3eb50571bb..76d98bd16e 100644 --- a/science-platform/templates/vault-secrets-operator-application.yaml +++ b/science-platform/templates/vault-secrets-operator-application.yaml @@ -17,5 +17,6 @@ spec: targetRevision: {{ .Values.revision }} helm: valueFiles: - - values-{{ .Values.environment }}.yaml + - "values.yaml" + - "values-{{ .Values.environment }}.yaml" {{- end -}} diff --git a/science-platform/templates/vo-cutouts-application.yaml b/science-platform/templates/vo-cutouts-application.yaml index ed2195b1c5..ae5692b74f 100644 --- a/science-platform/templates/vo-cutouts-application.yaml +++ b/science-platform/templates/vo-cutouts-application.yaml @@ -2,28 +2,38 @@ apiVersion: v1 kind: Namespace metadata: - name: vo-cutouts + name: "vo-cutouts" spec: finalizers: - - kubernetes + - "kubernetes" --- apiVersion: argoproj.io/v1alpha1 kind: Application metadata: - name: vo-cutouts - namespace: argocd + name: "vo-cutouts" + namespace: "argocd" finalizers: - - resources-finalizer.argocd.argoproj.io + - "resources-finalizer.argocd.argoproj.io" spec: destination: - namespace: vo-cutouts - server: https://kubernetes.default.svc - project: default + namespace: "vo-cutouts" + server: "https://kubernetes.default.svc" + project: "default" source: - path: services/vo-cutouts - repoURL: {{ .Values.repoURL }} - targetRevision: {{ .Values.revision }} + path: "services/vo-cutouts" + repoURL: {{ .Values.repoURL | quote }} + targetRevision: {{ .Values.revision | quote }} helm: + parameters: + - name: "global.baseUrl" + value: "https://{{ .Values.fqdn }}" + - name: "global.butlerRepositoryIndex" + value: {{ .Values.butlerRepositoryIndex | quote }} + - name: "global.host" + value: {{ .Values.fqdn | quote }} + - name: "global.vaultSecretsPath" + value: {{ .Values.vault_path_prefix | quote }} valueFiles: - - values-{{ .Values.environment }}.yaml + - "values.yaml" + - "values-{{ .Values.environment }}.yaml" {{- end -}} diff --git a/science-platform/values-base.yaml b/science-platform/values-base.yaml index f6fc25bcdf..b08a617123 100644 --- a/science-platform/values-base.yaml +++ b/science-platform/values-base.yaml @@ -6,8 +6,6 @@ alert_stream_broker: enabled: false cachemachine: enabled: true -cert_issuer: - enabled: true cert_manager: enabled: true datalinker: @@ -16,20 +14,20 @@ exposurelog: enabled: true gafaelfawr: enabled: true +hips: + enabled: false +ingress_nginx: + enabled: true mobu: enabled: false moneypenny: enabled: true -ingress_nginx: - enabled: true narrativelog: enabled: true noteburst: enabled: false nublado2: enabled: true -obstap: - enabled: false plot_navigator: enabled: false portal: @@ -38,6 +36,8 @@ postgres: enabled: true sasquatch: enabled: false +production_tools: + enabled: false semaphore: enabled: false squareone: @@ -52,6 +52,12 @@ tap: enabled: false tap_schema: enabled: false +telegraf: + enabled: true +telegraf-ds: + enabled: true +times_square: + enabled: false vault_secrets_operator: enabled: true vo_cutouts: diff --git a/science-platform/values-int.yaml b/science-platform/values-ccin2p3.yaml similarity index 79% rename from science-platform/values-int.yaml rename to science-platform/values-ccin2p3.yaml index 8a3e73bc12..0dc2f088ea 100644 --- a/science-platform/values-int.yaml +++ b/science-platform/values-ccin2p3.yaml @@ -1,15 +1,13 @@ -environment: int -fqdn: lsst-lsp-int.ncsa.illinois.edu -vault_path_prefix: secret/k8s_operator/lsst-lsp-int.lsst.codes +environment: ccin2p3 +fqdn: data-dev.lsst.eu +vault_path_prefix: secret/k8s_operator/rsp-cc alert_stream_broker: enabled: false cachemachine: enabled: true -cert_issuer: - enabled: false cert_manager: - enabled: false + enabled: true datalinker: enabled: true exposurelog: @@ -17,19 +15,17 @@ exposurelog: gafaelfawr: enabled: true mobu: - enabled: true + enabled: false moneypenny: enabled: true ingress_nginx: - enabled: false + enabled: true narrativelog: enabled: false noteburst: enabled: false nublado2: enabled: true -obstap: - enabled: true plot_navigator: enabled: false portal: @@ -38,10 +34,12 @@ postgres: enabled: true sasquatch: enabled: false +production_tools: + enabled: false semaphore: enabled: false sherlock: - enabled: true + enabled: false squareone: enabled: true squash_api: @@ -54,6 +52,12 @@ tap: enabled: true tap_schema: enabled: true +telegraf: + enabled: false +telegraf-ds: + enabled: false +times_square: + enabled: false vault_secrets_operator: enabled: true vo_cutouts: diff --git a/science-platform/values-idfdev.yaml b/science-platform/values-idfdev.yaml index 092a95bd57..50b2a0a67d 100644 --- a/science-platform/values-idfdev.yaml +++ b/science-platform/values-idfdev.yaml @@ -1,13 +1,12 @@ environment: idfdev fqdn: data-dev.lsst.cloud vault_path_prefix: secret/k8s_operator/data-dev.lsst.cloud +butlerRepositoryIndex: "s3://butler-us-central1-repo-locations/data-int-repos.yaml" alert_stream_broker: enabled: false cachemachine: enabled: true -cert_issuer: - enabled: true cert_manager: enabled: true datalinker: @@ -16,20 +15,20 @@ exposurelog: enabled: false gafaelfawr: enabled: true +hips: + enabled: true +ingress_nginx: + enabled: true mobu: enabled: true moneypenny: enabled: true -ingress_nginx: - enabled: true narrativelog: enabled: false noteburst: enabled: true nublado2: enabled: true -obstap: - enabled: true plot_navigator: enabled: false portal: @@ -38,6 +37,8 @@ postgres: enabled: true sasquatch: enabled: true +production_tools: + enabled: false semaphore: enabled: true sherlock: @@ -46,15 +47,23 @@ squareone: enabled: true squash_api: enabled: false +sqlproxy_cross_project: + enabled: true strimzi: - enabled: false + enabled: true strimzi_registry_operator: enabled: false tap: enabled: true tap_schema: enabled: true +telegraf: + enabled: true +telegraf-ds: + enabled: true +times_square: + enabled: true vault_secrets_operator: enabled: true vo_cutouts: - enabled: false + enabled: true diff --git a/science-platform/values-idfint.yaml b/science-platform/values-idfint.yaml index 829ca638d1..60750530f0 100644 --- a/science-platform/values-idfint.yaml +++ b/science-platform/values-idfint.yaml @@ -1,13 +1,12 @@ environment: idfint fqdn: data-int.lsst.cloud vault_path_prefix: secret/k8s_operator/data-int.lsst.cloud +butlerRepositoryIndex: "s3://butler-us-central1-repo-locations/data-int-repos.yaml" alert_stream_broker: enabled: true cachemachine: enabled: true -cert_issuer: - enabled: true cert_manager: enabled: true datalinker: @@ -16,20 +15,22 @@ exposurelog: enabled: false gafaelfawr: enabled: true +hips: + enabled: true +ingress_nginx: + enabled: true +linters: + enabled: true mobu: enabled: true moneypenny: enabled: true -ingress_nginx: - enabled: true narrativelog: enabled: false noteburst: enabled: false nublado2: enabled: true -obstap: - enabled: true plot_navigator: enabled: true portal: @@ -37,23 +38,29 @@ portal: postgres: enabled: true sasquatch: - enabled: false + enabled: true +production_tools: + enabled: true semaphore: enabled: true sherlock: enabled: true squareone: enabled: true -squash_api: - enabled: false strimzi: enabled: true strimzi_registry_operator: - enabled: true + enabled: false tap: enabled: true tap_schema: enabled: true +telegraf: + enabled: true +telegraf-ds: + enabled: true +times_square: + enabled: false vault_secrets_operator: enabled: true vo_cutouts: diff --git a/science-platform/values-idfprod.yaml b/science-platform/values-idfprod.yaml index afb59831be..135031a9a1 100644 --- a/science-platform/values-idfprod.yaml +++ b/science-platform/values-idfprod.yaml @@ -1,13 +1,12 @@ environment: idfprod fqdn: data.lsst.cloud vault_path_prefix: secret/k8s_operator/data.lsst.cloud +butlerRepositoryIndex: "s3://butler-us-central1-repo-locations/data-repos.yaml" alert_stream_broker: enabled: false cachemachine: enabled: true -cert_issuer: - enabled: true cert_manager: enabled: true datalinker: @@ -16,20 +15,20 @@ exposurelog: enabled: false gafaelfawr: enabled: true +hips: + enabled: true +ingress_nginx: + enabled: true mobu: enabled: true moneypenny: enabled: true -ingress_nginx: - enabled: true narrativelog: enabled: false noteburst: enabled: false nublado2: enabled: true -obstap: - enabled: true plot_navigator: enabled: false portal: @@ -38,6 +37,8 @@ postgres: enabled: true sasquatch: enabled: false +production_tools: + enabled: false semaphore: enabled: true sherlock: @@ -54,7 +55,13 @@ tap: enabled: true tap_schema: enabled: true +telegraf: + enabled: true +telegraf-ds: + enabled: true +times_square: + enabled: false vault_secrets_operator: enabled: true vo_cutouts: - enabled: false + enabled: true diff --git a/science-platform/values-minikube.yaml b/science-platform/values-minikube.yaml index 61a43e4981..72f26c9c4d 100644 --- a/science-platform/values-minikube.yaml +++ b/science-platform/values-minikube.yaml @@ -6,8 +6,6 @@ alert_stream_broker: enabled: false cachemachine: enabled: true -cert_issuer: - enabled: false cert_manager: enabled: true datalinker: @@ -16,20 +14,20 @@ exposurelog: enabled: false gafaelfawr: enabled: true +hips: + enabled: true +ingress_nginx: + enabled: true mobu: enabled: true moneypenny: enabled: true -ingress_nginx: - enabled: true narrativelog: enabled: false noteburst: enabled: true nublado2: enabled: true -obstap: - enabled: true plot_navigator: enabled: false portal: @@ -38,6 +36,8 @@ postgres: enabled: true sasquatch: enabled: false +production_tools: + enabled: false semaphore: enabled: true sherlock: @@ -54,6 +54,12 @@ tap: enabled: true tap_schema: enabled: true +telegraf: + enabled: false +telegraf-ds: + enabled: false +times_square: + enabled: false vault_secrets_operator: enabled: true vo_cutouts: diff --git a/science-platform/values-red-five.yaml b/science-platform/values-red-five.yaml deleted file mode 100644 index 21b98f9439..0000000000 --- a/science-platform/values-red-five.yaml +++ /dev/null @@ -1,58 +0,0 @@ -environment: red-five -fqdn: red-five.lsst.codes -vault_path_prefix: secret/k8s_operator/red-five.lsst.codes - -alert_stream_broker: - enabled: false -cachemachine: - enabled: true -cert_issuer: - enabled: true -cert_manager: - enabled: true -datalinker: - enabled: true -exposurelog: - enabled: false -gafaelfawr: - enabled: true -mobu: - enabled: true -ingress_nginx: - enabled: true -moneypenny: - enabled: true -narrativelog: - enabled: false -noteburst: - enabled: false -nublado2: - enabled: true -obstap: - enabled: true -plot_navigator: - enabled: false -portal: - enabled: true -postgres: - enabled: true -sasquatch: - enabled: false -semaphore: - enabled: false -squareone: - enabled: true -squash_api: - enabled: false -strimzi: - enabled: false -strimzi_registry_operator: - enabled: false -tap: - enabled: true -tap_schema: - enabled: true -vault_secrets_operator: - enabled: true -vo_cutouts: - enabled: false diff --git a/science-platform/values-roe.yaml b/science-platform/values-roe.yaml index 02b6bd9779..4a588487a4 100644 --- a/science-platform/values-roe.yaml +++ b/science-platform/values-roe.yaml @@ -6,8 +6,6 @@ alert_stream_broker: enabled: false cachemachine: enabled: true -cert_issuer: - enabled: true cert_manager: enabled: true datalinker: @@ -16,20 +14,20 @@ exposurelog: enabled: false gafaelfawr: enabled: true +hips: + enabled: false +ingress_nginx: + enabled: true mobu: enabled: true moneypenny: enabled: true -ingress_nginx: - enabled: true narrativelog: enabled: false noteburst: enabled: false nublado2: enabled: true -obstap: - enabled: false plot_navigator: enabled: false portal: @@ -38,6 +36,8 @@ postgres: enabled: true sasquatch: enabled: false +production_tools: + enabled: false semaphore: enabled: false squareone: @@ -50,6 +50,12 @@ tap: enabled: true tap_schema: enabled: true +telegraf: + enabled: false +telegraf-ds: + enabled: false +times_square: + enabled: false vault_secrets_operator: enabled: true vo_cutouts: diff --git a/science-platform/values-squash-sandbox.yaml b/science-platform/values-squash-sandbox.yaml deleted file mode 100644 index 6ce8cd7738..0000000000 --- a/science-platform/values-squash-sandbox.yaml +++ /dev/null @@ -1,58 +0,0 @@ -environment: squash-sandbox -fqdn: squash-sandbox.lsst.codes -vault_path_prefix: secret/k8s_operator/squash-sandbox.lsst.codes - -alert_stream_broker: - enabled: false -cachemachine: - enabled: false -cert_issuer: - enabled: true -cert_manager: - enabled: true -datalinker: - enabled: false -exposurelog: - enabled: false -gafaelfawr: - enabled: true -mobu: - enabled: false -moneypenny: - enabled: false -ingress_nginx: - enabled: true -narrativelog: - enabled: false -noteburst: - enabled: false -nublado2: - enabled: false -obstap: - enabled: false -plot_navigator: - enabled: false -portal: - enabled: false -postgres: - enabled: true -sasquatch: - enabled: false -semaphore: - enabled: false -squareone: - enabled: false -squash_api: - enabled: true -strimzi: - enabled: false -strimzi_registry_operator: - enabled: false -tap: - enabled: false -tap_schema: - enabled: false -vault_secrets_operator: - enabled: true -vo_cutouts: - enabled: false diff --git a/science-platform/values-stable.yaml b/science-platform/values-stable.yaml deleted file mode 100644 index 19e55ae044..0000000000 --- a/science-platform/values-stable.yaml +++ /dev/null @@ -1,60 +0,0 @@ -environment: stable -fqdn: lsst-lsp-stable.ncsa.illinois.edu -vault_path_prefix: secret/k8s_operator/lsst-lsp-stable.ncsa.illinois.edu - -alert_stream_broker: - enabled: false -cachemachine: - enabled: true -cert_issuer: - enabled: false -cert_manager: - enabled: false -datalinker: - enabled: true -exposurelog: - enabled: false -gafaelfawr: - enabled: true -mobu: - enabled: true -moneypenny: - enabled: true -ingress_nginx: - enabled: false -narrativelog: - enabled: false -noteburst: - enabled: false -nublado2: - enabled: true -obstap: - enabled: true -plot_navigator: - enabled: false -portal: - enabled: true -postgres: - enabled: true -sasquatch: - enabled: false -semaphore: - enabled: false -sherlock: - enabled: true -squareone: - enabled: true -squash_api: - enabled: false -strimzi: - enabled: false -strimzi_registry_operator: - enabled: false -tap: - enabled: true -tap_schema: - enabled: true -vault_secrets_operator: - enabled: true -vo_cutouts: - enabled: false diff --git a/science-platform/values-summit.yaml b/science-platform/values-summit.yaml index 7502399305..ba6309972b 100644 --- a/science-platform/values-summit.yaml +++ b/science-platform/values-summit.yaml @@ -6,8 +6,6 @@ alert_stream_broker: enabled: false cachemachine: enabled: true -cert_issuer: - enabled: true cert_manager: enabled: true datalinker: @@ -16,20 +14,20 @@ exposurelog: enabled: true gafaelfawr: enabled: true +hips: + enabled: false +ingress_nginx: + enabled: true mobu: enabled: false moneypenny: enabled: true -ingress_nginx: - enabled: true narrativelog: enabled: true noteburst: enabled: false nublado2: enabled: true -obstap: - enabled: false plot_navigator: enabled: false portal: @@ -37,6 +35,8 @@ portal: postgres: enabled: true sasquatch: + enabled: true +production_tools: enabled: false semaphore: enabled: false @@ -47,13 +47,19 @@ squareone: squash_api: enabled: false strimzi: - enabled: false + enabled: true strimzi_registry_operator: enabled: false tap: enabled: false tap_schema: enabled: false +telegraf: + enabled: true +telegraf-ds: + enabled: true +times_square: + enabled: false vault_secrets_operator: enabled: true vo_cutouts: diff --git a/science-platform/values-tucson-teststand.yaml b/science-platform/values-tucson-teststand.yaml index e346915c05..57a43e17e3 100644 --- a/science-platform/values-tucson-teststand.yaml +++ b/science-platform/values-tucson-teststand.yaml @@ -6,8 +6,6 @@ alert_stream_broker: enabled: false cachemachine: enabled: true -cert_issuer: - enabled: true cert_manager: enabled: true datalinker: @@ -16,27 +14,29 @@ exposurelog: enabled: true gafaelfawr: enabled: true +hips: + enabled: false +ingress_nginx: + enabled: true mobu: enabled: false moneypenny: enabled: true -ingress_nginx: - enabled: true narrativelog: enabled: true noteburst: enabled: false nublado2: enabled: true -obstap: - enabled: false plot_navigator: enabled: false portal: enabled: true postgres: - enabled: true + enabled: false sasquatch: + enabled: true +production_tools: enabled: false semaphore: enabled: false @@ -45,13 +45,19 @@ squareone: squash_api: enabled: false strimzi: - enabled: false + enabled: true strimzi_registry_operator: enabled: false tap: enabled: false tap_schema: enabled: false +telegraf: + enabled: true +telegraf-ds: + enabled: true +times_square: + enabled: false vault_secrets_operator: enabled: true vo_cutouts: diff --git a/science-platform/values.yaml b/science-platform/values.yaml index aecf274369..bb5c66285c 100644 --- a/science-platform/values.yaml +++ b/science-platform/values.yaml @@ -2,8 +2,6 @@ alert_stream_broker: enabled: false cachemachine: enabled: false -cert_issuer: - enabled: false cert_manager: enabled: false datalinker: @@ -12,8 +10,12 @@ exposurelog: enabled: false gafaelfawr: enabled: false +hips: + enabled: false ingress_nginx: enabled: false +linters: + enabled: false mobu: enabled: false moneypenny: @@ -24,8 +26,6 @@ noteburst: enabled: false nublado2: enabled: false -obstap: - enabled: false plot_navigator: enabled: false portal: @@ -34,10 +34,14 @@ postgres: enabled: false sasquatch: enabled: false +production_tools: + enabled: false semaphore: enabled: false sherlock: enabled: false +sqlproxy_cross_project: + enabled: false squareone: enabled: false squash_api: @@ -50,6 +54,12 @@ tap: enabled: false tap_schema: enabled: false +telegraf: + enabled: false +telegraf-ds: + enabled: false +times_square: + enabled: false vault_secrets_operator: enabled: false vo_cutouts: diff --git a/services/alert-stream-broker/.idea/.gitignore b/services/alert-stream-broker/.idea/.gitignore new file mode 100644 index 0000000000..13566b81b0 --- /dev/null +++ b/services/alert-stream-broker/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/services/alert-stream-broker/Chart.yaml b/services/alert-stream-broker/Chart.yaml index b213142182..a6042337fe 100644 --- a/services/alert-stream-broker/Chart.yaml +++ b/services/alert-stream-broker/Chart.yaml @@ -1,10 +1,13 @@ apiVersion: v2 name: alert-stream-broker version: "3" +description: Alert transmission to community brokers +sources: + - https://github.com/lsst-dm/alert_database_ingester + - https://github.com/lsst-dm/alert-stream-simulator dependencies: - name: alert-stream-broker version: 2.5.1 - repository: https://lsst-sqre.github.io/charts/ # The schema registry is bundled together in the same application as the # Kafka broker because Strimzi Registry Operator expects everything (the @@ -12,7 +15,6 @@ dependencies: # resource) to be in the same namespace. - name: alert-stream-schema-registry version: 2.1.0 - repository: https://lsst-sqre.github.io/charts/ # alert-stream-simulator is bundled together with the broker too for a # similar reason: the Strimzi EntityOperator can only watch a single @@ -21,8 +23,22 @@ dependencies: # connect. - name: alert-stream-simulator version: 1.6.2 - repository: https://lsst-sqre.github.io/charts/ - name: alert-database version: 2.1.0 + + - name: strimzi-registry-operator + version: 2.1.0 repository: https://lsst-sqre.github.io/charts/ + +annotations: + phalanx.lsst.io/docs: | + - id: "DMTN-093" + title: "Design of the LSST Alert Distribution System" + url: "https://dmtn-093.lsst.io/" + - id: "DMTN-210" + title: "Implementation of the LSST Alert Distribution System" + url: "https://dmtn-210.lsst.io/" + - id: "DMTN-214" + title: "Alert Distribution System Operator's Manual" + url: "https://dmtn-214.lsst.io/" diff --git a/services/alert-stream-broker/README.md b/services/alert-stream-broker/README.md new file mode 100644 index 0000000000..cffc94fec9 --- /dev/null +++ b/services/alert-stream-broker/README.md @@ -0,0 +1,17 @@ +# alert-stream-broker + +Alert transmission to community brokers + +## Source Code + +* +* + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| strimzi-registry-operator.clusterName | string | `"alert-broker"` | | +| strimzi-registry-operator.clusterNamespace | string | `"alert-stream-broker"` | | +| strimzi-registry-operator.operatorNamespace | string | `"alert-stream-broker"` | | +| strimzi-registry-operator.watchNamespace | string | `"alert-stream-broker"` | | diff --git a/services/alert-stream-broker/charts/alert-database/.helmignore b/services/alert-stream-broker/charts/alert-database/.helmignore new file mode 100644 index 0000000000..50af031725 --- /dev/null +++ b/services/alert-stream-broker/charts/alert-database/.helmignore @@ -0,0 +1,22 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/services/alert-stream-broker/charts/alert-database/Chart.yaml b/services/alert-stream-broker/charts/alert-database/Chart.yaml new file mode 100644 index 0000000000..b2f94ebad0 --- /dev/null +++ b/services/alert-stream-broker/charts/alert-database/Chart.yaml @@ -0,0 +1,9 @@ +apiVersion: v2 +name: alert-database +version: 2.1.0 +description: Archival database of alerts sent through the alert stream. +maintainers: + - name: bsmart + email: drbsmart@uw.edu +appVersion: 1.0.0 +type: application diff --git a/services/alert-stream-broker/charts/alert-database/README.md b/services/alert-stream-broker/charts/alert-database/README.md new file mode 100644 index 0000000000..f08056fd22 --- /dev/null +++ b/services/alert-stream-broker/charts/alert-database/README.md @@ -0,0 +1,41 @@ +# alert-database + +Archival database of alerts sent through the alert stream. + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| fullnameOverride | string | `""` | Override the full name for resources (includes the release name) | +| ingester.gcp.projectID | string | `""` | Project ID which has the above GCP IAM service account | +| ingester.gcp.serviceAccountName | string | `""` | Name of a service account which has credentials granting access to the alert database's backing storage buckets. | +| ingester.image.imagePullPolicy | string | `"IfNotPresent"` | | +| ingester.image.repository | string | `"lsstdm/alert_database_ingester"` | | +| ingester.image.tag | string | `"v2.0.1"` | | +| ingester.kafka.cluster | string | `"alert-broker"` | Name of a Strimzi Kafka cluster to connect to. | +| ingester.kafka.port | int | `9092` | Port to connect to on the Strimzi Kafka cluster. It should be an internal listener that expects SCRAM SHA-512 auth. | +| ingester.kafka.strimziAPIVersion | string | `"v1beta2"` | API version of the Strimzi installation's custom resource definitions | +| ingester.kafka.topic | string | `"alerts-simulated"` | Name of the topic which will holds alert data. | +| ingester.kafka.user | string | `"alert-database-ingester"` | The username of the Kafka user identity used to connect to the broker. | +| ingester.logLevel | string | `"verbose"` | set the log level of the application. can be 'info', or 'debug', or anything else to suppress logging. | +| ingester.schemaRegistryURL | string | `""` | URL of a schema registry instance | +| ingester.serviceAccountName | string | `"alert-database-ingester"` | The name of the Kubernetes ServiceAccount (*not* the Google Cloud IAM service account!) which is used by the alert database ingester. | +| ingress.annotations | object | `{}` | | +| ingress.enabled | bool | `true` | Whether to create an ingress | +| ingress.gafaelfawrAuthQuery | string | `"scope=read:alertdb"` | Query string for Gafaelfawr to authorize access | +| ingress.host | string | None, must be set if the ingress is enabled | Hostname for the ingress | +| ingress.path | string | `"/alertdb"` | Subpath to host the alert database application under the ingress | +| ingress.tls | list | `[]` | Configures TLS for the ingress if needed. If multiple ingresses share the same hostname, only one of them needs a TLS configuration. | +| nameOverride | string | `""` | Override the base name for resources | +| server.gcp.projectID | string | `""` | Project ID which has the above GCP IAM service account | +| server.gcp.serviceAccountName | string | `""` | Name of a service account which has credentials granting access to the alert database's backing storage buckets. | +| server.image.imagePullPolicy | string | `"IfNotPresent"` | | +| server.image.repository | string | `"lsstdm/alert_database_server"` | | +| server.image.tag | string | `"v2.1.0"` | | +| server.logLevel | string | `"verbose"` | set the log level of the application. can be 'info', or 'debug', or anything else to suppress logging. | +| server.service.port | int | `3000` | | +| server.service.type | string | `"ClusterIP"` | | +| server.serviceAccountName | string | `"alertdb-reader"` | The name of the Kubernetes ServiceAccount (*not* the Google Cloud IAM service account!) which is used by the alert database server. | +| storage.gcp.alertBucket | string | `""` | Name of a Google Cloud Storage bucket in GCP with alert data | +| storage.gcp.project | string | `""` | Name of a GCP project that has a bucket for database storage | +| storage.gcp.schemaBucket | string | `""` | Name of a Google Cloud Storage bucket in GCP with schema data | diff --git a/services/alert-stream-broker/charts/alert-database/ci/values-idfint.yaml b/services/alert-stream-broker/charts/alert-database/ci/values-idfint.yaml new file mode 100644 index 0000000000..d839602b98 --- /dev/null +++ b/services/alert-stream-broker/charts/alert-database/ci/values-idfint.yaml @@ -0,0 +1,24 @@ +ingester: + schemaRegistryURL: https://alert-schemas-int.lsst.cloud + + serviceAccountName: alert-database-writer + + gcp: + serviceAccountName: alertdb-writer + projectID: science-platform-int-dc5d + +storage: + gcp: + project: science-platform-int-dc5d + alertBucket: rubin-alertdb-int-us-central1-packets + schemaBucket: rubin-alertdb-int-us-central1-schemas + +ingress: + host: data-int.lsst.cloud + +server: + serviceAccountName: alert-database-reader + + gcp: + serviceAccountName: alertdb-reader + projectID: science-platform-int-dc5d diff --git a/services/alert-stream-broker/charts/alert-database/templates/_helpers.tpl b/services/alert-stream-broker/charts/alert-database/templates/_helpers.tpl new file mode 100644 index 0000000000..b315385468 --- /dev/null +++ b/services/alert-stream-broker/charts/alert-database/templates/_helpers.tpl @@ -0,0 +1,65 @@ +{{/* -*- go-template -*- */}} + +{{- define "alertDatabase.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "alertDatabase.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "alertDatabase.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* Name for the ingester */}} +{{- define "alertDatabase.ingesterName" -}} +{{- printf "%s-ingester-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* Name for the server */}} +{{- define "alertDatabase.serverName" -}} +{{- printf "%s-server-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "alertDatabase.labels" -}} +helm.sh/chart: {{ include "alertDatabase.chart" . }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Ingester selector labels +*/}} +{{- define "alertDatabase.ingesterSelectorLabels" -}} +app.kubernetes.io/name: {{ include "alertDatabase.ingesterName" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Server selector labels +*/}} +{{- define "alertDatabase.serverSelectorLabels" -}} +app.kubernetes.io/name: {{ include "alertDatabase.serverName" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} diff --git a/services/alert-stream-broker/charts/alert-database/templates/ingester-deployment.yaml b/services/alert-stream-broker/charts/alert-database/templates/ingester-deployment.yaml new file mode 100644 index 0000000000..cd794da932 --- /dev/null +++ b/services/alert-stream-broker/charts/alert-database/templates/ingester-deployment.yaml @@ -0,0 +1,54 @@ +apiVersion: apps/v1 +kind: Deployment + +metadata: + name: {{ template "alertDatabase.ingesterName" . }} + labels: + {{- include "alertDatabase.labels" . | nindent 4 }} +spec: + replicas: 1 + selector: + matchLabels: + {{- include "alertDatabase.ingesterSelectorLabels" . | nindent 6 }} + template: + metadata: + labels: + {{- include "alertDatabase.ingesterSelectorLabels" . | nindent 8 }} + spec: + containers: + - name: "alert-database-ingester" + image: "{{ .Values.ingester.image.repository }}:{{ .Values.ingester.image.tag }}" + volumeMounts: + - name: "kafka-client-secret" + mountPath: "/etc/kafka-client-secret" + readOnly: True + - name: "kafka-server-ca-cert" + mountPath: "/etc/kafka-server-ca-cert" + readOnly: True + command: + - "alertdb-ingester" + - "--kafka-host={{ .Values.ingester.kafka.cluster }}-kafka-bootstrap:{{ .Values.ingester.kafka.port }}" + - "--kafka-topic={{ .Values.ingester.kafka.topic }}" + - "--tls-client-key-location=/etc/kafka-client-secret/user.key" + - "--tls-client-crt-location=/etc/kafka-client-secret/user.crt" + - "--tls-server-ca-crt-location=/etc/kafka-server-ca-cert/ca.crt" + - "--kafka-auth-mechanism=mtls" + - "--schema-registry-address={{ required "A schema registry URL is required " .Values.ingester.schemaRegistryURL }}" + - "--gcp-project={{ required "A GCP project is required " .Values.storage.gcp.project }}" + - "--gcp-bucket-alerts={{ required "A GCP bucket name is required " .Values.storage.gcp.alertBucket }}" + - "--gcp-bucket-schemas={{ required "A GCP bucket name is required " .Values.storage.gcp.schemaBucket }}" + {{- if eq .Values.ingester.logLevel "debug" }} + - "--debug" + {{- end }} + {{- if eq .Values.ingester.logLevel "verbose" }} + - "--verbose" + {{- end }} + + volumes: + - name: "kafka-client-secret" + secret: + secretName: "{{ .Values.ingester.kafka.user}}" + - name: "kafka-server-ca-cert" + secret: + secretName: "{{ .Values.ingester.kafka.cluster}}-cluster-ca-cert" + serviceAccountName: "{{ .Values.ingester.serviceAccountName }}" diff --git a/services/alert-stream-broker/charts/alert-database/templates/ingester-serviceaccount.yaml b/services/alert-stream-broker/charts/alert-database/templates/ingester-serviceaccount.yaml new file mode 100644 index 0000000000..f6e1df0408 --- /dev/null +++ b/services/alert-stream-broker/charts/alert-database/templates/ingester-serviceaccount.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ .Values.ingester.serviceAccountName }} + annotations: + # The following annotation connects the Kubernetes ServiceAccount to a GCP + # IAM Service Account, granting access to resources on GCP, via the + # "Workload Identity" framework. + # + # https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity + iam.gke.io/gcp-service-account: "{{ .Values.ingester.gcp.serviceAccountName }}@{{ .Values.ingester.gcp.projectID }}.iam.gserviceaccount.com" diff --git a/services/alert-stream-broker/charts/alert-database/templates/ingress.yaml b/services/alert-stream-broker/charts/alert-database/templates/ingress.yaml new file mode 100644 index 0000000000..083c96d39b --- /dev/null +++ b/services/alert-stream-broker/charts/alert-database/templates/ingress.yaml @@ -0,0 +1,38 @@ +{{- if .Values.ingress.enabled -}} +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + annotations: + kubernetes.io/ingress.class: "nginx" + nginx.ingress.kubernetes.io/rewrite-target: /$2 + nginx.ingress.kubernetes.io/auth-method: "GET" + nginx.ingress.kubernetes.io/auth-url: "http://gafaelfawr.gafaelfawr.svc.cluster.local:8080/auth?{{ required "ingress.gafaelfawrAuthQuery must be set" .Values.ingress.gafaelfawrAuthQuery }}" + {{- with .Values.ingress.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} + name: {{ template "alertDatabase.fullname" . }} + labels: + {{- include "alertDatabase.labels" . | nindent 4 }} +spec: + rules: + - host: {{ required "ingress.host must be set" .Values.ingress.host | quote }} + http: + paths: + - path: "{{ .Values.ingress.path }}(/|$)(.*)" + pathType: Prefix + backend: + service: + name: {{ template "alertDatabase.fullname" . }} + port: + name: http + {{- if .Values.ingress.tls }} + tls: + {{- range .Values.ingress.tls }} + - hosts: + {{- range .hosts }} + - {{ . | quote }} + {{- end }} + secretName: {{ .secretName }} + {{- end }} + {{- end }} +{{- end }} diff --git a/services/alert-stream-broker/charts/alert-database/templates/kafka-user.yaml b/services/alert-stream-broker/charts/alert-database/templates/kafka-user.yaml new file mode 100644 index 0000000000..1cf0896802 --- /dev/null +++ b/services/alert-stream-broker/charts/alert-database/templates/kafka-user.yaml @@ -0,0 +1,47 @@ +apiVersion: kafka.strimzi.io/{{ .Values.ingester.kafka.strimziAPIVersion }} +kind: KafkaUser +metadata: + name: {{ .Values.ingester.kafka.user }} + labels: + strimzi.io/cluster: {{ .Values.ingester.kafka.cluster }} +spec: + authentication: + type: tls + authorization: + type: simple + acls: + # Allow read and describe on the source topic + - resource: + type: topic + name: {{ .Values.ingester.kafka.topic }} + patternType: literal + operation: Read + type: allow + - resource: + type: topic + name: {{ .Values.ingester.kafka.topic }} + patternType: literal + operation: Describe + type: allow + + # Allow all on the __consumer_offsets topic + - resource: + type: topic + name: "__consumer_offsets" + patternType: literal + operation: All + type: allow + + # Allow running as a consumer group + - resource: + type: group + name: "*" + patternType: literal + operation: Describe + type: allow + - resource: + type: group + name: "*" + patternType: literal + operation: Read + type: allow diff --git a/services/alert-stream-broker/charts/alert-database/templates/server-deployment.yaml b/services/alert-stream-broker/charts/alert-database/templates/server-deployment.yaml new file mode 100644 index 0000000000..62839837a4 --- /dev/null +++ b/services/alert-stream-broker/charts/alert-database/templates/server-deployment.yaml @@ -0,0 +1,48 @@ +apiVersion: apps/v1 +kind: Deployment + +metadata: + name: {{ template "alertDatabase.serverName" . }} + labels: + {{- include "alertDatabase.labels" . | nindent 4 }} +spec: + replicas: 1 + selector: + matchLabels: + {{- include "alertDatabase.serverSelectorLabels" . | nindent 6 }} + template: + metadata: + labels: + {{- include "alertDatabase.serverSelectorLabels" . | nindent 8 }} + spec: + containers: + - name: "alert-database-server" + image: "{{ .Values.server.image.repository }}:{{ .Values.server.image.tag }}" + + ports: + - name: http + containerPort: 3000 + protocol: TCP + + livenessProbe: + httpGet: + path: /v1/health + port: http + + command: + - "alertdb" + - "--listen-host=0.0.0.0" + - "--listen-port=3000" + - "--backend=google-cloud" + - "--gcp-project={{ required "A GCP project is required " .Values.storage.gcp.project }}" + - "--gcp-bucket-alerts={{ required "A GCP bucket name is required " .Values.storage.gcp.alertBucket }}" + - "--gcp-bucket-schemas={{ required "A GCP bucket name is required " .Values.storage.gcp.schemaBucket }}" + {{- if eq .Values.ingester.logLevel "debug" }} + - "--debug" + {{- end }} + {{- if eq .Values.ingester.logLevel "verbose" }} + - "--verbose" + {{- end }} + + + serviceAccountName: "{{ .Values.server.serviceAccountName }}" diff --git a/services/alert-stream-broker/charts/alert-database/templates/server-serviceaccount.yaml b/services/alert-stream-broker/charts/alert-database/templates/server-serviceaccount.yaml new file mode 100644 index 0000000000..51dc67ccff --- /dev/null +++ b/services/alert-stream-broker/charts/alert-database/templates/server-serviceaccount.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ .Values.server.serviceAccountName }} + annotations: + # The following annotation connects the Kubernetes ServiceAccount to a GCP + # IAM Service Account, granting access to resources on GCP, via the + # "Workload Identity" framework. + # + # https://cloud.google.com/kubernetes-engine/docs/how-to/workload-identity + iam.gke.io/gcp-service-account: "{{ .Values.server.gcp.serviceAccountName }}@{{ .Values.server.gcp.projectID }}.iam.gserviceaccount.com" diff --git a/services/alert-stream-broker/charts/alert-database/templates/service.yaml b/services/alert-stream-broker/charts/alert-database/templates/service.yaml new file mode 100644 index 0000000000..306e9900dd --- /dev/null +++ b/services/alert-stream-broker/charts/alert-database/templates/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "alertDatabase.fullname" . }} + labels: + {{- include "alertDatabase.labels" . | nindent 4 }} +spec: + type: {{ .Values.server.service.type }} + ports: + - port: {{ .Values.server.service.port }} + targetPort: http + protocol: TCP + name: http + selector: + {{- include "alertDatabase.serverSelectorLabels" . | nindent 4 }} diff --git a/services/alert-stream-broker/charts/alert-database/values.yaml b/services/alert-stream-broker/charts/alert-database/values.yaml new file mode 100644 index 0000000000..d1005c263c --- /dev/null +++ b/services/alert-stream-broker/charts/alert-database/values.yaml @@ -0,0 +1,105 @@ +# -- Override the base name for resources +nameOverride: "" + +# -- Override the full name for resources (includes the release name) +fullnameOverride: "" + +# Configuration for the ingester, which pulls data out of Kafka and writes +# it to the database backend. + +ingester: + image: + repository: lsstdm/alert_database_ingester + tag: v2.0.1 + imagePullPolicy: IfNotPresent + + kafka: + # -- Name of a Strimzi Kafka cluster to connect to. + cluster: alert-broker + + # -- Port to connect to on the Strimzi Kafka cluster. It should be an + # internal listener that expects SCRAM SHA-512 auth. + port: 9092 + + # -- The username of the Kafka user identity used to connect to the broker. + user: alert-database-ingester + + # -- Name of the topic which will holds alert data. + topic: alerts-simulated + + # -- API version of the Strimzi installation's custom resource definitions + strimziAPIVersion: v1beta2 + + # -- URL of a schema registry instance + schemaRegistryURL: "" + + gcp: + # -- Name of a service account which has credentials granting access to the + # alert database's backing storage buckets. + serviceAccountName: "" + # -- Project ID which has the above GCP IAM service account + projectID: "" + + # -- The name of the Kubernetes ServiceAccount (*not* the Google Cloud IAM + # service account!) which is used by the alert database ingester. + serviceAccountName: alert-database-ingester + + # -- set the log level of the application. can be 'info', or 'debug', or + # anything else to suppress logging. + logLevel: verbose + +server: + image: + repository: lsstdm/alert_database_server + tag: v2.1.0 + imagePullPolicy: IfNotPresent + + gcp: + # -- Name of a service account which has credentials granting access to the + # alert database's backing storage buckets. + serviceAccountName: "" + # -- Project ID which has the above GCP IAM service account + projectID: "" + + # -- The name of the Kubernetes ServiceAccount (*not* the Google Cloud IAM + # service account!) which is used by the alert database server. + serviceAccountName: alertdb-reader + + # -- set the log level of the application. can be 'info', or 'debug', or + # anything else to suppress logging. + logLevel: verbose + + service: + type: ClusterIP + port: 3000 + + +storage: + gcp: + # -- Name of a GCP project that has a bucket for database storage + project: "" + # -- Name of a Google Cloud Storage bucket in GCP with alert data + alertBucket: "" + # -- Name of a Google Cloud Storage bucket in GCP with schema data + schemaBucket: "" + +ingress: + # -- Whether to create an ingress + enabled: true + + # Additional annotations to add to the ingress + annotations: {} + + # -- Hostname for the ingress + # @default -- None, must be set if the ingress is enabled + host: "" + + # -- Configures TLS for the ingress if needed. If multiple ingresses share + # the same hostname, only one of them needs a TLS configuration. + tls: [] + + # -- Subpath to host the alert database application under the ingress + path: "/alertdb" + + # -- Query string for Gafaelfawr to authorize access + gafaelfawrAuthQuery: "scope=read:alertdb" diff --git a/services/alert-stream-broker/charts/alert-stream-broker/.helmignore b/services/alert-stream-broker/charts/alert-stream-broker/.helmignore new file mode 100644 index 0000000000..50af031725 --- /dev/null +++ b/services/alert-stream-broker/charts/alert-stream-broker/.helmignore @@ -0,0 +1,22 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/services/alert-stream-broker/charts/alert-stream-broker/Chart.yaml b/services/alert-stream-broker/charts/alert-stream-broker/Chart.yaml new file mode 100644 index 0000000000..41df3cce85 --- /dev/null +++ b/services/alert-stream-broker/charts/alert-stream-broker/Chart.yaml @@ -0,0 +1,9 @@ +apiVersion: v2 +name: alert-stream-broker +version: 2.5.1 +description: Kafka broker cluster for distributing alerts +maintainers: + - name: bsmart + email: drbsmart@uw.edu +appVersion: 1.0.0 +type: application diff --git a/services/alert-stream-broker/charts/alert-stream-broker/README.md b/services/alert-stream-broker/charts/alert-stream-broker/README.md new file mode 100644 index 0000000000..394f840d4c --- /dev/null +++ b/services/alert-stream-broker/charts/alert-stream-broker/README.md @@ -0,0 +1,41 @@ +# alert-stream-broker + +Kafka broker cluster for distributing alerts + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| cluster.name | string | `"alert-broker"` | Name used for the Kafka broker, and used by Strimzi for many annotations. | +| fullnameOverride | string | `""` | Override for the full name used for Kubernetes resources; by default one will be created based on the chart name and helm release name. | +| kafka.config | object | `{"log.retention.bytes":"42949672960","log.retention.hours":168,"offsets.retention.minutes":1440}` | Configuration overrides for the Kafka server. | +| kafka.config."log.retention.bytes" | string | `"42949672960"` | Maximum retained number of bytes for a broker's data. This is a string to avoid YAML type conversion issues for large numbers. | +| kafka.config."log.retention.hours" | int | `168` | Number of hours for a brokers data to be retained. | +| kafka.config."offsets.retention.minutes" | int | `1440` | Number of minutes for a consumer group's offsets to be retained. | +| kafka.externalListener.bootstrap.annotations | object | `{}` | | +| kafka.externalListener.bootstrap.host | string | `""` | Hostname that should be used by clients who want to connect to the broker through the bootstrap address. | +| kafka.externalListener.bootstrap.ip | string | `""` | IP address that should be used by the broker's external bootstrap load balancer for access from the internet. The format of this is a string like "192.168.1.1". | +| kafka.externalListener.brokers | list | `[]` | List of hostname and IP for each broker. The format of this is a list of maps with 'ip' and 'host' keys. For example: - ip: "192.168.1.1" host: broker-0.example - ip: "192.168.1.2" host: broker-1.example Each replica should get a host and IP. If these are unset, then IP addresses will be chosen automatically by the Kubernetes cluster's LoadBalancer controller, and hostnames will be unset, which will break TLS connections. | +| kafka.externalListener.tls.certIssuerName | string | `"letsencrypt-dns"` | Name of the certificate issuer. | +| kafka.externalListener.tls.enabled | bool | `false` | Whether TLS encryption is enabled. | +| kafka.interBrokerProtocolVersion | float | `3.2` | Version of the protocol for inter-broker communication, see https://strimzi.io/docs/operators/latest/deploying.html#ref-kafka-versions-str. | +| kafka.logMessageFormatVersion | float | `3.2` | Encoding version for messages, see https://strimzi.io/docs/operators/latest/deploying.html#ref-kafka-versions-str. | +| kafka.nodePool.affinities | list | `[{"key":"kafka","value":"ok"}]` | List of node affinities to set for the broker's nodes. The key should be a label key, and the value should be a label value, and then the broker will prefer running Kafka and Zookeeper on nodes with those key-value pairs. | +| kafka.nodePool.tolerations | list | `[{"effect":"NoSchedule","key":"kafka","value":"ok"}]` | List of taint tolerations when scheduling the broker's pods onto nodes. The key should be a taint key, the value should be a taint value, and effect should be a taint effect that can be tolerated (ignored) when scheduling the broker's Kafka and Zookeeper pods. | +| kafka.replicas | int | `3` | Number of Kafka broker replicas to run. | +| kafka.storage.size | string | `"1000Gi"` | Size of the backing storage disk for each of the Kafka brokers. | +| kafka.storage.storageClassName | string | `"standard"` | Name of a StorageClass to use when requesting persistent volumes. | +| kafka.version | string | `"3.2.3"` | Version of Kafka to deploy. | +| nameOverride | string | `""` | | +| strimziAPIVersion | string | `"v1beta2"` | Version of the Strimzi Custom Resource API. The correct value depends on the deployed version of Strimzi. See [this blog post](https://strimzi.io/blog/2021/04/29/api-conversion/) for more. | +| superusers | list | `["kafka-admin"]` | A list of usernames for users who should have global admin permissions. These users will be created, along with their credentials. | +| tls.certIssuerName | string | `"letsencrypt-dns"` | Name of a ClusterIssuer capable of provisioning a TLS certificate for the broker. | +| tls.subject.organization | string | `"Vera C. Rubin Observatory"` | Organization to use in the 'Subject' field of the broker's TLS certificate. | +| users | list | `[{"groups":["rubin-testing"],"readonlyTopics":["alert-stream","alerts-simulated"],"username":"rubin-testing"}]` | A list of users that should be created and granted access. Passwords for these users are not generated automatically; they are expected to be stored as 1Password secrets which are replicated into Vault. Each username should have a "{{ $username }}-password" secret associated with it. | +| users[0].groups | list | `["rubin-testing"]` | A list of string prefixes for groups that the user should get admin access to, allowing them to create, delete, describe, etc consumer groups. Note that these are prefix-matched, not just literal exact matches. | +| users[0].readonlyTopics | list | `["alert-stream","alerts-simulated"]` | A list of topics that the user should get read-only access to. | +| users[0].username | string | `"rubin-testing"` | The username for the user that should be created. | +| vaultSecretsPath | string | `""` | Path to the secret resource in Vault | +| zookeeper.replicas | int | `3` | Number of Zookeeper replicas to run. | +| zookeeper.storage.size | string | `"1000Gi"` | Size of the backing storage disk for each of the Zookeeper instances. | +| zookeeper.storage.storageClassName | string | `"standard"` | Name of a StorageClass to use when requesting persistent volumes. | diff --git a/services/alert-stream-broker/charts/alert-stream-broker/templates/_helpers.tpl b/services/alert-stream-broker/charts/alert-stream-broker/templates/_helpers.tpl new file mode 100644 index 0000000000..edfa089a03 --- /dev/null +++ b/services/alert-stream-broker/charts/alert-stream-broker/templates/_helpers.tpl @@ -0,0 +1,17 @@ +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "alertStreamBroker.fullname" -}} + {{- if .Values.fullnameOverride }} + {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} + {{- else }} + {{- $name := default .Chart.Name .Values.nameOverride }} + {{- if contains $name .Release.Name }} + {{- .Release.Name | trunc 63 | trimSuffix "-" }} + {{- else }} + {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} + {{- end }} + {{- end }} +{{- end }} diff --git a/services/alert-stream-broker/charts/alert-stream-broker/templates/certs.yaml b/services/alert-stream-broker/charts/alert-stream-broker/templates/certs.yaml new file mode 100644 index 0000000000..94bfb26a72 --- /dev/null +++ b/services/alert-stream-broker/charts/alert-stream-broker/templates/certs.yaml @@ -0,0 +1,23 @@ +{{- if .Values.kafka.externalListener.bootstrap.host }} +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: {{ .Values.cluster.name }}-external-tls + +spec: + secretName: {{ .Values.cluster.name }}-external-tls + + issuerRef: + name: {{ .Values.tls.certIssuerName }} + kind: ClusterIssuer + + subject: + organizations: + - {{ .Values.tls.subject.organization }} + + dnsNames: + - {{ .Values.kafka.externalListener.bootstrap.host }} + {{- range $broker := .Values.kafka.externalListener.brokers }} + - {{ $broker.host }} + {{- end }} +{{- end }} diff --git a/services/alert-stream-broker/charts/alert-stream-broker/templates/kafka.yaml b/services/alert-stream-broker/charts/alert-stream-broker/templates/kafka.yaml new file mode 100644 index 0000000000..47bf8244f0 --- /dev/null +++ b/services/alert-stream-broker/charts/alert-stream-broker/templates/kafka.yaml @@ -0,0 +1,165 @@ +apiVersion: kafka.strimzi.io/{{ .Values.strimziAPIVersion }} +kind: Kafka +metadata: + name: {{ .Values.cluster.name }} +spec: + kafka: + version: {{ .Values.kafka.version }} + replicas: {{ .Values.kafka.replicas }} + listeners: + - name: internal + port: 9092 + type: internal + tls: true + authentication: + type: tls + - name: tls # Used by the schema registry; it has a fixed name it expects + port: 9093 + type: internal + tls: true + authentication: + type: tls + - name: external + port: 9094 + type: loadbalancer + tls: {{ .Values.kafka.externalListener.tls.enabled}} + authentication: + type: scram-sha-512 + configuration: + {{- /* + + This is complicated looking, but that's just because these are all + optional parameters. They're optional because we don't actually know + the right IP addresses to use on a fresh deployment. + + The LoadBalancer Service type triggers automatic creation of a cloud + load balancer, which will get provisioned with some IP address that + we don't actually choose - it's picked for us. Once that has been + done, these options make it possible to pin the IP address: we can + request the actual IP that we already have. This is important because + it lets us configure a DNS record, associating a hostname with that + pinned IP address. + + */}} + bootstrap: + + {{- if .Values.kafka.externalListener.bootstrap.ip }} + loadBalancerIP: {{ .Values.kafka.externalListener.bootstrap.ip }} + {{- end }} + {{- if .Values.kafka.externalListener.bootstrap.annotations }} + annotations: {{ .Values.kafka.externalListener.bootstrap.annotations }} + {{- end }} + + {{- if .Values.kafka.externalListener.brokers }} + brokers: + {{- range $idx, $broker := .Values.kafka.externalListener.brokers }} + - broker: {{ $idx }} + loadBalancerIP: {{ $broker.ip }} + advertisedHost: {{ $broker.host }} + {{- end }} + {{- end }} + + {{- if and (.Values.kafka.externalListener.tls.enabled) (.Values.kafka.externalListener.bootstrap.host) }} + brokerCertChainAndKey: + secretName: {{ .Values.cluster.name }}-external-tls + certificate: tls.crt + key: tls.key + {{- end }} + + authorization: + type: simple +{{- if .Values.superusers }} + superUsers: +{{- range .Values.superusers }} + - {{ . }} +{{- end }} +{{- end }} + + config: + offsets.topic.replication.factor: 3 + transaction.state.log.replication.factor: 3 + transaction.state.log.min.isr: 2 + log.message.format.version: {{ .Values.kafka.logMessageFormatVersion }} + inter.broker.protocol.version: {{ .Values.kafka.interBrokerProtocolVersion }} + ssl.client.auth: required + {{- range $key, $value := .Values.kafka.config }} + {{ $key }}: {{ $value }} + {{- end }} + storage: + type: jbod + volumes: + # Note that storage is configured per replica. If there are 3 replicas, + # and 2 volumes in this array, each replica will get 2 + # PersistentVolumeClaims for the configured size, for a total of 6 + # volumes. + - id: 0 + type: persistent-claim + size: {{ .Values.kafka.storage.size }} + class: {{ .Values.kafka.storage.storageClassName }} + deleteClaim: false + + template: + pod: + {{- if .Values.kafka.nodePool.tolerations }} + tolerations: + {{- range $tol := .Values.kafka.nodePool.tolerations }} + - key: {{ $tol.key }} + operator: "Equal" + value: {{ $tol.value }} + effect: {{ $tol.effect }} + {{- end }} + {{- end }} + + {{- if .Values.kafka.nodePool.affinities }} + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + {{- range $affinity := .Values.kafka.nodePool.affinities }} + - weight: 1 + preference: + matchExpressions: + - key: {{ $affinity.key }} + operator: In + values: [{{ $affinity.value }}] + {{- end }} + {{- end }} + + zookeeper: + replicas: {{ .Values.zookeeper.replicas }} + storage: + # Note that storage is configured per replica. If there are 3 replicas, + # each will get its own PersistentVolumeClaim for the configured size. + type: persistent-claim + size: {{ .Values.zookeeper.storage.size }} + class: {{ .Values.zookeeper.storage.storageClassName }} + deleteClaim: false + + template: + pod: + {{- if .Values.kafka.nodePool.tolerations }} + tolerations: + {{- range $tol := .Values.kafka.nodePool.tolerations }} + - key: {{ $tol.key }} + operator: "Equal" + value: {{ $tol.value }} + effect: {{ $tol.effect }} + {{- end }} + {{- end }} + + {{- if .Values.kafka.nodePool.affinities }} + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + {{- range $affinity := .Values.kafka.nodePool.affinities }} + - weight: 1 + preference: + matchExpressions: + - key: {{ $affinity.key }} + operator: In + values: [{{ $affinity.value }}] + {{- end }} + {{- end }} + + entityOperator: + topicOperator: {} + userOperator: {} diff --git a/services/alert-stream-broker/charts/alert-stream-broker/templates/superuser.yaml b/services/alert-stream-broker/charts/alert-stream-broker/templates/superuser.yaml new file mode 100644 index 0000000000..2812678ddf --- /dev/null +++ b/services/alert-stream-broker/charts/alert-stream-broker/templates/superuser.yaml @@ -0,0 +1,27 @@ +{{ range $idx, $username := .Values.superusers }} +--- +apiVersion: kafka.strimzi.io/{{ $.Values.strimziAPIVersion }} +kind: KafkaUser +metadata: + name: {{ $username }} + labels: + strimzi.io/cluster: {{ $.Values.cluster.name }} +spec: + authentication: + type: scram-sha-512 + password: + valueFrom: + secretKeyRef: + name: {{ template "alertStreamBroker.fullname" $ }}-secrets + key: {{ $username }}-password + authorization: + type: simple + acls: + - resource: + type: topic + name: "*" + patternType: literal + type: allow + host: "*" + operation: All +{{ end }} diff --git a/services/alert-stream-broker/charts/alert-stream-broker/templates/users.yaml b/services/alert-stream-broker/charts/alert-stream-broker/templates/users.yaml new file mode 100644 index 0000000000..80d484775d --- /dev/null +++ b/services/alert-stream-broker/charts/alert-stream-broker/templates/users.yaml @@ -0,0 +1,52 @@ +{{ range $idx, $user := $.Values.users }} +--- +apiVersion: kafka.strimzi.io/{{ $.Values.strimziAPIVersion }} +kind: KafkaUser +metadata: + name: {{ $user.username }} + labels: + strimzi.io/cluster: {{ $.Values.cluster.name }} +spec: + authentication: + type: scram-sha-512 + password: + valueFrom: + secretKeyRef: + name: {{ template "alertStreamBroker.fullname" $ }}-secrets + key: {{ $user.username }}-password + authorization: + type: simple + acls: + {{- range $idx, $topic := $user.readonlyTopics }} + - resource: + type: topic + name: {{ $topic | quote }} + patternType: literal + type: allow + host: "*" + operation: Read + - resource: + type: topic + name: {{ $topic | quote }} + patternType: literal + type: allow + host: "*" + operation: Describe + - resource: + type: topic + name: {{ $topic | quote }} + patternType: literal + type: allow + host: "*" + operation: DescribeConfigs + {{- end }} + {{- range $idx, $group := $user.groups }} + - resource: + type: group + name: {{ $group | quote }} + patternType: prefix + type: allow + host: "*" + operation: All + {{- end }} +{{- end }} diff --git a/services/alert-stream-broker/charts/alert-stream-broker/templates/vault-secret.yaml b/services/alert-stream-broker/charts/alert-stream-broker/templates/vault-secret.yaml new file mode 100644 index 0000000000..d924e52bb7 --- /dev/null +++ b/services/alert-stream-broker/charts/alert-stream-broker/templates/vault-secret.yaml @@ -0,0 +1,7 @@ +apiVersion: ricoberger.de/v1alpha1 +kind: VaultSecret +metadata: + name: {{ template "alertStreamBroker.fullname" . }}-secrets +spec: + path: {{ required "vaultSecretsPath must be set" .Values.vaultSecretsPath | quote }} + type: Opaque diff --git a/services/alert-stream-broker/charts/alert-stream-broker/values.yaml b/services/alert-stream-broker/charts/alert-stream-broker/values.yaml new file mode 100644 index 0000000000..35e107ae7a --- /dev/null +++ b/services/alert-stream-broker/charts/alert-stream-broker/values.yaml @@ -0,0 +1,135 @@ +# -- Version of the Strimzi Custom Resource API. The correct value depends on +# the deployed version of Strimzi. See [this blog +# post](https://strimzi.io/blog/2021/04/29/api-conversion/) for more. +strimziAPIVersion: v1beta2 + +cluster: + # -- Name used for the Kafka broker, and used by Strimzi for many annotations. + name: alert-broker + +kafka: + # -- Version of Kafka to deploy. + version: 3.2.3 + # -- Encoding version for messages, see + # https://strimzi.io/docs/operators/latest/deploying.html#ref-kafka-versions-str. + logMessageFormatVersion: 3.2 + # -- Version of the protocol for inter-broker communication, see + # https://strimzi.io/docs/operators/latest/deploying.html#ref-kafka-versions-str. + interBrokerProtocolVersion: 3.2 + + # -- Number of Kafka broker replicas to run. + replicas: 3 + + storage: + # -- Size of the backing storage disk for each of the Kafka brokers. + size: 1000Gi + # -- Name of a StorageClass to use when requesting persistent volumes. + storageClassName: standard + + # -- Configuration overrides for the Kafka server. + config: + # -- Number of minutes for a consumer group's offsets to be retained. + offsets.retention.minutes: 1440 + # -- Number of hours for a brokers data to be retained. + log.retention.hours: 168 + # -- Maximum retained number of bytes for a broker's data. This is a string + # to avoid YAML type conversion issues for large numbers. + log.retention.bytes: "42949672960" + + externalListener: + tls: + # -- Whether TLS encryption is enabled. + enabled: false + # -- Name of the certificate issuer. + certIssuerName: "letsencrypt-dns" + bootstrap: + # -- IP address that should be used by the broker's external bootstrap load + # balancer for access from the internet. The format of this is a string like + # "192.168.1.1". + ip: "" + # -- Hostname that should be used by clients who want to connect to the + # broker through the bootstrap address. + host: "" + annotations: {} + + # -- List of hostname and IP for each broker. The format of this is a list + # of maps with 'ip' and 'host' keys. For example: + # + # - ip: "192.168.1.1" + # host: broker-0.example + # - ip: "192.168.1.2" + # host: broker-1.example + # + # Each replica should get a host and IP. If these are unset, then IP + # addresses will be chosen automatically by the Kubernetes cluster's + # LoadBalancer controller, and hostnames will be unset, which will break + # TLS connections. + brokers: [] + + nodePool: + # -- List of node affinities to set for the broker's nodes. The key should + # be a label key, and the value should be a label value, and then the + # broker will prefer running Kafka and Zookeeper on nodes with those + # key-value pairs. + affinities: + - key: kafka + value: ok + + # -- List of taint tolerations when scheduling the broker's pods onto + # nodes. The key should be a taint key, the value should be a taint + # value, and effect should be a taint effect that can be tolerated + # (ignored) when scheduling the broker's Kafka and Zookeeper pods. + tolerations: + - key: kafka + value: ok + effect: NoSchedule + + +# -- A list of usernames for users who should have global admin permissions. +# These users will be created, along with their credentials. +superusers: + - kafka-admin + +# -- A list of users that should be created and granted access. +# +# Passwords for these users are not generated automatically; they are expected +# to be stored as 1Password secrets which are replicated into Vault. Each +# username should have a "{{ $username }}-password" secret associated with it. +users: + - # -- The username for the user that should be created. + username: rubin-testing + # -- A list of topics that the user should get read-only access to. + readonlyTopics: ["alert-stream", "alerts-simulated"] + # -- A list of string prefixes for groups that the user should get admin + # access to, allowing them to create, delete, describe, etc consumer + # groups. Note that these are prefix-matched, not just literal exact + # matches. + groups: ["rubin-testing"] + + +zookeeper: + # -- Number of Zookeeper replicas to run. + replicas: 3 + + storage: + # -- Size of the backing storage disk for each of the Zookeeper instances. + size: 1000Gi + # -- Name of a StorageClass to use when requesting persistent volumes. + storageClassName: standard + +tls: + subject: + # -- Organization to use in the 'Subject' field of the broker's TLS certificate. + organization: "Vera C. Rubin Observatory" + # -- Name of a ClusterIssuer capable of provisioning a TLS certificate for + # the broker. + certIssuerName: "letsencrypt-dns" + +# -- Path to the secret resource in Vault +vaultSecretsPath: "" + +# -- Override for the full name used for Kubernetes resources; by default one +# will be created based on the chart name and helm release name. +fullnameOverride: "" + +nameOverride: "" diff --git a/services/alert-stream-broker/charts/alert-stream-schema-registry/.helmignore b/services/alert-stream-broker/charts/alert-stream-schema-registry/.helmignore new file mode 100644 index 0000000000..50af031725 --- /dev/null +++ b/services/alert-stream-broker/charts/alert-stream-schema-registry/.helmignore @@ -0,0 +1,22 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/services/alert-stream-broker/charts/alert-stream-schema-registry/Chart.yaml b/services/alert-stream-broker/charts/alert-stream-schema-registry/Chart.yaml new file mode 100644 index 0000000000..7cc9618d20 --- /dev/null +++ b/services/alert-stream-broker/charts/alert-stream-schema-registry/Chart.yaml @@ -0,0 +1,9 @@ +apiVersion: v2 +name: alert-stream-schema-registry +version: 2.1.0 +description: Confluent Schema Registry for managing schema versions for the Alert Stream +maintainers: + - name: bsmart + email: drbsmart@uw.edu +appVersion: 1.0.0 +type: application diff --git a/services/alert-stream-broker/charts/alert-stream-schema-registry/README.md b/services/alert-stream-broker/charts/alert-stream-schema-registry/README.md new file mode 100644 index 0000000000..cc6ac85074 --- /dev/null +++ b/services/alert-stream-broker/charts/alert-stream-schema-registry/README.md @@ -0,0 +1,18 @@ +# alert-stream-schema-registry + +Confluent Schema Registry for managing schema versions for the Alert Stream + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| clusterName | string | `"alert-broker"` | Strimzi "cluster name" of the broker to use as a backend. | +| hostname | string | `"alert-schemas-int.lsst.cloud"` | Hostname for an ingress which sends traffic to the Schema Registry. | +| name | string | `"alert-schema-registry"` | Name used by the registry, and by its users. | +| port | int | `8081` | Port where the registry is listening. NOTE: Not actually configurable in strimzi-registry-operator, so this basically cannot be changed. | +| schemaSync | object | `{"image":{"repository":"lsstdm/lsst_alert_packet","tag":"tickets-DM-32743"},"subject":"alert-packet"}` | Configuration for the Job which injects the most recent alert_packet schema into the Schema Registry | +| schemaSync.image.repository | string | `"lsstdm/lsst_alert_packet"` | Repository of a container which has the alert_packet syncLatestSchemaToRegistry.py program | +| schemaSync.image.tag | string | `"tickets-DM-32743"` | Version of the container to use | +| schemaSync.subject | string | `"alert-packet"` | Subject name to use when inserting data into the Schema Registry | +| schemaTopic | string | `"registry-schemas"` | Name of the topic used by the Schema Registry to store data. | +| strimziAPIVersion | string | `"v1beta2"` | Version of the Strimzi Custom Resource API. The correct value depends on the deployed version of Strimzi. See [this blog post](https://strimzi.io/blog/2021/04/29/api-conversion/) for more. | diff --git a/services/alert-stream-broker/charts/alert-stream-schema-registry/templates/ingress.yaml b/services/alert-stream-broker/charts/alert-stream-schema-registry/templates/ingress.yaml new file mode 100644 index 0000000000..e33ddf4c29 --- /dev/null +++ b/services/alert-stream-broker/charts/alert-stream-schema-registry/templates/ingress.yaml @@ -0,0 +1,31 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: "{{ .Values.name }}" + annotations: + kubernetes.io/ingress.class: "nginx" + cert-manager.io/cluster-issuer: letsencrypt-dns + nginx.ingress.kubernetes.io/configuration-snippet: | + # Forbid everything except GET since this should be a read-only ingress + # to the schema registry. + limit_except GET { + deny all; + } + +spec: + tls: + - hosts: [{{ .Values.hostname | quote }}] + secretName: "{{ .Values.name }}-tls" + + rules: + - host: {{ .Values.hostname | quote }} + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: {{ .Values.name }} + port: + # TODO: not configurable in strimzi-registry-operator + number: {{ .Values.port }} diff --git a/services/alert-stream-broker/charts/alert-stream-schema-registry/templates/schema-registry-server.yaml b/services/alert-stream-broker/charts/alert-stream-schema-registry/templates/schema-registry-server.yaml new file mode 100644 index 0000000000..492a383002 --- /dev/null +++ b/services/alert-stream-broker/charts/alert-stream-schema-registry/templates/schema-registry-server.yaml @@ -0,0 +1,7 @@ +apiVersion: roundtable.lsst.codes/v1beta1 +kind: StrimziSchemaRegistry +metadata: + name: {{ .Values.name }} +spec: + strimzi-version: {{ .Values.strimziAPIVersion }} + listener: internal diff --git a/services/alert-stream-broker/charts/alert-stream-schema-registry/templates/schema-registry-topic.yaml b/services/alert-stream-broker/charts/alert-stream-schema-registry/templates/schema-registry-topic.yaml new file mode 100644 index 0000000000..3eaa139011 --- /dev/null +++ b/services/alert-stream-broker/charts/alert-stream-schema-registry/templates/schema-registry-topic.yaml @@ -0,0 +1,11 @@ +apiVersion: "kafka.strimzi.io/{{ .Values.strimziAPIVersion }}" +kind: KafkaTopic +metadata: + name: "{{ .Values.schemaTopic }}" + labels: + strimzi.io/cluster: "{{ .Values.clusterName }}" +spec: + partitions: 1 + replicas: 3 + config: + cleanup.policy: compact diff --git a/services/sasquatch/charts/strimzi-kafka/templates/schema-registry-user.yaml b/services/alert-stream-broker/charts/alert-stream-schema-registry/templates/schema-registry-user.yaml similarity index 80% rename from services/sasquatch/charts/strimzi-kafka/templates/schema-registry-user.yaml rename to services/alert-stream-broker/charts/alert-stream-schema-registry/templates/schema-registry-user.yaml index 88c84bf126..60b7ae4a23 100644 --- a/services/sasquatch/charts/strimzi-kafka/templates/schema-registry-user.yaml +++ b/services/alert-stream-broker/charts/alert-stream-schema-registry/templates/schema-registry-user.yaml @@ -1,9 +1,9 @@ -apiVersion: kafka.strimzi.io/v1beta2 +apiVersion: kafka.strimzi.io/{{ .Values.strimziAPIVersion }} kind: KafkaUser metadata: - name: {{ .Values.cluster.name }}-schema-registry + name: {{ .Values.name }} labels: - strimzi.io/cluster: {{ .Values.cluster.name }} + strimzi.io/cluster: {{ .Values.clusterName }} spec: authentication: type: tls @@ -16,19 +16,19 @@ spec: # schemas topic - resource: type: topic - name: {{ .Values.registry.schemaTopic }} + name: "{{ .Values.schemaTopic }}" patternType: literal operation: Read type: allow - resource: type: topic - name: {{ .Values.registry.schemaTopic }} + name: "{{ .Values.schemaTopic }}" patternType: literal operation: Write type: allow - resource: type: topic - name: {{ .Values.registry.schemaTopic }} + name: "{{ .Values.schemaTopic }}" patternType: literal operation: DescribeConfigs type: allow diff --git a/services/alert-stream-broker/charts/alert-stream-schema-registry/templates/sync-schema-job.yaml b/services/alert-stream-broker/charts/alert-stream-schema-registry/templates/sync-schema-job.yaml new file mode 100644 index 0000000000..a83f6bae0d --- /dev/null +++ b/services/alert-stream-broker/charts/alert-stream-schema-registry/templates/sync-schema-job.yaml @@ -0,0 +1,29 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: "{{ .Release.Name }}-sync-schema" + labels: + app.kubernetes.io/managed-by: {{ .Release.Service | quote }} + app.kubernetes.io/instance: {{ .Release.Name | quote }} + app.kubernetes.io/version: {{ .Chart.AppVersion }} + helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + annotations: + argocd.argoproj.io/hook: Sync +spec: + ttlSecondsAfterFinished: 600 + template: + metadata: + name: "{{ .Release.Name }}" + labels: + app.kubernetes.io/managed-by: {{ .Release.Service | quote }} + app.kubernetes.io/instance: {{ .Release.Name | quote }} + helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + spec: + restartPolicy: Never + containers: + - name: sync-schema-job + image: "{{ .Values.schemaSync.image.repository }}:{{ .Values.schemaSync.image.tag | default .Chart.AppVersion }}" + command: + - "syncLatestSchemaToRegistry.py" + - "--schema-registry-url=http://{{ .Values.name }}:{{ .Values.port }}" + - "--subject={{ .Values.schemaSync.subject }}" diff --git a/services/alert-stream-broker/charts/alert-stream-schema-registry/values.yaml b/services/alert-stream-broker/charts/alert-stream-schema-registry/values.yaml new file mode 100644 index 0000000000..b84d04abf9 --- /dev/null +++ b/services/alert-stream-broker/charts/alert-stream-schema-registry/values.yaml @@ -0,0 +1,33 @@ +# -- Name used by the registry, and by its users. +name: alert-schema-registry + +# -- Port where the registry is listening. NOTE: Not actually configurable in +# strimzi-registry-operator, so this basically cannot be changed. +port: 8081 + +# -- Version of the Strimzi Custom Resource API. The correct value depends on +# the deployed version of Strimzi. See [this blog +# post](https://strimzi.io/blog/2021/04/29/api-conversion/) for more. +strimziAPIVersion: v1beta2 + +# -- Strimzi "cluster name" of the broker to use as a backend. +clusterName: alert-broker + +# -- Name of the topic used by the Schema Registry to store data. +schemaTopic: registry-schemas + +# -- Hostname for an ingress which sends traffic to the Schema Registry. +hostname: alert-schemas-int.lsst.cloud + +# -- Configuration for the Job which injects the most recent alert_packet +# schema into the Schema Registry +schemaSync: + image: + # -- Repository of a container which has the alert_packet + # syncLatestSchemaToRegistry.py program + repository: lsstdm/lsst_alert_packet + # -- Version of the container to use + tag: tickets-DM-32743 + + # -- Subject name to use when inserting data into the Schema Registry + subject: alert-packet diff --git a/services/alert-stream-broker/charts/alert-stream-simulator/.helmignore b/services/alert-stream-broker/charts/alert-stream-simulator/.helmignore new file mode 100644 index 0000000000..50af031725 --- /dev/null +++ b/services/alert-stream-broker/charts/alert-stream-simulator/.helmignore @@ -0,0 +1,22 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/services/alert-stream-broker/charts/alert-stream-simulator/Chart.yaml b/services/alert-stream-broker/charts/alert-stream-simulator/Chart.yaml new file mode 100644 index 0000000000..c2255fd9ac --- /dev/null +++ b/services/alert-stream-broker/charts/alert-stream-simulator/Chart.yaml @@ -0,0 +1,9 @@ +apiVersion: v2 +name: alert-stream-simulator +version: 1.6.2 +description: Producer which repeatedly publishes a static set of alerts into a Kafka topic +maintainers: + - name: bsmart + email: drbsmart@uw.edu +appVersion: 1.2.1 +type: application diff --git a/services/alert-stream-broker/charts/alert-stream-simulator/README.md b/services/alert-stream-broker/charts/alert-stream-simulator/README.md new file mode 100644 index 0000000000..e0833c4138 --- /dev/null +++ b/services/alert-stream-broker/charts/alert-stream-simulator/README.md @@ -0,0 +1,25 @@ +# alert-stream-simulator + +Producer which repeatedly publishes a static set of alerts into a Kafka topic + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| clusterName | string | `"alert-broker"` | Name of a Strimzi Kafka cluster to connect to. | +| clusterPort | int | `9092` | Port to connect to on the Strimzi Kafka cluster. It should be an internal TLS listener. | +| fullnameOverride | string | `""` | Explicitly sets the full name used for the deployment and job (includes the release name). | +| image.imagePullPolicy | string | `"IfNotPresent"` | Pull policy for the Deployment | +| image.repository | string | `"lsstdm/alert-stream-simulator"` | Source repository for the image which holds the rubin-alert-stream program. | +| image.tag | string | `"v1.2.1"` | Tag to use for the rubin-alert-stream container. | +| kafkaUserName | string | `"alert-stream-simulator"` | The username of the Kafka user identity used to connect to the broker. | +| maxBytesRetained | string | `"24000000000"` | Maximum number of bytes for the replay topic, per partition, per replica. Default is 100GB, but should be lower to not fill storage. | +| maxMillisecondsRetained | string | `"604800000"` | Maximum amount of time to save simulated alerts in the replay topic, in milliseconds. Default is 7 days. | +| nameOverride | string | `""` | Explicitly sets the name of the deployment and job. | +| repeatInterval | int | `37` | How often (in seconds) to repeat the sample data into the replay topic. | +| replayTopicName | string | `"alerts-simulated"` | Name of the topic which will receive the repeated alerts on an interval. | +| replayTopicPartitions | int | `8` | | +| replayTopicReplicas | int | `2` | | +| schemaID | int | `1` | Integer ID to use in the prefix of alert data packets. This should be a valid Confluent Schema Registry ID associated with the schema used. | +| staticTopicName | string | `"alerts-static"` | Name of the topic which will hold a static single visit of sample data. | +| strimziAPIVersion | string | `"v1beta2"` | API version of the Strimzi installation's custom resource definitions | diff --git a/services/alert-stream-broker/charts/alert-stream-simulator/templates/_helpers.tpl b/services/alert-stream-broker/charts/alert-stream-simulator/templates/_helpers.tpl new file mode 100644 index 0000000000..715cf9d667 --- /dev/null +++ b/services/alert-stream-broker/charts/alert-stream-simulator/templates/_helpers.tpl @@ -0,0 +1,55 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "alertStreamSimulator.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "alertStreamSimulator.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "alertStreamSimulator.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "alertStreamSimulator.labels" -}} +helm.sh/chart: {{ include "alertStreamSimulator.chart" . }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{ include "alertStreamSimulator.selectorLabels" . }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "alertStreamSimulator.selectorLabels" -}} +app.kubernetes.io/name: {{ include "alertStreamSimulator.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Name for the static alerts topic. +*/}} +{{- define "alertStreamSimulator.staticTopicName" -}}alerts-static{{- end }} diff --git a/services/alert-stream-broker/charts/alert-stream-simulator/templates/deployment.yaml b/services/alert-stream-broker/charts/alert-stream-simulator/templates/deployment.yaml new file mode 100644 index 0000000000..1bd137bbe4 --- /dev/null +++ b/services/alert-stream-broker/charts/alert-stream-simulator/templates/deployment.yaml @@ -0,0 +1,44 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "alertStreamSimulator.fullname" . }} + labels: + {{- include "alertStreamSimulator.labels" . | nindent 4 }} +spec: + replicas: 1 + selector: + matchLabels: + {{- include "alertStreamSimulator.selectorLabels" . | nindent 6 }} + template: + metadata: + labels: + {{- include "alertStreamSimulator.selectorLabels" . | nindent 8 }} + spec: + containers: + - name: "alert-stream-simulator" + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + volumeMounts: + - name: "kafka-client-secret" + mountPath: "/etc/kafka-client-secret" + readOnly: True + - name: "kafka-server-ca-cert" + mountPath: "/etc/kafka-server-ca-cert" + readOnly: True + command: + - "rubin-alert-sim" + - "--debug" + - "play-stream" + - "--broker={{ .Values.clusterName }}-kafka-bootstrap:{{ .Values.clusterPort }}" + - "--dst-topic={{ .Values.replayTopicName }}" + - "--src-topic={{ template "alertStreamSimulator.staticTopicName" . }}" + - "--tls-client-key-location=/etc/kafka-client-secret/user.key" + - "--tls-client-crt-location=/etc/kafka-client-secret/user.crt" + - "--tls-server-ca-crt-location=/etc/kafka-server-ca-cert/ca.crt" + - "--repeat-interval={{ .Values.repeatInterval }}" + volumes: + - name: "kafka-client-secret" + secret: + secretName: "{{ .Values.kafkaUserName}}" + - name: "kafka-server-ca-cert" + secret: + secretName: "{{ .Values.clusterName}}-cluster-ca-cert" diff --git a/services/alert-stream-broker/charts/alert-stream-simulator/templates/kafka-topics.yaml b/services/alert-stream-broker/charts/alert-stream-simulator/templates/kafka-topics.yaml new file mode 100644 index 0000000000..e3998adb52 --- /dev/null +++ b/services/alert-stream-broker/charts/alert-stream-simulator/templates/kafka-topics.yaml @@ -0,0 +1,13 @@ +apiVersion: "kafka.strimzi.io/{{ .Values.strimziAPIVersion }}" +kind: KafkaTopic +metadata: + name: "{{ .Values.replayTopicName }}" + labels: + strimzi.io/cluster: "{{ .Values.clusterName }}" +spec: + partitions: {{ .Values.replayTopicPartitions }} + replicas: {{ .Values.replayTopicReplicas }} + config: + cleanup.policy: "delete" + retention.ms: {{ .Values.maxMillisecondsRetained }} # 7 days + retention.bytes: {{ .Values.maxBytesRetained }} diff --git a/services/alert-stream-broker/charts/alert-stream-simulator/templates/kafka-user.yaml b/services/alert-stream-broker/charts/alert-stream-simulator/templates/kafka-user.yaml new file mode 100644 index 0000000000..42a35f8d9c --- /dev/null +++ b/services/alert-stream-broker/charts/alert-stream-simulator/templates/kafka-user.yaml @@ -0,0 +1,45 @@ +apiVersion: kafka.strimzi.io/{{ .Values.strimziAPIVersion }} +kind: KafkaUser +metadata: + name: {{ .Values.kafkaUserName }} + labels: + strimzi.io/cluster: {{ .Values.clusterName }} +spec: + authentication: + type: tls + authorization: + type: simple + acls: + # Allow all operations on both topics + - resource: + type: topic + name: {{ template "alertStreamSimulator.staticTopicName" . }} + patternType: literal + operation: All + type: allow + - resource: + type: topic + name: "{{ .Values.replayTopicName }}" + patternType: literal + operation: All + type: allow + # Allow all on the __consumer_offsets topic + - resource: + type: topic + name: "__consumer_offsets" + patternType: literal + operation: All + type: allow + # Allow running as a consumer group + - resource: + type: group + name: "*" + patternType: literal + operation: Describe + type: allow + - resource: + type: group + name: "*" + patternType: literal + operation: Read + type: allow diff --git a/services/alert-stream-broker/charts/alert-stream-simulator/templates/load-data-job.yaml b/services/alert-stream-broker/charts/alert-stream-simulator/templates/load-data-job.yaml new file mode 100644 index 0000000000..ede343ff73 --- /dev/null +++ b/services/alert-stream-broker/charts/alert-stream-simulator/templates/load-data-job.yaml @@ -0,0 +1,50 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: "{{ .Release.Name }}-load-data" + labels: + app.kubernetes.io/managed-by: {{ .Release.Service | quote }} + app.kubernetes.io/instance: {{ .Release.Name | quote }} + app.kubernetes.io/version: {{ .Chart.AppVersion }} + helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + annotations: + argocd.argoproj.io/hook: Sync +spec: + template: + metadata: + name: "{{ .Release.Name }}" + labels: + app.kubernetes.io/managed-by: {{ .Release.Service | quote }} + app.kubernetes.io/instance: {{ .Release.Name | quote }} + helm.sh/chart: "{{ .Chart.Name }}-{{ .Chart.Version }}" + spec: + restartPolicy: Never + containers: + - name: pre-install-job + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + volumeMounts: + - name: "kafka-client-secret" + mountPath: "/etc/kafka-client-secret" + readOnly: True + - name: "kafka-server-ca-cert" + mountPath: "/etc/kafka-server-ca-cert" + readOnly: True + command: + - "rubin-alert-sim" + - "--debug" + - "create-stream" + - "--broker={{ .Values.clusterName }}-kafka-bootstrap:{{ .Values.clusterPort }}" + - "--dst-topic={{ template "alertStreamSimulator.staticTopicName" . }}" + - "--create-topic" + - "--schema-id={{ .Values.schemaID }}" + - "--tls-client-key-location=/etc/kafka-client-secret/user.key" + - "--tls-client-crt-location=/etc/kafka-client-secret/user.crt" + - "--tls-server-ca-crt-location=/etc/kafka-server-ca-cert/ca.crt" + - "/var/sample_alert_data/rubin_single_visit_sample.avro" + volumes: + - name: "kafka-client-secret" + secret: + secretName: "{{ .Values.kafkaUserName}}" + - name: "kafka-server-ca-cert" + secret: + secretName: "{{ .Values.clusterName}}-cluster-ca-cert" diff --git a/services/alert-stream-broker/charts/alert-stream-simulator/values.yaml b/services/alert-stream-broker/charts/alert-stream-simulator/values.yaml new file mode 100644 index 0000000000..ef7ad6ddaa --- /dev/null +++ b/services/alert-stream-broker/charts/alert-stream-simulator/values.yaml @@ -0,0 +1,52 @@ +# -- Explicitly sets the name of the deployment and job. +nameOverride: "" + +# -- Explicitly sets the full name used for the deployment and job (includes +# the release name). +fullnameOverride: "" + +# -- The username of the Kafka user identity used to connect to the broker. +kafkaUserName: alert-stream-simulator + +# -- Name of the topic which will hold a static single visit of sample data. +staticTopicName: alerts-static + +# -- Name of the topic which will receive the repeated alerts on an interval. +replayTopicName: alerts-simulated + +# -- Integer ID to use in the prefix of alert data packets. This should be a +# valid Confluent Schema Registry ID associated with the schema used. +schemaID: 1 + +# -- Name of a Strimzi Kafka cluster to connect to. +clusterName: alert-broker + +# -- Port to connect to on the Strimzi Kafka cluster. It should be an internal +# TLS listener. +clusterPort: 9092 + +# -- API version of the Strimzi installation's custom resource definitions +strimziAPIVersion: v1beta2 + +image: + # -- Source repository for the image which holds the rubin-alert-stream program. + repository: lsstdm/alert-stream-simulator + # -- Tag to use for the rubin-alert-stream container. + tag: v1.2.1 + # -- Pull policy for the Deployment + imagePullPolicy: IfNotPresent + +# -- How often (in seconds) to repeat the sample data into the replay topic. +repeatInterval: 37 + +# -- Maximum amount of time to save simulated alerts in the replay topic, in +# milliseconds. Default is 7 days. +maxMillisecondsRetained: "604800000" + +# -- Maximum number of bytes for the replay topic, per partition, per replica. +# Default is 100GB, but should be lower to not fill storage. +maxBytesRetained: "24000000000" + +replayTopicPartitions: 8 + +replayTopicReplicas: 2 diff --git a/services/alert-stream-broker/values-idfint.yaml b/services/alert-stream-broker/values-idfint.yaml index 605bfe8c40..84791f748e 100644 --- a/services/alert-stream-broker/values-idfint.yaml +++ b/services/alert-stream-broker/values-idfint.yaml @@ -6,16 +6,20 @@ alert-stream-broker: # Addresses based on the state as of 2021-12-02; these were assigned by # Google and now we're pinning them. externalListener: + tls: + enabled: true bootstrap: - ip: 35.188.169.31 + ip: "35.224.176.103" host: alert-stream-int.lsst.cloud brokers: - - ip: 35.239.64.164 + - ip: "34.28.80.188" host: alert-stream-int-broker-0.lsst.cloud - - ip: 34.122.165.155 + - ip: "35.188.136.140" host: alert-stream-int-broker-1.lsst.cloud - - ip: 35.238.120.127 + - ip: "35.238.84.221" host: alert-stream-int-broker-2.lsst.cloud + + storage: size: 1500Gi diff --git a/services/alert-stream-broker/values.yaml b/services/alert-stream-broker/values.yaml index a12314beaa..f7ebb0373b 100644 --- a/services/alert-stream-broker/values.yaml +++ b/services/alert-stream-broker/values.yaml @@ -1 +1,8 @@ -# This file intentionally blank - no customization needed +strimzi-registry-operator: + # Should match the cluster name used by the alert-stream-broker + clusterName: alert-broker + clusterNamespace: alert-stream-broker + # Should match the namespace where the alert-broker cluster runs + watchNamespace: alert-stream-broker + + operatorNamespace: "alert-stream-broker" diff --git a/services/argocd/Chart.yaml b/services/argocd/Chart.yaml index e406c500dd..99dfa8454d 100644 --- a/services/argocd/Chart.yaml +++ b/services/argocd/Chart.yaml @@ -1,10 +1,12 @@ apiVersion: v2 name: argo-cd version: 1.0.0 +description: Kubernetes application manager +home: https://argoproj.github.io/cd/ +sources: + - https://github.com/argoproj/argo-cd + - https://github.com/argoproj/argo-helm dependencies: -- name: argo-cd - version: 3.33.5 - repository: https://argoproj.github.io/argo-helm -- name: pull-secret - version: 0.1.2 - repository: https://lsst-sqre.github.io/charts/ + - name: argo-cd + version: 5.19.12 + repository: https://argoproj.github.io/argo-helm diff --git a/services/argocd/README.md b/services/argocd/README.md new file mode 100644 index 0000000000..13c3499ab3 --- /dev/null +++ b/services/argocd/README.md @@ -0,0 +1,35 @@ +# argo-cd + +Kubernetes application manager + +**Homepage:** + +## Source Code + +* +* + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| argo-cd.configs.secret.createSecret | bool | `false` | | +| argo-cd.controller.metrics.applicationLabels.enabled | bool | `true` | | +| argo-cd.controller.metrics.applicationLabels.labels[0] | string | `"name"` | | +| argo-cd.controller.metrics.applicationLabels.labels[1] | string | `"instance"` | | +| argo-cd.controller.metrics.enabled | bool | `true` | | +| argo-cd.notifications.metrics.enabled | bool | `true` | | +| argo-cd.redis.enabled | bool | `true` | | +| argo-cd.redis.metrics.enabled | bool | `true` | | +| argo-cd.repoServer.metrics.enabled | bool | `true` | | +| argo-cd.server.config."helm.repositories" | string | `"- url: https://lsst-sqre.github.io/charts/\n name: lsst-sqre\n- url: https://ricoberger.github.io/helm-charts/\n name: ricoberger\n- url: https://kubernetes.github.io/ingress-nginx/\n name: ingress-nginx\n- url: https://charts.helm.sh/stable\n name: stable\n- url: https://strimzi.io/charts/\n name: strimzi\n"` | | +| argo-cd.server.config."resource.compareoptions" | string | `"ignoreAggregatedRoles: true\n"` | | +| argo-cd.server.extraArgs[0] | string | `"--basehref=/argo-cd"` | | +| argo-cd.server.extraArgs[1] | string | `"--insecure=true"` | | +| argo-cd.server.ingress.annotations."nginx.ingress.kubernetes.io/rewrite-target" | string | `"/$2"` | | +| argo-cd.server.ingress.enabled | bool | `true` | | +| argo-cd.server.ingress.ingressClassName | string | `"nginx"` | | +| argo-cd.server.ingress.pathType | string | `"ImplementationSpecific"` | | +| argo-cd.server.ingress.paths[0] | string | `"/argo-cd(/|$)(.*)"` | | +| argo-cd.server.metrics.enabled | bool | `true` | | +| global.vaultSecretsPath | string | Set by Argo CD | Base path for Vault secrets | diff --git a/services/argocd/templates/vault-secret.yaml b/services/argocd/templates/vault-secrets.yaml similarity index 54% rename from services/argocd/templates/vault-secret.yaml rename to services/argocd/templates/vault-secrets.yaml index 598154025d..92bae63785 100644 --- a/services/argocd/templates/vault-secret.yaml +++ b/services/argocd/templates/vault-secrets.yaml @@ -1,9 +1,7 @@ -{{ if .Values.vault_secret.enabled }} apiVersion: ricoberger.de/v1alpha1 kind: VaultSecret metadata: name: argocd-secret spec: - path: {{ .Values.vault_secret.path }} + path: "{{ .Values.global.vaultSecretsPath }}/argocd" type: Opaque -{{ end }} diff --git a/services/argocd/values-base.yaml b/services/argocd/values-base.yaml index b1a580136e..5462a9042f 100644 --- a/services/argocd/values-base.yaml +++ b/services/argocd/values-base.yaml @@ -1,24 +1,10 @@ argo-cd: - redis: - enabled: true - server: ingress: - enabled: true hosts: - "base-lsp.lsst.codes" - annotations: - kubernetes.io/ingress.class: nginx - nginx.ingress.kubernetes.io/rewrite-target: "/$2" - paths: - - /argo-cd(/|$)(.*) - - extraArgs: - - "--basehref=/argo-cd" - - "--insecure=true" - config: - url: https://base-lsp.lsst.codes/argo-cd + url: "https://base-lsp.lsst.codes/argo-cd" dex.config: | connectors: # Auth using GitHub. @@ -32,30 +18,8 @@ argo-cd: clientSecret: $dex.clientSecret orgs: - name: lsst-sqre - helm.repositories: | - - url: https://lsst-sqre.github.io/charts/ - name: lsst-sqre - - url: https://ricoberger.github.io/helm-charts/ - name: ricoberger - - url: https://kubernetes.github.io/ingress-nginx/ - name: ingress-nginx - - url: https://charts.helm.sh/stable - name: stable - - url: https://strimzi.io/charts/ - name: strimzi rbacConfig: policy.csv: | + g, lsst-sqre:friends, role:admin g, lsst-sqre:square, role:admin - - configs: - secret: - createSecret: false - -vault_secret: - enabled: true - path: secret/k8s_operator/base-lsp.lsst.codes/argocd - -pull-secret: - enabled: true - path: secret/k8s_operator/base-lsp.lsst.codes/pull-secret diff --git a/services/argocd/values-ccin2p3.yaml b/services/argocd/values-ccin2p3.yaml new file mode 100644 index 0000000000..43d84d21df --- /dev/null +++ b/services/argocd/values-ccin2p3.yaml @@ -0,0 +1,34 @@ +argo-cd: + server: + ingress: + hosts: + - "data-dev.lsst.eu" + config: + url: https://data-dev.lsst.eu/argo-cd + dex.config: | + connectors: + # Auth using GitHub. + # See https://dexidp.io/docs/connectors/github/ + - type: github + id: github + name: GitHub + config: + clientID: ae314e45a6af43ea910a + # Reference to key in argo-secret Kubernetes resource + clientSecret: $dex.clientSecret + orgs: + - name: rubin-lsst + # resource.customizations: | + # networking.k8s.io/Ingress: + # health.lua: | + # hs = {} + # hs.status = "Healthy" + # return hs + + rbacConfig: + policy.csv: | + g, rubin-lsst:admin, role:admin + + # configs: + # secret: + # createSecret: true diff --git a/services/argocd/values-idfdev.yaml b/services/argocd/values-idfdev.yaml index 8b94cfff84..c976c699dc 100644 --- a/services/argocd/values-idfdev.yaml +++ b/services/argocd/values-idfdev.yaml @@ -1,24 +1,11 @@ argo-cd: - redis: - enabled: true - server: ingress: - enabled: true hosts: - "data-dev.lsst.cloud" - annotations: - kubernetes.io/ingress.class: nginx - nginx.ingress.kubernetes.io/rewrite-target: "/$2" - paths: - - /argo-cd(/|$)(.*) - - extraArgs: - - "--basehref=/argo-cd" - - "--insecure=true" config: - url: https://data-dev.lsst.cloud/argo-cd + url: "https://data-dev.lsst.cloud/argo-cd" dex.config: | connectors: # Auth using Google. @@ -32,38 +19,18 @@ argo-cd: hostedDomains: - lsst.cloud redirectURI: https://data-dev.lsst.cloud/argo-cd/api/dex/callback - helm.repositories: | - - url: https://lsst-sqre.github.io/charts/ - name: lsst-sqre - - url: https://ricoberger.github.io/helm-charts/ - name: ricoberger - - url: https://kubernetes.github.io/ingress-nginx/ - name: ingress-nginx - - url: https://charts.helm.sh/stable - name: stable - - url: https://strimzi.io/charts/ - name: strimzi rbacConfig: policy.csv: | g, adam@lsst.cloud, role:admin g, afausti@lsst.cloud, role:admin g, christine@lsst.cloud, role:admin + g, dspeck@lsst.cloud, role:admin g, frossie@lsst.cloud, role:admin g, jsick@lsst.cloud, role:admin g, krughoff@lsst.cloud, role:admin g, rra@lsst.cloud, role:admin + g, gpdf@lsst.cloud, role:admin + g, loi@lsst.cloud, role:admin + g, roby@lsst.cloud, role:admin scopes: "[email]" - - configs: - secret: - createSecret: false - -vault_secret: - enabled: true - path: secret/k8s_operator/data-dev.lsst.cloud/argocd - - -pull-secret: - enabled: true - path: secret/k8s_operator/data-dev.lsst.cloud/pull-secret diff --git a/services/argocd/values-idfint.yaml b/services/argocd/values-idfint.yaml index 7d6e87e9a3..c2745b744d 100644 --- a/services/argocd/values-idfint.yaml +++ b/services/argocd/values-idfint.yaml @@ -1,24 +1,11 @@ argo-cd: - redis: - enabled: true - server: ingress: - enabled: true hosts: - "data-int.lsst.cloud" - annotations: - kubernetes.io/ingress.class: nginx - nginx.ingress.kubernetes.io/rewrite-target: "/$2" - paths: - - /argo-cd(/|$)(.*) - - extraArgs: - - "--basehref=/argo-cd" - - "--insecure=true" config: - url: https://data-int.lsst.cloud/argo-cd + url: "https://data-int.lsst.cloud/argo-cd" dex.config: | connectors: # Auth using Google. @@ -32,40 +19,22 @@ argo-cd: hostedDomains: - lsst.cloud redirectURI: https://data-int.lsst.cloud/argo-cd/api/dex/callback - helm.repositories: | - - url: https://lsst-sqre.github.io/charts/ - name: lsst-sqre - - url: https://ricoberger.github.io/helm-charts/ - name: ricoberger - - url: https://kubernetes.github.io/ingress-nginx/ - name: ingress-nginx - - url: https://charts.helm.sh/stable - name: stable - - url: https://strimzi.io/charts/ - name: strimzi rbacConfig: policy.csv: | g, adam@lsst.cloud, role:admin g, afausti@lsst.cloud, role:admin g, christine@lsst.cloud, role:admin + g, dspeck@lsst.cloud, role:admin g, frossie@lsst.cloud, role:admin g, jsick@lsst.cloud, role:admin g, krughoff@lsst.cloud, role:admin g, rra@lsst.cloud, role:admin g, ctslater@lsst.cloud, role:admin - g, swnelson@lsst.cloud, role:admin g, gpdf@lsst.cloud, role:admin + g, loi@lsst.cloud, role:admin + g, roby@lsst.cloud, role:admin + g, fritzm@lsst.cloud, role:admin + g, drbsmart@lsst.cloud, role:admin + g, ecbellm@lsst.cloud, role:admin scopes: "[email]" - - configs: - secret: - createSecret: false - -vault_secret: - enabled: true - path: secret/k8s_operator/data-int.lsst.cloud/argocd - -pull-secret: - enabled: true - path: secret/k8s_operator/data-int.lsst.cloud/pull-secret diff --git a/services/argocd/values-idfprod.yaml b/services/argocd/values-idfprod.yaml index ade495e7c6..b81d4fb937 100644 --- a/services/argocd/values-idfprod.yaml +++ b/services/argocd/values-idfprod.yaml @@ -1,24 +1,11 @@ argo-cd: - redis: - enabled: true - server: ingress: - enabled: true hosts: - "data.lsst.cloud" - annotations: - kubernetes.io/ingress.class: nginx - nginx.ingress.kubernetes.io/rewrite-target: "/$2" - paths: - - /argo-cd(/|$)(.*) - - extraArgs: - - "--basehref=/argo-cd" - - "--insecure=true" config: - url: https://data.lsst.cloud/argo-cd + url: "https://data.lsst.cloud/argo-cd" dex.config: | connectors: # Auth using Google. @@ -32,37 +19,18 @@ argo-cd: hostedDomains: - lsst.cloud redirectURI: https://data.lsst.cloud/argo-cd/api/dex/callback - helm.repositories: | - - url: https://lsst-sqre.github.io/charts/ - name: lsst-sqre - - url: https://ricoberger.github.io/helm-charts/ - name: ricoberger - - url: https://kubernetes.github.io/ingress-nginx/ - name: ingress-nginx - - url: https://charts.helm.sh/stable - name: stable - - url: https://strimzi.io/charts/ - name: strimzi rbacConfig: policy.csv: | g, adam@lsst.cloud, role:admin g, afausti@lsst.cloud, role:admin g, christine@lsst.cloud, role:admin + g, dspeck@lsst.cloud, role:admin g, frossie@lsst.cloud, role:admin g, jsick@lsst.cloud, role:admin g, krughoff@lsst.cloud, role:admin g, rra@lsst.cloud, role:admin + g, gpdf@lsst.cloud, role:admin + g, loi@lsst.cloud, role:admin + g, roby@lsst.cloud, role:admin scopes: "[email]" - - configs: - secret: - createSecret: false - -vault_secret: - enabled: true - path: secret/k8s_operator/data.lsst.cloud/argocd - -pull-secret: - enabled: true - path: secret/k8s_operator/data.lsst.cloud/pull-secret diff --git a/services/argocd/values-int.yaml b/services/argocd/values-int.yaml deleted file mode 100644 index 63b27711d9..0000000000 --- a/services/argocd/values-int.yaml +++ /dev/null @@ -1,61 +0,0 @@ -argo-cd: - redis: - enabled: true - - server: - ingress: - enabled: true - hosts: - - "lsst-lsp-int.ncsa.illinois.edu" - annotations: - kubernetes.io/ingress.class: nginx - nginx.ingress.kubernetes.io/rewrite-target: "/$2" - paths: - - /argo-cd(/|$)(.*) - - extraArgs: - - "--basehref=/argo-cd" - - "--insecure=true" - - config: - url: https://lsst-lsp-int.ncsa.illinois.edu/argo-cd - dex.config: | - connectors: - # Auth using GitHub. - # See https://dexidp.io/docs/connectors/github/ - - type: github - id: github - name: GitHub - config: - clientID: 3f4383ff79915ace05d7 - # Reference to key in argo-secret Kubernetes resource - clientSecret: $dex.clientSecret - orgs: - - name: lsst-sqre - helm.repositories: | - - url: https://lsst-sqre.github.io/charts/ - name: lsst-sqre - - url: https://ricoberger.github.io/helm-charts/ - name: ricoberger - - url: https://kubernetes.github.io/ingress-nginx/ - name: ingress-nginx - - url: https://charts.helm.sh/stable - name: stable - - url: https://strimzi.io/charts/ - name: strimzi - - rbacConfig: - policy.csv: | - g, lsst-sqre:square, role:admin - - configs: - secret: - createSecret: false - -vault_secret: - enabled: true - path: secret/k8s_operator/lsst-lsp-int.ncsa.illinois.edu/argocd - -pull-secret: - enabled: true - path: secret/k8s_operator/lsst-lsp-int.ncsa.illinois.edu/pull-secret diff --git a/services/argocd/values-minikube.yaml b/services/argocd/values-minikube.yaml index 1de5014008..86966dd3e3 100644 --- a/services/argocd/values-minikube.yaml +++ b/services/argocd/values-minikube.yaml @@ -3,45 +3,7 @@ argo-cd: args: repoServerTimeoutSeconds: "180" - redis: - enabled: true - server: ingress: - enabled: true hosts: - "minikube.lsst.codes" - annotations: - kubernetes.io/ingress.class: nginx - nginx.ingress.kubernetes.io/rewrite-target: "/$2" - paths: - - /argo-cd(/|$)(.*) - - extraArgs: - - "--basehref=/argo-cd" - - "--insecure=true" - - config: - helm.repositories: | - - url: https://lsst-sqre.github.io/charts/ - name: lsst-sqre - - url: https://ricoberger.github.io/helm-charts/ - name: ricoberger - - url: https://kubernetes.github.io/ingress-nginx/ - name: ingress-nginx - - url: https://charts.helm.sh/stable - name: stable - - url: https://strimzi.io/charts/ - name: strimzi - - configs: - secret: - createSecret: false - -vault_secret: - enabled: true - path: secret/k8s_operator/minikube.lsst.codes/argocd - -pull-secret: - enabled: true - path: secret/k8s_operator/minikube.lsst.codes/pull-secret diff --git a/services/argocd/values-red-five.yaml b/services/argocd/values-red-five.yaml deleted file mode 100644 index 30691c666f..0000000000 --- a/services/argocd/values-red-five.yaml +++ /dev/null @@ -1,43 +0,0 @@ -argo-cd: - redis: - enabled: true - - server: - ingress: - enabled: true - hosts: - - "red-five.lsst.codes" - annotations: - kubernetes.io/ingress.class: nginx - nginx.ingress.kubernetes.io/rewrite-target: "/$2" - paths: - - /argo-cd(/|$)(.*) - - extraArgs: - - "--basehref=/argo-cd" - - "--insecure=true" - - config: - helm.repositories: | - - url: https://lsst-sqre.github.io/charts/ - name: lsst-sqre - - url: https://ricoberger.github.io/helm-charts/ - name: ricoberger - - url: https://kubernetes.github.io/ingress-nginx/ - name: ingress-nginx - - url: https://charts.helm.sh/stable - name: stable - - url: https://strimzi.io/charts/ - name: strimzi - - configs: - secret: - createSecret: false - -vault_secret: - enabled: true - path: secret/k8s_operator/red-five.lsst.codes/argocd - -pull-secret: - enabled: true - path: secret/k8s_operator/red-five.lsst.codes/pull-secret diff --git a/services/argocd/values-roe.yaml b/services/argocd/values-roe.yaml index f35ff0781a..c129191161 100644 --- a/services/argocd/values-roe.yaml +++ b/services/argocd/values-roe.yaml @@ -1,36 +1,12 @@ argo-cd: - redis: - enabled: true - server: ingress: - enabled: true hosts: - "rsp.lsst.ac.uk" - annotations: - kubernetes.io/ingress.class: nginx - nginx.ingress.kubernetes.io/rewrite-target: "/$2" - paths: - - /argo-cd(/|$)(.*) - - extraArgs: - - "--basehref=/argo-cd" - - "--insecure=true" config: - helm.repositories: | - - url: https://lsst-sqre.github.io/charts/ - name: lsst-sqre - - url: https://ricoberger.github.io/helm-charts/ - name: ricoberger - - url: https://kubernetes.github.io/ingress-nginx/ - name: ingress-nginx - - url: https://charts.helm.sh/stable - name: stable -pull-secret: - enabled: true - path: secret/k8s_operator/roe/pull-secret + url: "https://rsp.lsst.ac.uk/argo-cd" -vault_secret: - enabled: true - path: secret/k8s_operator/roe/argocd + configs: + secret: + createSecret: true diff --git a/services/argocd/values-squash-sandbox.yaml b/services/argocd/values-squash-sandbox.yaml deleted file mode 100644 index 5b5a9ca319..0000000000 --- a/services/argocd/values-squash-sandbox.yaml +++ /dev/null @@ -1,34 +0,0 @@ -argo-cd: - redis: - enabled: true - - server: - ingress: - enabled: true - hosts: - - "squash-sandbox.lsst.codes" - annotations: - kubernetes.io/ingress.class: nginx - nginx.ingress.kubernetes.io/rewrite-target: "/$2" - paths: - - /argo-cd(/|$)(.*) - - extraArgs: - - "--basehref=/argo-cd" - - "--insecure=true" - - config: - helm.repositories: | - - url: https://lsst-sqre.github.io/charts/ - name: lsst-sqre - - url: https://ricoberger.github.io/helm-charts/ - name: ricoberger - - url: https://kubernetes.github.io/ingress-nginx/ - name: ingress-nginx - - url: https://charts.helm.sh/stable - name: stable - - url: https://strimzi.io/charts/ - name: strimzi - -vault_secret: - enabled: false diff --git a/services/argocd/values-stable.yaml b/services/argocd/values-stable.yaml deleted file mode 100644 index d1b3435b29..0000000000 --- a/services/argocd/values-stable.yaml +++ /dev/null @@ -1,61 +0,0 @@ -argo-cd: - redis: - enabled: true - - server: - ingress: - enabled: true - hosts: - - "lsst-lsp-stable.ncsa.illinois.edu" - annotations: - kubernetes.io/ingress.class: nginx - nginx.ingress.kubernetes.io/rewrite-target: "/$2" - paths: - - /argo-cd(/|$)(.*) - - extraArgs: - - "--basehref=/argo-cd" - - "--insecure=true" - - config: - url: https://lsst-lsp-stable.ncsa.illinois.edu/argo-cd - dex.config: | - connectors: - # Auth using GitHub. - # See https://dexidp.io/docs/connectors/github/ - - type: github - id: github - name: GitHub - config: - clientID: 5e20005bc8739cea5035 - # Reference to key in argo-secret Kubernetes resource - clientSecret: $dex.clientSecret - orgs: - - name: lsst-sqre - helm.repositories: | - - url: https://lsst-sqre.github.io/charts/ - name: lsst-sqre - - url: https://ricoberger.github.io/helm-charts/ - name: ricoberger - - url: https://kubernetes.github.io/ingress-nginx/ - name: ingress-nginx - - url: https://charts.helm.sh/stable - name: stable - - url: https://strimzi.io/charts/ - name: strimzi - - rbacConfig: - policy.csv: | - g, lsst-sqre:square, role:admin - - configs: - secret: - createSecret: false - -vault_secret: - enabled: true - path: secret/k8s_operator/lsst-lsp-stable.ncsa.illinois.edu/argocd - -pull-secret: - enabled: true - path: secret/k8s_operator/lsst-lsp-stable.ncsa.illinois.edu/pull-secret diff --git a/services/argocd/values-summit.yaml b/services/argocd/values-summit.yaml index 5680f76fda..0f5710ce2a 100644 --- a/services/argocd/values-summit.yaml +++ b/services/argocd/values-summit.yaml @@ -1,24 +1,11 @@ argo-cd: - redis: - enabled: true - server: ingress: - enabled: true hosts: - "summit-lsp.lsst.codes" - annotations: - kubernetes.io/ingress.class: nginx - nginx.ingress.kubernetes.io/rewrite-target: "/$2" - paths: - - /argo-cd(/|$)(.*) - - extraArgs: - - "--basehref=/argo-cd" - - "--insecure=true" config: - url: https://summit-lsp.lsst.codes/argo-cd + url: "https://summit-lsp.lsst.codes/argo-cd" dex.config: | connectors: # Auth using GitHub. @@ -32,30 +19,7 @@ argo-cd: clientSecret: $dex.clientSecret orgs: - name: lsst-sqre - helm.repositories: | - - url: https://lsst-sqre.github.io/charts/ - name: lsst-sqre - - url: https://ricoberger.github.io/helm-charts/ - name: ricoberger - - url: https://kubernetes.github.io/ingress-nginx/ - name: ingress-nginx - - url: https://charts.helm.sh/stable - name: stable - - url: https://strimzi.io/charts/ - name: strimzi - rbacConfig: policy.csv: | + g, lsst-sqre:friends, role:admin g, lsst-sqre:square, role:admin - - configs: - secret: - createSecret: false - -vault_secret: - enabled: true - path: secret/k8s_operator/summit-lsp.lsst.codes/argocd - -pull-secret: - enabled: true - path: secret/k8s_operator/summit-lsp.lsst.codes/pull-secret diff --git a/services/argocd/values-tucson-teststand.yaml b/services/argocd/values-tucson-teststand.yaml index baee8b0cfa..b363102534 100644 --- a/services/argocd/values-tucson-teststand.yaml +++ b/services/argocd/values-tucson-teststand.yaml @@ -1,24 +1,11 @@ argo-cd: - redis: - enabled: true - server: ingress: - enabled: true hosts: - "tucson-teststand.lsst.codes" - annotations: - kubernetes.io/ingress.class: nginx - nginx.ingress.kubernetes.io/rewrite-target: "/$2" - paths: - - /argo-cd(/|$)(.*) - - extraArgs: - - "--basehref=/argo-cd" - - "--insecure=true" config: - url: https://tucson-teststand.lsst.codes/argo-cd + url: "https://tucson-teststand.lsst.codes/argo-cd" dex.config: | connectors: # Auth using GitHub. @@ -32,30 +19,7 @@ argo-cd: clientSecret: $dex.clientSecret orgs: - name: lsst-sqre - helm.repositories: | - - url: https://lsst-sqre.github.io/charts/ - name: lsst-sqre - - url: https://ricoberger.github.io/helm-charts/ - name: ricoberger - - url: https://kubernetes.github.io/ingress-nginx/ - name: ingress-nginx - - url: https://charts.helm.sh/stable - name: stable - - url: https://strimzi.io/charts/ - name: strimzi - rbacConfig: policy.csv: | + g, lsst-sqre:friends, role:admin g, lsst-sqre:square, role:admin - - configs: - secret: - createSecret: false - -vault_secret: - enabled: true - path: secret/k8s_operator/tucson-teststand.lsst.codes/argocd - -pull-secret: - enabled: true - path: secret/k8s_operator/tucson-teststand.lsst.codes/pull-secret diff --git a/services/argocd/values.yaml b/services/argocd/values.yaml new file mode 100644 index 0000000000..841a442791 --- /dev/null +++ b/services/argocd/values.yaml @@ -0,0 +1,64 @@ +## Argo CD configuration +## https://github.com/argoproj/argo-helm/blob/main/charts/argo-cd/values.yaml +argo-cd: + redis: + enabled: true + metrics: + enabled: true + + controller: + metrics: + enabled: true + applicationLabels: + enabled: true + labels: ["name", "instance"] + + repoServer: + metrics: + enabled: true + + notifications: + metrics: + enabled: true + + server: + metrics: + enabled: true + ingress: + enabled: true + annotations: + nginx.ingress.kubernetes.io/rewrite-target: "/$2" + ingressClassName: "nginx" + paths: + - "/argo-cd(/|$)(.*)" + pathType: "ImplementationSpecific" + + extraArgs: + - "--basehref=/argo-cd" + - "--insecure=true" + + config: + helm.repositories: | + - url: https://lsst-sqre.github.io/charts/ + name: lsst-sqre + - url: https://ricoberger.github.io/helm-charts/ + name: ricoberger + - url: https://kubernetes.github.io/ingress-nginx/ + name: ingress-nginx + - url: https://charts.helm.sh/stable + name: stable + - url: https://strimzi.io/charts/ + name: strimzi + resource.compareoptions: | + ignoreAggregatedRoles: true + + configs: + secret: + createSecret: false + +# The following will be set by parameters injected by Argo CD and should not +# be set in the individual environment values files. +global: + # -- Base path for Vault secrets + # @default -- Set by Argo CD + vaultSecretsPath: "" diff --git a/services/cachemachine/Chart.yaml b/services/cachemachine/Chart.yaml index 8a3a424fc5..fd8100af9e 100644 --- a/services/cachemachine/Chart.yaml +++ b/services/cachemachine/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v2 name: cachemachine version: 1.0.0 -dependencies: - - name: cachemachine - version: 1.2.2 - repository: https://lsst-sqre.github.io/charts/ +description: JupyterLab image prepuller +sources: + - https://github.com/lsst-sqre/cachemachine +appVersion: 1.2.2 diff --git a/services/cachemachine/README.md b/services/cachemachine/README.md new file mode 100644 index 0000000000..1ed392e993 --- /dev/null +++ b/services/cachemachine/README.md @@ -0,0 +1,31 @@ +# cachemachine + +JupyterLab image prepuller + +## Source Code + +* + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| affinity | object | `{}` | Affinity rules for the cachemachine frontend pod | +| autostart | object | `{}` | Autostart configuration. Each key is the name of a class of images to pull, and the value is the JSON specification for which and how many images to pull. | +| fullnameOverride | string | `""` | Override the full name for resources (includes the release name) | +| global.baseUrl | string | Set by Argo CD | Base URL for the environment | +| global.host | string | Set by Argo CD | Host name for ingress | +| global.vaultSecretsPath | string | Set by Argo CD | Base path for Vault secrets | +| image.pullPolicy | string | `"IfNotPresent"` | Pull policy for the cachemachine image | +| image.repository | string | `"lsstsqre/cachemachine"` | cachemachine image to use | +| image.tag | string | The appVersion of the chart | Tag of cachemachine image to use | +| ingress.annotations | object | `{}` | Additional annotations to add for endpoints that are authenticated | +| ingress.anonymousAnnotations | object | `{}` | Additional annotations to add for endpoints that allow anonymous access, such as `/*/available` | +| nameOverride | string | `""` | Override the base name for resources | +| nodeSelector | object | `{}` | Node selector rules for the cachemachine frontend pod | +| podAnnotations | object | `{}` | Annotations for the cachemachine frontend pod | +| resources | object | `{}` | Resource limits and requests for the cachemachine frontend pod | +| serviceAccount | object | `{"annotations":{},"name":""}` | Secret names to use for all Docker pulls | +| serviceAccount.annotations | object | `{}` | Annotations to add to the service account | +| serviceAccount.name | string | Name based on the fullname template | Name of the service account to use | +| tolerations | list | `[]` | Tolerations for the cachemachine frontend pod | diff --git a/services/cachemachine/templates/_helpers.tpl b/services/cachemachine/templates/_helpers.tpl new file mode 100644 index 0000000000..6599ed07b6 --- /dev/null +++ b/services/cachemachine/templates/_helpers.tpl @@ -0,0 +1,60 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "cachemachine.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "cachemachine.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "cachemachine.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Common labels +*/}} +{{- define "cachemachine.labels" -}} +app.kubernetes.io/name: {{ include "cachemachine.name" . }} +helm.sh/chart: {{ include "cachemachine.chart" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end -}} + +{{/* +Selector labels +*/}} +{{- define "cachemachine.selectorLabels" -}} +app.kubernetes.io/name: {{ include "cachemachine.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "cachemachine.serviceAccountName" -}} +{{ default (include "cachemachine.fullname" .) .Values.serviceAccount.name }} +{{- end -}} diff --git a/services/cachemachine/templates/configmap.yaml b/services/cachemachine/templates/configmap.yaml new file mode 100644 index 0000000000..013ff04860 --- /dev/null +++ b/services/cachemachine/templates/configmap.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "cachemachine.fullname" . }}-autostart + labels: + {{- include "cachemachine.labels" . | nindent 4 }} +data: + {{- toYaml .Values.autostart | nindent 2 }} diff --git a/services/cachemachine/templates/deployment.yaml b/services/cachemachine/templates/deployment.yaml new file mode 100644 index 0000000000..b8105098c2 --- /dev/null +++ b/services/cachemachine/templates/deployment.yaml @@ -0,0 +1,96 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "cachemachine.fullname" . }} + labels: + {{- include "cachemachine.labels" . | nindent 4 }} +spec: + replicas: 1 + selector: + matchLabels: + {{- include "cachemachine.selectorLabels" . | nindent 6 }} + template: + metadata: + annotations: + checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} + {{- with .Values.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "cachemachine.selectorLabels" . | nindent 8 }} + spec: + imagePullSecrets: + - name: "pull-secret" + serviceAccountName: {{ template "cachemachine.serviceAccountName" . }} + securityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + containers: + - name: {{ .Chart.Name }} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - "all" + readOnlyRootFilesystem: true + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy | quote }} + env: + - name: DOCKER_SECRET_NAME + value: "pull-secret" + ports: + - name: "http" + containerPort: 8080 + protocol: "TCP" + readinessProbe: + httpGet: + path: "/" + port: "http" + {{- with .Values.resources }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + {{- end }} + volumeMounts: + - name: "docker-creds" + mountPath: "/etc/secrets" + readOnly: true + - name: autostart + mountPath: "/etc/cachemachine" + readOnly: true + - name: podinfo + mountPath: /etc/podinfo + volumes: + - name: docker-creds + secret: + secretName: pull-secret + - name: autostart + configMap: + name: {{ include "cachemachine.fullname" . }}-autostart + - name: podinfo + downwardAPI: + items: + - path: "annotations" + fieldRef: + fieldPath: metadata.annotations + - path: "labels" + fieldRef: + fieldPath: metadata.labels + - path: "name" + fieldRef: + fieldPath: metadata.name + - path: "uid" + fieldRef: + fieldPath: metadata.uid + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/services/cachemachine/templates/ingress-anonymous.yaml b/services/cachemachine/templates/ingress-anonymous.yaml new file mode 100644 index 0000000000..4ac68ad654 --- /dev/null +++ b/services/cachemachine/templates/ingress-anonymous.yaml @@ -0,0 +1,30 @@ +apiVersion: gafaelfawr.lsst.io/v1alpha1 +kind: GafaelfawrIngress +metadata: + name: {{ template "cachemachine.fullname" . }}-anonymous + labels: + {{- include "cachemachine.labels" . | nindent 4 }} +config: + baseUrl: {{ .Values.global.baseUrl | quote }} + scopes: + anonymous: true +template: + metadata: + name: {{ template "cachemachine.fullname" . }}-anonymous + annotations: + nginx.ingress.kubernetes.io/use-regex: "true" + {{- with .Values.ingress.anonymousAnnotations }} + {{- toYaml . | nindent 6 }} + {{- end }} + spec: + rules: + - host: {{ required "global.host must be set" .Values.global.host | quote }} + http: + paths: + - path: "/cachemachine/.*/(available|desired)" + pathType: "ImplementationSpecific" + backend: + service: + name: {{ template "cachemachine.fullname" . }} + port: + number: 80 diff --git a/services/cachemachine/templates/ingress.yaml b/services/cachemachine/templates/ingress.yaml new file mode 100644 index 0000000000..0fe53f9cee --- /dev/null +++ b/services/cachemachine/templates/ingress.yaml @@ -0,0 +1,31 @@ +apiVersion: gafaelfawr.lsst.io/v1alpha1 +kind: GafaelfawrIngress +metadata: + name: {{ template "cachemachine.fullname" . }} + labels: + {{- include "cachemachine.labels" . | nindent 4 }} +config: + baseUrl: {{ .Values.global.baseUrl | quote }} + scopes: + all: + - "exec:admin" + loginRedirect: true +template: + metadata: + name: {{ template "cachemachine.fullname" . }} + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 6 }} + {{- end }} + spec: + rules: + - host: {{ required "global.host must be set" .Values.global.host | quote }} + http: + paths: + - path: "/cachemachine" + pathType: "Prefix" + backend: + service: + name: {{ template "cachemachine.fullname" . }} + port: + number: 80 diff --git a/services/cachemachine/templates/networkpolicy-pull.yaml b/services/cachemachine/templates/networkpolicy-pull.yaml new file mode 100644 index 0000000000..de3104385d --- /dev/null +++ b/services/cachemachine/templates/networkpolicy-pull.yaml @@ -0,0 +1,15 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ template "cachemachine.fullname" . }}-pull + labels: + {{- include "cachemachine.labels" . | nindent 4 }} +spec: + # Reject all inbound and outbound connections to the pods that exist solely + # to pull Docker images. + podSelector: + matchLabels: + cachemachine: "pull" + policyTypes: + - Ingress + - Egress diff --git a/services/cachemachine/templates/networkpolicy.yaml b/services/cachemachine/templates/networkpolicy.yaml new file mode 100644 index 0000000000..2741f62d58 --- /dev/null +++ b/services/cachemachine/templates/networkpolicy.yaml @@ -0,0 +1,21 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ include "cachemachine.fullname" . }} +spec: + podSelector: + matchLabels: + {{- include "cachemachine.selectorLabels" . | nindent 6 }} + policyTypes: + - Ingress + ingress: + # Allow inbound access from pods (in any namespace) labeled + # gafaelfawr.lsst.io/ingress: true. + - from: + - namespaceSelector: {} + podSelector: + matchLabels: + gafaelfawr.lsst.io/ingress: "true" + ports: + - protocol: "TCP" + port: 8080 diff --git a/services/cachemachine/templates/service.yaml b/services/cachemachine/templates/service.yaml new file mode 100644 index 0000000000..63ccbc2ed1 --- /dev/null +++ b/services/cachemachine/templates/service.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "cachemachine.fullname" . }} + labels: + {{- include "cachemachine.labels" . | nindent 4 }} +spec: + type: ClusterIP + ports: + - port: 80 + targetPort: "http" + protocol: "TCP" + selector: + {{- include "cachemachine.selectorLabels" . | nindent 4 }} diff --git a/services/cachemachine/templates/serviceaccount.yaml b/services/cachemachine/templates/serviceaccount.yaml new file mode 100644 index 0000000000..81a80ff760 --- /dev/null +++ b/services/cachemachine/templates/serviceaccount.yaml @@ -0,0 +1,65 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "cachemachine.serviceAccountName" . }} + labels: + {{- include "cachemachine.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ template "cachemachine.serviceAccountName" . }} + labels: + {{- include "cachemachine.labels" . | nindent 4 }} +rules: + - apiGroups: [""] + resources: ["nodes"] + verbs: ["list"] +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ template "cachemachine.serviceAccountName" . }} + labels: + {{- include "cachemachine.labels" . | nindent 4 }} +subjects: + - kind: ServiceAccount + name: {{ template "cachemachine.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} +roleRef: + kind: ClusterRole + name: {{ template "cachemachine.serviceAccountName" . }} + apiGroup: rbac.authorization.k8s.io +--- +kind: Role +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ template "cachemachine.serviceAccountName" . }} + labels: + {{- include "cachemachine.labels" . | nindent 4 }} +rules: + - apiGroups: ["apps"] + resources: ["daemonsets"] + verbs: ["create", "delete"] + - apiGroups: ["apps"] + resources: ["daemonsets/status"] + verbs: ["get"] +--- +kind: RoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ template "cachemachine.serviceAccountName" . }} + labels: + {{- include "cachemachine.labels" . | nindent 4 }} +subjects: + - kind: ServiceAccount + name: {{ template "cachemachine.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} +roleRef: + kind: Role + name: {{ template "cachemachine.serviceAccountName" . }} + apiGroup: rbac.authorization.k8s.io diff --git a/services/cachemachine/templates/tests/test-connection.yaml b/services/cachemachine/templates/tests/test-connection.yaml new file mode 100644 index 0000000000..35c987cdcc --- /dev/null +++ b/services/cachemachine/templates/tests/test-connection.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Pod +metadata: + name: {{ include "cachemachine.fullname" . }}-test-connection + annotations: + "helm.sh/hook": "test-success" + labels: + {{- include "cachemachine.labels" . | nindent 4 }} +spec: + containers: + - name: "wget" + image: "busybox" + command: ['wget'] + args: + - '{{ include "cachemachine.fullname" . }}:8080' + restartPolicy: Never diff --git a/services/cachemachine/templates/vault-secrets.yaml b/services/cachemachine/templates/vault-secrets.yaml new file mode 100644 index 0000000000..6f813c9b7d --- /dev/null +++ b/services/cachemachine/templates/vault-secrets.yaml @@ -0,0 +1,9 @@ +apiVersion: ricoberger.de/v1alpha1 +kind: VaultSecret +metadata: + name: pull-secret + labels: + {{- include "cachemachine.labels" . | nindent 4 }} +spec: + path: "{{- .Values.global.vaultSecretsPath }}/pull-secret" + type: kubernetes.io/dockerconfigjson diff --git a/services/cachemachine/values-base.yaml b/services/cachemachine/values-base.yaml index d0cd8faa4a..688d5c65b4 100644 --- a/services/cachemachine/values-base.yaml +++ b/services/cachemachine/values-base.yaml @@ -1,35 +1,25 @@ -cachemachine: - imagePullSecrets: - - name: "cachemachine-secret" - - ingress: - enabled: true - host: "base-lsp.lsst.codes" - - vaultSecretsPath: "secret/k8s_operator/base-lsp.lsst.codes/pull-secret" - - autostart: - jupyter: | - { - "name": "jupyter", - "labels": { - "jupyterlab": "ok" - }, - "repomen": [ - { - "type": "RubinRepoMan", - "registry_url": "registry.hub.docker.com", - "repo": "lsstts/sal-sciplat-lab", - "recommended_tag": "recommended", - "num_releases": 0, - "num_weeklies": 3, - "num_dailies": 2, - "cycle": 20, - "alias_tags": [ - "latest", - "latest_daily", - "latest_weekly" - ] - } - ] - } +autostart: + jupyter: | + { + "name": "jupyter", + "labels": { + "jupyterlab": "ok" + }, + "repomen": [ + { + "type": "RubinRepoMan", + "registry_url": "ts-dockerhub.lsst.org", + "repo": "sal-sciplat-lab", + "recommended_tag": "recommended_c0026", + "num_releases": 0, + "num_weeklies": 3, + "num_dailies": 2, + "cycle": 26, + "alias_tags": [ + "latest", + "latest_daily", + "latest_weekly" + ] + } + ] + } diff --git a/services/cachemachine/values-ccin2p3.yaml b/services/cachemachine/values-ccin2p3.yaml new file mode 100644 index 0000000000..a5b8e8aef5 --- /dev/null +++ b/services/cachemachine/values-ccin2p3.yaml @@ -0,0 +1,17 @@ +autostart: + jupyter: | + { + "name": "jupyter", + "labels": {}, + "repomen": [ + { + "type": "RubinRepoMan", + "registry_url": "registry.hub.docker.com", + "repo": "lsstsqre/sciplat-lab", + "recommended_tag": "recommended", + "num_releases": 1, + "num_weeklies": 2, + "num_dailies": 3 + } + ] + } diff --git a/services/cachemachine/values-idfdev.yaml b/services/cachemachine/values-idfdev.yaml index b334bbd647..3c1b6dad90 100644 --- a/services/cachemachine/values-idfdev.yaml +++ b/services/cachemachine/values-idfdev.yaml @@ -1,39 +1,34 @@ -cachemachine: - imagePullSecrets: - - name: "cachemachine-secret" +serviceAccount: + annotations: { + iam.gke.io/gcp-service-account: cachemachine-wi@science-platform-dev-7696.iam.gserviceaccount.com + } - ingress: - enabled: true - host: "data-dev.lsst.cloud" - - vaultSecretsPath: "secret/k8s_operator/data-dev.lsst.cloud/pull-secret" - - autostart: - jupyter: | - { - "name": "jupyter", - "labels": {}, - "repomen": [ - { - "type": "SimpleRepoMan", - "images": [ - { - "image_url": "us-central1-docker.pkg.dev/rubin-shared-services-71ec/sciplat/sciplat-lab:exp_w_2022_06_pdf", - "name": "Experimental Weekly 2022_06 (PDF)" - }, - { - "image_url": "us-central1-docker.pkg.dev/rubin-shared-services-71ec/sciplat/sciplat-lab:exp_w_2022_06_tickets-DM-33448", - "name": "Experimental Weekly 2022_06 (tickets/DM-33448)" - }, - { - "image_url": "us-central1-docker.pkg.dev/rubin-shared-services-71ec/sciplat/sciplat-lab:exp_w_2022_05_shallowclone", - "name": "Experimental Weekly 2022_05 (shallowclone)" - }, - { - "image_url": "us-central1-docker.pkg.dev/rubin-shared-services-71ec/sciplat/sciplat-lab:exp_w_2022_04_mamba", - "name": "Experimental Weekly 2022_04 (mamba)" - } - ] - } - ] - } +autostart: + jupyter: | + { + "name": "jupyter", + "labels": {}, + "repomen": [ + { + "type": "RubinRepoGar", + "registry_url": "us-central1-docker.pkg.dev", + "gar_repository": "sciplat", + "gar_image": "sciplat-lab", + "project_id": "rubin-shared-services-71ec", + "location": "us-central1", + "recommended_tag": "recommended", + "num_releases": 1, + "num_weeklies": 2, + "num_dailies": 3 + }, + { + "type": "SimpleRepoMan", + "images": [ + { + "image_url": "us-central1-docker.pkg.dev/rubin-shared-services-71ec/sciplat/sciplat-lab:w_2022_40", + "name": "Weekly 2022_40" + } + ] + } + ] + } diff --git a/services/cachemachine/values-idfint.yaml b/services/cachemachine/values-idfint.yaml index eb3ccb8c86..4edfa8adc3 100644 --- a/services/cachemachine/values-idfint.yaml +++ b/services/cachemachine/values-idfint.yaml @@ -1,36 +1,34 @@ -cachemachine: - imagePullSecrets: - - name: "cachemachine-secret" +serviceAccount: + annotations: { + iam.gke.io/gcp-service-account: cachemachine-wi@science-platform-int-dc5d.iam.gserviceaccount.com + } - ingress: - enabled: true - host: "data-int.lsst.cloud" - - vaultSecretsPath: "secret/k8s_operator/data-int.lsst.cloud/pull-secret" - - autostart: - jupyter: | - { - "name": "jupyter", - "labels": {}, - "repomen": [ - { - "type": "RubinRepoMan", - "registry_url": "registry.hub.docker.com", - "repo": "lsstsqre/sciplat-lab", - "recommended_tag": "recommended", - "num_releases": 1, - "num_weeklies": 2, - "num_dailies": 3 - }, - { - "type": "SimpleRepoMan", - "images": [ - { - "image_url": "registry.hub.docker.com/lsstsqre/sciplat-lab:w_2021_49", - "name": "Weekly 2021_49" - } - ] - } - ] - } +autostart: + jupyter: | + { + "name": "jupyter", + "labels": {}, + "repomen": [ + { + "type": "RubinRepoGar", + "registry_url": "us-central1-docker.pkg.dev", + "gar_repository": "sciplat", + "gar_image": "sciplat-lab", + "project_id": "rubin-shared-services-71ec", + "location": "us-central1", + "recommended_tag": "recommended", + "num_releases": 1, + "num_weeklies": 2, + "num_dailies": 3 + }, + { + "type": "SimpleRepoMan", + "images": [ + { + "image_url": "us-central1-docker.pkg.dev/rubin-shared-services-71ec/sciplat/sciplat-lab:w_2022_40", + "name": "Weekly 2022_40" + } + ] + } + ] + } diff --git a/services/cachemachine/values-idfprod.yaml b/services/cachemachine/values-idfprod.yaml index 8d5b007bb6..b7fb4ac4a8 100644 --- a/services/cachemachine/values-idfprod.yaml +++ b/services/cachemachine/values-idfprod.yaml @@ -1,36 +1,34 @@ -cachemachine: - imagePullSecrets: - - name: "cachemachine-secret" +serviceAccount: + annotations: { + iam.gke.io/gcp-service-account: cachemachine-wi@science-platform-stable-6994.iam.gserviceaccount.com + } - ingress: - enabled: true - host: "data.lsst.cloud" - - vaultSecretsPath: "secret/k8s_operator/data.lsst.cloud/pull-secret" - - autostart: - jupyter: | - { - "name": "jupyter", - "labels": {}, - "repomen": [ - { - "type": "RubinRepoMan", - "registry_url": "registry.hub.docker.com", - "repo": "lsstsqre/sciplat-lab", - "recommended_tag": "recommended", - "num_releases": 1, - "num_weeklies": 2, - "num_dailies": 3 - }, - { - "type": "SimpleRepoMan", - "images": [ - { - "image_url": "registry.hub.docker.com/lsstsqre/sciplat-lab:w_2021_49", - "name": "Weekly 2021_49" - } - ] - } - ] - } +autostart: + jupyter: | + { + "name": "jupyter", + "labels": {}, + "repomen": [ + { + "type": "RubinRepoGar", + "registry_url": "us-central1-docker.pkg.dev", + "gar_repository": "sciplat", + "gar_image": "sciplat-lab", + "project_id": "rubin-shared-services-71ec", + "location": "us-central1", + "recommended_tag": "recommended", + "num_releases": 1, + "num_weeklies": 2, + "num_dailies": 3 + }, + { + "type": "SimpleRepoMan", + "images": [ + { + "image_url": "us-central1-docker.pkg.dev/rubin-shared-services-71ec/sciplat/sciplat-lab:w_2022_40", + "name": "Weekly 2022_40" + } + ] + } + ] + } diff --git a/services/cachemachine/values-int.yaml b/services/cachemachine/values-int.yaml deleted file mode 100644 index 2454bc8628..0000000000 --- a/services/cachemachine/values-int.yaml +++ /dev/null @@ -1,40 +0,0 @@ -cachemachine: - imagePullSecrets: - - name: "cachemachine-secret" - - ingress: - enabled: true - host: "lsst-lsp-int.ncsa.illinois.edu" - annotations: - nginx.ingress.kubernetes.io/auth-url: "https://lsst-lsp-int.ncsa.illinois.edu/auth?scope=exec:admin" - - vaultSecretsPath: "secret/k8s_operator/lsst-lsp-int.ncsa.illinois.edu/pull-secret" - - autostart: - jupyter: | - { - "name": "jupyter", - "labels": { - "jupyterlab": "ok" - }, - "repomen": [ - { - "type": "RubinRepoMan", - "registry_url": "registry.hub.docker.com", - "repo": "lsstsqre/sciplat-lab", - "recommended_tag": "recommended", - "num_releases": 1, - "num_weeklies": 2, - "num_dailies": 3 - }, - { - "type": "SimpleRepoMan", - "images": [ - { - "image_url": "registry.hub.docker.com/lsstsqre/sciplat-lab:w_2021_49", - "name": "Weekly 2021_49" - } - ] - } - ] - } diff --git a/services/cachemachine/values-minikube.yaml b/services/cachemachine/values-minikube.yaml index 47696d94c9..4369a6be97 100644 --- a/services/cachemachine/values-minikube.yaml +++ b/services/cachemachine/values-minikube.yaml @@ -1,27 +1,17 @@ -cachemachine: - imagePullSecrets: - - name: "cachemachine-secret" - - ingress: - enabled: true - host: "minikube.lsst.codes" - - vaultSecretsPath: "secret/k8s_operator/minikube.lsst.codes/pull-secret" - - autostart: - jupyter: | - { - "name": "jupyter", - "labels": {}, - "repomen": [ - { - "type": "RubinRepoMan", - "registry_url": "registry.hub.docker.com", - "repo": "lsstsqre/sciplat-lab", - "recommended_tag": "recommended", - "num_releases": 0, - "num_weeklies": 0, - "num_dailies": 0 - } - ] - } +autostart: + jupyter: | + { + "name": "jupyter", + "labels": {}, + "repomen": [ + { + "type": "RubinRepoMan", + "registry_url": "registry.hub.docker.com", + "repo": "lsstsqre/sciplat-lab", + "recommended_tag": "recommended", + "num_releases": 0, + "num_weeklies": 0, + "num_dailies": 0 + } + ] + } diff --git a/services/cachemachine/values-roe.yaml b/services/cachemachine/values-roe.yaml index 17b21ba571..a5b8e8aef5 100644 --- a/services/cachemachine/values-roe.yaml +++ b/services/cachemachine/values-roe.yaml @@ -1,28 +1,17 @@ -cachemachine: - imagePullSecrets: - - name: "cachemachine-secret" - - ingress: - enabled: true - host: "rsp.lsst.ac.uk" - - vaultSecretsPath: "secret/k8s_operator/roe/pull-secret" - - - autostart: - jupyter: | - { - "name": "jupyter", - "labels": {}, - "repomen": [ - { - "type": "RubinRepoMan", - "registry_url": "registry.hub.docker.com", - "repo": "lsstsqre/sciplat-lab", - "recommended_tag": "recommended", - "num_releases": 1, - "num_weeklies": 2, - "num_dailies": 3 - } - ] - } +autostart: + jupyter: | + { + "name": "jupyter", + "labels": {}, + "repomen": [ + { + "type": "RubinRepoMan", + "registry_url": "registry.hub.docker.com", + "repo": "lsstsqre/sciplat-lab", + "recommended_tag": "recommended", + "num_releases": 1, + "num_weeklies": 2, + "num_dailies": 3 + } + ] + } diff --git a/services/cachemachine/values-stable.yaml b/services/cachemachine/values-stable.yaml deleted file mode 100644 index 3c0597044f..0000000000 --- a/services/cachemachine/values-stable.yaml +++ /dev/null @@ -1,40 +0,0 @@ -cachemachine: - imagePullSecrets: - - name: "cachemachine-secret" - - ingress: - enabled: true - host: "lsst-lsp-stable.ncsa.illinois.edu" - annotations: - nginx.ingress.kubernetes.io/auth-url: "https://lsst-lsp-stable.ncsa.illinois.edu/auth?scope=exec:admin" - - vaultSecretsPath: "secret/k8s_operator/lsst-lsp-stable.ncsa.illinois.edu/pull-secret" - - autostart: - jupyter: | - { - "name": "jupyter", - "labels": { - "jupyterlab": "ok" - }, - "repomen": [ - { - "type": "RubinRepoMan", - "registry_url": "registry.hub.docker.com", - "repo": "lsstsqre/sciplat-lab", - "recommended_tag": "recommended", - "num_releases": 1, - "num_weeklies": 2, - "num_dailies": 3 - }, - { - "type": "SimpleRepoMan", - "images": [ - { - "image_url": "registry.hub.docker.com/lsstsqre/sciplat-lab:w_2021_49", - "name": "Weekly 2021_49" - } - ] - } - ] - } diff --git a/services/cachemachine/values-summit.yaml b/services/cachemachine/values-summit.yaml index 8e57f44424..360bbaaa90 100644 --- a/services/cachemachine/values-summit.yaml +++ b/services/cachemachine/values-summit.yaml @@ -1,35 +1,23 @@ -cachemachine: - imagePullSecrets: - - name: "cachemachine-secret" - - ingress: - enabled: true - host: "summit-lsp.lsst.codes" - - vaultSecretsPath: "secret/k8s_operator/summit-lsp.lsst.codes/pull-secret" - - autostart: - jupyter: | - { - "name": "jupyter", - "labels": { - "jupyterlab": "ok" - }, - "repomen": [ - { - "type": "RubinRepoMan", - "registry_url": "ts-dockerhub.lsst.org", - "repo": "sal-sciplat-lab", - "recommended_tag": "recommended", - "num_releases": 0, - "num_weeklies": 3, - "num_dailies": 2, - "cycle": 24, - "alias_tags": [ - "latest", - "latest_daily", - "latest_weekly" - ] - } - ] - } +autostart: + jupyter: | + { + "name": "jupyter", + "labels": {}, + "repomen": [ + { + "type": "RubinRepoMan", + "registry_url": "ts-dockerhub.lsst.org", + "repo": "sal-sciplat-lab", + "recommended_tag": "recommended_c0028", + "num_releases": 0, + "num_weeklies": 3, + "num_dailies": 2, + "cycle": 28, + "alias_tags": [ + "latest", + "latest_daily", + "latest_weekly" + ] + } + ] + } diff --git a/services/cachemachine/values-tucson-teststand.yaml b/services/cachemachine/values-tucson-teststand.yaml index 8c0a7cc763..e53cdba1fa 100644 --- a/services/cachemachine/values-tucson-teststand.yaml +++ b/services/cachemachine/values-tucson-teststand.yaml @@ -1,33 +1,23 @@ -cachemachine: - imagePullSecrets: - - name: "cachemachine-secret" - - ingress: - enabled: true - host: "tucson-teststand.lsst.codes" - - vaultSecretsPath: "secret/k8s_operator/tucson-teststand.lsst.codes/pull-secret" - - autostart: - jupyter: | - { - "name": "jupyter", - "labels": {}, - "repomen": [ - { - "type": "RubinRepoMan", - "registry_url": "ts-dockerhub.lsst.org", - "repo": "sal-sciplat-lab", - "recommended_tag": "recommended", - "num_releases": 1, - "num_weeklies": 3, - "num_dailies": 2, - "cycle": 24, - "alias_tags": [ - "latest", - "latest_daily", - "latest_weekly" - ] - } - ] - } +autostart: + jupyter: | + { + "name": "jupyter", + "labels": {}, + "repomen": [ + { + "type": "RubinRepoMan", + "registry_url": "ts-dockerhub.lsst.org", + "repo": "sal-sciplat-lab", + "recommended_tag": "recommended_c0029", + "num_releases": 1, + "num_weeklies": 3, + "num_dailies": 2, + "cycle": 29, + "alias_tags": [ + "latest", + "latest_daily", + "latest_weekly" + ] + } + ] + } diff --git a/services/cachemachine/values.yaml b/services/cachemachine/values.yaml new file mode 100644 index 0000000000..f6c7d38961 --- /dev/null +++ b/services/cachemachine/values.yaml @@ -0,0 +1,70 @@ +# Default values for cachemachine. + +# -- Override the base name for resources +nameOverride: "" + +# -- Override the full name for resources (includes the release name) +fullnameOverride: "" + +image: + # -- cachemachine image to use + repository: lsstsqre/cachemachine + + # -- Pull policy for the cachemachine image + pullPolicy: IfNotPresent + + # -- Tag of cachemachine image to use + # @default -- The appVersion of the chart + tag: "" + +# -- Secret names to use for all Docker pulls +serviceAccount: + # -- Name of the service account to use + # @default -- Name based on the fullname template + name: "" + + # -- Annotations to add to the service account + annotations: {} + +ingress: + # -- Additional annotations to add for endpoints that are authenticated + annotations: {} + + # -- Additional annotations to add for endpoints that allow anonymous + # access, such as `/*/available` + anonymousAnnotations: {} + +# -- Resource limits and requests for the cachemachine frontend pod +resources: {} + +# -- Annotations for the cachemachine frontend pod +podAnnotations: {} + +# -- Node selector rules for the cachemachine frontend pod +nodeSelector: {} + +# -- Tolerations for the cachemachine frontend pod +tolerations: [] + +# -- Affinity rules for the cachemachine frontend pod +affinity: {} + +# -- Autostart configuration. Each key is the name of a class of images to +# pull, and the value is the JSON specification for which and how many images +# to pull. +autostart: {} + +# The following will be set by parameters injected by Argo CD and should not +# be set in the individual environment values files. +global: + # -- Base URL for the environment + # @default -- Set by Argo CD + baseUrl: "" + + # -- Host name for ingress + # @default -- Set by Argo CD + host: "" + + # -- Base path for Vault secrets + # @default -- Set by Argo CD + vaultSecretsPath: "" diff --git a/services/cert-issuer/Chart.yaml b/services/cert-issuer/Chart.yaml deleted file mode 100644 index f82895ba32..0000000000 --- a/services/cert-issuer/Chart.yaml +++ /dev/null @@ -1,7 +0,0 @@ -apiVersion: v2 -name: cert-issuer -version: 1.0.0 -dependencies: - - name: cert-issuer - version: 1.0.0 - repository: https://lsst-sqre.github.io/charts/ diff --git a/services/cert-issuer/README.md b/services/cert-issuer/README.md deleted file mode 100644 index 8e183b2b21..0000000000 --- a/services/cert-issuer/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# cert-issuer - -Set up a cert-manager cluster issuer for a Science Platform environment. -Only used in environments where we control our own certificates. -The issuer is separate from the cert-manager application to support environments where someone else manages cert-manager. diff --git a/services/cert-issuer/values-base.yaml b/services/cert-issuer/values-base.yaml deleted file mode 100644 index ff5c31ee08..0000000000 --- a/services/cert-issuer/values-base.yaml +++ /dev/null @@ -1,7 +0,0 @@ -cert-issuer: - config: - email: "sqre-admin@lists.lsst.org" - route53: - awsAccessKeyId: "AKIAQSJOS2SFLUEVXZDB" - hostedZone: "Z06873202D7WVTZUFOQ42" - vaultSecretPath: "secret/k8s_operator/base-lsp.lsst.codes/cert-manager" diff --git a/services/cert-issuer/values-idfdev.yaml b/services/cert-issuer/values-idfdev.yaml deleted file mode 100644 index 8acbacb82d..0000000000 --- a/services/cert-issuer/values-idfdev.yaml +++ /dev/null @@ -1,7 +0,0 @@ -cert-issuer: - config: - email: "sqre-admin@lists.lsst.org" - route53: - awsAccessKeyId: "AKIAQSJOS2SFL5I4TYND" - hostedZone: "Z0567328105IEHEMIXLCO" - vaultSecretPath: "secret/k8s_operator/data-dev.lsst.cloud/cert-manager" diff --git a/services/cert-issuer/values-idfint.yaml b/services/cert-issuer/values-idfint.yaml deleted file mode 100644 index 19689bb194..0000000000 --- a/services/cert-issuer/values-idfint.yaml +++ /dev/null @@ -1,7 +0,0 @@ -cert-issuer: - config: - email: "sqre-admin@lists.lsst.org" - route53: - awsAccessKeyId: "AKIAQSJOS2SFL5I4TYND" - hostedZone: "Z0567328105IEHEMIXLCO" - vaultSecretPath: "secret/k8s_operator/data-int.lsst.cloud/cert-manager" diff --git a/services/cert-issuer/values-idfprod.yaml b/services/cert-issuer/values-idfprod.yaml deleted file mode 100644 index 84a5d4a1f4..0000000000 --- a/services/cert-issuer/values-idfprod.yaml +++ /dev/null @@ -1,7 +0,0 @@ -cert-issuer: - config: - email: "sqre-admin@lists.lsst.org" - route53: - awsAccessKeyId: "AKIAQSJOS2SFL5I4TYND" - hostedZone: "Z0567328105IEHEMIXLCO" - vaultSecretPath: "secret/k8s_operator/data.lsst.cloud/cert-manager" diff --git a/services/cert-issuer/values-minikube.yaml b/services/cert-issuer/values-minikube.yaml deleted file mode 100644 index 5da7cbb959..0000000000 --- a/services/cert-issuer/values-minikube.yaml +++ /dev/null @@ -1,7 +0,0 @@ -cert-issuer: - config: - email: "sqre-admin@lists.lsst.org" - route53: - awsAccessKeyId: "AKIAQSJOS2SFLUEVXZDB" - hostedZone: "Z06873202D7WVTZUFOQ42" - vaultSecretPath: "secret/k8s_operator/minikube.lsst.codes/cert-manager" diff --git a/services/cert-issuer/values-red-five.yaml b/services/cert-issuer/values-red-five.yaml deleted file mode 100644 index a19ce460d3..0000000000 --- a/services/cert-issuer/values-red-five.yaml +++ /dev/null @@ -1,7 +0,0 @@ -cert-issuer: - config: - email: "sqre-admin@lists.lsst.org" - route53: - awsAccessKeyId: "AKIAQSJOS2SFLUEVXZDB" - hostedZone: "Z06873202D7WVTZUFOQ42" - vaultSecretPath: "secret/k8s_operator/red-five.lsst.codes/cert-manager" diff --git a/services/cert-issuer/values-roe.yaml b/services/cert-issuer/values-roe.yaml deleted file mode 100644 index fe52807d9b..0000000000 --- a/services/cert-issuer/values-roe.yaml +++ /dev/null @@ -1,7 +0,0 @@ -cert-issuer: - config: - email: "rsp@roe.ac.uk" - route53: - awsAccessKeyId: "AKIAQSJOS2SFL5I4TYND" - hostedZone: "Z0567328105IEHEMIXLCO" - vaultSecretPath: "secret/k8s_operator/roe/cert-manager" diff --git a/services/cert-issuer/values-squash-sandbox.yaml b/services/cert-issuer/values-squash-sandbox.yaml deleted file mode 100644 index 3939cfc204..0000000000 --- a/services/cert-issuer/values-squash-sandbox.yaml +++ /dev/null @@ -1,7 +0,0 @@ -cert-issuer: - config: - email: "sqre-admin@lists.lsst.org" - route53: - awsAccessKeyId: "AKIAQSJOS2SFLUEVXZDB" - hostedZone: "Z06873202D7WVTZUFOQ42" - vaultSecretPath: "secret/k8s_operator/squash-sandbox.lsst.codes/cert-manager" diff --git a/services/cert-issuer/values-summit.yaml b/services/cert-issuer/values-summit.yaml deleted file mode 100644 index 579d9f243b..0000000000 --- a/services/cert-issuer/values-summit.yaml +++ /dev/null @@ -1,7 +0,0 @@ -cert-issuer: - config: - email: "sqre-admin@lists.lsst.org" - route53: - awsAccessKeyId: "AKIAQSJOS2SFLUEVXZDB" - hostedZone: "Z06873202D7WVTZUFOQ42" - vaultSecretPath: "secret/k8s_operator/summit-lsp.lsst.codes/cert-manager" diff --git a/services/cert-issuer/values-tucson-teststand.yaml b/services/cert-issuer/values-tucson-teststand.yaml deleted file mode 100644 index cf206b9d87..0000000000 --- a/services/cert-issuer/values-tucson-teststand.yaml +++ /dev/null @@ -1,7 +0,0 @@ -cert-issuer: - config: - email: "sqre-admin@lists.lsst.org" - route53: - awsAccessKeyId: "AKIAQSJOS2SFLUEVXZDB" - hostedZone: "Z06873202D7WVTZUFOQ42" - vaultSecretPath: "secret/k8s_operator/tucson-teststand.lsst.codes/cert-manager" diff --git a/services/cert-manager/Chart.yaml b/services/cert-manager/Chart.yaml index 1744880ef8..cc4e84074c 100644 --- a/services/cert-manager/Chart.yaml +++ b/services/cert-manager/Chart.yaml @@ -1,10 +1,11 @@ apiVersion: v2 name: cert-manager version: 1.0.0 +description: TLS certificate manager +home: https://cert-manager.io/ +sources: + - https://github.com/cert-manager/cert-manager dependencies: -- name: cert-manager - version: v1.7.1 - repository: https://charts.jetstack.io -- name: pull-secret - version: 0.1.2 - repository: https://lsst-sqre.github.io/charts/ + - name: cert-manager + version: v1.11.0 + repository: https://charts.jetstack.io diff --git a/services/cert-manager/README.md b/services/cert-manager/README.md new file mode 100644 index 0000000000..1d4e1116ec --- /dev/null +++ b/services/cert-manager/README.md @@ -0,0 +1,22 @@ +# cert-manager + +TLS certificate manager + +**Homepage:** + +## Source Code + +* + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| cert-manager | object | Install CRDs, force use of Google and Cloudfront DNS servers | Configuration for upstream cert-manager chart | +| config.createIssuer | bool | `true` | Whether to create a Let's Encrypt DNS-based cluster issuer | +| config.email | string | sqre-admin | Contact email address registered with Let's Encrypt | +| config.route53.awsAccessKeyId | string | None, must be set if `createIssuer` is true | AWS access key ID for Route 53 (must match `aws-secret-access-key` in Vault secret referenced by `config.vaultSecretPath`) | +| config.route53.hostedZone | string | None, must be set if `createIssuer` is true | Route 53 hosted zone in which to create challenge records | +| fullnameOverride | string | `""` | Override the full name for resources (includes the release name) | +| global.vaultSecretsPath | string | Set by Argo CD | Base path for Vault secrets | +| nameOverride | string | `""` | Override the base name for resources | diff --git a/services/cert-manager/templates/_helpers.tpl b/services/cert-manager/templates/_helpers.tpl new file mode 100644 index 0000000000..8707bd4fcd --- /dev/null +++ b/services/cert-manager/templates/_helpers.tpl @@ -0,0 +1,52 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "cert-manager.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "cert-manager.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "cert-manager.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "cert-manager.labels" -}} +helm.sh/chart: {{ include "cert-manager.chart" . }} +{{ include "cert-manager.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "cert-manager.selectorLabels" -}} +app.kubernetes.io/name: {{ include "cert-manager.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} diff --git a/services/cert-manager/templates/cluster-issuer.yaml b/services/cert-manager/templates/cluster-issuer.yaml new file mode 100644 index 0000000000..f36dddfceb --- /dev/null +++ b/services/cert-manager/templates/cluster-issuer.yaml @@ -0,0 +1,24 @@ +{{- if .Values.config.createIssuer -}} +apiVersion: cert-manager.io/v1 +kind: ClusterIssuer +metadata: + name: "letsencrypt-dns" + labels: + {{- include "cert-manager.labels" . | nindent 4 }} +spec: + acme: + email: {{ required "config.email must be set" .Values.config.email | quote }} + server: "https://acme-v02.api.letsencrypt.org/directory" + privateKeySecretRef: + name: {{ include "cert-manager.fullname" . }}-letsencrypt + solvers: + - dns01: + cnameStrategy: "Follow" + route53: + region: "us-east-1" + accessKeyID: {{ required "config.route53.awsAccessKeyId must be set" .Values.config.route53.awsAccessKeyId | quote }} + hostedZoneID: {{ required "config.route53.hostedZone must be set" .Values.config.route53.hostedZone | quote }} + secretAccessKeySecretRef: + name: {{ include "cert-manager.fullname" . }} + key: "aws-secret-access-key" +{{- end }} diff --git a/services/cert-manager/templates/vault-secrets.yaml b/services/cert-manager/templates/vault-secrets.yaml new file mode 100644 index 0000000000..85cd69ef37 --- /dev/null +++ b/services/cert-manager/templates/vault-secrets.yaml @@ -0,0 +1,11 @@ +{{- if .Values.config.createIssuer -}} +apiVersion: ricoberger.de/v1alpha1 +kind: VaultSecret +metadata: + name: {{ include "cert-manager.fullname" . }} + labels: + {{- include "cert-manager.labels" . | nindent 4 }} +spec: + path: "{{ required "global.vaultSecretsPath must be set" .Values.global.vaultSecretsPath }}/cert-manager" + type: Opaque +{{- end }} diff --git a/services/cert-manager/values-base.yaml b/services/cert-manager/values-base.yaml index f9df5cd28d..958b34c026 100644 --- a/services/cert-manager/values-base.yaml +++ b/services/cert-manager/values-base.yaml @@ -1,9 +1,4 @@ -cert-manager: - installCRDs: true - extraArgs: - - --dns01-recursive-nameservers-only - - --dns01-recursive-nameservers=8.8.8.8:53,1.1.1.1:53 - -pull-secret: - enabled: true - path: secret/k8s_operator/base-lsp.lsst.codes/pull-secret +config: + route53: + awsAccessKeyId: "AKIAQSJOS2SFLUEVXZDB" + hostedZone: "Z06873202D7WVTZUFOQ42" diff --git a/services/cert-manager/values-ccin2p3.yaml b/services/cert-manager/values-ccin2p3.yaml new file mode 100644 index 0000000000..a311844928 --- /dev/null +++ b/services/cert-manager/values-ccin2p3.yaml @@ -0,0 +1,2 @@ +config: + createIssuer: false diff --git a/services/cert-manager/values-idfdev.yaml b/services/cert-manager/values-idfdev.yaml index b106943dd5..b1676a6375 100644 --- a/services/cert-manager/values-idfdev.yaml +++ b/services/cert-manager/values-idfdev.yaml @@ -1,9 +1,4 @@ -cert-manager: - installCRDs: true - extraArgs: - - --dns01-recursive-nameservers-only - - --dns01-recursive-nameservers=8.8.8.8:53,1.1.1.1:53 - -pull-secret: - enabled: true - path: secret/k8s_operator/data-dev.lsst.cloud/pull-secret +config: + route53: + awsAccessKeyId: "AKIAQSJOS2SFL5I4TYND" + hostedZone: "Z0567328105IEHEMIXLCO" diff --git a/services/cert-manager/values-idfint.yaml b/services/cert-manager/values-idfint.yaml index 708eb5566f..b1676a6375 100644 --- a/services/cert-manager/values-idfint.yaml +++ b/services/cert-manager/values-idfint.yaml @@ -1,9 +1,4 @@ -cert-manager: - installCRDs: true - extraArgs: - - --dns01-recursive-nameservers-only - - --dns01-recursive-nameservers=8.8.8.8:53,1.1.1.1:53 - -pull-secret: - enabled: true - path: secret/k8s_operator/data-int.lsst.cloud/pull-secret +config: + route53: + awsAccessKeyId: "AKIAQSJOS2SFL5I4TYND" + hostedZone: "Z0567328105IEHEMIXLCO" diff --git a/services/cert-manager/values-idfprod.yaml b/services/cert-manager/values-idfprod.yaml index c43fef53b1..b1676a6375 100644 --- a/services/cert-manager/values-idfprod.yaml +++ b/services/cert-manager/values-idfprod.yaml @@ -1,9 +1,4 @@ -cert-manager: - installCRDs: true - extraArgs: - - --dns01-recursive-nameservers-only - - --dns01-recursive-nameservers=8.8.8.8:53,1.1.1.1:53 - -pull-secret: - enabled: true - path: secret/k8s_operator/data.lsst.cloud/pull-secret +config: + route53: + awsAccessKeyId: "AKIAQSJOS2SFL5I4TYND" + hostedZone: "Z0567328105IEHEMIXLCO" diff --git a/services/cert-manager/values-int.yaml b/services/cert-manager/values-int.yaml deleted file mode 100644 index e4ace2b5d9..0000000000 --- a/services/cert-manager/values-int.yaml +++ /dev/null @@ -1,9 +0,0 @@ -cert-manager: - installCRDs: true - extraArgs: - - --dns01-recursive-nameservers-only - - --dns01-recursive-nameservers=8.8.8.8:53,1.1.1.1:53 - -pull-secret: - enabled: true - path: secret/k8s_operator/lsst-lsp-int.ncsa.illinois.edu/pull-secret diff --git a/services/cert-manager/values-minikube.yaml b/services/cert-manager/values-minikube.yaml index 329b8ef770..a311844928 100644 --- a/services/cert-manager/values-minikube.yaml +++ b/services/cert-manager/values-minikube.yaml @@ -1,9 +1,2 @@ -cert-manager: - installCRDs: true - extraArgs: - - --dns01-recursive-nameservers-only - - --dns01-recursive-nameservers=8.8.8.8:53,1.1.1.1:53 - -pull-secret: - enabled: true - path: secret/k8s_operator/minikube.lsst.codes/pull-secret +config: + createIssuer: false diff --git a/services/cert-manager/values-red-five.yaml b/services/cert-manager/values-red-five.yaml deleted file mode 100644 index a3e61a2a8f..0000000000 --- a/services/cert-manager/values-red-five.yaml +++ /dev/null @@ -1,9 +0,0 @@ -cert-manager: - installCRDs: true - extraArgs: - - --dns01-recursive-nameservers-only - - --dns01-recursive-nameservers=8.8.8.8:53,1.1.1.1:53 - -pull-secret: - enabled: true - path: secret/k8s_operator/red-five.lsst.codes/pull-secret diff --git a/services/cert-manager/values-roe.yaml b/services/cert-manager/values-roe.yaml index 113b9af27d..2c27644d49 100644 --- a/services/cert-manager/values-roe.yaml +++ b/services/cert-manager/values-roe.yaml @@ -1,9 +1,5 @@ -cert-manager: - installCRDs: true - extraArgs: - - --dns01-recursive-nameservers-only - - --dns01-recursive-nameservers=8.8.8.8:53,1.1.1.1:53 - -pull-secret: - enabled: true - path: secret/k8s_operator/roe/pull-secret +config: + email: "rsp@roe.ac.uk" + route53: + awsAccessKeyId: "AKIAQSJOS2SFL5I4TYND" + hostedZone: "Z0567328105IEHEMIXLCO" diff --git a/services/cert-manager/values-stable.yaml b/services/cert-manager/values-stable.yaml deleted file mode 100644 index 580d528f76..0000000000 --- a/services/cert-manager/values-stable.yaml +++ /dev/null @@ -1,9 +0,0 @@ -cert-manager: - installCRDs: true - extraArgs: - - --dns01-recursive-nameservers-only - - --dns01-recursive-nameservers=8.8.8.8:53,1.1.1.1:53 - -pull-secret: - enabled: true - path: secret/k8s_operator/lsst-lsp-stable.ncsa.illinois.edu/pull-secret diff --git a/services/cert-manager/values-summit.yaml b/services/cert-manager/values-summit.yaml index 9368f1e2fe..958b34c026 100644 --- a/services/cert-manager/values-summit.yaml +++ b/services/cert-manager/values-summit.yaml @@ -1,9 +1,4 @@ -cert-manager: - installCRDs: true - extraArgs: - - --dns01-recursive-nameservers-only - - --dns01-recursive-nameservers=8.8.8.8:53,1.1.1.1:53 - -pull-secret: - enabled: true - path: secret/k8s_operator/summit-lsp.lsst.codes/pull-secret +config: + route53: + awsAccessKeyId: "AKIAQSJOS2SFLUEVXZDB" + hostedZone: "Z06873202D7WVTZUFOQ42" diff --git a/services/cert-manager/values-tucson-teststand.yaml b/services/cert-manager/values-tucson-teststand.yaml index 2ee2048b5d..958b34c026 100644 --- a/services/cert-manager/values-tucson-teststand.yaml +++ b/services/cert-manager/values-tucson-teststand.yaml @@ -1,9 +1,4 @@ -cert-manager: - installCRDs: true - extraArgs: - - --dns01-recursive-nameservers-only - - --dns01-recursive-nameservers=8.8.8.8:53,1.1.1.1:53 - -pull-secret: - enabled: true - path: secret/k8s_operator/tucson-teststand.lsst.codes/pull-secret +config: + route53: + awsAccessKeyId: "AKIAQSJOS2SFLUEVXZDB" + hostedZone: "Z06873202D7WVTZUFOQ42" diff --git a/services/cert-manager/values.yaml b/services/cert-manager/values.yaml index bdbcf1edf0..760b83fe1e 100644 --- a/services/cert-manager/values.yaml +++ b/services/cert-manager/values.yaml @@ -1,5 +1,39 @@ +# -- Override the base name for resources +nameOverride: "" + +# -- Override the full name for resources (includes the release name) +fullnameOverride: "" + +config: + # -- Whether to create a Let's Encrypt DNS-based cluster issuer + createIssuer: true + + # -- Contact email address registered with Let's Encrypt + # @default -- sqre-admin + email: "sqre-admin@lists.lsst.org" + + # Currently, DNS with Route 53 is the only supported solver mechanism + route53: + # -- AWS access key ID for Route 53 (must match `aws-secret-access-key` in + # Vault secret referenced by `config.vaultSecretPath`) + # @default -- None, must be set if `createIssuer` is true + awsAccessKeyId: "" + + # -- Route 53 hosted zone in which to create challenge records + # @default -- None, must be set if `createIssuer` is true + hostedZone: "" + +# -- Configuration for upstream cert-manager chart +# @default -- Install CRDs, force use of Google and Cloudfront DNS servers cert-manager: installCRDs: true extraArgs: - - --dns01-recursive-nameservers-only - - --dns01-recursive-nameservers=8.8.8.8:53,1.1.1.1:53 + - "--dns01-recursive-nameservers-only" + - "--dns01-recursive-nameservers=8.8.8.8:53,1.1.1.1:53" + +# The following will be set by parameters injected by Argo CD and should not +# be set in the individual environment values files. +global: + # -- Base path for Vault secrets + # @default -- Set by Argo CD + vaultSecretsPath: "" diff --git a/services/datalinker/Chart.yaml b/services/datalinker/Chart.yaml index c31c766d03..90e29e1e7f 100644 --- a/services/datalinker/Chart.yaml +++ b/services/datalinker/Chart.yaml @@ -1,10 +1,12 @@ apiVersion: v2 name: datalinker version: 1.0.0 -dependencies: - - name: datalinker - version: 0.1.6 - repository: https://lsst-sqre.github.io/charts/ - - name: pull-secret - version: 0.1.2 - repository: https://lsst-sqre.github.io/charts/ +description: IVOA DataLink-based service and data discovery +sources: + - https://github.com/lsst-sqre/datalinker +appVersion: 1.5.2 +annotations: + phalanx.lsst.io/docs: | + - id: "DMTN-238" + title: "RSP DataLink service implementation strategy" + url: "https://dmtn-238.lsst.io/" diff --git a/services/datalinker/README.md b/services/datalinker/README.md new file mode 100644 index 0000000000..a3cfbd378f --- /dev/null +++ b/services/datalinker/README.md @@ -0,0 +1,33 @@ +# datalinker + +IVOA DataLink-based service and data discovery + +## Source Code + +* + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| affinity | object | `{}` | Affinity rules for the datalinker deployment pod | +| autoscaling.enabled | bool | `false` | Enable autoscaling of datalinker deployment | +| autoscaling.maxReplicas | int | `100` | Maximum number of datalinker deployment pods | +| autoscaling.minReplicas | int | `1` | Minimum number of datalinker deployment pods | +| autoscaling.targetCPUUtilizationPercentage | int | `80` | Target CPU utilization of datalinker deployment pods | +| config.tapMetadataUrl | string | `"https://github.com/lsst/sdm_schemas/releases/download/1.2.0/datalink-columns.zip"` | URL containing TAP schema metadata used to construct queries | +| fullnameOverride | string | `""` | Override the full name for resources (includes the release name) | +| global.baseUrl | string | Set by Argo CD | Base URL for the environment | +| global.butlerRepositoryIndex | string | Set by Argo CD | URI to the Butler configuration of available repositories | +| global.host | string | Set by Argo CD | Host name for ingress | +| global.vaultSecretsPath | string | Set by Argo CD | Base path for Vault secrets | +| image.pullPolicy | string | `"IfNotPresent"` | Pull policy for the datalinker image | +| image.repository | string | `"ghcr.io/lsst-sqre/datalinker"` | Image to use in the datalinker deployment | +| image.tag | string | `""` | Overrides the image tag whose default is the chart appVersion. | +| ingress.annotations | object | `{}` | Additional annotations for the ingresses | +| nameOverride | string | `""` | Override the base name for resources | +| nodeSelector | object | `{}` | Node selection rules for the datalinker deployment pod | +| podAnnotations | object | `{}` | Annotations for the datalinker deployment pod | +| replicaCount | int | `1` | Number of web deployment pods to start | +| resources | object | `{}` | Resource limits and requests for the datalinker deployment pod | +| tolerations | list | `[]` | Tolerations for the datalinker deployment pod | diff --git a/services/datalinker/templates/_helpers.tpl b/services/datalinker/templates/_helpers.tpl new file mode 100644 index 0000000000..eb7efad489 --- /dev/null +++ b/services/datalinker/templates/_helpers.tpl @@ -0,0 +1,51 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "datalinker.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "datalinker.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "datalinker.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "datalinker.labels" -}} +helm.sh/chart: {{ include "datalinker.chart" . }} +{{ include "datalinker.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "datalinker.selectorLabels" -}} +app.kubernetes.io/name: {{ include "datalinker.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} diff --git a/services/datalinker/templates/deployment.yaml b/services/datalinker/templates/deployment.yaml new file mode 100644 index 0000000000..3838890aac --- /dev/null +++ b/services/datalinker/templates/deployment.yaml @@ -0,0 +1,105 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "datalinker.fullname" . }} + labels: + {{- include "datalinker.labels" . | nindent 4 }} +spec: + {{- if not .Values.autoscaling.enabled }} + replicas: {{ .Values.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "datalinker.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "datalinker.selectorLabels" . | nindent 8 }} + spec: + automountServiceAccountToken: false + imagePullSecrets: + - name: "pull-secret" + securityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + fsGroup: 1000 + containers: + - name: {{ .Chart.Name }} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - all + readOnlyRootFilesystem: true + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + env: + - name: "DATALINKER_CUTOUT_SYNC_URL" + value: "{{ .Values.global.baseUrl }}/api/cutout/sync" + - name: "DATALINKER_HIPS_BASE_URL" + value: "{{ .Values.global.baseUrl }}/api/hips" + {{- if .Values.config.tapMetadataUrl }} + - name: "DATALINKER_TAP_METADATA_DIR" + value: "/tmp/tap-metadata" + - name: "DATALINKER_TAP_METADATA_URL" + value: {{ .Values.config.tapMetadataUrl | quote }} + {{- end }} + - name: "DATALINKER_TOKEN" + valueFrom: + secretKeyRef: + name: {{ include "datalinker.fullname" . }}-gafaelfawr-token + key: "token" + # The following are used by Butler to retrieve its configuration + # and authenticate to its database. + - name: "AWS_SHARED_CREDENTIALS_FILE" + value: "/tmp/secrets/aws-credentials.ini" + - name: "DAF_BUTLER_REPOSITORY_INDEX" + value: {{ .Values.global.butlerRepositoryIndex | quote }} + - name: "PGPASSFILE" + value: "/tmp/secrets/postgres-credentials.txt" + - name: "S3_ENDPOINT_URL" + value: "https://storage.googleapis.com" + - name: "GOOGLE_APPLICATION_CREDENTIALS" + value: "/tmp/secrets/butler-gcs-idf-creds.json" + ports: + - name: http + containerPort: 8080 + protocol: TCP + livenessProbe: + httpGet: + path: / + port: http + readinessProbe: + httpGet: + path: / + port: http + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumeMounts: + - name: "butler-secret" + mountPath: "/etc/butler/secrets" + - name: "tmp" + mountPath: "/tmp" + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + volumes: + - name: "butler-secret" + secret: + secretName: {{ template "datalinker.fullname" . }}-butler-secret + - name: "tmp" + emptyDir: {} diff --git a/services/datalinker/templates/gafaelfawr-token.yaml b/services/datalinker/templates/gafaelfawr-token.yaml new file mode 100644 index 0000000000..2d55ce2b8f --- /dev/null +++ b/services/datalinker/templates/gafaelfawr-token.yaml @@ -0,0 +1,10 @@ +apiVersion: gafaelfawr.lsst.io/v1alpha1 +kind: GafaelfawrServiceToken +metadata: + name: {{ include "datalinker.fullname" . }}-gafaelfawr-token + labels: + {{- include "datalinker.labels" . | nindent 4 }} +spec: + service: "bot-datalinker" + scopes: + - "read:image" diff --git a/services/datalinker/templates/hpa.yaml b/services/datalinker/templates/hpa.yaml new file mode 100644 index 0000000000..0df24ad67a --- /dev/null +++ b/services/datalinker/templates/hpa.yaml @@ -0,0 +1,28 @@ +{{- if .Values.autoscaling.enabled }} +apiVersion: autoscaling/v2beta1 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "datalinker.fullname" . }} + labels: + {{- include "datalinker.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "datalinker.fullname" . }} + minReplicas: {{ .Values.autoscaling.minReplicas }} + maxReplicas: {{ .Values.autoscaling.maxReplicas }} + metrics: + {{- if .Values.autoscaling.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: cpu + targetAverageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} + {{- end }} + {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: memory + targetAverageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }} + {{- end }} +{{- end }} diff --git a/services/datalinker/templates/ingress-anonymous.yaml b/services/datalinker/templates/ingress-anonymous.yaml new file mode 100644 index 0000000000..8c8ab0c86f --- /dev/null +++ b/services/datalinker/templates/ingress-anonymous.yaml @@ -0,0 +1,29 @@ +apiVersion: gafaelfawr.lsst.io/v1alpha1 +kind: GafaelfawrIngress +metadata: + name: {{ include "datalinker.fullname" . }}-anonymous + labels: + {{- include "datalinker.labels" . | nindent 4 }} +config: + baseUrl: {{ .Values.global.baseUrl | quote }} + scopes: + anonymous: true +template: + metadata: + name: {{ include "datalinker.fullname" . }}-anonymous + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 6 }} + {{- end }} + spec: + rules: + - host: {{ required "global.host must be set" .Values.global.host | quote }} + http: + paths: + - path: "/api/hips/list" + pathType: "Exact" + backend: + service: + name: {{ include "datalinker.fullname" . }} + port: + number: 8080 diff --git a/services/datalinker/templates/ingress-image.yaml b/services/datalinker/templates/ingress-image.yaml new file mode 100644 index 0000000000..889ba5e5ca --- /dev/null +++ b/services/datalinker/templates/ingress-image.yaml @@ -0,0 +1,30 @@ +apiVersion: gafaelfawr.lsst.io/v1alpha1 +kind: GafaelfawrIngress +metadata: + name: {{ include "datalinker.fullname" . }}-image + labels: + {{- include "datalinker.labels" . | nindent 4 }} +config: + baseUrl: {{ .Values.global.baseUrl | quote }} + scopes: + all: + - "read:image" +template: + metadata: + name: {{ include "datalinker.fullname" . }}-image + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 6 }} + {{- end }} + spec: + rules: + - host: {{ required "global.host must be set" .Values.global.host | quote }} + http: + paths: + - path: "/api/datalink/links" + pathType: "Exact" + backend: + service: + name: {{ include "datalinker.fullname" . }} + port: + number: 8080 diff --git a/services/datalinker/templates/ingress-tap.yaml b/services/datalinker/templates/ingress-tap.yaml new file mode 100644 index 0000000000..107229fcf1 --- /dev/null +++ b/services/datalinker/templates/ingress-tap.yaml @@ -0,0 +1,30 @@ +apiVersion: gafaelfawr.lsst.io/v1alpha1 +kind: GafaelfawrIngress +metadata: + name: {{ include "datalinker.fullname" . }}-tap + labels: + {{- include "datalinker.labels" . | nindent 4 }} +config: + baseUrl: {{ .Values.global.baseUrl | quote }} + scopes: + all: + - "read:tap" +template: + metadata: + name: {{ include "datalinker.fullname" . }}-tap + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 6 }} + {{- end }} + spec: + rules: + - host: {{ required "global.host must be set" .Values.global.host | quote }} + http: + paths: + - path: "/api/datalink" + pathType: "Prefix" + backend: + service: + name: {{ include "datalinker.fullname" . }} + port: + number: 8080 diff --git a/services/datalinker/templates/networkpolicy.yaml b/services/datalinker/templates/networkpolicy.yaml new file mode 100644 index 0000000000..0bcf4940a4 --- /dev/null +++ b/services/datalinker/templates/networkpolicy.yaml @@ -0,0 +1,21 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ include "datalinker.fullname" . }} +spec: + podSelector: + matchLabels: + {{- include "datalinker.selectorLabels" . | nindent 6 }} + policyTypes: + - Ingress + ingress: + # Allow inbound access from pods (in any namespace) labeled + # gafaelfawr.lsst.io/ingress: true. + - from: + - namespaceSelector: {} + podSelector: + matchLabels: + gafaelfawr.lsst.io/ingress: "true" + ports: + - protocol: "TCP" + port: 8080 diff --git a/services/datalinker/templates/service.yaml b/services/datalinker/templates/service.yaml new file mode 100644 index 0000000000..20bf10e5d9 --- /dev/null +++ b/services/datalinker/templates/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "datalinker.fullname" . }} + labels: + {{- include "datalinker.labels" . | nindent 4 }} +spec: + type: "ClusterIP" + ports: + - port: 8080 + targetPort: "http" + protocol: "TCP" + name: "http" + selector: + {{- include "datalinker.selectorLabels" . | nindent 4 }} diff --git a/services/datalinker/templates/vault-secrets.yaml b/services/datalinker/templates/vault-secrets.yaml new file mode 100644 index 0000000000..6386737511 --- /dev/null +++ b/services/datalinker/templates/vault-secrets.yaml @@ -0,0 +1,9 @@ +apiVersion: ricoberger.de/v1alpha1 +kind: VaultSecret +metadata: + name: {{ template "datalinker.fullname" . }}-butler-secret + labels: + {{- include "datalinker.labels" . | nindent 4 }} +spec: + path: "{{ .Values.global.vaultSecretsPath }}/butler-secret" + type: Opaque diff --git a/services/datalinker/values-ccin2p3.yaml b/services/datalinker/values-ccin2p3.yaml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/services/datalinker/values-idfdev.yaml b/services/datalinker/values-idfdev.yaml index 6c4087ba4e..e69de29bb2 100644 --- a/services/datalinker/values-idfdev.yaml +++ b/services/datalinker/values-idfdev.yaml @@ -1,8 +0,0 @@ -datalinker: - ingress: - enabled: true - host: "data-dev.lsst.cloud" - -pull-secret: - enabled: true - path: secret/k8s_operator/data-dev.lsst.cloud/pull-secret diff --git a/services/datalinker/values-idfint.yaml b/services/datalinker/values-idfint.yaml index 68e40deab8..e69de29bb2 100644 --- a/services/datalinker/values-idfint.yaml +++ b/services/datalinker/values-idfint.yaml @@ -1,8 +0,0 @@ -datalinker: - ingress: - enabled: true - host: "data-int.lsst.cloud" - -pull-secret: - enabled: true - path: secret/k8s_operator/data-int.lsst.cloud/pull-secret diff --git a/services/datalinker/values-idfprod.yaml b/services/datalinker/values-idfprod.yaml index e49b8e90bc..e69de29bb2 100644 --- a/services/datalinker/values-idfprod.yaml +++ b/services/datalinker/values-idfprod.yaml @@ -1,8 +0,0 @@ -datalinker: - ingress: - enabled: true - host: "data.lsst.cloud" - -pull-secret: - enabled: true - path: secret/k8s_operator/data.lsst.cloud/pull-secret diff --git a/services/datalinker/values-int.yaml b/services/datalinker/values-int.yaml deleted file mode 100644 index f40119de59..0000000000 --- a/services/datalinker/values-int.yaml +++ /dev/null @@ -1,8 +0,0 @@ -datalinker: - ingress: - enabled: true - host: "lsst-lsp-int.ncsa.illinois.edu" - -pull-secret: - enabled: true - path: secret/k8s_operator/lsst-lsp-int.ncsa.illinois.edu/pull-secret diff --git a/services/datalinker/values-minikube.yaml b/services/datalinker/values-minikube.yaml index 0ddc95f5a5..e69de29bb2 100644 --- a/services/datalinker/values-minikube.yaml +++ b/services/datalinker/values-minikube.yaml @@ -1,8 +0,0 @@ -datalinker: - ingress: - enabled: true - host: "minikube.lsst.cloud" - -pull-secret: - enabled: true - path: secret/k8s_operator/minikube.lsst.codes/pull-secret diff --git a/services/datalinker/values-red-five.yaml b/services/datalinker/values-red-five.yaml deleted file mode 100644 index ea01b10a25..0000000000 --- a/services/datalinker/values-red-five.yaml +++ /dev/null @@ -1,8 +0,0 @@ -datalinker: - ingress: - enabled: true - host: "red-five.lsst.codes" - -pull-secret: - enabled: true - path: secret/k8s_operator/red-five.lsst.codes/pull-secret diff --git a/services/datalinker/values-stable.yaml b/services/datalinker/values-stable.yaml deleted file mode 100644 index eadcb995c2..0000000000 --- a/services/datalinker/values-stable.yaml +++ /dev/null @@ -1,8 +0,0 @@ -datalinker: - ingress: - enabled: true - host: "lsst-lsp-stable.ncsa.illinois.edu" - -pull-secret: - enabled: true - path: secret/k8s_operator/lsst-lsp-stable.ncsa.illinois.edu/pull-secret diff --git a/services/datalinker/values.yaml b/services/datalinker/values.yaml new file mode 100644 index 0000000000..e88ca255eb --- /dev/null +++ b/services/datalinker/values.yaml @@ -0,0 +1,78 @@ +# Default values for datalinker. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# -- Number of web deployment pods to start +replicaCount: 1 + +image: + # -- Image to use in the datalinker deployment + repository: "ghcr.io/lsst-sqre/datalinker" + + # -- Pull policy for the datalinker image + pullPolicy: "IfNotPresent" + + # -- Overrides the image tag whose default is the chart appVersion. + tag: "" + +# -- Override the base name for resources +nameOverride: "" + +# -- Override the full name for resources (includes the release name) +fullnameOverride: "" + +ingress: + # -- Additional annotations for the ingresses + annotations: {} + +autoscaling: + # -- Enable autoscaling of datalinker deployment + enabled: false + + # -- Minimum number of datalinker deployment pods + minReplicas: 1 + + # -- Maximum number of datalinker deployment pods + maxReplicas: 100 + + # -- Target CPU utilization of datalinker deployment pods + targetCPUUtilizationPercentage: 80 + # targetMemoryUtilizationPercentage: 80 + +config: + # -- URL containing TAP schema metadata used to construct queries + tapMetadataUrl: "https://github.com/lsst/sdm_schemas/releases/download/1.2.0/datalink-columns.zip" + +# -- Annotations for the datalinker deployment pod +podAnnotations: {} + +# -- Resource limits and requests for the datalinker deployment pod +resources: {} + +# -- Node selection rules for the datalinker deployment pod +nodeSelector: {} + +# -- Tolerations for the datalinker deployment pod +tolerations: [] + +# -- Affinity rules for the datalinker deployment pod +affinity: {} + +# The following will be set by parameters injected by Argo CD and should not +# be set in the individual environment values files. +global: + # -- Base URL for the environment + # @default -- Set by Argo CD + baseUrl: "" + + # -- URI to the Butler configuration of available repositories + # @default -- Set by Argo CD + butlerRepositoryIndex: "" + + # -- Host name for ingress + # @default -- Set by Argo CD + host: "" + + # -- Base path for Vault secrets + # @default -- Set by Argo CD + vaultSecretsPath: "" diff --git a/services/exposurelog/Chart.yaml b/services/exposurelog/Chart.yaml index ef1bd0ba94..9de9651aac 100644 --- a/services/exposurelog/Chart.yaml +++ b/services/exposurelog/Chart.yaml @@ -1,10 +1,15 @@ apiVersion: v2 name: exposurelog -version: 0.0.1 -dependencies: -- name: exposurelog - version: ">=0.1.0" - repository: https://lsst-sqre.github.io/charts/ -- name: pull-secret - version: 0.1.2 - repository: https://lsst-sqre.github.io/charts/ +description: Log messages related to an exposure +type: application +sources: + - https://github.com/lsst-sqre/exposurelog + +# The chart version. SQuaRE convention is to use 1.0.0 +version: 1.0.0 + +# This is the version number of the application being deployed. This version +# number should be incremented each time you make changes to the +# application. Versions are not expected to follow Semantic Versioning. They +# should reflect the version the application is using. +appVersion: 0.9.6 diff --git a/services/exposurelog/README.md b/services/exposurelog/README.md new file mode 100644 index 0000000000..da67d864e9 --- /dev/null +++ b/services/exposurelog/README.md @@ -0,0 +1,47 @@ +# exposurelog + +Log messages related to an exposure + +## Source Code + +* + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| affinity | object | `{}` | Affinity rules for the exposurelog pod | +| autoscaling | object | `{"enabled":false,"maxReplicas":100,"minReplicas":1,"targetCPUUtilizationPercentage":80,"targetMemoryUtilizationPercentage":80}` | Exposurelog autoscaling settings | +| autoscaling.enabled | bool | false | enable exposurelog autoscaling | +| autoscaling.maxReplicas | int | `100` | maximum number of exposurelog replicas | +| autoscaling.minReplicas | int | `1` | minimum number of exposurelog replicas | +| autoscaling.targetCPUUtilizationPercentage | int | `80` | Target CPU utilization for exposurelog pod autoscale calculations | +| autoscaling.targetMemoryUtilizationPercentage | int | `80` | Target memory utilization for exposurelog pod autoscale calculations | +| config | object | `{"butler_uri_1":"","butler_uri_2":"","nfs_path_1":"","nfs_path_2":"","nfs_server_1":"","nfs_server_2":"","site_id":""}` | Application-specific configuration | +| config.butler_uri_1 | string | `""` | URI for butler registry 1 (required). Format: * For a volume mounted using `nfs_path_1` (see above): An absolute path starting with `/volume_1/`. * For a network URI: see the daf_butler documentation. * For a sandbox deployment: specify `LSSTCam` for butler_uri_1. | +| config.butler_uri_2 | string | `""` | URI for butler registry 2 (optional). Format: * For a volume mounted using `nfs_path_2` (see above): An absolute path starting with `/volume_2/`. * For a network URI: see the daf_butler documentation. * For a sandbox deployment: specify `LATISS` for butler_uri_2. | +| config.nfs_path_1 | string | `""` | NFS path to butler registry 1 Only specify a non-blank value if reading the registry from an NFS-mounted file. If not blank then mount the specified NFS path as internal volume /volume1 | +| config.nfs_path_2 | string | `""` | NFS path to butler registry 2 Only specify a non-blank value if reading the registry from an NFS-mounted file. If not blank then mount the specified NFS path as internal volume /volume2 | +| config.nfs_server_1 | string | `""` | Name of the NFS server that exports nfs_path_1 Specify a non-blank value if and only if the corresponding nfs_path_1 is not blank. | +| config.nfs_server_2 | string | `""` | Name of the NFS server that exports nfs_path_2 Specify a non-blank value if and only if the corresponding nfs_path_1 is not blank. | +| config.site_id | string | `""` | Site ID; a non-empty string of up to 16 characters. This should be different for each non-sandbox deployment. Sandboxes should use `test`. | +| db.database | string | `"exposurelog"` | database name | +| db.host | string | `"postgres.postgres"` | database host | +| db.port | int | `5432` | database port | +| db.user | string | `"exposurelog"` | database user | +| fullnameOverride | string | `""` | Override the full name for resources (includes the release name) | +| global.baseUrl | string | Set by Argo CD | Base URL for the environment | +| global.host | string | Set by Argo CD | Host name for ingress | +| global.vaultSecretsPath | string | Set by Argo CD | Base path for Vault secrets | +| image.pullPolicy | string | `"Always"` | Pull policy for the exposurelog image | +| image.repository | string | `"lsstsqre/exposurelog"` | exposurelog image to use | +| image.tag | string | The appVersion of the chart | Tag of exposure image to use | +| ingress.gafaelfawrAuthQuery | string | `""` | Gafaelfawr auth query string | +| nameOverride | string | `""` | Override the base name for resources | +| nodeSelector | object | `{}` | Node selector rules for the exposurelog pod | +| podAnnotations | object | `{}` | Annotations for the exposurelog pod | +| podSecurityContext | object | `{}` | Security context for the exposurelog pod | +| replicaCount | int | `1` | How many exposurelog pods to run | +| resources | object | `{}` | Resource limits and requests for the exposurelog pod | +| securityContext | object | `{}` | Security context for the exposurelog deployment | +| tolerations | list | `[]` | Tolerations for the exposurelog pod | diff --git a/services/exposurelog/templates/_helpers.tpl b/services/exposurelog/templates/_helpers.tpl new file mode 100644 index 0000000000..c8389c67db --- /dev/null +++ b/services/exposurelog/templates/_helpers.tpl @@ -0,0 +1,51 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "exposurelog.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "exposurelog.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "exposurelog.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "exposurelog.labels" -}} +helm.sh/chart: {{ include "exposurelog.chart" . }} +{{ include "exposurelog.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "exposurelog.selectorLabels" -}} +app.kubernetes.io/name: {{ include "exposurelog.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} diff --git a/services/exposurelog/templates/deployment.yaml b/services/exposurelog/templates/deployment.yaml new file mode 100644 index 0000000000..c3c37e2734 --- /dev/null +++ b/services/exposurelog/templates/deployment.yaml @@ -0,0 +1,119 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "exposurelog.fullname" . }} + labels: + {{- include "exposurelog.labels" . | nindent 4 }} +spec: + {{- if not .Values.autoscaling.enabled }} + replicas: {{ .Values.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "exposurelog.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "exposurelog.selectorLabels" . | nindent 8 }} + spec: + imagePullSecrets: + - name: "pull-secret" + securityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + containers: + - name: {{ .Chart.Name }} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - all + readOnlyRootFilesystem: true + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - name: http + containerPort: 8080 + protocol: TCP + livenessProbe: + httpGet: + path: /exposurelog + port: http + readinessProbe: + httpGet: + path: /exposurelog + port: http + resources: + {{- toYaml .Values.resources | nindent 12 }} + env: + - name: BUTLER_URI_1 + value: {{ .Values.config.butler_uri_1 | quote }} + - name: BUTLER_URI_2 + value: {{ .Values.config.butler_uri_2 | quote }} + - name: EXPOSURELOG_DB_USER + value: {{ .Values.db.user | quote }} + - name: PGUSER + value: {{ .Values.db.user | quote }} + - name: EXPOSURELOG_DB_PASSWORD + valueFrom: + secretKeyRef: + name: exposurelog + key: exposurelog_password + - name: PGPASSWORD + valueFrom: + secretKeyRef: + name: exposurelog + key: exposurelog_password + - name: EXPOSURELOG_DB_HOST + value: {{ .Values.db.host | quote }} + - name: EXPOSURELOG_DB_PORT + value: {{ .Values.db.port | quote }} + - name: EXPOSURELOG_DB_DATABASE + value: {{ .Values.db.database | quote }} + - name: SITE_ID + value: {{ .Values.config.site_id | quote }} + volumeMounts: + {{- if .Values.config.nfs_path_1 }} + - name: volume1 + mountPath: /volume_1 + {{- end }} + {{- if .Values.config.nfs_path_2 }} + - name: volume2 + mountPath: /volume_2 + {{- end }} + - name: tmp + mountPath: /tmp + volumes: + {{- if .Values.config.nfs_path_1 }} + - name: volume1 + nfs: + path: {{ .Values.config.nfs_path_1 }} + readOnly: true + server: {{ .Values.config.nfs_server_1 }} + {{- end }} + {{- if .Values.config.nfs_path_2 }} + - name: volume2 + nfs: + path: {{ .Values.config.nfs_path_2 }} + readOnly: true + server: {{ .Values.config.nfs_server_2 }} + {{- end }} + - name: tmp + emptyDir: {} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/services/exposurelog/templates/hpa.yaml b/services/exposurelog/templates/hpa.yaml new file mode 100644 index 0000000000..d7e30c1a63 --- /dev/null +++ b/services/exposurelog/templates/hpa.yaml @@ -0,0 +1,28 @@ +{{- if .Values.autoscaling.enabled }} +apiVersion: autoscaling/v2beta1 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "exposurelog.fullname" . }} + labels: + {{- include "exposurelog.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "exposurelog.fullname" . }} + minReplicas: {{ .Values.autoscaling.minReplicas }} + maxReplicas: {{ .Values.autoscaling.maxReplicas }} + metrics: + {{- if .Values.autoscaling.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: cpu + targetAverageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} + {{- end }} + {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: memory + targetAverageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }} + {{- end }} +{{- end }} diff --git a/services/exposurelog/templates/ingress.yaml b/services/exposurelog/templates/ingress.yaml new file mode 100644 index 0000000000..aa26a054db --- /dev/null +++ b/services/exposurelog/templates/ingress.yaml @@ -0,0 +1,30 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ template "exposurelog.fullname" . }} + labels: + {{- include "exposurelog.labels" . | nindent 4 }} + annotations: + {{- if .Values.ingress.gafaelfawrAuthQuery }} + nginx.ingress.kubernetes.io/auth-method: "GET" + nginx.ingress.kubernetes.io/auth-response-headers: "X-Auth-Request-User,X-Auth-Request-Email,X-Auth-Request-Token" + nginx.ingress.kubernetes.io/auth-signin: "{{ .Values.global.baseUrl }}/login" + nginx.ingress.kubernetes.io/auth-url: "{{ .Values.global.baseUrl }}/auth?{{ .Values.ingress.gafaelfawrAuthQuery }}" + {{- end }} + {{- with .Values.ingress.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + ingressClassName: "nginx" + rules: + - host: {{ required "global.host must be set" .Values.global.host | quote }} + http: + paths: + - path: "/exposurelog" + pathType: "Prefix" + backend: + service: + name: {{ include "exposurelog.fullname" . }} + port: + number: 8080 + diff --git a/services/exposurelog/templates/service.yaml b/services/exposurelog/templates/service.yaml new file mode 100644 index 0000000000..fcef7a178c --- /dev/null +++ b/services/exposurelog/templates/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "exposurelog.fullname" . }} + labels: + {{- include "exposurelog.labels" . | nindent 4 }} +spec: + type: ClusterIP + ports: + - port: 8080 + targetPort: http + protocol: TCP + name: http + selector: + {{- include "exposurelog.selectorLabels" . | nindent 4 }} diff --git a/services/exposurelog/templates/tests/test-connection.yaml b/services/exposurelog/templates/tests/test-connection.yaml new file mode 100644 index 0000000000..a910c41e41 --- /dev/null +++ b/services/exposurelog/templates/tests/test-connection.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "exposurelog.fullname" . }}-test-connection" + labels: + {{- include "exposurelog.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test +spec: + containers: + - name: wget + image: busybox + command: ['wget'] + args: ['{{ include "exposurelog.fullname" . }}:8080'] + restartPolicy: Never diff --git a/services/exposurelog/templates/vault-secrets.yaml b/services/exposurelog/templates/vault-secrets.yaml index 997bfec204..2d30e8e123 100644 --- a/services/exposurelog/templates/vault-secrets.yaml +++ b/services/exposurelog/templates/vault-secrets.yaml @@ -1,8 +1,18 @@ apiVersion: ricoberger.de/v1alpha1 kind: VaultSecret metadata: - name: postgres + name: exposurelog namespace: exposurelog spec: - path: {{ .Values.vault_path }} + path: "{{- .Values.global.vaultSecretsPath }}/exposurelog" type: Opaque +--- +apiVersion: ricoberger.de/v1alpha1 +kind: VaultSecret +metadata: + name: pull-secret + labels: + {{- include "exposurelog.labels" . | nindent 4 }} +spec: + path: "{{- .Values.global.vaultSecretsPath }}/pull-secret" + type: kubernetes.io/dockerconfigjson diff --git a/services/exposurelog/values-base.yaml b/services/exposurelog/values-base.yaml index db15e4c24f..d003906f49 100644 --- a/services/exposurelog/values-base.yaml +++ b/services/exposurelog/values-base.yaml @@ -1,30 +1,8 @@ -# WARNING: this is a "playground" deployment -# using exposurelog's built-in test butler registries. -exposurelog: - pull_secret: pull-secret - +config: + # WARNING: this is a "playground" deployment + # using exposurelog's built-in test butler registries. site_id: test # Use the test butler registries. # Note: exposurelog's Dockerfile copies the test repos to the top of the container butler_uri_1: LSSTCam butler_uri_2: LATISS - - ingress: - enabled: true - annotations: {} - # kubernetes.io/ingress.class: nginx - # kubernetes.io/tls-acme: "true" - hosts: - - host: base-lsp.lsst.codes - paths: - - /exposurelog - tls: [] - # - secretName: chart-example-tls - # hosts: - # - chart-example.local - -vault_path: secret/k8s_operator/base-lsp.lsst.codes/postgres - -pull-secret: - enabled: true - path: secret/k8s_operator/base-lsp.lsst.codes/pull-secret diff --git a/services/exposurelog/values-minikube.yaml b/services/exposurelog/values-minikube.yaml new file mode 100644 index 0000000000..45d77ff9ce --- /dev/null +++ b/services/exposurelog/values-minikube.yaml @@ -0,0 +1,2 @@ +config: + site_id: minikube diff --git a/services/exposurelog/values-roe.yaml b/services/exposurelog/values-roe.yaml index da905413c0..d003906f49 100644 --- a/services/exposurelog/values-roe.yaml +++ b/services/exposurelog/values-roe.yaml @@ -1,30 +1,8 @@ -# WARNING: this is a "playground" deployment -# using exposurelog's built-in test butler registries. -exposurelog: - pull_secret: pull-secret - +config: + # WARNING: this is a "playground" deployment + # using exposurelog's built-in test butler registries. site_id: test # Use the test butler registries. # Note: exposurelog's Dockerfile copies the test repos to the top of the container butler_uri_1: LSSTCam butler_uri_2: LATISS - - ingress: - enabled: true - annotations: {} - # kubernetes.io/ingress.class: nginx - # kubernetes.io/tls-acme: "true" - hosts: - - host: rsp.lsst.ac.uk - paths: - - /exposurelog - tls: [] - # - secretName: chart-example-tls - # hosts: - # - chart-example.local - -vault_path: secret/k8s_operator/roe/postgres - -pull-secret: - enabled: true - path: secret/k8s_operator/roe/pull-secret diff --git a/services/exposurelog/values-summit.yaml b/services/exposurelog/values-summit.yaml index d3f9f94552..991b8e96a1 100644 --- a/services/exposurelog/values-summit.yaml +++ b/services/exposurelog/values-summit.yaml @@ -1,31 +1,11 @@ -exposurelog: - pull_secret: pull-secret - +config: site_id: summit nfs_path_1: /repo/LSSTComCam # Mounted as /volume_1 - nfs_server_1: comcam-arctl01.cp.lsst.org + nfs_server_1: comcam-archiver.cp.lsst.org butler_uri_1: /volume_1 nfs_path_2: /repo/LATISS # Mounted as /volume_2 - nfs_server_2: atarchiver.cp.lsst.org + nfs_server_2: auxtel-archiver.cp.lsst.org butler_uri_2: /volume_2 - - ingress: - enabled: true - annotations: {} - # kubernetes.io/ingress.class: nginx - # kubernetes.io/tls-acme: "true" - hosts: - - host: summit-lsp.lsst.codes - paths: - - /exposurelog - tls: [] - # - secretName: chart-example-tls - # hosts: - # - chart-example.local - -vault_path: secret/k8s_operator/summit-lsp.lsst.codes/postgres - -pull-secret: - enabled: true - path: secret/k8s_operator/summit-lsp.lsst.codes/pull-secret +db: + host: postgresdb01.cp.lsst.org diff --git a/services/exposurelog/values-tucson-teststand.yaml b/services/exposurelog/values-tucson-teststand.yaml new file mode 100644 index 0000000000..8382031cf3 --- /dev/null +++ b/services/exposurelog/values-tucson-teststand.yaml @@ -0,0 +1,11 @@ +config: + site_id: tucson + nfs_path_1: /repo/LSSTComCam # Mounted as /volume_1 + nfs_server_1: comcam-archiver.tu.lsst.org + butler_uri_1: /volume_1 + + nfs_path_2: /repo/LATISS # Mounted as /volume_2 + nfs_server_2: auxtel-archiver.tu.lsst.org + butler_uri_2: /volume_2 +db: + host: squoint.tu.lsst.org diff --git a/services/exposurelog/values.yaml b/services/exposurelog/values.yaml new file mode 100644 index 0000000000..d9a807127e --- /dev/null +++ b/services/exposurelog/values.yaml @@ -0,0 +1,140 @@ +# Default values for exposurelog. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# -- Override the base name for resources +nameOverride: "" + +# -- Override the full name for resources (includes the release name) +fullnameOverride: "" + +# -- How many exposurelog pods to run +replicaCount: 1 + +image: + # -- exposurelog image to use + repository: lsstsqre/exposurelog + # -- Pull policy for the exposurelog image + pullPolicy: Always + # -- Tag of exposure image to use + # @default -- The appVersion of the chart + tag: "" + +db: + # -- database host + host: postgres.postgres + # -- database port + port: 5432 + # -- database user + user: exposurelog + # -- database name + database: exposurelog + +ingress: + # -- Gafaelfawr auth query string + gafaelfawrAuthQuery: "" + +# -- Application-specific configuration +config: + # -- NFS path to butler registry 1 + # Only specify a non-blank value if reading the registry from an NFS-mounted file. + # If not blank then mount the specified NFS path as internal volume /volume1 + nfs_path_1: "" + # -- NFS path to butler registry 2 + # Only specify a non-blank value if reading the registry from an NFS-mounted file. + # If not blank then mount the specified NFS path as internal volume /volume2 + nfs_path_2: "" + + # -- Name of the NFS server that exports nfs_path_1 + # Specify a non-blank value if and only if the corresponding + # nfs_path_1 is not blank. + nfs_server_1: "" + # -- Name of the NFS server that exports nfs_path_2 + # Specify a non-blank value if and only if the corresponding + # nfs_path_1 is not blank. + nfs_server_2: "" + + # -- URI for butler registry 1 (required). Format: + # * For a volume mounted using `nfs_path_1` (see above): + # An absolute path starting with `/volume_1/`. + # * For a network URI: see the daf_butler documentation. + # * For a sandbox deployment: specify `LSSTCam` for butler_uri_1. + butler_uri_1: "" + # -- URI for butler registry 2 (optional). Format: + # * For a volume mounted using `nfs_path_2` (see above): + # An absolute path starting with `/volume_2/`. + # * For a network URI: see the daf_butler documentation. + # * For a sandbox deployment: specify `LATISS` for butler_uri_2. + butler_uri_2: "" + + # -- Site ID; a non-empty string of up to 16 characters. + # This should be different for each non-sandbox deployment. + # Sandboxes should use `test`. + site_id: "" + +# -- Annotations for the exposurelog pod +podAnnotations: {} + +# -- Security context for the exposurelog pod +podSecurityContext: {} + # fsGroup: 2000 + +# -- Security context for the exposurelog deployment +securityContext: {} + # capabilities: + # drop: + # - ALL + # readOnlyRootFilesystem: true + # runAsNonRoot: true + # runAsUser: 1000 + +# -- Resource limits and requests for the exposurelog pod +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +# -- Exposurelog autoscaling settings +autoscaling: + # -- enable exposurelog autoscaling + # @default -- false + enabled: false + # -- minimum number of exposurelog replicas + minReplicas: 1 + # -- maximum number of exposurelog replicas + maxReplicas: 100 + # -- Target CPU utilization for exposurelog pod autoscale calculations + targetCPUUtilizationPercentage: 80 + # -- Target memory utilization for exposurelog pod autoscale calculations + targetMemoryUtilizationPercentage: 80 + +# -- Node selector rules for the exposurelog pod +nodeSelector: {} + +# -- Tolerations for the exposurelog pod +tolerations: [] + +# -- Affinity rules for the exposurelog pod +affinity: {} + +# The following will be set by parameters injected by Argo CD and should not +# be set in the individual environment values files. +global: + # -- Base URL for the environment + # @default -- Set by Argo CD + baseUrl: "" + + # -- Host name for ingress + # @default -- Set by Argo CD + host: "" + + # -- Base path for Vault secrets + # @default -- Set by Argo CD + vaultSecretsPath: "" diff --git a/services/gafaelfawr/Chart.yaml b/services/gafaelfawr/Chart.yaml index 69d8b6684a..ab5af9c92d 100644 --- a/services/gafaelfawr/Chart.yaml +++ b/services/gafaelfawr/Chart.yaml @@ -1,10 +1,23 @@ apiVersion: v2 name: gafaelfawr version: 1.0.0 -dependencies: - - name: gafaelfawr - version: 4.5.5 - repository: https://lsst-sqre.github.io/charts/ - - name: pull-secret - version: 0.1.2 - repository: https://lsst-sqre.github.io/charts/ +description: Authentication and identity system +home: https://gafaelfawr.lsst.io/ +sources: + - https://github.com/lsst-sqre/gafaelfawr +appVersion: 9.0.0 + +annotations: + phalanx.lsst.io/docs: | + - id: "DMTN-234" + title: "RSP identity management design" + url: "https://dmtn-234.lsst.io/" + - id: "DMTN-224" + title: "RSP identity management implementation strategy" + url: "https://dmtn-224.lsst.io/" + - id: "SQR-055" + title: "COmanage configuration for Rubin Science Platform" + url: "https://sqr-055.lsst.io/" + - id: "SQR-069" + title: "Implementation decisions for RSP identity management" + url: "https://sqr-069.lsst.io/" diff --git a/services/gafaelfawr/README.md b/services/gafaelfawr/README.md new file mode 100644 index 0000000000..301bb5dd89 --- /dev/null +++ b/services/gafaelfawr/README.md @@ -0,0 +1,107 @@ +# gafaelfawr + +Authentication and identity system + +**Homepage:** + +## Source Code + +* + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| affinity | object | `{}` | Affinity rules for the Gafaelfawr frontend pod | +| cloudsql.affinity | object | `{}` | Affinity rules for the Cloud SQL Proxy pod | +| cloudsql.enabled | bool | `false` | Enable the Cloud SQL Auth Proxy, used with CloudSQL databases on Google Cloud. This will be run as a sidecar for the main Gafaelfawr pods, and as a separate service (behind a `NetworkPolicy`) for other, lower-traffic services. | +| cloudsql.image.pullPolicy | string | `"IfNotPresent"` | Pull policy for Cloud SQL Auth Proxy images | +| cloudsql.image.repository | string | `"gcr.io/cloudsql-docker/gce-proxy"` | Cloud SQL Auth Proxy image to use | +| cloudsql.image.tag | string | `"1.33.2"` | Cloud SQL Auth Proxy tag to use | +| cloudsql.instanceConnectionName | string | None, must be set if Cloud SQL Auth Proxy is enabled | Instance connection name for a CloudSQL PostgreSQL instance | +| cloudsql.nodeSelector | object | `{}` | Node selection rules for the Cloud SQL Proxy pod | +| cloudsql.podAnnotations | object | `{}` | Annotations for the Cloud SQL Proxy pod | +| cloudsql.resources | object | `{}` | Resource limits and requests for the Cloud SQL Proxy pod | +| cloudsql.serviceAccount | string | None, must be set if Cloud SQL Auth Proxy is enabled | The Google service account that has an IAM binding to the `gafaelfawr` Kubernetes service account and has the `cloudsql.client` role | +| cloudsql.tolerations | list | `[]` | Tolerations for the Cloud SQL Proxy pod | +| config.cilogon.clientId | string | `""` | CILogon client ID. One and only one of this, `config.github.clientId`, or `config.oidc.clientId` must be set. | +| config.cilogon.enrollmentUrl | string | Login fails with an error | Where to send the user if their username cannot be found in LDAP | +| config.cilogon.gidClaim | string | Do not set a primary GID | Claim from which to get the primary GID (only used if not retrieved from LDAP or Firestore) | +| config.cilogon.groupsClaim | string | `"isMemberOf"` | Claim from which to get the group membership (only used if not retrieved from LDAP) | +| config.cilogon.loginParams | object | `{"skin":"LSST"}` | Additional parameters to add | +| config.cilogon.test | bool | `false` | Whether to use the test instance of CILogon | +| config.cilogon.uidClaim | string | `"uidNumber"` | Claim from which to get the numeric UID (only used if not retrieved from LDAP or Firestore) | +| config.cilogon.usernameClaim | string | `"uid"` | Claim from which to get the username | +| config.databaseUrl | string | None, must be set if `cloudsql.enabled` is not true | URL for the PostgreSQL database | +| config.errorFooter | string | `""` | HTML footer to add to any login error page (inside a

tag). | +| config.firestore.project | string | Firestore support is disabled | If set, assign UIDs and GIDs using Google Firestore in the given project. Cloud SQL must be enabled and the Cloud SQL service account must have read/write access to that Firestore instance. | +| config.github.clientId | string | `""` | GitHub client ID. One and only one of this, `config.cilogon.clientId`, or `config.oidc.clientId` must be set. | +| config.groupMapping | object | `{}` | Defines a mapping of scopes to groups that provide that scope. See [DMTN-235](https://dmtn-235.lsst.io/) for more details on scopes. | +| config.initialAdmins | list | `[]` | Usernames to add as administrators when initializing a new database. Used only if there are no administrators. | +| config.knownScopes | object | See the `values.yaml` file | Names and descriptions of all scopes in use. This is used to populate the new token creation page. Only scopes listed here will be options when creating a new token. See [DMTN-235](https://dmtn-235.lsst.io/). | +| config.ldap.addUserGroup | bool | `false` | Whether to synthesize a user private group for each user with a GID equal to their UID | +| config.ldap.emailAttr | string | `"mail"` | Attribute containing the user's email address | +| config.ldap.gidAttr | string | Use GID of user private group | Attribute containing the user's primary GID (set to `gidNumber` for most LDAP servers) | +| config.ldap.groupBaseDn | string | None, must be set | Base DN for the LDAP search to find a user's groups | +| config.ldap.groupMemberAttr | string | `"member"` | Member attribute of the object class. Values must match the username returned in the token from the OpenID Connect authentication server. | +| config.ldap.groupObjectClass | string | `"posixGroup"` | Object class containing group information | +| config.ldap.nameAttr | string | `"displayName"` | Attribute containing the user's full name | +| config.ldap.uidAttr | string | Get UID from upstream authentication provider | Attribute containing the user's UID number (set to `uidNumber` for most LDAP servers) | +| config.ldap.url | string | Do not use LDAP | LDAP server URL from which to retrieve user group information | +| config.ldap.userBaseDn | string | Get user metadata from the upstream authentication provider | Base DN for the LDAP search to find a user's entry | +| config.ldap.userDn | string | Use anonymous binds | Bind DN for simple bind authentication. If set, `ldap-secret` must be set in the Gafaelfawr secret | +| config.ldap.userSearchAttr | string | `"uid"` | Search attribute containing the user's username | +| config.loglevel | string | `"INFO"` | Choose from the text form of Python logging levels | +| config.oidc.audience | string | Value of `config.oidc.clientId` | Audience for the JWT token | +| config.oidc.clientId | string | `""` | Client ID for generic OpenID Connect support. One and only one of this, `config.cilogon.clientId`, or `config.github.clientId` must be set. | +| config.oidc.enrollmentUrl | string | Login fails with an error | Where to send the user if their username cannot be found in LDAP | +| config.oidc.gidClaim | string | Do not set a primary GID | Claim from which to get the primary GID (only used if not retrieved from LDAP or Firestore) | +| config.oidc.groupsClaim | string | `"isMemberOf"` | Claim from which to get the group membership (only used if not retrieved from LDAP) | +| config.oidc.issuer | string | None, must be set | Issuer for the JWT token | +| config.oidc.loginParams | object | `{}` | Additional parameters to add to the login request | +| config.oidc.loginUrl | string | None, must be set | URL to which to redirect the user for authorization | +| config.oidc.scopes | list | `["openid"]` | Scopes to request from the OpenID Connect provider | +| config.oidc.tokenUrl | string | None, must be set | URL from which to retrieve the token for the user | +| config.oidc.uidClaim | string | `"uidNumber"` | Claim from which to get the numeric UID (only used if not retrieved from LDAP or Firestore) | +| config.oidc.usernameClaim | string | `"sub"` | Claim from which to get the username | +| config.oidcServer.enabled | bool | `false` | Whether to support OpenID Connect clients. If set to true, `oidc-server-secrets` must be set in the Gafaelfawr secret. | +| config.proxies | list | [`10.0.0.0/8`, `172.16.0.0/12`, `192.168.0.0/16`] | List of netblocks used for internal Kubernetes IP addresses, used to determine the true client IP for logging | +| config.slackAlerts | bool | `false` | Whether to send certain serious alerts to Slack. If `true`, the `slack-webhook` secret must also be set. | +| config.tokenLifetimeMinutes | int | `43200` (30 days) | Session length and token expiration (in minutes) | +| fullnameOverride | string | `""` | Override the full name for resources (includes the release name) | +| global.baseUrl | string | Set by Argo CD | Base URL for the environment | +| global.host | string | Set by Argo CD | Host name for ingress | +| global.vaultSecretsPath | string | Set by Argo CD | Base path for Vault secrets | +| image.pullPolicy | string | `"IfNotPresent"` | Pull policy for the Gafaelfawr image | +| image.repository | string | `"ghcr.io/lsst-sqre/gafaelfawr"` | Gafaelfawr image to use | +| image.tag | string | The appVersion of the chart | Tag of Gafaelfawr image to use | +| maintenance.affinity | object | `{}` | Affinity rules for Gafaelfawr maintenance and audit pods | +| maintenance.auditSchedule | string | `"30 3 * * *"` | Cron schedule string for Gafaelfawr data consistency audit (in UTC) | +| maintenance.maintenanceSchedule | string | `"5 * * * *"` | Cron schedule string for Gafaelfawr periodic maintenance (in UTC) | +| maintenance.nodeSelector | object | `{}` | Node selection rules for Gafaelfawr maintenance and audit pods | +| maintenance.podAnnotations | object | `{}` | Annotations for Gafaelfawr maintenance and audit pods | +| maintenance.resources | object | `{}` | Resource limits and requests for Gafaelfawr maintenance and audit pods | +| maintenance.tolerations | list | `[]` | Tolerations for Gafaelfawr maintenance and audit pods | +| nameOverride | string | `""` | Override the base name for resources | +| nodeSelector | object | `{}` | Node selector rules for the Gafaelfawr frontend pod | +| operator.affinity | object | `{}` | Affinity rules for the token management pod | +| operator.nodeSelector | object | `{}` | Node selection rules for the token management pod | +| operator.podAnnotations | object | `{}` | Annotations for the token management pod | +| operator.resources | object | `{}` | Resource limits and requests for the Gafaelfawr Kubernetes operator | +| operator.tolerations | list | `[]` | Tolerations for the token management pod | +| podAnnotations | object | `{}` | Annotations for the Gafaelfawr frontend pod | +| redis.affinity | object | `{}` | Affinity rules for the Redis pod | +| redis.image.pullPolicy | string | `"IfNotPresent"` | Pull policy for the Redis image | +| redis.image.repository | string | `"redis"` | Redis image to use | +| redis.image.tag | string | `"7.0.8"` | Redis image tag to use | +| redis.nodeSelector | object | `{}` | Node selection rules for the Redis pod | +| redis.persistence.accessMode | string | `"ReadWriteOnce"` | Access mode of storage to request | +| redis.persistence.enabled | bool | `true` | Whether to persist Redis storage and thus tokens. Setting this to false will use `emptyDir` and reset all tokens on every restart. Only use this for a test deployment. | +| redis.persistence.size | string | `"1Gi"` | Amount of persistent storage to request | +| redis.persistence.storageClass | string | `""` | Class of storage to request | +| redis.persistence.volumeClaimName | string | `""` | Use an existing PVC, not dynamic provisioning. If this is set, the size, storageClass, and accessMode settings are ignored. | +| redis.podAnnotations | object | `{}` | Pod annotations for the Redis pod | +| redis.tolerations | list | `[]` | Tolerations for the Redis pod | +| replicaCount | int | `1` | Number of web frontend pods to start | +| resources | object | `{}` | Resource limits and requests for the Gafaelfawr frontend pod | +| tolerations | list | `[]` | Tolerations for the Gafaelfawr frontend pod | diff --git a/services/gafaelfawr/crds/ingress.yaml b/services/gafaelfawr/crds/ingress.yaml new file mode 100644 index 0000000000..d81837e8f4 --- /dev/null +++ b/services/gafaelfawr/crds/ingress.yaml @@ -0,0 +1,341 @@ +# The canonical version of this file is in the Gafaelfawr repository: +# https://github.com/lsst-sqre/gafaelfawr/blob/main/crds/ingress.yaml +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: gafaelfawringresses.gafaelfawr.lsst.io + labels: + app.kubernetes.io/name: gafaelfawr.lsst.io + app.kubernetes.io/part-of: gafaelfawr + annotations: + helm.sh/hook: crd-install +spec: + group: gafaelfawr.lsst.io + scope: Namespaced + names: + plural: gafaelfawringresses + singular: gafaelfawringress + kind: GafaelfawrIngress + versions: + - name: v1alpha1 + served: true + storage: true + additionalPrinterColumns: + - description: "If the ingress was created successfully" + jsonPath: .status.create.status + name: "Succeeded" + type: string + - description: "Reason for the current status" + jsonPath: .status.create.reason + name: "Reason" + type: string + - description: "More information about the current status" + jsonPath: .status.create.message + name: "Message" + type: string + - description: "Time when the condition was last updated" + jsonPath: .status.create.lastTransitionTime + name: "Last Transition" + type: date + - description: "Time when the GafaelfawrIngress was created" + jsonPath: .metadata.creationTimestamp + name: "Age" + type: date + subresources: + status: {} + schema: + openAPIV3Schema: + description: >- + GafaelfawrIngress defines the parameters used to create an Ingress + resource. + type: object + required: + - config + - template + properties: + config: + type: object + description: "Configuration for the ingress to create." + required: + - baseUrl + properties: + baseUrl: + type: string + description: "Base URL for Gafaelfawr APIs." + pattern: "^https://[a-z.-]+" + authType: + type: string + enum: + - basic + - bearer + description: >- + Controls the authentication type in the challenge + returned in the `WWW-Authenticate` header if the user + is not authenticated. By default, this is `bearer`. + delegate: + type: object + description: >- + Create a (or reuse a cached) delegated token and + include it in the request to the backend service. + properties: + internal: + type: object + description: >- + Delegate an internal token to this service. + required: + - scopes + - service + properties: + scopes: + type: array + description: >- + Scopes to include in the delegated token if + they are available. These scopes are not + required to access the service; to make them + required, include them in spec.scopes as well. + items: + type: string + service: + type: string + description: >- + Name of the service to which the token is + delegated. + notebook: + type: object + description: >- + Delegate a notebook token to this service. + minimumLifetime: + type: integer + description: >- + Minimum lifetime of delegated token in seconds. If + the user's token has less than that time + remaining, force them to reauthenticate. + useAuthorization: + type: boolean + description: >- + If set to true, put the delegated token in the + Authorization header of the request as a bearer token, + in addition to X-Auth-Request-Token. + oneOf: + - required: + - internal + - required: + - notebook + loginRedirect: + type: boolean + description: >- + Whether to redirect to the login flow if the user is + not currently authenticated. + replace403: + type: boolean + description: >- + Whether to replace 403 responses with a custom 403 + response from Gafaelfawr that disables caching and + includes authorization-related errors in the + `WWW-Authenticate` header. + scopes: + type: object + description: >- + The token scope or scopes required to access this + service. May be omitted if the service allows + anonymous access. + properties: + any: + type: array + description: >- + Access is granted if any of the listed scopes are + present. + items: + type: string + all: + type: array + description: >- + Access is granted if all of the listed scopes are + present. + items: + type: string + anonymous: + type: boolean + description: >- + Allow anonymous access to this ingress. No access + control checks will be made and no token delegation is + possible, but Gafaelfawr tokens will still be stripped + from the `Authorization` and `Cookie` headers. + oneOf: + - required: + - any + - required: + - all + - properties: + anonymous: + enum: + - true + required: + - anonymous + template: + type: object + description: "The template used to create the ingress." + required: + - metadata + - spec + properties: + metadata: + type: object + description: "Metadata attributes for the generated ingress." + properties: + annotations: + type: object + description: >- + Annotations to apply to the generated ingress. These + will be merged with the annotations required by + Gafaelfawr. If there is a conflict, the + Gafaelfawr-generated annotations will override the + ones provided in this field. + additionalProperties: + type: string + labels: + type: object + description: "Labels to apply to the generated ingress." + additionalProperties: + type: string + name: + type: string + description: "Name of the generated ingress" + spec: + type: object + description: "Spec for the generated ingress." + required: + - rules + properties: + rules: + type: array + description: >- + Host rules for the generated ingress. See the schema + for the regular Ingress resource for descriptions of + the individual fields. + items: + type: object + properties: + host: + type: string + http: + type: object + required: + - paths + properties: + paths: + type: array + items: + type: object + required: + - path + - pathType + - backend + properties: + path: + type: string + pathType: + type: string + enum: + - Exact + - ImplementationSpecific + - Prefix + backend: + type: object + properties: + service: + type: object + properties: + name: + type: string + port: + type: object + properties: + number: + type: integer + name: + type: string + tls: + type: array + description: >- + TLS configuration if one should be added to this + generated ingress. See the schema for the regular + Ingress resource for descriptions of the individual + fields. + items: + type: object + properties: + hosts: + type: array + items: + type: string + secretName: + type: string + status: + type: object + description: >- + The current state of the GafaelfawrIngress, its processing by + Gafaelfawr, and its child resources. + x-kubernetes-preserve-unknown-fields: true + properties: + create: + type: object + description: >- + Status of processing of the last creation or update of the + GafaelfawrIngress object. + required: + - lastTransitionTime + - message + - reason + - status + - type + properties: + lastTransitionTime: + type: string + format: date-time + description: > + The last time the child Ingress status changed. + message: + type: string + description: > + A human readable message indicating details about the + transition. This may be an empty string. + maxLength: 32768 + observedGeneration: + description: > + The .metadata.generation that the condition was set + based upon. For instance, if .metadata.generation is + currently 12, but the + .status.create.observedGeneration is 9, the condition + is out of date with respect to the current state of + the instance. + format: int64 + minimum: 0 + type: integer + reason: + type: string + description: > + A programmatic identifier indicating the reason for + the condition's last transition. Producers of specific + condition types may define expected values and + meanings for this field, and whether the values are + considered a guaranteed API. The value should be a + CamelCase string. This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: "^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$" + status: + type: string + description: > + Status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - "Unknown" + type: + type: string + description: > + Type of condition in CamelCase or in + foo.example.com/CamelCase. + maxLength: 316 + pattern: "^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$" diff --git a/services/gafaelfawr/crds/service-token.yaml b/services/gafaelfawr/crds/service-token.yaml new file mode 100644 index 0000000000..810904def7 --- /dev/null +++ b/services/gafaelfawr/crds/service-token.yaml @@ -0,0 +1,200 @@ +# The canonical version of this file is in the Gafaelfawr repository: +# https://github.com/lsst-sqre/gafaelfawr/blob/main/crds/service-token.yaml +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + name: gafaelfawrservicetokens.gafaelfawr.lsst.io + labels: + app.kubernetes.io/name: gafaelfawr.lsst.io + app.kubernetes.io/part-of: gafaelfawr + annotations: + helm.sh/hook: crd-install +spec: + group: gafaelfawr.lsst.io + scope: Namespaced + names: + plural: gafaelfawrservicetokens + singular: gafaelfawrservicetoken + kind: GafaelfawrServiceToken + versions: + - name: v1alpha1 + served: true + storage: true + additionalPrinterColumns: + - description: "Service for which to create a token" + jsonPath: ".spec.service" + name: "Service" + type: string + - description: "If the secret was created/updated successfully" + jsonPath: .status.create.status + name: "Succeeded" + type: string + - description: "Reason for the current status" + jsonPath: .status.create.reason + name: "Reason" + type: string + - description: "More information about the current status" + jsonPath: .status.create.message + name: "Message" + type: string + - description: "Time when the condition was last updated" + jsonPath: .status.create.lastTransitionTime + name: "Last Transition" + type: date + - description: "Time when the GafaelfawrServiceToken was created" + jsonPath: .metadata.creationTimestamp + name: Age + type: date + subresources: + status: {} + schema: + openAPIV3Schema: + description: >- + GafaelfawrServiceTokenSpec defines the desired state of the + GafaelfawrServiceToken. + type: object + properties: + spec: + type: object + description: "Specification of the token secret to create." + properties: + service: + type: string + description: "Username field of the service token." + scopes: + type: array + description: >- + Array of scopes that should be granted to the service + token. + items: + type: string + pattern: "^[a-zA-Z0-9:._-]+$" + status: + type: object + description: >- + The observed state of the GafaelfawrServiceToken. + x-kubernetes-preserve-unknown-fields: true + properties: + create: + type: object + description: >- + Status of processing of the last creation or update of the + GafaelfawrServiceToken object. + required: + - lastTransitionTime + - message + - reason + - status + - type + properties: + lastTransitionTime: + type: string + format: date-time + description: > + The last time the child Secret status changed. + message: + type: string + description: > + A human readable message indicating details about the + transition. This may be an empty string. + maxLength: 32768 + observedGeneration: + description: > + The .metadata.generation that the condition was set + based upon. For instance, if .metadata.generation is + currently 12, but the + .status.create.observedGeneration is 9, the condition + is out of date with respect to the current state of + the instance. + format: int64 + minimum: 0 + type: integer + reason: + type: string + description: > + A programmatic identifier indicating the reason for + the condition's last transition. Producers of specific + condition types may define expected values and + meanings for this field, and whether the values are + considered a guaranteed API. The value should be a + CamelCase string. This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: "^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$" + status: + type: string + description: > + Status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - "Unknown" + type: + type: string + description: > + Type of condition in CamelCase or in + foo.example.com/CamelCase. + maxLength: 316 + pattern: "^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$" + periodic: + type: object + description: >- + Status of the last periodic validation of the Secret for + this GafaelfawrServiceToken object. + required: + - lastTransitionTime + - message + - reason + - status + - type + properties: + lastTransitionTime: + type: string + format: date-time + description: > + The last time the child Secret status changed due to a + periodic revalidation. + message: + type: string + description: > + A human readable message indicating details about the + transition. This may be an empty string. + maxLength: 32768 + observedGeneration: + description: > + The .metadata.generation that the condition was set + based upon. For instance, if .metadata.generation is + currently 12, but the + .status.create.observedGeneration is 9, the condition + is out of date with respect to the current state of + the instance. + format: int64 + minimum: 0 + type: integer + reason: + type: string + description: > + A programmatic identifier indicating the reason for + the condition's last transition. Producers of specific + condition types may define expected values and + meanings for this field, and whether the values are + considered a guaranteed API. The value should be a + CamelCase string. This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: "^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$" + status: + type: string + description: > + Status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - "Unknown" + type: + type: string + description: > + Type of condition in CamelCase or in + foo.example.com/CamelCase. + maxLength: 316 + pattern: "^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$" diff --git a/services/gafaelfawr/templates/_helpers.tpl b/services/gafaelfawr/templates/_helpers.tpl new file mode 100644 index 0000000000..57265a82b9 --- /dev/null +++ b/services/gafaelfawr/templates/_helpers.tpl @@ -0,0 +1,52 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "gafaelfawr.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "gafaelfawr.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "gafaelfawr.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "gafaelfawr.labels" -}} +helm.sh/chart: {{ include "gafaelfawr.chart" . }} +{{ include "gafaelfawr.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "gafaelfawr.selectorLabels" -}} +app.kubernetes.io/name: {{ include "gafaelfawr.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} diff --git a/services/gafaelfawr/templates/cloudsql-deployment.yaml b/services/gafaelfawr/templates/cloudsql-deployment.yaml new file mode 100644 index 0000000000..a2f5fbf91a --- /dev/null +++ b/services/gafaelfawr/templates/cloudsql-deployment.yaml @@ -0,0 +1,63 @@ +{{- if .Values.cloudsql.enabled -}} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: cloud-sql-proxy + labels: + {{- include "gafaelfawr.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.cloudsql.replicaCount }} + selector: + matchLabels: + {{- include "gafaelfawr.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: "cloud-sql-proxy" + template: + metadata: + {{- with .Values.cloudsql.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "gafaelfawr.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: "cloud-sql-proxy" + spec: + serviceAccountName: {{ include "gafaelfawr.fullname" . }} + containers: + - name: "cloud-sql-proxy" + command: + - "/cloud_sql_proxy" + - "-ip_address_types=PRIVATE" + - "-instances={{ required "cloudsql.instanceConnectionName must be specified" .Values.cloudsql.instanceConnectionName }}=tcp:0.0.0.0:5432" + image: "{{ .Values.cloudsql.image.repository }}:{{ .Values.cloudsql.image.tag }}" + imagePullPolicy: {{ .Values.cloudsql.image.pullPolicy | quote }} + ports: + - containerPort: 5432 + name: "http" + protocol: "TCP" + {{- with .Values.cloudsql.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - "all" + readOnlyRootFilesystem: true + securityContext: + runAsNonRoot: true + runAsUser: 65532 + runAsGroup: 65532 + {{- with .Values.cloudsql.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.cloudsql.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.cloudsql.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/services/gafaelfawr/templates/cloudsql-networkpolicy.yaml b/services/gafaelfawr/templates/cloudsql-networkpolicy.yaml new file mode 100644 index 0000000000..27d59bad79 --- /dev/null +++ b/services/gafaelfawr/templates/cloudsql-networkpolicy.yaml @@ -0,0 +1,36 @@ +{{- if .Values.cloudsql.enabled -}} +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: "cloud-sql-proxy" + labels: + {{- include "gafaelfawr.labels" . | nindent 4 }} +spec: + podSelector: + # This policy controls inbound and outbound access to the Cloud SQL Proxy. + matchLabels: + {{- include "gafaelfawr.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: "cloud-sql-proxy" + policyTypes: + - Ingress + ingress: + # Allow inbound access to the Cloud SQL Proxy from other components except + # the frontend. The frontend, since it's performance-critical and gates + # all access to the cluster, continues running its own sidecar. + - from: + - podSelector: + matchLabels: + {{- include "gafaelfawr.selectorLabels" . | nindent 14 }} + app.kubernetes.io/component: "audit" + - podSelector: + matchLabels: + {{- include "gafaelfawr.selectorLabels" . | nindent 14 }} + app.kubernetes.io/component: "maintenance" + - podSelector: + matchLabels: + {{- include "gafaelfawr.selectorLabels" . | nindent 14 }} + app.kubernetes.io/component: "operator" + ports: + - protocol: "TCP" + port: 5432 +{{- end }} diff --git a/services/gafaelfawr/templates/cloudsql-service.yaml b/services/gafaelfawr/templates/cloudsql-service.yaml new file mode 100644 index 0000000000..5273410788 --- /dev/null +++ b/services/gafaelfawr/templates/cloudsql-service.yaml @@ -0,0 +1,17 @@ +{{- if .Values.cloudsql.enabled -}} +apiVersion: v1 +kind: Service +metadata: + name: "cloud-sql-proxy" + labels: + {{- include "gafaelfawr.labels" . | nindent 4 }} +spec: + type: ClusterIP + ports: + - protocol: "TCP" + port: 5432 + targetPort: "http" + selector: + {{- include "gafaelfawr.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: "cloud-sql-proxy" +{{- end }} diff --git a/services/gafaelfawr/templates/configmap.yaml b/services/gafaelfawr/templates/configmap.yaml new file mode 100644 index 0000000000..ad772597b3 --- /dev/null +++ b/services/gafaelfawr/templates/configmap.yaml @@ -0,0 +1,202 @@ +{{/* Generate two versions of the ConfigMap, one using the sidecar proxy + and the other using the separate Cloud SQL Proxy service. The second + will be used for CronJobs and other lower-load services, avoiding the + difficulty with coordinating stopping the Cloud SQL Proxy sidecar when + a CronJob ends. */}} +{{- define "gafaelfawr.configMap" }} + realm: {{ required "global.host must be set" .Values.global.host | quote }} + loglevel: {{ .Values.config.loglevel | quote }} + session_secret_file: "/etc/gafaelfawr/secrets/session-secret" + bootstrap_token_file: "/etc/gafaelfawr/secrets/bootstrap-token" + database_password_file: "/etc/gafaelfawr/secrets/database-password" + redis_url: "redis://{{ template "gafaelfawr.fullname" . }}-redis.{{ .Release.Namespace }}:6379/0" + redis_password_file: "/etc/gafaelfawr/secrets/redis-password" + {{- if .Values.config.slackAlerts }} + slack_webhook_file: "/etc/gafaelfawr/secrets/slack-webhook" + {{- end }} + token_lifetime_minutes: {{ .Values.config.tokenLifetimeMinutes }} + {{- if .Values.config.proxies }} + proxies: + {{- range $netblock := .Values.config.proxies }} + - {{ $netblock | quote }} + {{- end }} + {{- end }} + after_logout_url: {{ required "global.baseUrl must be set" .Values.global.baseUrl }} + {{- if .Values.config.errorFooter }} + error_footer: {{ .Values.config.errorFooter | quote }} + {{- end }} + + {{- if .Values.config.github.clientId }} + + github: + client_id: {{ .Values.config.github.clientId | quote }} + client_secret_file: "/etc/gafaelfawr/secrets/github-client-secret" + + {{- else if .Values.config.cilogon.clientId }} + + oidc: + client_id: {{ .Values.config.cilogon.clientId | quote }} + client_secret_file: "/etc/gafaelfawr/secrets/cilogon-client-secret" + {{- if .Values.config.cilogon.test }} + login_url: "https://test.cilogon.org/authorize" + token_url: "https://test.cilogon.org/oauth2/token" + issuer: "https://test.cilogon.org" + {{- else }} + login_url: "https://cilogon.org/authorize" + token_url: "https://cilogon.org/oauth2/token" + issuer: "https://cilogon.org" + {{- end }} + {{- if .Values.config.cilogon.enrollmentUrl }} + enrollment_url: {{ .Values.config.cilogon.enrollmentUrl | quote }} + {{- end }} + {{- if .Values.config.cilogon.loginParams }} + login_params: + {{- range $key, $value := .Values.config.cilogon.loginParams }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- end }} + redirect_url: "{{ .Values.global.baseUrl }}/login" + scopes: + - "email" + - "org.cilogon.userinfo" + audience: {{ .Values.config.cilogon.clientId | quote }} + {{- if .Values.config.cilogon.usernameClaim }} + username_claim: {{ .Values.config.cilogon.usernameClaim | quote }} + {{- end }} + {{- if .Values.config.cilogon.uidClaim }} + uid_claim: {{ .Values.config.cilogon.uidClaim | quote }} + {{- end }} + {{- if .Values.config.cilogon.gidClaim }} + gid_claim: {{ .Values.config.cilogon.gidClaim | quote }} + {{- end }} + {{- if .Values.config.cilogon.groupsClaim }} + groups_claim: {{ .Values.config.cilogon.groupsClaim | quote }} + {{- end }} + + {{- else if .Values.config.oidc.clientId }} + + oidc: + client_id: {{ .Values.config.oidc.clientId | quote }} + client_secret_file: "/etc/gafaelfawr/secrets/oidc-client-secret" + {{- if .Values.config.oidc.audience }} + audience: {{ .Values.config.oidc.audience | quote }} + {{- else }} + audience: {{ .Values.config.oidc.clientId | quote }} + {{- end }} + login_url: {{ required "config.oidc.loginUrl must be set" .Values.config.oidc.loginUrl | quote }} + token_url: {{ required "config.oidc.tokenUrl must be set" .Values.config.oidc.tokenUrl | quote }} + {{- if .Values.config.oidc.enrollmentUrl }} + enrollment_url: {{ .Values.config.oidc.enrollmentUrl | quote }} + {{- end }} + issuer: {{ required "config.oidc.issuer must be set" .Values.config.oidc.issuer | quote }} + redirect_url: "{{ .Values.global.baseUrl }}/login" + scopes: + {{- with .Values.config.oidc.scopes }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.config.oidc.loginParams }} + login_params: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.config.oidc.usernameClaim }} + username_claim: {{ .Values.config.oidc.usernameClaim | quote }} + {{- end }} + {{- if .Values.config.oidc.uidClaim }} + uid_claim: {{ .Values.config.oidc.uidClaim | quote }} + {{- end }} + {{- if .Values.config.oidc.gidClaim }} + gid_claim: {{ .Values.config.oidc.gidClaim | quote }} + {{- end }} + {{- if .Values.config.oidc.groupsClaim }} + groups_claim: {{ .Values.config.oidc.groupsClaim | quote }} + {{- end }} + + {{- end }} + + {{- if .Values.config.firestore.project }} + firestore: + project: {{ .Values.config.firestore.project | quote }} + {{- end }} + + {{- if .Values.config.ldap.url }} + ldap: + url: {{ .Values.config.ldap.url | quote }} + group_base_dn: {{ required "config.ldap.groupBaseDn must be set" .Values.config.ldap.groupBaseDn | quote }} + {{- if .Values.config.ldap.userDn }} + user_dn: {{ .Values.config.ldap.userDn | quote }} + password_file: "/etc/gafaelfawr/secrets/ldap-password" + {{- end }} + group_object_class: {{ .Values.config.ldap.groupObjectClass | quote }} + group_member_attr: {{ .Values.config.ldap.groupMemberAttr | quote }} + {{- if .Values.config.ldap.userBaseDn }} + user_base_dn: {{ .Values.config.ldap.userBaseDn | quote }} + user_search_attr: {{ .Values.config.ldap.userSearchAttr | quote }} + name_attr: {{ .Values.config.ldap.nameAttr | quote }} + email_attr: {{ .Values.config.ldap.emailAttr | quote }} + {{- if .Values.config.ldap.uidAttr }} + uid_attr: {{ .Values.config.ldap.uidAttr | quote }} + {{- end }} + {{- if .Values.config.ldap.gidAttr }} + gid_attr: {{ .Values.config.ldap.gidAttr | quote }} + {{- end }} + {{- end }} + {{- if .Values.config.ldap.addUserGroup }} + add_user_group: true + {{- end }} + {{- end }} + + {{- if .Values.config.oidcServer.enabled }} + oidc_server: + issuer: "https://{{ .Values.global.host }}" + key_id: "gafaelfawr" + audience: "https://{{ .Values.global.host }}" + key_file: "/etc/gafaelfawr/secrets/signing-key" + secrets_file: "/etc/gafaelfawr/secrets/oidc-server-secrets" + {{- end }} + + known_scopes: + {{- range $key, $value := .Values.config.knownScopes }} + {{ $key | quote }}: {{ $value | quote }} + {{- end }} + + {{- with .Values.config.groupMapping }} + group_mapping: + {{- toYaml . | nindent 6 }} + {{- end }} + + {{- if .Values.config.initialAdmins }} + initial_admins: + {{- range $admin := .Values.config.initialAdmins }} + - {{ $admin | quote }} + {{- end }} + {{- end }} +{{- end }} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "gafaelfawr.fullname" . }}-config + labels: + {{- include "gafaelfawr.labels" . | nindent 4 }} +data: + gafaelfawr.yaml: | + {{- if .Values.cloudsql.enabled }} + database_url: "postgresql://gafaelfawr@cloud-sql-proxy/gafaelfawr" + {{- else }} + database_url: {{ required "config.databaseUrl must be set" .Values.config.databaseUrl | quote }} + {{- end }} + {{- template "gafaelfawr.configMap" . }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "gafaelfawr.fullname" . }}-config-sidecar + labels: + {{- include "gafaelfawr.labels" . | nindent 4 }} +data: + gafaelfawr.yaml: | + {{- if .Values.cloudsql.enabled }} + database_url: "postgresql://gafaelfawr@localhost/gafaelfawr" + {{- else }} + database_url: {{ required "config.databaseUrl must be set" .Values.config.databaseUrl | quote }} + {{- end }} + {{- template "gafaelfawr.configMap" . }} diff --git a/services/gafaelfawr/templates/cronjob-audit.yaml b/services/gafaelfawr/templates/cronjob-audit.yaml new file mode 100644 index 0000000000..690985df58 --- /dev/null +++ b/services/gafaelfawr/templates/cronjob-audit.yaml @@ -0,0 +1,72 @@ +{{- if .Values.config.slackAlerts -}} +apiVersion: batch/v1 +kind: CronJob +metadata: + name: {{ template "gafaelfawr.fullname" . }}-audit + labels: + {{- include "gafaelfawr.labels" . | nindent 4 }} +spec: + schedule: {{ .Values.maintenance.auditSchedule | quote }} + concurrencyPolicy: "Forbid" + jobTemplate: + spec: + template: + metadata: + {{- with .Values.maintenance.podAnnotations }} + annotations: + {{- toYaml . | nindent 12 }} + {{- end }} + labels: + {{- include "gafaelfawr.selectorLabels" . | nindent 12 }} + app.kubernetes.io/component: "audit" + spec: + restartPolicy: "Never" + automountServiceAccountToken: false + containers: + - name: "gafaelfawr" + command: + - "gafaelfawr" + - "audit" + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy | quote }} + {{- with .Values.maintenance.resources }} + resources: + {{- toYaml . | nindent 16 }} + {{- end }} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - "all" + readOnlyRootFilesystem: true + volumeMounts: + - name: "config" + mountPath: "/etc/gafaelfawr" + readOnly: true + - name: "secret" + mountPath: "/etc/gafaelfawr/secrets" + readOnly: true + securityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + volumes: + - name: "config" + configMap: + name: {{ template "gafaelfawr.fullname" . }}-config + - name: "secret" + secret: + secretName: {{ template "gafaelfawr.fullname" . }}-secret + {{- with .Values.maintenance.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.maintenance.affinity }} + affinity: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.maintenance.tolerations }} + tolerations: + {{- toYaml . | nindent 12 }} + {{- end }} +{{- end }} diff --git a/services/gafaelfawr/templates/cronjob-maintenance.yaml b/services/gafaelfawr/templates/cronjob-maintenance.yaml new file mode 100644 index 0000000000..a45bbf9bc9 --- /dev/null +++ b/services/gafaelfawr/templates/cronjob-maintenance.yaml @@ -0,0 +1,70 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: {{ template "gafaelfawr.fullname" . }}-maintenance + labels: + {{- include "gafaelfawr.labels" . | nindent 4 }} +spec: + schedule: {{ .Values.maintenance.maintenanceSchedule | quote }} + concurrencyPolicy: "Forbid" + jobTemplate: + spec: + template: + metadata: + {{- with .Values.maintenance.podAnnotations }} + annotations: + {{- toYaml . | nindent 12 }} + {{- end }} + labels: + {{- include "gafaelfawr.selectorLabels" . | nindent 12 }} + app.kubernetes.io/component: "maintenance" + spec: + restartPolicy: "Never" + automountServiceAccountToken: false + containers: + - name: "gafaelfawr" + command: + - "gafaelfawr" + - "maintenance" + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy | quote }} + {{- with .Values.maintenance.resources }} + resources: + {{- toYaml . | nindent 16 }} + {{- end }} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - "all" + readOnlyRootFilesystem: true + volumeMounts: + - name: "config" + mountPath: "/etc/gafaelfawr" + readOnly: true + - name: "secret" + mountPath: "/etc/gafaelfawr/secrets" + readOnly: true + securityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + volumes: + - name: "config" + configMap: + name: {{ template "gafaelfawr.fullname" . }}-config + - name: "secret" + secret: + secretName: {{ template "gafaelfawr.fullname" . }}-secret + {{- with .Values.maintenance.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.maintenance.affinity }} + affinity: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.maintenance.tolerations }} + tolerations: + {{- toYaml . | nindent 12 }} + {{- end }} diff --git a/services/gafaelfawr/templates/deployment-operator.yaml b/services/gafaelfawr/templates/deployment-operator.yaml new file mode 100644 index 0000000000..19d44aa96c --- /dev/null +++ b/services/gafaelfawr/templates/deployment-operator.yaml @@ -0,0 +1,75 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "gafaelfawr.fullname" . }}-operator + labels: + {{- include "gafaelfawr.labels" . | nindent 4 }} +spec: + replicas: 1 + selector: + matchLabels: + {{- include "gafaelfawr.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: "operator" + template: + metadata: + annotations: + checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} + {{- with .Values.operator.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "gafaelfawr.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: "operator" + spec: + serviceAccountName: {{ include "gafaelfawr.fullname" . }}-operator + containers: + - name: "gafaelfawr" + command: + - "kopf" + - "run" + - "-A" + - "--log-format=json" + - "-m" + - "gafaelfawr.operator" + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy | quote }} + {{- with .Values.operator.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - "all" + readOnlyRootFilesystem: true + volumeMounts: + - name: "config" + mountPath: "/etc/gafaelfawr" + readOnly: true + - name: "secret" + mountPath: "/etc/gafaelfawr/secrets" + readOnly: true + securityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + volumes: + - name: "config" + configMap: + name: {{ template "gafaelfawr.fullname" . }}-config + - name: "secret" + secret: + secretName: {{ template "gafaelfawr.fullname" . }}-secret + {{- with .Values.operator.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.operator.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.operator.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/services/gafaelfawr/templates/deployment.yaml b/services/gafaelfawr/templates/deployment.yaml new file mode 100644 index 0000000000..565f51ac1e --- /dev/null +++ b/services/gafaelfawr/templates/deployment.yaml @@ -0,0 +1,98 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "gafaelfawr.fullname" . }} + labels: + {{- include "gafaelfawr.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "gafaelfawr.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: "frontend" + template: + metadata: + annotations: + checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} + {{- with .Values.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "gafaelfawr.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: "frontend" + spec: + {{- if .Values.cloudsql.enabled }} + serviceAccountName: {{ include "gafaelfawr.fullname" . }} + {{- else }} + automountServiceAccountToken: false + {{- end }} + containers: + {{- if .Values.cloudsql.enabled }} + - name: "cloud-sql-proxy" + command: + - "/cloud_sql_proxy" + - "-ip_address_types=PRIVATE" + - "-instances={{ required "cloudsql.instanceConnectionName must be specified" .Values.cloudsql.instanceConnectionName }}=tcp:5432" + image: "{{ .Values.cloudsql.image.repository }}:{{ .Values.cloudsql.image.tag }}" + imagePullPolicy: {{ .Values.cloudsql.image.pullPolicy | quote }} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - "all" + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 65532 + runAsGroup: 65532 + {{- end }} + - name: "gafaelfawr" + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy | quote }} + ports: + - containerPort: 8080 + name: "http" + protocol: "TCP" + readinessProbe: + httpGet: + path: "/" + port: "http" + {{- with .Values.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - "all" + readOnlyRootFilesystem: true + volumeMounts: + - name: "config" + mountPath: "/etc/gafaelfawr" + readOnly: true + - name: "secret" + mountPath: "/etc/gafaelfawr/secrets" + readOnly: true + securityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + volumes: + - name: "config" + configMap: + name: {{ template "gafaelfawr.fullname" . }}-config-sidecar + - name: "secret" + secret: + secretName: {{ template "gafaelfawr.fullname" . }}-secret + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/services/gafaelfawr/templates/ingress-rewrite.yaml b/services/gafaelfawr/templates/ingress-rewrite.yaml new file mode 100644 index 0000000000..ec7210c162 --- /dev/null +++ b/services/gafaelfawr/templates/ingress-rewrite.yaml @@ -0,0 +1,22 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + annotations: + nginx.ingress.kubernetes.io/rewrite-target: "/auth/tokens/" + nginx.ingress.kubernetes.io/use-regex: "true" + name: {{ template "gafaelfawr.fullname" . }}-rewrite + labels: + {{- include "gafaelfawr.labels" . | nindent 4 }} +spec: + ingressClassName: "nginx" + rules: + - host: {{ required "global.host must be set" .Values.global.host | quote }} + http: + paths: + - path: "/auth/tokens/id/.*" + pathType: "ImplementationSpecific" + backend: + service: + name: {{ template "gafaelfawr.fullname" . }} + port: + number: 8080 diff --git a/services/gafaelfawr/templates/ingress.yaml b/services/gafaelfawr/templates/ingress.yaml new file mode 100644 index 0000000000..18a03df271 --- /dev/null +++ b/services/gafaelfawr/templates/ingress.yaml @@ -0,0 +1,49 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ template "gafaelfawr.fullname" . }} + labels: + {{- include "gafaelfawr.labels" . | nindent 4 }} +spec: + ingressClassName: "nginx" + rules: + - host: {{ required "global.host must be set" .Values.global.host | quote }} + http: + paths: + - path: "/auth" + pathType: Prefix + backend: + service: + name: {{ template "gafaelfawr.fullname" . }} + port: + number: 8080 + - path: "/login" + pathType: Exact + backend: + service: + name: {{ template "gafaelfawr.fullname" . }} + port: + number: 8080 + - path: "/logout" + pathType: Exact + backend: + service: + name: {{ template "gafaelfawr.fullname" . }} + port: + number: 8080 + {{- if .Values.config.oidcServer.enabled }} + - path: "/.well-known/jwks.json" + pathType: Exact + backend: + service: + name: {{ template "gafaelfawr.fullname" . }} + port: + number: 8080 + - path: "/.well-known/openid-configuration" + pathType: Exact + backend: + service: + name: {{ template "gafaelfawr.fullname" . }} + port: + number: 8080 + {{- end }} diff --git a/services/gafaelfawr/templates/networkpolicy.yaml b/services/gafaelfawr/templates/networkpolicy.yaml new file mode 100644 index 0000000000..f5104e820d --- /dev/null +++ b/services/gafaelfawr/templates/networkpolicy.yaml @@ -0,0 +1,25 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ template "gafaelfawr.fullname" . }} + labels: + {{- include "gafaelfawr.labels" . | nindent 4 }} +spec: + podSelector: + # This policy controls inbound access to the frontend component. + matchLabels: + {{- include "gafaelfawr.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: "frontend" + policyTypes: + - Ingress + ingress: + # Allow inbound access to the frontend from pods (in any namespace) + # labeled gafaelfawr.lsst.io/ingress: true. + - from: + - namespaceSelector: {} + podSelector: + matchLabels: + gafaelfawr.lsst.io/ingress: "true" + ports: + - protocol: "TCP" + port: 8080 diff --git a/services/gafaelfawr/templates/redis-networkpolicy.yaml b/services/gafaelfawr/templates/redis-networkpolicy.yaml new file mode 100644 index 0000000000..3a7adfb52f --- /dev/null +++ b/services/gafaelfawr/templates/redis-networkpolicy.yaml @@ -0,0 +1,38 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ template "gafaelfawr.fullname" . }}-redis + labels: + {{- include "gafaelfawr.labels" . | nindent 4 }} +spec: + podSelector: + # This policy controls inbound and outbound access to the Redis component. + matchLabels: + {{- include "gafaelfawr.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: "redis" + policyTypes: + - Ingress + # Deny all outbound access; Redis doesn't need to talk to anything. + - Egress + ingress: + # Allow inbound access to Redis from all other components. + - from: + - podSelector: + matchLabels: + {{- include "gafaelfawr.selectorLabels" . | nindent 14 }} + app.kubernetes.io/component: "audit" + - podSelector: + matchLabels: + {{- include "gafaelfawr.selectorLabels" . | nindent 14 }} + app.kubernetes.io/component: "frontend" + - podSelector: + matchLabels: + {{- include "gafaelfawr.selectorLabels" . | nindent 14 }} + app.kubernetes.io/component: "maintenance" + - podSelector: + matchLabels: + {{- include "gafaelfawr.selectorLabels" . | nindent 14 }} + app.kubernetes.io/component: "operator" + ports: + - protocol: "TCP" + port: 6379 diff --git a/services/gafaelfawr/templates/redis-service.yml b/services/gafaelfawr/templates/redis-service.yml new file mode 100644 index 0000000000..8824f01e36 --- /dev/null +++ b/services/gafaelfawr/templates/redis-service.yml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ template "gafaelfawr.fullname" . }}-redis + labels: + {{- include "gafaelfawr.labels" . | nindent 4 }} +spec: + type: ClusterIP + ports: + - port: 6379 + protocol: "TCP" + targetPort: 6379 + selector: + {{- include "gafaelfawr.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: "redis" + sessionAffinity: None diff --git a/services/gafaelfawr/templates/redis-statefulset.yaml b/services/gafaelfawr/templates/redis-statefulset.yaml new file mode 100644 index 0000000000..99fe2a841e --- /dev/null +++ b/services/gafaelfawr/templates/redis-statefulset.yaml @@ -0,0 +1,107 @@ +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: {{ template "gafaelfawr.fullname" . }}-redis + labels: + {{- include "gafaelfawr.labels" . | nindent 4 }} +spec: + replicas: 1 + selector: + matchLabels: + {{- include "gafaelfawr.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: "redis" + serviceName: "redis" + template: + metadata: + {{- with .Values.redis.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "gafaelfawr.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: "redis" + spec: + automountServiceAccountToken: false + containers: + - name: "redis" + args: + - "redis-server" + - "--appendonly" + - "yes" + - "--requirepass" + - "$(REDIS_PASSWORD)" + env: + - name: "REDIS_PASSWORD" + valueFrom: + secretKeyRef: + name: {{ template "gafaelfawr.fullname" . }}-secret + key: "redis-password" + image: "{{ .Values.redis.image.repository }}:{{ .Values.redis.image.tag }}" + imagePullPolicy: {{ .Values.redis.image.pullPolicy | quote }} + livenessProbe: + exec: + command: + - "sh" + - "-c" + - "redis-cli -h $(hostname) incr health:counter" + initialDelaySeconds: 15 + periodSeconds: 30 + ports: + - containerPort: 6379 + resources: + limits: + cpu: "1" + requests: + cpu: "100m" + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - "all" + readOnlyRootFilesystem: true + volumeMounts: + - name: {{ template "gafaelfawr.fullname" . }}-redis-data + mountPath: "/data" + imagePullSecrets: + - name: "pull-secret" + securityContext: + fsGroup: 999 + runAsNonRoot: true + runAsUser: 999 + runAsGroup: 999 + {{- if (not .Values.redis.persistence.enabled) }} + volumes: + - name: {{ template "gafaelfawr.fullname" . }}-redis-data + emptyDir: {} + {{- else if .Values.redis.persistence.volumeClaimName }} + volumes: + - name: {{ template "gafaelfawr.fullname" . }}-redis-data + persistentVolumeClaim: + claimName: {{ .Values.redis.persistence.volumeClaimName | quote }} + {{- end }} + {{- with .Values.redis.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.redis.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.redis.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if (and .Values.redis.persistence.enabled (not .Values.redis.persistence.volumeClaimName)) }} + volumeClaimTemplates: + - metadata: + name: {{ template "gafaelfawr.fullname" . }}-redis-data + spec: + accessModes: + - {{ .Values.redis.persistence.accessMode | quote }} + resources: + requests: + storage: {{ .Values.redis.persistence.size | quote }} + {{- if .Values.redis.persistence.storageClass }} + storageClassName: {{ .Values.redis.persistence.storageClass | quote }} + {{- end }} + {{- end }} diff --git a/services/gafaelfawr/templates/service.yaml b/services/gafaelfawr/templates/service.yaml new file mode 100644 index 0000000000..27e6cdb0f0 --- /dev/null +++ b/services/gafaelfawr/templates/service.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ template "gafaelfawr.fullname" . }} + labels: + {{- include "gafaelfawr.labels" . | nindent 4 }} +spec: + type: ClusterIP + ports: + - protocol: "TCP" + port: 8080 + targetPort: "http" + selector: + {{- include "gafaelfawr.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: "frontend" + sessionAffinity: None diff --git a/services/gafaelfawr/templates/serviceaccount-operator.yaml b/services/gafaelfawr/templates/serviceaccount-operator.yaml new file mode 100644 index 0000000000..e1c96b4e47 --- /dev/null +++ b/services/gafaelfawr/templates/serviceaccount-operator.yaml @@ -0,0 +1,55 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "gafaelfawr.fullname" . }}-operator + labels: + {{- include "gafaelfawr.labels" . | nindent 4 }} + annotations: + {{- if .Values.cloudsql.enabled }} + iam.gke.io/gcp-service-account: {{ required "cloudsql.serviceAccount must be set to a valid Google service account" .Values.cloudsql.serviceAccount | quote }} + {{- end }} +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: {{ include "gafaelfawr.fullname" . }}-operator + labels: + {{- include "gafaelfawr.labels" . | nindent 4 }} +rules: + - apiGroups: [""] + resources: ["events"] + verbs: ["create"] + - apiGroups: [""] + resources: ["namespaces"] + verbs: ["list", "watch"] + - apiGroups: [""] + resources: ["secrets"] + verbs: ["create", "get", "patch", "update"] + - apiGroups: ["apiextensions.k8s.io"] + resources: ["customresourcedefinitions"] + verbs: ["list", "watch"] + - apiGroups: ["networking.k8s.io"] + resources: ["ingresses"] + verbs: ["create", "get", "patch", "update"] + - apiGroups: ["gafaelfawr.lsst.io"] + resources: + - "gafaelfawringresses" + - "gafaelfawringresses/status" + - "gafaelfawrservicetokens" + - "gafaelfawrservicetokens/status" + verbs: ["get", "list", "patch", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: {{ include "gafaelfawr.fullname" . }}-operator + labels: + {{- include "gafaelfawr.labels" . | nindent 4 }} +subjects: + - kind: ServiceAccount + name: {{ include "gafaelfawr.fullname" . }}-operator + namespace: {{ .Release.Namespace }} +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: {{ include "gafaelfawr.fullname" . }}-operator diff --git a/services/gafaelfawr/templates/serviceaccount.yaml b/services/gafaelfawr/templates/serviceaccount.yaml new file mode 100644 index 0000000000..770808516a --- /dev/null +++ b/services/gafaelfawr/templates/serviceaccount.yaml @@ -0,0 +1,12 @@ +{{- if .Values.cloudsql.enabled -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "gafaelfawr.fullname" . }} + labels: + {{- include "gafaelfawr.labels" . | nindent 4 }} + annotations: + {{- if .Values.cloudsql.enabled }} + iam.gke.io/gcp-service-account: {{ required "cloudsql.serviceAccount must be set to a valid Google service account" .Values.cloudsql.serviceAccount | quote }} + {{- end }} +{{- end }} diff --git a/services/gafaelfawr/templates/vault-secrets.yaml b/services/gafaelfawr/templates/vault-secrets.yaml new file mode 100644 index 0000000000..e0b12539b7 --- /dev/null +++ b/services/gafaelfawr/templates/vault-secrets.yaml @@ -0,0 +1,9 @@ +apiVersion: ricoberger.de/v1alpha1 +kind: VaultSecret +metadata: + name: {{ template "gafaelfawr.fullname" . }}-secret + labels: + {{- include "gafaelfawr.labels" . | nindent 4 }} +spec: + path: "{{ .Values.global.vaultSecretsPath }}/gafaelfawr" + type: Opaque diff --git a/services/gafaelfawr/values-base.yaml b/services/gafaelfawr/values-base.yaml index 641e1196a6..5752f7d963 100644 --- a/services/gafaelfawr/values-base.yaml +++ b/services/gafaelfawr/values-base.yaml @@ -1,54 +1,95 @@ -gafaelfawr: - imagePullSecrets: - - name: "pull-secret" - ingress: - host: "base-lsp.lsst.codes" - vaultSecretsPath: "secret/k8s_operator/base-lsp.lsst.codes/gafaelfawr" +redis: + persistence: + storageClass: "rook-ceph-block" - # Reset token storage on every Redis restart for now. This should change to - # use persistent volumes once we can coordinate that. - redis: - persistence: - enabled: false +config: + slackAlerts: true + databaseUrl: "postgresql://gafaelfawr@postgres.postgres/gafaelfawr" - config: - host: "base-lsp.lsst.codes" - databaseUrl: "postgresql://gafaelfawr@postgres.postgres/gafaelfawr" + github: + clientId: "ec88b9b897f302b620d1" - github: - clientId: "ec88b9b897f302b620d1" + # Allow access by GitHub team. + groupMapping: + "admin:provision": + - github: + organization: "lsst-sqre" + team: "square" + "exec:admin": + - github: + organization: "lsst-sqre" + team: "square" + "exec:internal-tools": + - github: + organization: "lsst-sqre" + team: "square" + - github: + organization: "lsst-sqre" + team: "friends" + - github: + organization: "lsst-ts" + team: "base-access" + - github: + organization: "rubin-summit" + team: "rsp-access" + "exec:notebook": + - github: + organization: "lsst-sqre" + team: "square" + - github: + organization: "lsst-sqre" + team: "friends" + - github: + organization: "lsst-ts" + team: "base-access" + - github: + organization: "rubin-summit" + team: "rsp-access" + "exec:portal": + - github: + organization: "lsst-sqre" + team: "square" + - github: + organization: "lsst-sqre" + team: "friends" + - github: + organization: "lsst-ts" + team: "base-access" + - github: + organization: "rubin-summit" + team: "rsp-access" + "read:image": + - github: + organization: "lsst-sqre" + team: "square" + - github: + organization: "lsst-sqre" + team: "friends" + - github: + organization: "lsst-ts" + team: "base-access" + - github: + organization: "rubin-summit" + team: "rsp-access" + "read:tap": + - github: + organization: "lsst-sqre" + team: "square" + - github: + organization: "lsst-sqre" + team: "friends" + - github: + organization: "lsst-ts" + team: "base-access" + - github: + organization: "rubin-summit" + team: "rsp-access" - # Allow access by GitHub team. - groupMapping: - "admin:provision": - - "lsst-sqre-square" - "exec:admin": - - "lsst-sqre-square" - "exec:notebook": - - "lsst-sqre-square" - - "lsst-sqre-friends" - - "lsst-ts-base-access" - - "rubin-summit-rsp-access" - "exec:portal": - - "lsst-sqre-square" - - "lsst-sqre-friends" - - "lsst-ts-base-access" - - "rubin-summit-rsp-access" - "read:tap": - - "lsst-sqre-square" - - "lsst-sqre-friends" - - "lsst-ts-base-access" - - "rubin-summit-rsp-access" - - initialAdmins: - - "afausti" - - "athornton" - - "cbanek" - - "frossie" - - "jonathansick" - - "rra" - - "simonkrughoff" - -pull-secret: - enabled: true - path: "secret/k8s_operator/base-lsp.lsst.codes/pull-secret" + initialAdmins: + - "afausti" + - "athornton" + - "cbanek" + - "frossie" + - "jonathansick" + - "rra" + - "simonkrughoff" diff --git a/services/gafaelfawr/values-ccin2p3.yaml b/services/gafaelfawr/values-ccin2p3.yaml new file mode 100644 index 0000000000..0326a102a3 --- /dev/null +++ b/services/gafaelfawr/values-ccin2p3.yaml @@ -0,0 +1,100 @@ +replicaCount: 2 + +redis: + persistence: + enabled: false + +config: + loglevel: "DEBUG" + databaseUrl: "postgresql://gafaelfawr@postgres.postgres/gafaelfawr" + + # Session length and token expiration (in minutes). + issuer: + exp_minutes: 43200 # 30 days + + # github: + # clientId: ae314e45a6af43ea910a + + oidc: + clientId: "lsst_rsp" + loginUrl: "https://login.cc.in2p3.fr/auth/realms/cc.in2p3.fr/protocol/openid-connect/auth" + tokenUrl: "https://login.cc.in2p3.fr/auth/realms/cc.in2p3.fr/protocol/openid-connect/token" + # scopes: + # - "openid" + issuer: "https://login.cc.in2p3.fr/auth/realms/cc.in2p3.fr" + gidClaim: "gid_number" + uidClaim: "uid_number" + groupsClaim: "groups" + usernameClaim: "preferred_username" + + oidcServer: + enabled: false + + initialAdmins: + - "mainetti" + + groupMapping: + "admin:token": "lsst" + "user:token": "lsst" + "exec:admin": "lsst" + "read:all": + - "lsst" + "exec:internal-tools": + - "lsst" + "exec:notebook": + - "lsst" + "exec:portal": + - "lsst" + "read:tap": + - "lsst" + "read:image": + - "lsst" + + # Allow access by GitHub team. + # groupMapping: + # "admin:provision": + # - github: + # organization: "rubin-in2p3" + # team: "admin" + # "exec:admin": + # - github: + # organization: "rubin-in2p3" + # team: "admin" + # "exec:notebook": + # - github: + # organization: "rubin-in2p3" + # team: "admin" + # - github: + # organization: "rubin-in2p3" + # team: "user" + # "exec:portal": + # - github: + # organization: "rubin-in2p3" + # team: "admin" + # - github: + # organization: "rubin-in2p3" + # team: "user" + # "read:tap": + # - github: + # organization: "rubin-in2p3" + # team: "admin" + # - github: + # organization: "rubin-in2p3" + # team: "user" + # - github: + # organization: "rubin-in2p3" + # team: "delegates" + # "read:image": + # - github: + # organization: "rubin-in2p3" + # team: "admin" + # - github: + # organization: "rubin-in2p3" + # team: "user" + # - github: + # organization: "rubin-in2p3" + # team: "delegates" + + # initialAdmins: + # - "mainetti" + # #- "gabrimaine" diff --git a/services/gafaelfawr/values-idfdev.yaml b/services/gafaelfawr/values-idfdev.yaml index c67f1ee830..a9ddcc1469 100644 --- a/services/gafaelfawr/values-idfdev.yaml +++ b/services/gafaelfawr/values-idfdev.yaml @@ -1,64 +1,63 @@ -gafaelfawr: - imagePullSecrets: - - name: "pull-secret" - ingress: - host: "data-dev.lsst.cloud" - vaultSecretsPath: "secret/k8s_operator/data-dev.lsst.cloud/gafaelfawr" - - # Use the CSI storage class so that we can use snapshots. - redis: - persistence: - storageClass: "standard-rwo" - - config: - host: "data-dev.lsst.cloud" - databaseUrl: "postgresql://gafaelfawr@localhost/gafaelfawr" - - # Support OpenID Connect clients like Chronograf. - oidcServer: - enabled: true - - github: - clientId: "f46555b3f4c524e764ac" - - # Allow access by GitHub team. - groupMapping: - "admin:provision": - - "lsst-sqre-square" - "exec:admin": - - "lsst-sqre-square" - "exec:notebook": - - "lsst-sqre-square" - - "lsst-sqre-friends" - "exec:portal": - - "lsst-sqre-square" - - "lsst-sqre-friends" - "read:image": - - "lsst-sqre-square" - - "lsst-sqre-friends" - "read:tap": - - "lsst-sqre-square" - - "lsst-sqre-friends" - - initialAdmins: - - "afausti" - - "athornton" - - "cbanek" - - "frossie" - - "jonathansick" - - "rra" - - "simonkrughoff" - - errorFooter: | - To report problems or ask for help, please open an issue in the - GitHub - rubin-dp0/Support project. - - cloudsql: +# Use the CSI storage class so that we can use snapshots. +redis: + persistence: + storageClass: "standard-rwo" + +config: + loglevel: "DEBUG" + slackAlerts: true + + cilogon: + clientId: "cilogon:/client_id/46f9ae932fd30e9fb1b246972a3c0720" + enrollmentUrl: "https://id-dev.lsst.cloud/registry/co_petitions/start/coef:6" + test: true + usernameClaim: "username" + + firestore: + project: "rsp-firestore-dev-31c4" + + ldap: + url: "ldaps://ldap-test.cilogon.org" + userDn: "uid=readonly_user,ou=system,o=LSST,o=CO,dc=lsst_dev,dc=org" + groupBaseDn: "ou=groups,o=LSST,o=CO,dc=lsst_dev,dc=org" + groupObjectClass: "eduMember" + groupMemberAttr: "hasMember" + userBaseDn: "ou=people,o=LSST,o=CO,dc=lsst_dev,dc=org" + userSearchAttr: "voPersonApplicationUID" + addUserGroup: true + + # Support OpenID Connect clients like Chronograf. + oidcServer: enabled: true - instanceConnectionName: "science-platform-dev-7696:us-central1:science-platform-dev-e9e11de2" - serviceAccount: "gafaelfawr@science-platform-dev-7696.iam.gserviceaccount.com" -pull-secret: + groupMapping: + "admin:jupyterlab": + - "g_admins" + "admin:provision": + - "g_admins" + "exec:admin": + - "g_admins" + "exec:internal-tools": + - "g_users" + "exec:notebook": + - "g_users" + "exec:portal": + - "g_users" + "read:image": + - "g_users" + "read:tap": + - "g_users" + + initialAdmins: + - "adam" + - "afausti" + - "cbanek" + - "frossie" + - "jsick" + - "rra" + - "simonkrughoff" + +cloudsql: enabled: true - path: "secret/k8s_operator/data-dev.lsst.cloud/pull-secret" + instanceConnectionName: "science-platform-dev-7696:us-central1:science-platform-dev-e9e11de2" + serviceAccount: "gafaelfawr@science-platform-dev-7696.iam.gserviceaccount.com" diff --git a/services/gafaelfawr/values-idfint.yaml b/services/gafaelfawr/values-idfint.yaml index 175601c301..b1b8c4ba65 100644 --- a/services/gafaelfawr/values-idfint.yaml +++ b/services/gafaelfawr/values-idfint.yaml @@ -1,68 +1,64 @@ -gafaelfawr: - imagePullSecrets: - - name: "pull-secret" - ingress: - host: "data-int.lsst.cloud" - vaultSecretsPath: "secret/k8s_operator/data-int.lsst.cloud/gafaelfawr" +# Use the CSI storage class so that we can use snapshots. +redis: + persistence: + storageClass: "standard-rwo" - # Use the CSI storage class so that we can use snapshots. - redis: - persistence: - storageClass: "standard-rwo" +config: + slackAlerts: true - config: - loglevel: "DEBUG" - host: "data-int.lsst.cloud" - databaseUrl: "postgresql://gafaelfawr@localhost/gafaelfawr" + cilogon: + clientId: "cilogon:/client_id/6b3f86ecfe74f14afa81b73a76be0868" + enrollmentUrl: "https://id-int.lsst.cloud/registry/co_petitions/start/coef:10" + test: true + usernameClaim: "username" - github: - clientId: "0c4cc7eaffc0f89b9ace" + firestore: + project: "rsp-firestore-int-7bfb" - # Allow access by GitHub team. - groupMapping: - "admin:provision": - - "lsst-sqre-square" - "exec:admin": - - "lsst-sqre-square" - "exec:notebook": - - "lsst-ops-panda" - - "lsst-sqre-square" - - "lsst-sqre-friends" - "exec:portal": - - "lsst-ops-panda" - - "lsst-sqre-square" - - "lsst-sqre-friends" - "read:alertdb": - - "lsst-sqre-square" - - "lsst-sqre-friends" - "read:image": - - "lsst-ops-panda" - - "lsst-sqre-square" - - "lsst-sqre-friends" - "read:tap": - - "lsst-ops-panda" - - "lsst-sqre-square" - - "lsst-sqre-friends" + ldap: + url: "ldaps://ldap-test.cilogon.org" + userDn: "uid=readonly_user,ou=system,o=LSST,o=CO,dc=lsst,dc=org" + groupBaseDn: "ou=groups,o=LSST,o=CO,dc=lsst,dc=org" + groupObjectClass: "eduMember" + groupMemberAttr: "hasMember" + userBaseDn: "ou=people,o=LSST,o=CO,dc=lsst,dc=org" + userSearchAttr: "voPersonApplicationUID" + addUserGroup: true - initialAdmins: - - "afausti" - - "athornton" - - "cbanek" - - "frossie" - - "jonathansick" - - "rra" - - "simonkrughoff" + # Support OpenID Connect clients like Chronograf. + oidcServer: + enabled: true - errorFooter: | - To report problems or ask for help, please open an issue in the - GitHub - rubin-dp0/Support project. + # Allow access by GitHub team. + groupMapping: + "admin:provision": + - "g_admins" + "exec:admin": + - "g_admins" + "exec:internal-tools": + - "g_users" + "exec:notebook": + - "g_users" + "exec:portal": + - "g_users" + "read:alertdb": + - "g_admins" + - "g_developers" + "read:image": + - "g_users" + "read:tap": + - "g_users" - cloudsql: - enabled: true - instanceConnectionName: "science-platform-int-dc5d:us-central1:science-platform-int-8f439af2" - serviceAccount: "gafaelfawr@science-platform-int-dc5d.iam.gserviceaccount.com" + initialAdmins: + - "adam" + - "afausti" + - "cbanek" + - "frossie" + - "jsick" + - "rra" + - "simonkrughoff" -pull-secret: +cloudsql: enabled: true - path: "secret/k8s_operator/data-int.lsst.cloud/pull-secret" + instanceConnectionName: "science-platform-int-dc5d:us-central1:science-platform-int-8f439af2" + serviceAccount: "gafaelfawr@science-platform-int-dc5d.iam.gserviceaccount.com" diff --git a/services/gafaelfawr/values-idfprod.yaml b/services/gafaelfawr/values-idfprod.yaml index ec443eb303..46ced602f4 100644 --- a/services/gafaelfawr/values-idfprod.yaml +++ b/services/gafaelfawr/values-idfprod.yaml @@ -1,75 +1,116 @@ -gafaelfawr: - replicaCount: 2 +replicaCount: 2 - imagePullSecrets: - - name: "pull-secret" - ingress: - host: "data.lsst.cloud" - vaultSecretsPath: "secret/k8s_operator/data.lsst.cloud/gafaelfawr" +# Use the CSI storage class so that we can use snapshots. +redis: + persistence: + storageClass: "standard-rwo" - # Use the CSI storage class so that we can use snapshots. - redis: - persistence: - storageClass: "standard-rwo" +config: + slackAlerts: true - config: - loglevel: "DEBUG" - host: "data.lsst.cloud" - databaseUrl: "postgresql://gafaelfawr@localhost/gafaelfawr" + github: + clientId: "65b6333a066375091548" - github: - clientId: "65b6333a066375091548" + # Allow access by GitHub team. + groupMapping: + "admin:provision": + - github: + organization: "lsst-sqre" + team: "square" + "exec:admin": + - github: + organization: "lsst-sqre" + team: "square" + "exec:internal-tools": + - github: + organization: "lsst" + team: "data-management" + - github: + organization: "lsst" + team: "ops" + - github: + organization: "lsst-sqre" + team: "square" + "exec:notebook": + - github: + organization: "lsst" + team: "data-management" + - github: + organization: "lsst" + team: "ops" + - github: + organization: "lsst-sqre" + team: "square" + - github: + organization: "rubin-dp0" + team: "delegates" + - github: + organization: "rubin-dp0" + team: "friends" + "exec:portal": + - github: + organization: "lsst" + team: "data-management" + - github: + organization: "lsst" + team: "ops" + - github: + organization: "lsst-sqre" + team: "square" + - github: + organization: "rubin-dp0" + team: "delegates" + - github: + organization: "rubin-dp0" + team: "friends" + "read:image": + - github: + organization: "lsst" + team: "data-management" + - github: + organization: "lsst" + team: "ops" + - github: + organization: "lsst-sqre" + team: "square" + - github: + organization: "rubin-dp0" + team: "delegates" + - github: + organization: "rubin-dp0" + team: "friends" + "read:tap": + - github: + organization: "lsst" + team: "data-management" + - github: + organization: "lsst" + team: "ops" + - github: + organization: "lsst-sqre" + team: "square" + - github: + organization: "rubin-dp0" + team: "delegates" + - github: + organization: "rubin-dp0" + team: "friends" - # Allow access by GitHub team. - groupMapping: - "admin:provision": - - "lsst-sqre-square" - "exec:admin": - - "lsst-sqre-square" - "exec:notebook": - - "lsst-sqre-square" - - "lsst-data-management" - - "lsst-ops" - - "rubin-dp0-delegates" - - "rubin-dp0-friends" - "exec:portal": - - "lsst-sqre-square" - - "lsst-data-management" - - "lsst-ops" - - "rubin-dp0-delegates" - - "rubin-dp0-friends" - "read:image": - - "lsst-sqre-square" - - "lsst-data-management" - - "lsst-ops" - - "rubin-dp0-delegates" - - "rubin-dp0-friends" - "read:tap": - - "lsst-sqre-square" - - "lsst-data-management" - - "lsst-ops" - - "rubin-dp0-delegates" - - "rubin-dp0-friends" + initialAdmins: + - "afausti" + - "athornton" + - "cbanek" + - "frossie" + - "jonathansick" + - "rra" + - "simonkrughoff" - initialAdmins: - - "afausti" - - "athornton" - - "cbanek" - - "frossie" - - "jonathansick" - - "rra" - - "simonkrughoff" + errorFooter: | + To report problems or ask for help, please open an issue in the + GitHub + rubin-dp0/Support project. - errorFooter: | - To report problems or ask for help, please open an issue in the - GitHub - rubin-dp0/Support project. - - cloudsql: - enabled: true - instanceConnectionName: "science-platform-stable-6994:us-central1:science-platform-stable-0c29612b" - serviceAccount: "gafaelfawr@science-platform-stable-6994.iam.gserviceaccount.com" - -pull-secret: +cloudsql: enabled: true - path: "secret/k8s_operator/data.lsst.cloud/pull-secret" + instanceConnectionName: "science-platform-stable-6994:us-central1:science-platform-stable-0c29612b" + serviceAccount: "gafaelfawr@science-platform-stable-6994.iam.gserviceaccount.com" diff --git a/services/gafaelfawr/values-int.yaml b/services/gafaelfawr/values-int.yaml deleted file mode 100644 index 7b5e57347a..0000000000 --- a/services/gafaelfawr/values-int.yaml +++ /dev/null @@ -1,49 +0,0 @@ -gafaelfawr: - imagePullSecrets: - - name: "pull-secret" - ingress: - host: "lsst-lsp-int.ncsa.illinois.edu" - vaultSecretsPath: "secret/k8s_operator/lsst-lsp-int.ncsa.illinois.edu/gafaelfawr" - - # Use an existing, manually-managed PVC for Redis. - redis: - persistence: - volumeClaimName: "auth-int-volume-claim" - - config: - host: "lsst-lsp-int.ncsa.illinois.edu" - databaseUrl: "postgresql://gafaelfawr@postgres.postgres/gafaelfawr" - - # IP range used by the cluster, used to determine the true client IP for - # logging. - proxies: - - "141.142.181.0/24" - - # Use CILogon authentication. - cilogon: - clientId: "cilogon:/client_id/6ca7b54ac075b65bccb9c885f9ba4a75" - redirectUrl: "https://lsst-lsp-int.ncsa.illinois.edu/oauth2/callback" - test: true - loginParams: - skin: "LSST" - - # Use NCSA groups to determine token scopes. - groupMapping: - "admin:provision": ["lsst_int_lsp_admin"] - "exec:admin": ["lsst_int_lsp_admin"] - "exec:notebook": ["lsst_int_lspdev"] - "exec:portal": ["lsst_int_lspdev"] - "read:tap": ["lsst_int_lspdev"] - - initialAdmins: - - "afausti" - - "athornto" - - "cbanek" - - "frossie" - - "jsick" - - "krughoff" - - "rra" - -pull-secret: - enabled: true - path: "secret/k8s_operator/lsst-lsp-int.ncsa.illinois.edu/pull-secret" diff --git a/services/gafaelfawr/values-minikube.yaml b/services/gafaelfawr/values-minikube.yaml index 99a58a4cd4..a40c7713f5 100644 --- a/services/gafaelfawr/values-minikube.yaml +++ b/services/gafaelfawr/values-minikube.yaml @@ -1,47 +1,54 @@ -gafaelfawr: - imagePullSecrets: - - name: "pull-secret" - ingress: - host: "minikube.lsst.codes" - vaultSecretsPath: "secret/k8s_operator/minikube.lsst.codes/gafaelfawr" +# Reset token storage on every Redis restart. +redis: + persistence: + enabled: false - # Reset token storage on every Redis restart. - redis: - persistence: - enabled: false +config: + databaseUrl: "postgresql://gafaelfawr@postgres.postgres/gafaelfawr" - config: - host: "minikube.lsst.codes" - databaseUrl: "postgresql://gafaelfawr@postgres.postgres/gafaelfawr" + # Support OpenID Connect clients like Chronograf. + oidcServer: + enabled: true - # Support OpenID Connect clients like Chronograf. - oidcServer: - enabled: true + github: + clientId: "65b6333a066375091548" - # Use CILogon authentication. - cilogon: - clientId: "cilogon:/client_id/74e865cd71a3a327096d36081166b739" - redirectUrl: "https://minikube.lsst.codes/login" - loginParams: - skin: "LSST" + # Allow access by GitHub team. + groupMapping: + "admin:provision": + - github: + organization: "lsst-sqre" + team: "square" + "exec:admin": + - github: + organization: "lsst-sqre" + team: "square" + "exec:internal-tools": + - github: + organization: "lsst-sqre" + team: "square" + "exec:notebook": + - github: + organization: "lsst-sqre" + team: "square" + "exec:portal": + - github: + organization: "lsst-sqre" + team: "square" + "read:image": + - github: + organization: "lsst-sqre" + team: "square" + "read:tap": + - github: + organization: "lsst-sqre" + team: "square" - # Use NCSA groups to determine token scopes. - groupMapping: - "exec:admin": ["lsst_int_lsp_admin"] - "exec:notebook": ["lsst_int_lspdev"] - "exec:portal": ["lsst_int_lspdev"] - "exec:user": ["lsst_int_lspdev"] - "read:tap": ["lsst_int_lspdev"] - - initialAdmins: - - "afausti" - - "athornton" - - "cbanek" - - "frossie" - - "jonathansick" - - "rra" - - "simonkrughoff" - -pull-secret: - enabled: true - path: "secret/k8s_operator/minikube.lsst.codes/pull-secret" + initialAdmins: + - "afausti" + - "athornton" + - "cbanek" + - "frossie" + - "jonathansick" + - "rra" + - "simonkrughoff" diff --git a/services/gafaelfawr/values-red-five.yaml b/services/gafaelfawr/values-red-five.yaml deleted file mode 100644 index 0cc981eace..0000000000 --- a/services/gafaelfawr/values-red-five.yaml +++ /dev/null @@ -1,46 +0,0 @@ -gafaelfawr: - imagePullSecrets: - - name: "pull-secret" - ingress: - host: "red-five.lsst.codes" - vaultSecretsPath: "secret/k8s_operator/red-five.lsst.codes/gafaelfawr" - - # Reset token storage on every Redis restart. - redis: - persistence: - enabled: false - - config: - host: "red-five.lsst.codes" - databaseUrl: "postgresql://gafaelfawr@postgres.postgres/gafaelfawr" - - # Disabled but kept so that the client ID is easily accessible. - # github: - # clientId: "a19e79298a352f3e5650" - - # Use CILogon authentication. - cilogon: - clientId: "cilogon:/client_id/51ea95a5fac24d5a6f33e658d7d77d2a" - loginParams: - skin: "LSST" - - # Use NCSA groups to determine token scopes. - groupMapping: - "admin:provision": ["lsst_int_lsp_admin"] - "exec:admin": ["lsst_int_lsp_admin"] - "exec:notebook": ["lsst_int_lspdev"] - "exec:portal": ["lsst_int_lspdev"] - "read:tap": ["lsst_int_lspdev"] - - initialAdmins: - - "afausti" - - "athornto" - - "cbanek" - - "frossie" - - "jsick" - - "krughoff" - - "rra" - -pull-secret: - enabled: true - path: "secret/k8s_operator/red-five.lsst.codes/pull-secret" diff --git a/services/gafaelfawr/values-roe.yaml b/services/gafaelfawr/values-roe.yaml index c579149a4c..5f7c2128f4 100644 --- a/services/gafaelfawr/values-roe.yaml +++ b/services/gafaelfawr/values-roe.yaml @@ -1,47 +1,55 @@ -gafaelfawr: - imagePullSecrets: - - name: "pull-secret" - ingress: - host: "rsp.lsst.ac.uk" - vaultSecretsPath: "secret/k8s_operator/roe/gafaelfawr" +redis: + persistence: + enabled: false - redis: - persistence: - enabled: false +config: + databaseUrl: "postgresql://gafaelfawr@postgres.postgres/gafaelfawr" + github: + clientId: "10172b4db1b67ee31620" - config: - loglevel: "DEBUG" - host: "rsp.lsst.ac.uk" - databaseUrl: "postgresql://gafaelfawr@postgres.postgres/gafaelfawr" - github: - clientId: "10172b4db1b67ee31620" + # Allow access by GitHub team. + groupMapping: + "exec:admin": + - github: + organization: "lsp-uk" + team: "dev" + "exec:notebook": + - github: + organization: "lsp-uk" + team: "dev" + "exec:internal-tools": + - github: + organization: "lsp-uk" + team: "dev" + "exec:portal": + - github: + organization: "lsp-uk" + team: "dev" + "exec:user": + - github: + organization: "lsp-uk" + team: "dev" + "read:workspace": + - github: + organization: "lsp-uk" + team: "dev" + "read:workspace/user": + - github: + organization: "lsp-uk" + team: "dev" + "write:workspace/user": + - github: + organization: "lsp-uk" + team: "dev" + "read:image": + - github: + organization: "lsp-uk" + team: "dev" + "read:tap": + - github: + organization: "lsp-uk" + team: "dev" - # Allow access by GitHub team. - groupMapping: - "exec:admin": - - "lsp-uk-dev" - "exec:notebook": - - "lsp-uk-dev" - "read:workspace": - - "lsp-uk-dev" - "read:workspace/user": - - "lsp-uk-dev" - "write:workspace/user": - - "lsp-uk-dev" - "exec:portal": - - "lsp-uk-dev" - "exec:user": - - "lsp-uk-dev" - "read:tap": - - "lsp-uk-dev" - "read:image": - - "lsp-uk-dev" - - initialAdmins: - - "stvoutsin" - - -pull-secret: - enabled: true - path: secret/k8s_operator/roe/pull-secret + initialAdmins: + - "stvoutsin" diff --git a/services/gafaelfawr/values-squash-sandbox.yaml b/services/gafaelfawr/values-squash-sandbox.yaml deleted file mode 100644 index 05d37fec61..0000000000 --- a/services/gafaelfawr/values-squash-sandbox.yaml +++ /dev/null @@ -1,49 +0,0 @@ -gafaelfawr: - ingress: - host: "squash-sandbox.lsst.codes" - vaultSecretsPath: "secret/k8s_operator/squash-sandbox/gafaelfawr" - - # Reset token storage on every Redis restart. - redis: - persistence: - enabled: false - - config: - host: "squash-sandbox.lsst.codes" - databaseUrl: "postgresql://gafaelfawr@postgres.postgres/gafaelfawr" - - # Whether to issue tokens for InfluxDB. If set to true, influxdb-secret - # must be set in the Gafaelfawr secret. - issuer: - influxdb: - enabled: true - username: "efdreader" - - # Whether to support OpenID Connect clients. If set to true, - # oidc-server-secrets must be set in the Gafaelfawr secret. - oidcServer: - enabled: true - - # Use CILogon authentication. - cilogon: - clientId: "cilogon:/client_id/232eaabf026dab8b26f9c9770873cb7e" - redirectUrl: "https://squash-sandbox.lsst.codes/login" - loginParams: - skin: "LSST" - - # Use NCSA groups to determine token scopes. - groupMapping: - "admin:provision": ["lsst_int_lsp_admin"] - "exec:admin": ["lsst_int_lsp_admin"] - "exec:notebook": ["lsst_int_lspdev"] - "exec:portal": ["lsst_int_lspdev"] - "read:tap": ["lsst_int_lspdev"] - - initialAdmins: - - "afausti" - - "athornto" - - "cbanek" - - "frossie" - - "jsick" - - "krughoff" - - "rra" diff --git a/services/gafaelfawr/values-stable.yaml b/services/gafaelfawr/values-stable.yaml deleted file mode 100644 index 9c19d82f9f..0000000000 --- a/services/gafaelfawr/values-stable.yaml +++ /dev/null @@ -1,50 +0,0 @@ -gafaelfawr: - replicaCount: 2 - - imagePullSecrets: - - name: "pull-secret" - ingress: - host: "lsst-lsp-stable.ncsa.illinois.edu" - vaultSecretsPath: "secret/k8s_operator/lsst-lsp-stable.ncsa.illinois.edu/gafaelfawr" - - # Use an existing, manually-managed PVC for Redis. - redis: - persistence: - volumeClaimName: "auth-redis-volume-claim" - - config: - host: "lsst-lsp-stable.ncsa.illinois.edu" - databaseUrl: "postgresql://gafaelfawr@postgres.postgres/gafaelfawr" - - # IP range used by the cluster, used to determine the true client IP for - # logging. - proxies: - - "41.142.182.128/26" - - # Use CILogon authentication. - cilogon: - clientId: "cilogon:/client_id/7ae419868b97e81644ced9886ffbcec" - redirectUrl: "https://lsst-lsp-stable.ncsa.illinois.edu/oauth2/callback" - loginParams: - skin: "LSST" - - # Use NCSA groups to determine token scopes. - groupMapping: - "admin:provision": ["lsst_int_lsp_admin"] - "exec:admin": ["lsst_int_lsp_admin"] - "exec:notebook": ["lsst_int_lspdev"] - "exec:portal": ["lsst_int_lspdev"] - "read:tap": ["lsst_int_lspdev"] - - initialAdmins: - - "afausti" - - "athornto" - - "cbanek" - - "frossie" - - "jsick" - - "krughoff" - - "rra" - -pull-secret: - enabled: true - path: "secret/k8s_operator/lsst-lsp-stable.ncsa.illinois.edu/pull-secret" diff --git a/services/gafaelfawr/values-summit.yaml b/services/gafaelfawr/values-summit.yaml index 431adad2b3..1798c04d76 100644 --- a/services/gafaelfawr/values-summit.yaml +++ b/services/gafaelfawr/values-summit.yaml @@ -1,55 +1,100 @@ -gafaelfawr: - imagePullSecrets: - - name: "pull-secret" - ingress: - host: "summit-lsp.lsst.codes" - vaultSecretsPath: "secret/k8s_operator/summit-lsp.lsst.codes/gafaelfawr" +redis: + persistence: + storageClass: "rook-ceph-block" - # Reset token storage on every Redis restart for now. This should change to - # use persistent volumes once we can coordinate that. - redis: - persistence: - enabled: false +config: + slackAlerts: true + databaseUrl: "postgresql://gafaelfawr@postgresdb01.cp.lsst.org/gafaelfawr" - config: - host: "summit-lsp.lsst.codes" - databaseUrl: "postgresql://gafaelfawr@postgres.postgres/gafaelfawr" + github: + clientId: "220d64cbf46f9d2b7873" - # Use GitHub authentication. - github: - clientId: "220d64cbf46f9d2b7873" + # Support OpenID Connect clients like Chronograf. + oidcServer: + enabled: true - # Allow access by GitHub team. - groupMapping: - "admin:provision": - - "lsst-sqre-square" - "exec:admin": - - "lsst-sqre-square" - "exec:notebook": - - "lsst-sqre-square" - - "lsst-sqre-friends" - - "lsst-ts-summit-access" - - "rubin-summit-rsp-access" - "exec:portal": - - "lsst-sqre-square" - - "lsst-sqre-friends" - - "lsst-ts-summit-access" - - "rubin-summit-rsp-access" - "read:tap": - - "lsst-sqre-square" - - "lsst-sqre-friends" - - "lsst-ts-summit-access" - - "rubin-summit-rsp-access" + # Allow access by GitHub team. + groupMapping: + "admin:provision": + - github: + organization: "lsst-sqre" + team: "square" + "exec:admin": + - github: + organization: "lsst-sqre" + team: "square" + - "lsst-sqre-square" + "exec:internal-tools": + - github: + organization: "lsst-sqre" + team: "square" + - github: + organization: "lsst-sqre" + team: "friends" + - github: + organization: "lsst-ts" + team: "summit-access" + - github: + organization: "rubin-summit" + team: "rsp-access" + "exec:notebook": + - github: + organization: "lsst-sqre" + team: "square" + - github: + organization: "lsst-sqre" + team: "friends" + - github: + organization: "lsst-ts" + team: "summit-access" + - github: + organization: "rubin-summit" + team: "rsp-access" + "exec:portal": + - github: + organization: "lsst-sqre" + team: "square" + - github: + organization: "lsst-sqre" + team: "friends" + - github: + organization: "lsst-ts" + team: "summit-access" + - github: + organization: "rubin-summit" + team: "rsp-access" + "read:image": + - github: + organization: "lsst-sqre" + team: "square" + - github: + organization: "lsst-sqre" + team: "friends" + - github: + organization: "lsst-ts" + team: "summit-access" + - github: + organization: "rubin-summit" + team: "rsp-access" + "read:tap": + - github: + organization: "lsst-sqre" + team: "square" + - github: + organization: "lsst-sqre" + team: "friends" + - github: + organization: "lsst-ts" + team: "summit-access" + - github: + organization: "rubin-summit" + team: "rsp-access" - initialAdmins: - - "afausti" - - "athornton" - - "cbanek" - - "frossie" - - "jonathansick" - - "rra" - - "simonkrughoff" - -pull-secret: - enabled: true - path: "secret/k8s_operator/summit-lsp.lsst.codes/pull-secret" + initialAdmins: + - "afausti" + - "athornton" + - "cbanek" + - "frossie" + - "jonathansick" + - "rra" + - "simonkrughoff" diff --git a/services/gafaelfawr/values-tucson-teststand.yaml b/services/gafaelfawr/values-tucson-teststand.yaml index f919620bda..2fe8208cb8 100644 --- a/services/gafaelfawr/values-tucson-teststand.yaml +++ b/services/gafaelfawr/values-tucson-teststand.yaml @@ -1,55 +1,101 @@ -gafaelfawr: - imagePullSecrets: - - name: "pull-secret" - ingress: - host: "tucson-teststand.lsst.codes" - vaultSecretsPath: "secret/k8s_operator/tucson-teststand.lsst.codes/gafaelfawr" +redis: + persistence: + storageClass: "rook-ceph-block" - # Reset token storage on every Redis restart for now. This should change to - # use persistent volumes once we can coordinate that. - redis: - persistence: - enabled: false +config: + slackAlerts: true + databaseUrl: "postgresql://gafaelfawr@squoint.tu.lsst.org/gafaelfawr" - config: - host: "tucson-teststand.lsst.codes" - databaseUrl: "postgresql://gafaelfawr@postgres.postgres/gafaelfawr" + github: + clientId: "49533cbd8a8079730dcf" - # Use GitHub authentication. - github: - clientId: "49533cbd8a8079730dcf" + # Support OpenID Connect clients like Chronograf. + oidcServer: + enabled: true - # Allow access by GitHub team. - groupMapping: - "admin:provision": - - "lsst-sqre-square" - "exec:admin": - - "lsst-sqre-square" - "exec:notebook": - - "lsst-sqre-square" - - "lsst-sqre-friends" - - "lsst-ts-base-access" - - "rubin-summit-rsp-access" - "exec:portal": - - "lsst-sqre-square" - - "lsst-sqre-friends" - - "lsst-ts-base-access" - - "rubin-summit-rsp-access" - "read:tap": - - "lsst-sqre-square" - - "lsst-sqre-friends" - - "lsst-ts-base-access" - - "rubin-summit-rsp-access" + # Allow access by GitHub team. + groupMapping: + "admin:provision": + - github: + organization: "lsst-sqre" + team: "square" + - "lsst-sqre-square" + "exec:admin": + - github: + organization: "lsst-sqre" + team: "square" + - "lsst-sqre-square" + "exec:internal-tools": + - github: + organization: "lsst-sqre" + team: "square" + - github: + organization: "lsst-sqre" + team: "friends" + - github: + organization: "lsst-ts" + team: "base-access" + - github: + organization: "rubin-summit" + team: "rsp-access" + "exec:notebook": + - github: + organization: "lsst-sqre" + team: "square" + - github: + organization: "lsst-sqre" + team: "friends" + - github: + organization: "lsst-ts" + team: "base-access" + - github: + organization: "rubin-summit" + team: "rsp-access" + "exec:portal": + - github: + organization: "lsst-sqre" + team: "square" + - github: + organization: "lsst-sqre" + team: "friends" + - github: + organization: "lsst-ts" + team: "base-access" + - github: + organization: "rubin-summit" + team: "rsp-access" + "read:image": + - github: + organization: "lsst-sqre" + team: "square" + - github: + organization: "lsst-sqre" + team: "friends" + - github: + organization: "lsst-ts" + team: "base-access" + - github: + organization: "rubin-summit" + team: "rsp-access" + "read:tap": + - github: + organization: "lsst-sqre" + team: "square" + - github: + organization: "lsst-sqre" + team: "friends" + - github: + organization: "lsst-ts" + team: "base-access" + - github: + organization: "rubin-summit" + team: "rsp-access" - initialAdmins: - - "afausti" - - "athornton" - - "cbanek" - - "frossie" - - "jonathansick" - - "rra" - - "simonkrughoff" - -pull-secret: - enabled: true - path: "secret/k8s_operator/tucson-teststand.lsst.codes/pull-secret" + initialAdmins: + - "afausti" + - "athornton" + - "cbanek" + - "frossie" + - "jonathansick" + - "rra" + - "simonkrughoff" diff --git a/services/gafaelfawr/values.yaml b/services/gafaelfawr/values.yaml new file mode 100644 index 0000000000..7439a72e94 --- /dev/null +++ b/services/gafaelfawr/values.yaml @@ -0,0 +1,386 @@ +# Default values for Gafaelfawr. + +# -- Override the base name for resources +nameOverride: "" + +# -- Override the full name for resources (includes the release name) +fullnameOverride: "" + +# -- Number of web frontend pods to start +replicaCount: 1 + +image: + # -- Gafaelfawr image to use + repository: "ghcr.io/lsst-sqre/gafaelfawr" + + # -- Pull policy for the Gafaelfawr image + pullPolicy: "IfNotPresent" + + # -- Tag of Gafaelfawr image to use + # @default -- The appVersion of the chart + tag: "" + +# -- Resource limits and requests for the Gafaelfawr frontend pod +resources: {} + +# -- Annotations for the Gafaelfawr frontend pod +podAnnotations: {} + +# -- Node selector rules for the Gafaelfawr frontend pod +nodeSelector: {} + +# -- Tolerations for the Gafaelfawr frontend pod +tolerations: [] + +# -- Affinity rules for the Gafaelfawr frontend pod +affinity: {} + +config: + # -- URL for the PostgreSQL database + # @default -- None, must be set if `cloudsql.enabled` is not true + databaseUrl: "" + + # -- Choose from the text form of Python logging levels + loglevel: "INFO" + + # -- Whether to send certain serious alerts to Slack. If `true`, the + # `slack-webhook` secret must also be set. + slackAlerts: false + + # -- Session length and token expiration (in minutes) + # @default -- `43200` (30 days) + tokenLifetimeMinutes: 43200 + + # -- List of netblocks used for internal Kubernetes IP addresses, used to + # determine the true client IP for logging + # @default -- [`10.0.0.0/8`, `172.16.0.0/12`, `192.168.0.0/16`] + proxies: + - "10.0.0.0/8" + - "172.16.0.0/12" + - "192.168.0.0/16" + + cilogon: + # -- CILogon client ID. One and only one of this, + # `config.github.clientId`, or `config.oidc.clientId` must be set. + clientId: "" + + # -- Where to send the user if their username cannot be found in LDAP + # @default -- Login fails with an error + enrollmentUrl: "" + + # -- Whether to use the test instance of CILogon + test: false + + # -- Additional parameters to add + loginParams: + skin: "LSST" + + # -- Claim from which to get the username + # @default -- `"uid"` + usernameClaim: "" + + # -- Claim from which to get the numeric UID (only used if not retrieved + # from LDAP or Firestore) + # @default -- `"uidNumber"` + uidClaim: "" + + # -- Claim from which to get the primary GID (only used if not retrieved + # from LDAP or Firestore) + # @default -- Do not set a primary GID + gidClaim: "" + + # -- Claim from which to get the group membership (only used if not + # retrieved from LDAP) + # @default -- `"isMemberOf"` + groupsClaim: "" + + firestore: + # -- If set, assign UIDs and GIDs using Google Firestore in the given + # project. Cloud SQL must be enabled and the Cloud SQL service account + # must have read/write access to that Firestore instance. + # @default -- Firestore support is disabled + project: "" + + github: + # -- GitHub client ID. One and only one of this, `config.cilogon.clientId`, + # or `config.oidc.clientId` must be set. + clientId: "" + + oidc: + # -- Client ID for generic OpenID Connect support. One and only one of + # this, `config.cilogon.clientId`, or `config.github.clientId` must be set. + clientId: "" + + # -- Audience for the JWT token + # @default -- Value of `config.oidc.clientId` + audience: "" + + # -- URL to which to redirect the user for authorization + # @default -- None, must be set + loginUrl: "" + + # -- Additional parameters to add to the login request + loginParams: {} + + # -- URL from which to retrieve the token for the user + # @default -- None, must be set + tokenUrl: "" + + # -- Where to send the user if their username cannot be found in LDAP + # @default -- Login fails with an error + enrollmentUrl: "" + + # -- Issuer for the JWT token + # @default -- None, must be set + issuer: "" + + # -- Scopes to request from the OpenID Connect provider + scopes: + - "openid" + + # -- Claim from which to get the username + # @default -- `"sub"` + usernameClaim: "" + + # -- Claim from which to get the numeric UID (only used if not retrieved + # from LDAP or Firestore) + # @default -- `"uidNumber"` + uidClaim: "" + + # -- Claim from which to get the primary GID (only used if not retrieved + # from LDAP or Firestore) + # @default -- Do not set a primary GID + gidClaim: "" + + # -- Claim from which to get the group membership (only used if not + # retrieved from LDAP) + # @default -- `"isMemberOf"` + groupsClaim: "" + + ldap: + # -- LDAP server URL from which to retrieve user group information + # @default -- Do not use LDAP + url: "" + + # -- Bind DN for simple bind authentication. If set, `ldap-secret` must be + # set in the Gafaelfawr secret + # @default -- Use anonymous binds + userDn: "" + + # -- Base DN for the LDAP search to find a user's groups + # @default -- None, must be set + groupBaseDn: "" + + # -- Object class containing group information + groupObjectClass: "posixGroup" + + # -- Member attribute of the object class. Values must match the username + # returned in the token from the OpenID Connect authentication server. + groupMemberAttr: "member" + + # -- Base DN for the LDAP search to find a user's entry + # @default -- Get user metadata from the upstream authentication provider + userBaseDn: "" + + # -- Search attribute containing the user's username + userSearchAttr: "uid" + + # -- Attribute containing the user's full name + nameAttr: "displayName" + + # -- Attribute containing the user's email address + emailAttr: "mail" + + # -- Attribute containing the user's UID number (set to `uidNumber` for + # most LDAP servers) + # @default -- Get UID from upstream authentication provider + uidAttr: "" + + # -- Attribute containing the user's primary GID (set to `gidNumber` for + # most LDAP servers) + # @default -- Use GID of user private group + gidAttr: "" + + # -- Whether to synthesize a user private group for each user with a GID + # equal to their UID + addUserGroup: false + + oidcServer: + # -- Whether to support OpenID Connect clients. If set to true, + # `oidc-server-secrets` must be set in the Gafaelfawr secret. + enabled: false + + # -- Names and descriptions of all scopes in use. This is used to populate + # the new token creation page. Only scopes listed here will be options when + # creating a new token. See [DMTN-235](https://dmtn-235.lsst.io/). + # @default -- See the `values.yaml` file + knownScopes: + "admin:jupyterlab": >- + Can create and destroy labs for any user + "admin:token": >- + Can create and modify tokens for any user + "admin:provision": >- + Can perform privileged user provisioning + "exec:admin": >- + Administrative access to all APIs + "exec:internal-tools": >- + Use project-internal tools. + "exec:notebook": >- + Use the Notebook Aspect + "exec:portal": >- + Use the Portal Aspect + "read:alertdb": >- + Retrieve alert packets and schemas from the alert archive database + "read:image": >- + Retrieve images from project datasets + "read:tap": >- + Execute SELECT queries in the TAP interface on project datasets + "user:token": >- + Can create and modify user tokens + + # -- Defines a mapping of scopes to groups that provide that scope. See + # [DMTN-235](https://dmtn-235.lsst.io/) for more details on scopes. + groupMapping: {} + + # -- Usernames to add as administrators when initializing a new database. + # Used only if there are no administrators. + initialAdmins: [] + + # -- HTML footer to add to any login error page (inside a

tag). + errorFooter: "" + +cloudsql: + # -- Enable the Cloud SQL Auth Proxy, used with CloudSQL databases on Google + # Cloud. This will be run as a sidecar for the main Gafaelfawr pods, and as + # a separate service (behind a `NetworkPolicy`) for other, lower-traffic + # services. + enabled: false + + image: + # -- Cloud SQL Auth Proxy image to use + repository: "gcr.io/cloudsql-docker/gce-proxy" + + # -- Cloud SQL Auth Proxy tag to use + tag: "1.33.2" + + # -- Pull policy for Cloud SQL Auth Proxy images + pullPolicy: "IfNotPresent" + + # -- Instance connection name for a CloudSQL PostgreSQL instance + # @default -- None, must be set if Cloud SQL Auth Proxy is enabled + instanceConnectionName: "" + + # -- The Google service account that has an IAM binding to the `gafaelfawr` + # Kubernetes service account and has the `cloudsql.client` role + # @default -- None, must be set if Cloud SQL Auth Proxy is enabled + serviceAccount: "" + + # -- Resource limits and requests for the Cloud SQL Proxy pod + resources: {} + + # -- Annotations for the Cloud SQL Proxy pod + podAnnotations: {} + + # -- Node selection rules for the Cloud SQL Proxy pod + nodeSelector: {} + + # -- Tolerations for the Cloud SQL Proxy pod + tolerations: [] + + # -- Affinity rules for the Cloud SQL Proxy pod + affinity: {} + +maintenance: + # -- Cron schedule string for Gafaelfawr data consistency audit (in UTC) + auditSchedule: "30 3 * * *" + + # -- Cron schedule string for Gafaelfawr periodic maintenance (in UTC) + maintenanceSchedule: "5 * * * *" + + # -- Resource limits and requests for Gafaelfawr maintenance and audit pods + resources: {} + + # -- Annotations for Gafaelfawr maintenance and audit pods + podAnnotations: {} + + # -- Node selection rules for Gafaelfawr maintenance and audit pods + nodeSelector: {} + + # -- Tolerations for Gafaelfawr maintenance and audit pods + tolerations: [] + + # -- Affinity rules for Gafaelfawr maintenance and audit pods + affinity: {} + +operator: + # -- Resource limits and requests for the Gafaelfawr Kubernetes operator + resources: {} + + # -- Annotations for the token management pod + podAnnotations: {} + + # -- Node selection rules for the token management pod + nodeSelector: {} + + # -- Tolerations for the token management pod + tolerations: [] + + # -- Affinity rules for the token management pod + affinity: {} + +redis: + image: + # -- Redis image to use + repository: "redis" + + # -- Redis image tag to use + tag: "7.0.8" + + # -- Pull policy for the Redis image + pullPolicy: "IfNotPresent" + + persistence: + # -- Whether to persist Redis storage and thus tokens. Setting this to + # false will use `emptyDir` and reset all tokens on every restart. Only + # use this for a test deployment. + enabled: true + + # -- Amount of persistent storage to request + size: "1Gi" + + # -- Class of storage to request + storageClass: "" + + # -- Access mode of storage to request + accessMode: "ReadWriteOnce" + + # -- Use an existing PVC, not dynamic provisioning. If this is set, the + # size, storageClass, and accessMode settings are ignored. + volumeClaimName: "" + + # -- Pod annotations for the Redis pod + podAnnotations: {} + + # -- Node selection rules for the Redis pod + nodeSelector: {} + + # -- Tolerations for the Redis pod + tolerations: [] + + # -- Affinity rules for the Redis pod + affinity: {} + +# The following will be set by parameters injected by Argo CD and should not +# be set in the individual environment values files. +global: + # -- Base URL for the environment + # @default -- Set by Argo CD + baseUrl: "" + + # -- Host name for ingress + # @default -- Set by Argo CD + host: "" + + # -- Base path for Vault secrets + # @default -- Set by Argo CD + vaultSecretsPath: "" diff --git a/services/hips/.helmignore b/services/hips/.helmignore new file mode 100644 index 0000000000..0e8a0eb36f --- /dev/null +++ b/services/hips/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/services/hips/Chart.yaml b/services/hips/Chart.yaml new file mode 100644 index 0000000000..53328e149e --- /dev/null +++ b/services/hips/Chart.yaml @@ -0,0 +1,12 @@ +apiVersion: v2 +name: hips +version: 1.0.0 +description: HiPS tile server backed by Google Cloud Storage +sources: + - https://github.com/lsst-sqre/crawlspace +appVersion: 1.0.0 +annotations: + phalanx.lsst.io/docs: | + - id: "DMTN-230" + title: "RSP HiPS service implementation strategy" + url: "https://dmtn-230.lsst.io/" diff --git a/services/hips/README.md b/services/hips/README.md new file mode 100644 index 0000000000..01f98617a6 --- /dev/null +++ b/services/hips/README.md @@ -0,0 +1,32 @@ +# hips + +HiPS tile server backed by Google Cloud Storage + +## Source Code + +* + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| affinity | object | `{}` | Affinity rules for the hips deployment pod | +| autoscaling.enabled | bool | `false` | Enable autoscaling of hips deployment | +| autoscaling.maxReplicas | int | `100` | Maximum number of hips deployment pods | +| autoscaling.minReplicas | int | `1` | Minimum number of hips deployment pods | +| autoscaling.targetCPUUtilizationPercentage | int | `80` | Target CPU utilization of hips deployment pods | +| config.gcsBucket | string | None, must be set | Name of Google Cloud Storage bucket holding the HiPS files | +| config.gcsProject | string | None, must be set | Google Cloud project in which the underlying storage is located | +| config.logLevel | string | `"INFO"` | Choose from the text form of Python logging levels | +| config.serviceAccount | string | None, must be set | The Google service account that has an IAM binding to the `hips` Kubernetes service account and has access to the storage bucket | +| global.baseUrl | string | Set by Argo CD | Base URL for the environment | +| global.host | string | Set by Argo CD | Host name for ingress | +| image.pullPolicy | string | `"IfNotPresent"` | Pull policy for the hips image | +| image.repository | string | `"ghcr.io/lsst-sqre/crawlspace"` | Image to use in the hips deployment | +| image.tag | string | `""` | Overrides the image tag whose default is the chart appVersion. | +| ingress.annotations | object | `{}` | Additional annotations for the ingress | +| nodeSelector | object | `{}` | Node selection rules for the hips deployment pod | +| podAnnotations | object | `{}` | Annotations for the hips deployment pod | +| replicaCount | int | `1` | Number of web deployment pods to start | +| resources | object | `{}` | Resource limits and requests for the hips deployment pod | +| tolerations | list | `[]` | Tolerations for the hips deployment pod | diff --git a/services/hips/templates/_helpers.tpl b/services/hips/templates/_helpers.tpl new file mode 100644 index 0000000000..5a738df28d --- /dev/null +++ b/services/hips/templates/_helpers.tpl @@ -0,0 +1,26 @@ +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "hips.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "hips.labels" -}} +helm.sh/chart: {{ include "hips.chart" . }} +{{ include "hips.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "hips.selectorLabels" -}} +app.kubernetes.io/name: "hips" +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} diff --git a/services/hips/templates/deployment.yaml b/services/hips/templates/deployment.yaml new file mode 100644 index 0000000000..007849dc3f --- /dev/null +++ b/services/hips/templates/deployment.yaml @@ -0,0 +1,66 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: "hips" + labels: + {{- include "hips.labels" . | nindent 4 }} +spec: + {{- if not .Values.autoscaling.enabled }} + replicas: {{ .Values.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "hips.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "hips.selectorLabels" . | nindent 8 }} + spec: + containers: + - name: {{ .Chart.Name }} + env: + - name: "CRAWLSPACE_PROJECT" + value: {{ required "config.gcsProject must be set" .Values.config.gcsProject | quote }} + - name: "CRAWLSPACE_BUCKET" + value: {{ required "config.gcsBucket must be set" .Values.config.gcsBucket | quote }} + - name: "SAFIR_LOG_LEVEL" + value: {{ .Values.config.logLevel | quote }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - name: "http" + containerPort: 8080 + protocol: "TCP" + readinessProbe: + httpGet: + path: "/" + port: "http" + resources: + {{- toYaml .Values.resources | nindent 12 }} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - "all" + readOnlyRootFilesystem: true + securityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + serviceAccountName: "hips" + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/services/hips/templates/hpa.yaml b/services/hips/templates/hpa.yaml new file mode 100644 index 0000000000..0606eb00d9 --- /dev/null +++ b/services/hips/templates/hpa.yaml @@ -0,0 +1,28 @@ +{{- if .Values.autoscaling.enabled }} +apiVersion: autoscaling/v2beta1 +kind: HorizontalPodAutoscaler +metadata: + name: "hips" + labels: + {{- include "hips.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: "hips" + minReplicas: {{ .Values.autoscaling.minReplicas }} + maxReplicas: {{ .Values.autoscaling.maxReplicas }} + metrics: + {{- if .Values.autoscaling.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: "cpu" + targetAverageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} + {{- end }} + {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: "memory" + targetAverageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }} + {{- end }} +{{- end }} diff --git a/services/hips/templates/ingress.yaml b/services/hips/templates/ingress.yaml new file mode 100644 index 0000000000..78bfe06ee9 --- /dev/null +++ b/services/hips/templates/ingress.yaml @@ -0,0 +1,30 @@ +apiVersion: gafaelfawr.lsst.io/v1alpha1 +kind: GafaelfawrIngress +metadata: + name: "hips" + labels: + {{- include "hips.labels" . | nindent 4 }} +config: + baseUrl: {{ .Values.global.baseUrl | quote }} + scopes: + all: + - "read:image" +template: + metadata: + name: "hips" + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 6 }} + {{- end }} + spec: + rules: + - host: {{ required "global.host must be set" .Values.global.host | quote }} + http: + paths: + - path: "/api/hips" + pathType: "Prefix" + backend: + service: + name: "hips" + port: + number: 8080 diff --git a/services/hips/templates/networkpolicy.yaml b/services/hips/templates/networkpolicy.yaml new file mode 100644 index 0000000000..bbbba68634 --- /dev/null +++ b/services/hips/templates/networkpolicy.yaml @@ -0,0 +1,31 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: "hips" + labels: + {{- include "hips.labels" . | nindent 4 }} +spec: + podSelector: + matchLabels: + {{- include "hips.selectorLabels" . | nindent 6 }} + policyTypes: + - Ingress + ingress: + - from: + # Allow inbound access from pods (in any namespace) labeled + # gafaelfawr.lsst.io/ingress: true. + - namespaceSelector: {} + podSelector: + matchLabels: + gafaelfawr.lsst.io/ingress: "true" + # Temporarily also allow inbound access from the Portal because the + # current version of the Portal doesn't support passing authentication + # credentials to HiPS requests. + - namespaceSelector: {} + podSelector: + matchLabels: + app.kubernetes.io/instance: "portal" + app.kubernetes.io/component: "firefly" + ports: + - protocol: "TCP" + port: 8080 diff --git a/services/hips/templates/service.yaml b/services/hips/templates/service.yaml new file mode 100644 index 0000000000..e5d572b92c --- /dev/null +++ b/services/hips/templates/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: "hips" + labels: + {{- include "hips.labels" . | nindent 4 }} +spec: + type: "ClusterIP" + ports: + - port: 8080 + targetPort: "http" + protocol: "TCP" + name: "http" + selector: + {{- include "hips.selectorLabels" . | nindent 4 }} diff --git a/services/hips/templates/serviceaccount.yaml b/services/hips/templates/serviceaccount.yaml new file mode 100644 index 0000000000..902961623a --- /dev/null +++ b/services/hips/templates/serviceaccount.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: "hips" + labels: + {{- include "hips.labels" . | nindent 4 }} + annotations: + iam.gke.io/gcp-service-account: {{ required "config.serviceAccount must be set to a valid Google service account" .Values.config.serviceAccount | quote }} diff --git a/services/hips/values-idfdev.yaml b/services/hips/values-idfdev.yaml new file mode 100644 index 0000000000..dfff5a5b62 --- /dev/null +++ b/services/hips/values-idfdev.yaml @@ -0,0 +1,4 @@ +config: + gcsProject: "data-curation-prod-fbdb" + gcsBucket: "static-us-central1-dp02-hips" + serviceAccount: "crawlspace-hips@science-platform-dev-7696.iam.gserviceaccount.com" diff --git a/services/hips/values-idfint.yaml b/services/hips/values-idfint.yaml new file mode 100644 index 0000000000..54f6619c7a --- /dev/null +++ b/services/hips/values-idfint.yaml @@ -0,0 +1,6 @@ +replicaCount: 4 + +config: + gcsProject: "data-curation-prod-fbdb" + gcsBucket: "static-us-central1-dp02-hips" + serviceAccount: "crawlspace-hips@science-platform-int-dc5d.iam.gserviceaccount.com" diff --git a/services/hips/values-idfprod.yaml b/services/hips/values-idfprod.yaml new file mode 100644 index 0000000000..a3710b46f8 --- /dev/null +++ b/services/hips/values-idfprod.yaml @@ -0,0 +1,6 @@ +replicaCount: 4 + +config: + gcsProject: "data-curation-prod-fbdb" + gcsBucket: "static-us-central1-dp02-hips" + serviceAccount: "crawlspace-hips@science-platform-stable-6994.iam.gserviceaccount.com" diff --git a/services/hips/values-minikube.yaml b/services/hips/values-minikube.yaml new file mode 100644 index 0000000000..44e7bb33bc --- /dev/null +++ b/services/hips/values-minikube.yaml @@ -0,0 +1,4 @@ +config: + gcsProject: "bogus" + gcsBucket: "bogus" + serviceAccount: "bogus" diff --git a/services/hips/values.yaml b/services/hips/values.yaml new file mode 100644 index 0000000000..aa4a305bd1 --- /dev/null +++ b/services/hips/values.yaml @@ -0,0 +1,77 @@ +# Default values for hips. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# -- Number of web deployment pods to start +replicaCount: 1 + +config: + # -- Google Cloud project in which the underlying storage is located + # @default -- None, must be set + gcsProject: "" + + # -- Name of Google Cloud Storage bucket holding the HiPS files + # @default -- None, must be set + gcsBucket: "" + + # -- Choose from the text form of Python logging levels + logLevel: "INFO" + + # -- The Google service account that has an IAM binding to the `hips` + # Kubernetes service account and has access to the storage bucket + # @default -- None, must be set + serviceAccount: "" + +image: + # -- Image to use in the hips deployment + repository: "ghcr.io/lsst-sqre/crawlspace" + + # -- Pull policy for the hips image + pullPolicy: "IfNotPresent" + + # -- Overrides the image tag whose default is the chart appVersion. + tag: "" + +ingress: + # -- Additional annotations for the ingress + annotations: {} + +autoscaling: + # -- Enable autoscaling of hips deployment + enabled: false + + # -- Minimum number of hips deployment pods + minReplicas: 1 + + # -- Maximum number of hips deployment pods + maxReplicas: 100 + + # -- Target CPU utilization of hips deployment pods + targetCPUUtilizationPercentage: 80 + # targetMemoryUtilizationPercentage: 80 + +# -- Annotations for the hips deployment pod +podAnnotations: {} + +# -- Resource limits and requests for the hips deployment pod +resources: {} + +# -- Node selection rules for the hips deployment pod +nodeSelector: {} + +# -- Tolerations for the hips deployment pod +tolerations: [] + +# -- Affinity rules for the hips deployment pod +affinity: {} + +# The following will be set by parameters injected by Argo CD and should not +# be set in the individual environment values files. +global: + # -- Base URL for the environment + # @default -- Set by Argo CD + baseUrl: "" + + # -- Host name for ingress + # @default -- Set by Argo CD + host: "" diff --git a/services/ingress-nginx/Chart.yaml b/services/ingress-nginx/Chart.yaml index d86ced117e..c797f1c90a 100644 --- a/services/ingress-nginx/Chart.yaml +++ b/services/ingress-nginx/Chart.yaml @@ -1,10 +1,11 @@ apiVersion: v2 name: ingress-nginx version: 1.0.0 +description: Ingress controller +home: https://kubernetes.github.io/ingress-nginx/ +sources: + - https://github.com/kubernetes/ingress-nginx dependencies: - name: ingress-nginx - version: 4.0.17 + version: 4.4.2 repository: https://kubernetes.github.io/ingress-nginx - - name: pull-secret - version: ">=0.1.2" - repository: https://lsst-sqre.github.io/charts/ diff --git a/services/ingress-nginx/README.md b/services/ingress-nginx/README.md new file mode 100644 index 0000000000..c695a75304 --- /dev/null +++ b/services/ingress-nginx/README.md @@ -0,0 +1,24 @@ +# ingress-nginx + +Ingress controller + +**Homepage:** + +## Source Code + +* + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| global.vaultSecretsPath | string | Set by Argo CD | Base path for Vault secrets | +| ingress-nginx.controller.config.compute-full-forwarded-for | string | `"true"` | Put the complete path in `X-Forwarded-For`, not just the last hop, so that the client IP will be exposed to Gafaelfawr | +| ingress-nginx.controller.config.proxy-body-size | string | `"100m"` | Maximum size of the client request body (needs to be large enough to allow table uploads) | +| ingress-nginx.controller.config.server-snippet | string | See `values.yaml` | Add additional configuration used by Gafaelfawr to report errors from the authorization layer | +| ingress-nginx.controller.config.ssl-redirect | string | `"true"` | Redirect all non-SSL access to SSL. | +| ingress-nginx.controller.config.use-forwarded-headers | string | `"true"` | Enable the `X-Forwarded-For` processing | +| ingress-nginx.controller.metrics.enabled | bool | `true` | Enable metrics reporting via Prometheus | +| ingress-nginx.controller.podLabels | object | See `values.yaml` | Add labels used by `NetworkPolicy` objects to restrict access to the ingress and thus ensure that auth subrequest handlers run | +| ingress-nginx.controller.service.externalTrafficPolicy | string | `"Local"` | Force traffic routing policy to Local so that the external IP in `X-Forwarded-For` will be correct | +| vaultCertificate.enabled | bool | `false` | Whether to store ingress TLS certificate via vault-secrets-operator. Typically "squareone" owns it instead in an RSP. | diff --git a/services/ingress-nginx/templates/vault-certificate.yaml b/services/ingress-nginx/templates/vault-secrets.yaml similarity index 56% rename from services/ingress-nginx/templates/vault-certificate.yaml rename to services/ingress-nginx/templates/vault-secrets.yaml index cc2a615207..61adfb0147 100644 --- a/services/ingress-nginx/templates/vault-certificate.yaml +++ b/services/ingress-nginx/templates/vault-secrets.yaml @@ -1,9 +1,9 @@ -{{ if .Values.vault_certificate.enabled }} +{{ if .Values.vaultCertificate.enabled }} apiVersion: ricoberger.de/v1alpha1 kind: VaultSecret metadata: name: ingress-certificate spec: - path: {{ .Values.vault_certificate.path }} + path: "{{ .Values.global.vaultSecretsPath }}/ingress-nginx" type: kubernetes.io/tls {{ end }} diff --git a/services/ingress-nginx/values-base.yaml b/services/ingress-nginx/values-base.yaml index 4e1e66cb7f..430a3efeed 100644 --- a/services/ingress-nginx/values-base.yaml +++ b/services/ingress-nginx/values-base.yaml @@ -1,22 +1,7 @@ ingress-nginx: controller: config: - compute-full-forwarded-for: "true" large-client-header-buffers: "4 64k" - proxy-body-size: "100m" proxy-buffer-size: "64k" - ssl-redirect: "true" - use-forwarded-headers: "true" service: - externalTrafficPolicy: Local loadBalancerIP: "139.229.146.150" - podLabels: - gafaelfawr.lsst.io/ingress: "true" - hub.jupyter.org/network-access-proxy-http: "true" - -vault_certificate: - enabled: false - -pull-secret: - enabled: true - path: secret/k8s_operator/base-lsp.lsst.codes/pull-secret diff --git a/services/ingress-nginx/values-ccin2p3.yaml b/services/ingress-nginx/values-ccin2p3.yaml new file mode 100644 index 0000000000..25731ebefd --- /dev/null +++ b/services/ingress-nginx/values-ccin2p3.yaml @@ -0,0 +1,27 @@ +ingress-nginx: + controller: + nodeSelector: + kubernetes.io/hostname: "ccqserv202" + + tolerations: + - key: "dedicated" + operator: "Equal" + value: "qserv" + effect: "NoSchedule" + + config: + large-client-header-buffers: "4 64k" + proxy-buffer-size: "64k" + service: + externalIPs: + - 134.158.237.2 + type: NodePort + admissionWebhooks: + enabled: false + extraArgs: + default-ssl-certificate: ingress-nginx/ingress-certificate + # podLabels: + # hub.jupyter.org/network-access-proxy-http: "true" + +vaultCertificate: + enabled: true diff --git a/services/ingress-nginx/values-idfdev.yaml b/services/ingress-nginx/values-idfdev.yaml index 61568d9ad8..ce9e5c39ca 100644 --- a/services/ingress-nginx/values-idfdev.yaml +++ b/services/ingress-nginx/values-idfdev.yaml @@ -1,22 +1,4 @@ ingress-nginx: controller: - config: - compute-full-forwarded-for: "true" - large-client-header-buffers: "4 64k" - proxy-body-size: "100m" - proxy-buffer-size: "64k" - ssl-redirect: "true" - use-forwarded-headers: "true" service: - externalTrafficPolicy: Local loadBalancerIP: "35.225.112.77" - podLabels: - gafaelfawr.lsst.io/ingress: "true" - hub.jupyter.org/network-access-proxy-http: "true" - -vault_certificate: - enabled: false - -pull-secret: - enabled: true - path: secret/k8s_operator/data-dev.lsst.cloud/pull-secret diff --git a/services/ingress-nginx/values-idfint.yaml b/services/ingress-nginx/values-idfint.yaml index 9030d9fd6a..233a5c9f91 100644 --- a/services/ingress-nginx/values-idfint.yaml +++ b/services/ingress-nginx/values-idfint.yaml @@ -1,22 +1,7 @@ ingress-nginx: controller: config: - compute-full-forwarded-for: "true" large-client-header-buffers: "4 64k" - proxy-body-size: "100m" proxy-buffer-size: "64k" - ssl-redirect: "true" - use-forwarded-headers: "true" service: - externalTrafficPolicy: Local loadBalancerIP: "35.238.192.49" - podLabels: - gafaelfawr.lsst.io/ingress: "true" - hub.jupyter.org/network-access-proxy-http: "true" - -vault_certificate: - enabled: false - -pull-secret: - enabled: true - path: secret/k8s_operator/data-int.lsst.cloud/pull-secret diff --git a/services/ingress-nginx/values-idfprod.yaml b/services/ingress-nginx/values-idfprod.yaml index 4ef58c27fc..a1289fb904 100644 --- a/services/ingress-nginx/values-idfprod.yaml +++ b/services/ingress-nginx/values-idfprod.yaml @@ -1,22 +1,7 @@ ingress-nginx: controller: config: - compute-full-forwarded-for: "true" large-client-header-buffers: "4 64k" - proxy-body-size: "100m" proxy-buffer-size: "64k" - ssl-redirect: "true" - use-forwarded-headers: "true" service: - externalTrafficPolicy: Local loadBalancerIP: "35.202.181.164" - podLabels: - gafaelfawr.lsst.io/ingress: "true" - hub.jupyter.org/network-access-proxy-http: "true" - -vault_certificate: - enabled: false - -pull-secret: - enabled: true - path: secret/k8s_operator/data.lsst.cloud/pull-secret diff --git a/services/ingress-nginx/values-minikube.yaml b/services/ingress-nginx/values-minikube.yaml index 984f65f210..ae315d0392 100644 --- a/services/ingress-nginx/values-minikube.yaml +++ b/services/ingress-nginx/values-minikube.yaml @@ -1,13 +1,7 @@ ingress-nginx: controller: - config: - compute-full-forwarded-for: "true" - large-client-header-buffers: "4 64k" - proxy-body-size: "100m" - proxy-buffer-size: "64k" - ssl-redirect: "true" - use-forwarded-headers: "true" service: + externalTrafficPolicy: null type: ClusterIP hostNetwork: true dnsPolicy: ClusterFirstWithHostNet @@ -15,20 +9,6 @@ ingress-nginx: enabled: false extraArgs: default-ssl-certificate: ingress-nginx/ingress-certificate - podLabels: - gafaelfawr.lsst.io/ingress: "true" - hub.jupyter.org/network-access-proxy-http: "true" - metrics: - enabled: true - service: - annotations: - prometheus.io/port: "10254" - prometheus.io/scrape: "true" -vault_certificate: +vaultCertificate: enabled: true - path: secret/k8s_operator/minikube.lsst.codes/ingress-nginx - -pull-secret: - enabled: true - path: secret/k8s_operator/minikube.lsst.codes/pull-secret diff --git a/services/ingress-nginx/values-red-five.yaml b/services/ingress-nginx/values-red-five.yaml deleted file mode 100644 index 87187b9389..0000000000 --- a/services/ingress-nginx/values-red-five.yaml +++ /dev/null @@ -1,21 +0,0 @@ -ingress-nginx: - controller: - config: - compute-full-forwarded-for: "true" - large-client-header-buffers: "4 64k" - proxy-body-size: "100m" - proxy-buffer-size: "64k" - ssl-redirect: "true" - use-forwarded-headers: "true" - service: - externalTrafficPolicy: Local - podLabels: - gafaelfawr.lsst.io/ingress: "true" - hub.jupyter.org/network-access-proxy-http: "true" - -vault_certificate: - enabled: false - -pull-secret: - enabled: true - path: secret/k8s_operator/red-five.lsst.codes/pull-secret diff --git a/services/ingress-nginx/values-roe.yaml b/services/ingress-nginx/values-roe.yaml index 6dda2488f7..b5e4203299 100644 --- a/services/ingress-nginx/values-roe.yaml +++ b/services/ingress-nginx/values-roe.yaml @@ -1,33 +1,25 @@ ingress-nginx: controller: config: - compute-full-forwarded-for: "true" large-client-header-buffers: "4 64k" - proxy-body-size: "100m" proxy-buffer-size: "64k" - ssl-redirect: "true" - use-forwarded-headers: "true" service: + externalTrafficPolicy: null type: ClusterIP - dnsPolicy: ClusterFirstWithHostNet - hostNetwork: true affinity: nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: node-role.kubernetes.io/etcd - operator: Exists - + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 1 + preference: + matchExpressions: + - key: nodetype + operator: In + values: + - public + dnsPolicy: ClusterFirstWithHostNet + hostNetwork: true extraArgs: default-ssl-certificate: ingress-nginx/ingress-certificate - podLabels: - hub.jupyter.org/network-access-proxy-http: "true" - -vault_certificate: - enabled: true - path: secret/k8s_operator/roe/ingress-nginx -pull-secret: +vaultCertificate: enabled: true - path: secret/k8s_operator/roe/pull-secret diff --git a/services/ingress-nginx/values-squash-sandbox.yaml b/services/ingress-nginx/values-squash-sandbox.yaml deleted file mode 100644 index 35d6c67e93..0000000000 --- a/services/ingress-nginx/values-squash-sandbox.yaml +++ /dev/null @@ -1,17 +0,0 @@ -ingress-nginx: - controller: - config: - compute-full-forwarded-for: "true" - large-client-header-buffers: "4 64k" - proxy-body-size: "100m" - proxy-buffer-size: "64k" - ssl-redirect: "true" - use-forwarded-headers: "true" - service: - externalTrafficPolicy: Local - podLabels: - gafaelfawr.lsst.io/ingress: "true" - hub.jupyter.org/network-access-proxy-http: "true" - -vault_certificate: - enabled: false diff --git a/services/ingress-nginx/values-summit.yaml b/services/ingress-nginx/values-summit.yaml index 403af54254..489f86ec1a 100644 --- a/services/ingress-nginx/values-summit.yaml +++ b/services/ingress-nginx/values-summit.yaml @@ -1,22 +1,7 @@ ingress-nginx: controller: config: - compute-full-forwarded-for: "true" large-client-header-buffers: "4 64k" - proxy-body-size: "100m" proxy-buffer-size: "64k" - ssl-redirect: "true" - use-forwarded-headers: "true" service: - externalTrafficPolicy: Local loadBalancerIP: "139.229.160.150" - podLabels: - gafaelfawr.lsst.io/ingress: "true" - hub.jupyter.org/network-access-proxy-http: "true" - -vault_certificate: - enabled: false - -pull-secret: - enabled: true - path: secret/k8s_operator/summit-lsp.lsst.codes/pull-secret diff --git a/services/ingress-nginx/values-tucson-teststand.yaml b/services/ingress-nginx/values-tucson-teststand.yaml index e94cb0eb8f..32b357a265 100644 --- a/services/ingress-nginx/values-tucson-teststand.yaml +++ b/services/ingress-nginx/values-tucson-teststand.yaml @@ -1,22 +1,7 @@ ingress-nginx: controller: config: - compute-full-forwarded-for: "true" large-client-header-buffers: "4 64k" - proxy-body-size: "100m" proxy-buffer-size: "64k" - ssl-redirect: "true" - use-forwarded-headers: "true" service: - externalTrafficPolicy: Local loadBalancerIP: "140.252.146.50" - podLabels: - gafaelfawr.lsst.io/ingress: "true" - hub.jupyter.org/network-access-proxy-http: "true" - -vault_certificate: - enabled: false - -pull-secret: - enabled: true - path: secret/k8s_operator/tucson-teststand.lsst.codes/pull-secret diff --git a/services/ingress-nginx/values.yaml b/services/ingress-nginx/values.yaml new file mode 100644 index 0000000000..132daa5e69 --- /dev/null +++ b/services/ingress-nginx/values.yaml @@ -0,0 +1,63 @@ +# Ingress configuration +# https://github.com/kubernetes/ingress-nginx/blob/main/charts/ingress-nginx/values.yaml +ingress-nginx: + controller: + config: + # -- Put the complete path in `X-Forwarded-For`, not just the last hop, + # so that the client IP will be exposed to Gafaelfawr + compute-full-forwarded-for: "true" + + # -- Maximum size of the client request body (needs to be large enough + # to allow table uploads) + proxy-body-size: "100m" + + # -- Redirect all non-SSL access to SSL. + ssl-redirect: "true" + + # -- Enable the `X-Forwarded-For` processing + use-forwarded-headers: "true" + + # -- Add additional configuration used by Gafaelfawr to report errors + # from the authorization layer + # @default -- See `values.yaml` + server-snippet: | + location @autherror { + default_type application/json; + if ($auth_status = 400) { + add_header Cache-Control "no-cache, must-revalidate" always; + add_header WWW-Authenticate $auth_www_authenticate always; + return 400 $auth_error_body; + } + add_header Cache-Control "no-cache, must-revalidate" always; + add_header WWW-Authenticate $auth_www_authenticate always; + return 403; + } + + service: + # -- Force traffic routing policy to Local so that the external IP in + # `X-Forwarded-For` will be correct + externalTrafficPolicy: Local + + # -- Add labels used by `NetworkPolicy` objects to restrict access to the + # ingress and thus ensure that auth subrequest handlers run + # @default -- See `values.yaml` + podLabels: + gafaelfawr.lsst.io/ingress: "true" + hub.jupyter.org/network-access-proxy-http: "true" + + metrics: + # -- Enable metrics reporting via Prometheus + enabled: true + +vaultCertificate: + # -- Whether to store ingress TLS certificate via + # vault-secrets-operator. Typically "squareone" owns it instead in an + # RSP. + enabled: false + +# The following will be set by parameters injected by Argo CD and should not +# be set in the individual environment values files. +global: + # -- Base path for Vault secrets + # @default -- Set by Argo CD + vaultSecretsPath: "" diff --git a/services/linters/Chart.yaml b/services/linters/Chart.yaml new file mode 100644 index 0000000000..5bb2e14e5e --- /dev/null +++ b/services/linters/Chart.yaml @@ -0,0 +1,7 @@ +apiVersion: v2 +name: linters +version: 1.0.0 +description: Linters running for operational reasons +sources: + - https://github.com/lsst-sqre/ops-linters +appVersion: 0.1.6 diff --git a/services/linters/README.md b/services/linters/README.md new file mode 100644 index 0000000000..facd3c1375 --- /dev/null +++ b/services/linters/README.md @@ -0,0 +1,27 @@ +# linters + +Linters running for operational reasons + +## Source Code + +* + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| affinity | object | `{}` | Affinity rules for the linter pod | +| fullnameOverride | string | `""` | Override the full name for resources (includes the release name) | +| global.baseUrl | string | Set by Argo CD | Base URL for the environment | +| global.host | string | Set by Argo CD | Host name for ingress | +| global.vaultSecretsPath | string | Set by Argo CD | Base path for Vault secrets | +| image.pullPolicy | string | `"IfNotPresent"` | Pull policy for the linter image | +| image.repository | string | `"ghcr.io/lsst-sqre/ops-linters"` | linter image to use | +| image.tag | string | The appVersion of the chart | Tag of linter image to use | +| linterSchedule | string | `"0 0 * * *"` | Cron schedule string for linter checking (in UTC) | +| nameOverride | string | `""` | Override the base name for resources | +| nodeSelector | object | `{}` | Node selector rules for the linter pod | +| podAnnotations | object | `{}` | Annotations for the linter pod | +| replicaCount | int | `1` | Number of web frontend pods to start | +| resources | object | `{}` | Resource limits and requests for the linter pod | +| tolerations | list | `[]` | Tolerations for the linter pod | diff --git a/services/linters/templates/_helpers.tpl b/services/linters/templates/_helpers.tpl new file mode 100644 index 0000000000..cdbd80f67b --- /dev/null +++ b/services/linters/templates/_helpers.tpl @@ -0,0 +1,52 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "linters.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "linters.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "linters.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "linters.labels" -}} +helm.sh/chart: {{ include "linters.chart" . }} +{{ include "linters.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "linters.selectorLabels" -}} +app.kubernetes.io/name: {{ include "linters.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} diff --git a/services/linters/templates/cronjob-dns-linter.yaml b/services/linters/templates/cronjob-dns-linter.yaml new file mode 100644 index 0000000000..47c98a62fb --- /dev/null +++ b/services/linters/templates/cronjob-dns-linter.yaml @@ -0,0 +1,62 @@ +apiVersion: batch/v1 +kind: CronJob +metadata: + name: {{ template "linters.fullname" . }}-maintenance + labels: + {{- include "linters.labels" . | nindent 4 }} +spec: + schedule: {{ .Values.linterSchedule | quote }} + concurrencyPolicy: "Forbid" + jobTemplate: + spec: + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 12 }} + {{- end }} + labels: + {{- include "linters.selectorLabels" . | nindent 12 }} + app.kubernetes.io/component: "linter" + spec: + restartPolicy: "Never" + automountServiceAccountToken: false + serviceAccountName: {{ include "linters.fullname" . }} + containers: + - name: "linters" + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy | quote }} + {{- with .Values.resources }} + resources: + {{- toYaml . | nindent 16 }} + {{- end }} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - "all" + readOnlyRootFilesystem: true + volumeMounts: + - name: "secret" + mountPath: "/etc/linters/secrets" + readOnly: true + securityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + volumes: + - name: "secret" + secret: + secretName: {{ template "linters.fullname" . }}-secret + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 12 }} + {{- end }} diff --git a/services/linters/templates/serviceaccount.yaml b/services/linters/templates/serviceaccount.yaml new file mode 100644 index 0000000000..6e817d7759 --- /dev/null +++ b/services/linters/templates/serviceaccount.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "linters.fullname" . }} + labels: + {{- include "linters.labels" . | nindent 4 }} + annotations: + iam.gke.io/gcp-service-account: {{ required ".Values.serviceAccount must be set to a valid Google service account" .Values.serviceAccount | quote }} diff --git a/services/linters/templates/vault-secrets.yaml b/services/linters/templates/vault-secrets.yaml new file mode 100644 index 0000000000..ae5c0892a9 --- /dev/null +++ b/services/linters/templates/vault-secrets.yaml @@ -0,0 +1,9 @@ +apiVersion: ricoberger.de/v1alpha1 +kind: VaultSecret +metadata: + name: {{ template "linters.fullname" . }}-secret + labels: + {{- include "linters.labels" . | nindent 4 }} +spec: + path: "{{ .Values.global.vaultSecretsPath }}/linters" + type: Opaque diff --git a/services/linters/values-idfint.yaml b/services/linters/values-idfint.yaml new file mode 100644 index 0000000000..6de1a37299 --- /dev/null +++ b/services/linters/values-idfint.yaml @@ -0,0 +1 @@ +serviceAccount: "dns-validator-wi@science-platform-int-dc5d.iam.gserviceaccount.com" diff --git a/services/linters/values.yaml b/services/linters/values.yaml new file mode 100644 index 0000000000..9261041e4f --- /dev/null +++ b/services/linters/values.yaml @@ -0,0 +1,54 @@ +# Default values for linter. + +# -- Override the base name for resources +nameOverride: "" + +# -- Override the full name for resources (includes the release name) +fullnameOverride: "" + +# -- Number of web frontend pods to start +replicaCount: 1 + +image: + # -- linter image to use + repository: "ghcr.io/lsst-sqre/ops-linters" + + # -- Pull policy for the linter image + pullPolicy: "IfNotPresent" + + # -- Tag of linter image to use + # @default -- The appVersion of the chart + tag: "" + +# -- Resource limits and requests for the linter pod +resources: {} + +# -- Annotations for the linter pod +podAnnotations: {} + +# -- Node selector rules for the linter pod +nodeSelector: {} + +# -- Tolerations for the linter pod +tolerations: [] + +# -- Affinity rules for the linter pod +affinity: {} + +# -- Cron schedule string for linter checking (in UTC) +linterSchedule: "0 0 * * *" + +# The following will be set by parameters injected by Argo CD and should not +# be set in the individual environment values files. +global: + # -- Base URL for the environment + # @default -- Set by Argo CD + baseUrl: "" + + # -- Host name for ingress + # @default -- Set by Argo CD + host: "" + + # -- Base path for Vault secrets + # @default -- Set by Argo CD + vaultSecretsPath: "" diff --git a/services/mobu/Chart.yaml b/services/mobu/Chart.yaml index 8eae3417d8..f1047ca194 100644 --- a/services/mobu/Chart.yaml +++ b/services/mobu/Chart.yaml @@ -1,10 +1,7 @@ apiVersion: v2 name: mobu version: 1.0.0 -dependencies: - - name: mobu - version: ">=3.0.0" - repository: https://lsst-sqre.github.io/charts/ - - name: pull-secret - version: 0.1.2 - repository: https://lsst-sqre.github.io/charts/ +description: Continuous integration testing +sources: + - https://github.com/lsst-sqre/mobu +appVersion: 4.5.0 diff --git a/services/mobu/README.md b/services/mobu/README.md new file mode 100644 index 0000000000..2bc6059b1f --- /dev/null +++ b/services/mobu/README.md @@ -0,0 +1,28 @@ +# mobu + +Continuous integration testing + +## Source Code + +* + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| affinity | object | `{}` | Affinity rules for the mobu frontend pod | +| autostart | list | `[]` | Autostart specification. Must be a list of mobu flock specifications. Each flock listed will be automatically started when mobu is started. | +| cachemachineImagePolicy | string | `"available"` | Cachemachine image policy. Must be one of `desired` or `available`. Determines whether cachemachine reports the images it has or the ones it wants. Should be `desired` in environments with image streaming enabled (e.g. IDF). | +| fullnameOverride | string | `""` | Override the full name for resources (includes the release name) | +| global.baseUrl | string | Set by Argo CD | Base URL for the environment | +| global.host | string | Set by Argo CD | Host name for ingress | +| global.vaultSecretsPath | string | Set by Argo CD | Base path for Vault secrets | +| image.pullPolicy | string | `"IfNotPresent"` | Pull policy for the mobu image | +| image.repository | string | `"ghcr.io/lsst-sqre/mobu"` | mobu image to use | +| image.tag | string | The appVersion of the chart | Tag of mobu image to use | +| ingress.annotations | object | `{}` | Additional annotations to add to the ingress | +| nameOverride | string | `""` | Override the base name for resources | +| nodeSelector | object | `{}` | Node selector rules for the mobu frontend pod | +| podAnnotations | object | `{}` | Annotations for the mobu frontend pod | +| resources | object | `{}` | Resource limits and requests for the mobu frontend pod | +| tolerations | list | `[]` | Tolerations for the mobu frontend pod | diff --git a/services/mobu/templates/_helpers.tpl b/services/mobu/templates/_helpers.tpl new file mode 100644 index 0000000000..b28af543ea --- /dev/null +++ b/services/mobu/templates/_helpers.tpl @@ -0,0 +1,53 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "mobu.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "mobu.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "mobu.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Common labels +*/}} +{{- define "mobu.labels" -}} +app.kubernetes.io/name: {{ include "mobu.name" . }} +helm.sh/chart: {{ include "mobu.chart" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end -}} + +{{/* +Selector labels +*/}} +{{- define "mobu.selectorLabels" -}} +app.kubernetes.io/name: {{ include "mobu.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} diff --git a/services/mobu/templates/configmap-autostart.yaml b/services/mobu/templates/configmap-autostart.yaml new file mode 100644 index 0000000000..06a1949ff8 --- /dev/null +++ b/services/mobu/templates/configmap-autostart.yaml @@ -0,0 +1,11 @@ +{{- if .Values.autostart -}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "mobu.fullname" . }}-autostart + labels: + {{- include "mobu.labels" . | nindent 4 }} +data: + autostart.yaml: | + {{- toYaml .Values.autostart | nindent 4 }} +{{- end }} diff --git a/services/mobu/templates/deployment.yaml b/services/mobu/templates/deployment.yaml new file mode 100644 index 0000000000..efb73dbff8 --- /dev/null +++ b/services/mobu/templates/deployment.yaml @@ -0,0 +1,97 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "mobu.fullname" . }} + labels: + {{- include "mobu.labels" . | nindent 4 }} +spec: + replicas: 1 + selector: + matchLabels: + {{- include "mobu.selectorLabels" . | nindent 6 }} + strategy: + type: "Recreate" + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "mobu.selectorLabels" . | nindent 8 }} + spec: + automountServiceAccountToken: false + containers: + - name: {{ .Chart.Name }} + env: + - name: "ALERT_HOOK" + valueFrom: + secretKeyRef: + name: {{ template "mobu.fullname" . }}-secret + key: "ALERT_HOOK" + {{- if .Values.autostart }} + - name: "AUTOSTART" + value: "/etc/mobu/autostart.yaml" + {{- end }} + - name: "CACHEMACHINE_IMAGE_POLICY" + value: {{ .Values.cachemachineImagePolicy }} + - name: "ENVIRONMENT_URL" + value: {{ .Values.global.baseUrl }} + - name: "GAFAELFAWR_TOKEN" + valueFrom: + secretKeyRef: + name: {{ template "mobu.fullname" . }}-gafaelfawr-token + key: "token" + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy | quote }} + ports: + - name: "http" + containerPort: 8080 + protocol: "TCP" + readinessProbe: + httpGet: + path: "/mobu/flocks" + port: "http" + timeoutSeconds: 10 + {{- with .Values.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + volumeMounts: + {{- if .Values.autostart }} + - name: "autostart" + mountPath: "/etc/mobu" + readOnly: true + {{- end }} + - name: "tmp" + mountPath: "/tmp" + securityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + volumes: + {{- if .Values.autostart }} + - name: "autostart" + configMap: + name: {{ include "mobu.fullname" . }}-autostart + {{- end }} + - name: "tmp" + emptyDir: {} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/services/mobu/templates/gafaelfawr-token.yaml b/services/mobu/templates/gafaelfawr-token.yaml new file mode 100644 index 0000000000..c50b1ceb39 --- /dev/null +++ b/services/mobu/templates/gafaelfawr-token.yaml @@ -0,0 +1,11 @@ +apiVersion: gafaelfawr.lsst.io/v1alpha1 +kind: GafaelfawrServiceToken +metadata: + name: {{ include "mobu.fullname" . }}-gafaelfawr-token + labels: + {{- include "mobu.labels" . | nindent 4 }} +spec: + service: "bot-mobu" + scopes: + - "admin:token" + - "exec:admin" diff --git a/services/mobu/templates/ingress.yaml b/services/mobu/templates/ingress.yaml new file mode 100644 index 0000000000..42a41c00ed --- /dev/null +++ b/services/mobu/templates/ingress.yaml @@ -0,0 +1,31 @@ +apiVersion: gafaelfawr.lsst.io/v1alpha1 +kind: GafaelfawrIngress +metadata: + name: {{ template "mobu.fullname" . }} + labels: + {{- include "mobu.labels" . | nindent 4 }} +config: + baseUrl: {{ .Values.global.baseUrl | quote }} + scopes: + all: + - "exec:admin" + loginRedirect: true +template: + metadata: + name: {{ template "mobu.fullname" . }} + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 6 }} + {{- end }} + spec: + rules: + - host: {{ required "global.host must be set" .Values.global.host | quote }} + http: + paths: + - path: "/mobu" + pathType: "Prefix" + backend: + service: + name: {{ template "mobu.fullname" . }} + port: + number: 8080 diff --git a/services/mobu/templates/networkpolicy.yaml b/services/mobu/templates/networkpolicy.yaml new file mode 100644 index 0000000000..9ac98c6c65 --- /dev/null +++ b/services/mobu/templates/networkpolicy.yaml @@ -0,0 +1,21 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ include "mobu.fullname" . }} +spec: + podSelector: + matchLabels: + {{- include "mobu.selectorLabels" . | nindent 6 }} + policyTypes: + - Ingress + ingress: + # Allow inbound access from pods (in any namespace) labeled + # gafaelfawr.lsst.io/ingress: true. + - from: + - namespaceSelector: {} + podSelector: + matchLabels: + gafaelfawr.lsst.io/ingress: "true" + ports: + - protocol: "TCP" + port: 8080 diff --git a/services/mobu/templates/service.yaml b/services/mobu/templates/service.yaml new file mode 100644 index 0000000000..7402fd5a95 --- /dev/null +++ b/services/mobu/templates/service.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "mobu.fullname" . }} + labels: + {{- include "mobu.labels" . | nindent 4 }} +spec: + type: "ClusterIP" + ports: + - port: 8080 + targetPort: "http" + protocol: "TCP" + selector: + {{- include "mobu.selectorLabels" . | nindent 4 }} diff --git a/services/mobu/templates/vault-secrets.yaml b/services/mobu/templates/vault-secrets.yaml new file mode 100644 index 0000000000..050d8fbadc --- /dev/null +++ b/services/mobu/templates/vault-secrets.yaml @@ -0,0 +1,9 @@ +apiVersion: ricoberger.de/v1alpha1 +kind: VaultSecret +metadata: + name: {{ template "mobu.fullname" . }}-secret + labels: + {{- include "mobu.labels" . | nindent 4 }} +spec: + path: "{{ .Values.global.vaultSecretsPath }}/mobu" + type: "Opaque" diff --git a/services/mobu/values-idfdev.yaml b/services/mobu/values-idfdev.yaml index a10bf962e3..b256a1954a 100644 --- a/services/mobu/values-idfdev.yaml +++ b/services/mobu/values-idfdev.yaml @@ -1,26 +1,23 @@ -mobu: - imagePullSecrets: - - name: "pull-secret" +cachemachineImagePolicy: "desired" - ingress: - host: "data-dev.lsst.cloud" - - environmentUrl: "https://data-dev.lsst.cloud" - vaultSecretsPath: "secret/k8s_operator/data-dev.lsst.cloud/mobu" - - autostart: - - name: "python" - count: 1 - users: - - username: "systemtest01" - uidnumber: 74768 - scopes: ["exec:notebook"] - business: "JupyterPythonLoop" - options: - jupyter: - image_size: "Small" - restart: true - -pull-secret: - enabled: true - path: "secret/k8s_operator/data-dev.lsst.cloud/pull-secret" +autostart: + - name: "python" + count: 1 + users: + - username: "bot-mobu-user" + scopes: ["exec:notebook"] + business: "JupyterPythonLoop" + options: + jupyter: + image_size: "Small" + restart: true + - name: "tap" + count: 1 + users: + - username: "bot-mobu-tap" + scopes: ["read:tap"] + business: "TAPQueryRunner" + restart: true + options: + tap_sync: true + tap_query_set: "dp0.2" diff --git a/services/mobu/values-idfint.yaml b/services/mobu/values-idfint.yaml index b7a9497ed5..a1d5b30fea 100644 --- a/services/mobu/values-idfint.yaml +++ b/services/mobu/values-idfint.yaml @@ -1,40 +1,44 @@ -mobu: - imagePullSecrets: - - name: "pull-secret" +cachemachineImagePolicy: "desired" - ingress: - host: "data-int.lsst.cloud" - - environmentUrl: "https://data-int.lsst.cloud" - vaultSecretsPath: "secret/k8s_operator/data-int.lsst.cloud/mobu" - - autostart: - - name: "firefighter" - count: 1 - users: - - username: "systemtest01" - uidnumber: 74768 - scopes: ["exec:notebook", "exec:portal", "read:tap"] - business: "NotebookRunner" - options: - repo_url: "https://github.com/SimonKrughoff/system-test.git" - repo_branch: "prod" - max_executions: 1 - restart: true - - name: "weekly" - count: 1 - users: - - username: "systemtest02" - uidnumber: 74769 - scopes: ["exec:notebook", "exec:portal", "read:tap"] - business: "NotebookRunner" - options: - jupyter: - image_class: "latest-weekly" - repo_url: "https://github.com/lsst-sqre/system-test.git" - repo_branch: "prod" - restart: true - -pull-secret: - enabled: true - path: "secret/k8s_operator/data-int.lsst.cloud/pull-secret" +autostart: + - name: "firefighter" + count: 1 + users: + - username: "bot-mobu-recommended" + scopes: + - "exec:notebook" + - "exec:portal" + - "read:image" + - "read:tap" + business: "NotebookRunner" + options: + repo_url: "https://github.com/lsst-sqre/system-test.git" + repo_branch: "prod" + max_executions: 1 + restart: true + - name: "weekly" + count: 1 + users: + - username: "bot-mobu-weekly" + scopes: + - "exec:notebook" + - "exec:portal" + - "read:image" + - "read:tap" + business: "NotebookRunner" + options: + jupyter: + image_class: "latest-weekly" + repo_url: "https://github.com/lsst-sqre/system-test.git" + repo_branch: "prod" + restart: true + - name: "tap" + count: 1 + users: + - username: "bot-mobu-tap" + scopes: ["read:tap"] + business: "TAPQueryRunner" + restart: true + options: + tap_sync: false + tap_query_set: "dp0.2" diff --git a/services/mobu/values-idfprod.yaml b/services/mobu/values-idfprod.yaml index e102dc85ee..056a48df9c 100644 --- a/services/mobu/values-idfprod.yaml +++ b/services/mobu/values-idfprod.yaml @@ -1,71 +1,68 @@ -mobu: - imagePullSecrets: - - name: "pull-secret" +cachemachineImagePolicy: "desired" - ingress: - host: "data.lsst.cloud" - - environmentUrl: "https://data.lsst.cloud" - vaultSecretsPath: "secret/k8s_operator/data.lsst.cloud/mobu" - - autostart: - - name: "firefighter" - count: 5 - users: - - username: "systemtest01" - uidnumber: 74768 - - username: "systemtest02" - uidnumber: 74769 - - username: "systemtest03" - uidnumber: 74770 - - username: "systemtest04" - uidnumber: 74771 - - username: "systemtest05" - uidnumber: 74772 - scopes: ["exec:notebook", "exec:portal", "read:tap"] - business: "NotebookRunner" - options: - repo_url: "https://github.com/lsst-sqre/system-test.git" - repo_branch: "prod" - max_executions: 1 - restart: true - - name: "quickbeam" - count: 1 - users: - - username: "systemtest06" - uidnumber: 74773 - scopes: ["exec:notebook", "exec:portal", "read:tap"] - business: "NotebookRunner" - options: - jupyter_options_form: - image: "registry.hub.docker.com/lsstsqre/sciplat-lab:recommended" - image_list: "registry.hub.docker.com/lsstsqre/sciplat-lab:recommended|Recommended|" - image_dropdown: "use_image_from_dropdown" - size: "Small" - repo_url: "https://github.com/lsst-sqre/system-test.git" - repo_branch: "prod" - idle_time: 900 - delete_lab: false - restart: true - - name: "tutorial" - count: 1 - users: - - username: "systemtest07" - uidnumber: 74774 - scopes: ["exec:notebook", "exec:portal", "read:tap"] - business: "NotebookRunner" - options: - jupyter_options_form: - image: "registry.hub.docker.com/lsstsqre/sciplat-lab:recommended" - image_list: "registry.hub.docker.com/lsstsqre/sciplat-lab:recommended|Recommended|" - image_dropdown: "use_image_from_dropdown" - size: "Large" - repo_url: "https://github.com/rubin-dp0/tutorial-notebooks.git" - repo_branch: "prod" - max_executions: 1 - working_directory: "notebooks/tutorial-notebooks" - restart: true - -pull-secret: - enabled: true - path: "secret/k8s_operator/data.lsst.cloud/pull-secret" +autostart: + - name: "firefighter" + count: 5 + user_spec: + username_prefix: "bot-mobu-recommended" + uid_start: 74768 + gid_start: 74768 + scopes: + - "exec:notebook" + - "exec:portal" + - "read:image" + - "read:tap" + business: "NotebookRunner" + options: + repo_url: "https://github.com/lsst-sqre/system-test.git" + repo_branch: "prod" + max_executions: 1 + restart: true + - name: "quickbeam" + count: 1 + users: + - username: "bot-mobu-persistent" + uidnumber: 74773 + gidnumber: 74773 + scopes: + - "exec:notebook" + - "exec:portal" + - "read:image" + - "read:tap" + business: "NotebookRunner" + options: + repo_url: "https://github.com/lsst-sqre/system-test.git" + repo_branch: "prod" + idle_time: 900 + delete_lab: false + restart: true + - name: "tutorial" + count: 1 + users: + - username: "bot-mobu-tutorial" + uidnumber: 74774 + gidnumber: 74774 + scopes: + - "exec:notebook" + - "exec:portal" + - "read:image" + - "read:tap" + business: "NotebookRunner" + options: + repo_url: "https://github.com/rubin-dp0/tutorial-notebooks.git" + repo_branch: "prod" + max_executions: 1 + working_directory: "notebooks/tutorial-notebooks" + restart: true + - name: "tap" + count: 1 + users: + - username: "bot-mobu-tap" + uidnumber: 74775 + gidnumber: 74775 + scopes: ["read:tap"] + business: "TAPQueryRunner" + options: + tap_sync: true + tap_query_set: "dp0.2" + restart: true diff --git a/services/mobu/values-int.yaml b/services/mobu/values-int.yaml deleted file mode 100644 index b23efcb4da..0000000000 --- a/services/mobu/values-int.yaml +++ /dev/null @@ -1,29 +0,0 @@ -mobu: - imagePullSecrets: - - name: "pull-secret" - - ingress: - annotations: - nginx.ingress.kubernetes.io/auth-url: "https://lsst-lsp-int.ncsa.illinois.edu/auth?scope=exec:admin" - host: "lsst-lsp-int.ncsa.illinois.edu" - - environmentUrl: "https://lsst-lsp-int.ncsa.illinois.edu" - vaultSecretsPath: "secret/k8s_operator/lsst-lsp-int.ncsa.illinois.edu/mobu" - - autostart: - - name: "firefighter" - count: 1 - users: - - username: "lsptestuser01" - uidnumber: 60181 - scopes: ["exec:notebook", "exec:portal", "read:tap"] - business: "NotebookRunner" - options: - repo_url: "https://github.com/lsst-sqre/system-test.git" - repo_branch: "NCSA-prod" - max_executions: 1 - restart: true - -pull-secret: - enabled: true - path: "secret/k8s_operator/lsst-lsp-int.ncsa.illinois.edu/pull-secret" diff --git a/services/mobu/values-minikube.yaml b/services/mobu/values-minikube.yaml index adac4a2270..e69de29bb2 100644 --- a/services/mobu/values-minikube.yaml +++ b/services/mobu/values-minikube.yaml @@ -1,13 +0,0 @@ -mobu: - imagePullSecrets: - - name: "pull-secret" - - ingress: - host: "minikube.lsst.codes" - - environmentUrl: "https://minikube.lsst.codes" - vaultSecretsPath: "secret/k8s_operator/minikube.lsst.codes/mobu" - -pull-secret: - enabled: true - path: "secret/k8s_operator/minikube.lsst.codes/pull-secret" diff --git a/services/mobu/values-red-five.yaml b/services/mobu/values-red-five.yaml deleted file mode 100644 index 23a4ffc80a..0000000000 --- a/services/mobu/values-red-five.yaml +++ /dev/null @@ -1,13 +0,0 @@ -mobu: - imagePullSecrets: - - name: "pull-secret" - - ingress: - host: "red-five.lsst.codes" - - environmentUrl: "https://red-five.lsst.codes" - vaultSecretsPath: "secret/k8s_operator/red-five.lsst.codes/mobu" - -pull-secret: - enabled: true - path: "secret/k8s_operator/red-five.lsst.codes/pull-secret" diff --git a/services/mobu/values-roe.yaml b/services/mobu/values-roe.yaml index 6a73f25bf1..11c34784b5 100644 --- a/services/mobu/values-roe.yaml +++ b/services/mobu/values-roe.yaml @@ -1,40 +1,28 @@ -mobu: - imagePullSecrets: - - name: "pull-secret" - - ingress: - host: "rsp.lsst.ac.uk" - - environmentUrl: "https://rsp.lsst.ac.uk" - vaultSecretsPath: "secret/k8s_operator/roe/mobu" - - autostart: - - name: "firefighter" - count: 1 - users: - - username: "systemtest01" - uidnumber: 74768 - scopes: ["exec:notebook", "exec:portal", "read:tap"] - business: "NotebookRunner" - options: - repo_url: "https://github.com/SimonKrughoff/system-test.git" - repo_branch: "prod" - max_executions: 1 - restart: true - - name: "weekly" - count: 1 - users: - - username: "systemtest02" - uidnumber: 74769 - scopes: ["exec:notebook", "exec:portal", "read:tap"] - business: "NotebookRunner" - options: - jupyter: - image_class: "latest-weekly" - repo_url: "https://github.com/lsst-sqre/system-test.git" - repo_branch: "prod" - restart: true - -pull-secret: - enabled: true - path: "secret/k8s_operator/roe/pull-secret" +autostart: + - name: "firefighter" + count: 1 + users: + - username: "bot-mobu-recommended" + uidnumber: 74768 + gidnumber: 74768 + scopes: ["exec:notebook", "exec:portal", "read:image", "read:tap"] + business: "NotebookRunner" + options: + repo_url: "https://github.com/SimonKrughoff/system-test.git" + repo_branch: "prod" + max_executions: 1 + restart: true + - name: "weekly" + count: 1 + users: + - username: "bot-mobu-weekly" + uidnumber: 74769 + gidnumber: 74769 + scopes: ["exec:notebook", "exec:portal", "read:image", "read:tap"] + business: "NotebookRunner" + options: + jupyter: + image_class: "latest-weekly" + repo_url: "https://github.com/lsst-sqre/system-test.git" + repo_branch: "prod" + restart: true diff --git a/services/mobu/values-stable.yaml b/services/mobu/values-stable.yaml deleted file mode 100644 index 26a8be819e..0000000000 --- a/services/mobu/values-stable.yaml +++ /dev/null @@ -1,37 +0,0 @@ -mobu: - imagePullSecrets: - - name: "pull-secret" - - ingress: - annotations: - nginx.ingress.kubernetes.io/auth-url: "https://lsst-lsp-stable.ncsa.illinois.edu/auth?scope=exec:admin" - host: "lsst-lsp-stable.ncsa.illinois.edu" - - environmentUrl: "https://lsst-lsp-stable.ncsa.illinois.edu" - vaultSecretsPath: "secret/k8s_operator/lsst-lsp-stable.ncsa.illinois.edu/mobu" - - autostart: - - name: "firefighter" - count: 5 - users: - - username: "lsptestuser01" - uidnumber: 60181 - - username: "lsptestuser02" - uidnumber: 60182 - - username: "lsptestuser03" - uidnumber: 60183 - - username: "lsptestuser04" - uidnumber: 60184 - - username: "lsptestuser05" - uidnumber: 60185 - scopes: ["exec:notebook", "exec:portal", "read:tap"] - business: "NotebookRunner" - options: - repo_url: "https://github.com/lsst-sqre/system-test.git" - repo_branch: "NCSA-prod" - max_executions: 1 - restart: true - -pull-secret: - enabled: true - path: "secret/k8s_operator/lsst-lsp-stable.ncsa.illinois.edu/pull-secret" diff --git a/services/mobu/values.yaml b/services/mobu/values.yaml new file mode 100644 index 0000000000..9173f69c8a --- /dev/null +++ b/services/mobu/values.yaml @@ -0,0 +1,62 @@ +# Default values for mobu. + +# -- Override the base name for resources +nameOverride: "" + +# -- Override the full name for resources (includes the release name) +fullnameOverride: "" + +# -- Autostart specification. Must be a list of mobu flock specifications. +# Each flock listed will be automatically started when mobu is started. +autostart: [] + +# -- Cachemachine image policy. Must be one of `desired` or +# `available`. Determines whether cachemachine reports the images it +# has or the ones it wants. Should be `desired` in environments with +# image streaming enabled (e.g. IDF). +cachemachineImagePolicy: "available" + +image: + # -- mobu image to use + repository: "ghcr.io/lsst-sqre/mobu" + + # -- Pull policy for the mobu image + pullPolicy: "IfNotPresent" + + # -- Tag of mobu image to use + # @default -- The appVersion of the chart + tag: "" + +ingress: + # -- Additional annotations to add to the ingress + annotations: {} + +# -- Resource limits and requests for the mobu frontend pod +resources: {} + +# -- Annotations for the mobu frontend pod +podAnnotations: {} + +# -- Node selector rules for the mobu frontend pod +nodeSelector: {} + +# -- Tolerations for the mobu frontend pod +tolerations: [] + +# -- Affinity rules for the mobu frontend pod +affinity: {} + +# The following will be set by parameters injected by Argo CD and should not +# be set in the individual environment values files. +global: + # -- Base URL for the environment + # @default -- Set by Argo CD + baseUrl: "" + + # -- Host name for ingress + # @default -- Set by Argo CD + host: "" + + # -- Base path for Vault secrets + # @default -- Set by Argo CD + vaultSecretsPath: "" diff --git a/services/moneypenny/Chart.yaml b/services/moneypenny/Chart.yaml index 4838f807ed..9c0ba6863a 100644 --- a/services/moneypenny/Chart.yaml +++ b/services/moneypenny/Chart.yaml @@ -1,10 +1,15 @@ apiVersion: v2 +appVersion: "1.0.0" name: moneypenny -version: 1.0.0 -dependencies: - - name: moneypenny - version: 1.0.2 - repository: https://lsst-sqre.github.io/charts/ - - name: pull-secret - version: 0.1.2 - repository: https://lsst-sqre.github.io/charts/ +description: User provisioning actions +sources: + - https://github.com/lsst-sqre/moneypenny + - https://github.com/lsst-sqre/farthing + - https://github.com/lsst-sqre/inituserhome +version: 1.0.2 +annotations: + phalanx.lsst.io/docs: | + - id: "SQR-052" + title: >- + Proposal for privilege separation in RSP Notebook Aspect containers + url: "https://sqr-052.lsst.io/" diff --git a/services/moneypenny/README.md b/services/moneypenny/README.md new file mode 100644 index 0000000000..1cfedae207 --- /dev/null +++ b/services/moneypenny/README.md @@ -0,0 +1,34 @@ +# moneypenny + +User provisioning actions + +## Source Code + +* +* +* + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| affinity | object | `{}` | Affinity rules for the vo-cutouts frontend pod | +| fullnameOverride | string | `""` | Override the full name for resources (includes the release name) | +| global.baseUrl | string | Set by Argo CD | Base URL for the environment | +| global.host | string | Set by Argo CD | Host name for ingress | +| global.vaultSecretsPath | string | Set by Argo CD | Base path for Vault secrets | +| image.pullPolicy | string | `"IfNotPresent"` | Pull policy for the moneypenny image | +| image.repository | string | `"lsstsqre/moneypenny"` | moneypenny image to use | +| image.tag | string | The appVersion of the chart | Tag of moneypenny image to use | +| ingress.annotations | object | `{}` | Additional annotations to add to the ingress | +| nameOverride | string | `""` | Override the base name for resources | +| nodeSelector | object | `{}` | Node selector rules for the vo-cutouts frontend pod | +| orders.commission | list | `[{"image":"lsstsqre/farthing","name":"farthing","securityContext":{"allowPrivilegeEscalation":false,"runAsNonRootUser":true,"runAsUser":1000}}]` | List of specifications for containers to run to commission a new user. Each member of the list should set a container `name`, `image`, and `securityContext` and may contain `volumeMounts`. | +| orders.retire | list | `[{"image":"lsstsqre/farthing","name":"farthing","securityContext":{"allowPrivilegeEscalation":false,"runAsNonRootUser":true,"runAsUser":1000}}]` | List of specifications for containers to run to retire a user. Each member of the list should set a container `name`, `image`, and `securityContext` and may contain `volumeMounts`. | +| orders.volumes | list | `[]` | Additional volumes to mount when commissioning or retiring users. | +| podAnnotations | object | `{}` | Annotations for the vo-cutouts frontend pod | +| quips | string | A small selection | Moneypenny quotes | +| replicaCount | int | `1` | Number of pods to start | +| resources | object | `{}` | Resource limits and requests for the vo-cutouts frontend pod | +| serviceAccount.name | string | Name based on the fullname template | Name of the service account to use | +| tolerations | list | `[]` | Tolerations for the vo-cutouts frontend pod | diff --git a/services/moneypenny/templates/_helpers.tpl b/services/moneypenny/templates/_helpers.tpl new file mode 100644 index 0000000000..ff1f0f98a7 --- /dev/null +++ b/services/moneypenny/templates/_helpers.tpl @@ -0,0 +1,60 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "moneypenny.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "moneypenny.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "moneypenny.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Common labels +*/}} +{{- define "moneypenny.labels" -}} +app.kubernetes.io/name: {{ include "moneypenny.name" . }} +helm.sh/chart: {{ include "moneypenny.chart" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end -}} + +{{/* +Selector labels +*/}} +{{- define "moneypenny.selectorLabels" -}} +app.kubernetes.io/name: {{ include "moneypenny.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "moneypenny.serviceAccountName" -}} +{{ default (include "moneypenny.fullname" .) .Values.serviceAccount.name }} +{{- end -}} diff --git a/services/moneypenny/templates/cm-m-config.yaml b/services/moneypenny/templates/cm-m-config.yaml new file mode 100644 index 0000000000..5dedc2a46d --- /dev/null +++ b/services/moneypenny/templates/cm-m-config.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "moneypenny.fullname" . }}-m-config + labels: + {{- include "moneypenny.labels" . | nindent 4 }} +data: + m.yaml: | + {{- toYaml .Values.orders | nindent 4 }} diff --git a/services/moneypenny/templates/cm-quips.yaml b/services/moneypenny/templates/cm-quips.yaml new file mode 100644 index 0000000000..a0e9f928ba --- /dev/null +++ b/services/moneypenny/templates/cm-quips.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "moneypenny.fullname" . }}-quips + labels: + {{- include "moneypenny.labels" . | nindent 4 }} +data: + quips.txt: | + {{- .Values.quips | nindent 4 }} diff --git a/services/moneypenny/templates/configmap.yaml b/services/moneypenny/templates/configmap.yaml new file mode 100644 index 0000000000..646d1c8042 --- /dev/null +++ b/services/moneypenny/templates/configmap.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "moneypenny.fullname" .}} + labels: + {{- include "moneypenny.labels" . | nindent 4 }} +data: + SAFIR_NAME: "moneypenny" + SAFIR_PROFILE: "production" + SAFIR_LOGGER: "moneypenny" + SAFIR_LOG_LEVEL: "INFO" + DOCKER_SECRET_NAME: "pull-secret" diff --git a/services/moneypenny/templates/deployment.yaml b/services/moneypenny/templates/deployment.yaml new file mode 100644 index 0000000000..2684cf8eea --- /dev/null +++ b/services/moneypenny/templates/deployment.yaml @@ -0,0 +1,96 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "moneypenny.fullname" . }} + labels: + {{- include "moneypenny.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "moneypenny.selectorLabels" . | nindent 6 }} + template: + metadata: + annotations: + checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} + checksum/config-m: {{ include (print $.Template.BasePath "/cm-m-config.yaml") . | sha256sum }} + {{- with .Values.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "moneypenny.selectorLabels" . | nindent 8 }} + spec: + imagePullSecrets: + - name: "pull-secret" + serviceAccountName: {{ include "moneypenny.serviceAccountName" . }} + securityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + containers: + - name: "moneypenny" + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - "all" + readOnlyRootFilesystem: true + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy | quote }} + envFrom: + - configMapRef: + name: {{ template "moneypenny.fullname" . }} + ports: + - name: "http" + containerPort: 8080 + protocol: "TCP" + livenessProbe: + httpGet: + path: "/" + port: "http" + readinessProbe: + httpGet: + path: "/" + port: "http" + {{- with .Values.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + volumeMounts: + - name: "m-config" + mountPath: "/opt/lsst/software/moneypenny/config/M" + readOnly: true + - name: "quips" + mountPath: "/opt/lsst/software/moneypenny/config/quips" + readOnly: true + - name: "podinfo" + mountPath: "/etc/podinfo" + readOnly: true + volumes: + - name: "m-config" + configMap: + name: {{ template "moneypenny.fullname" . }}-m-config + - name: "quips" + configMap: + name: {{ template "moneypenny.fullname" . }}-quips + - name: "podinfo" + downwardAPI: + items: + - path: "name" + fieldRef: + fieldPath: "metadata.name" + - path: "uid" + fieldRef: + fieldPath: "metadata.uid" + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/services/moneypenny/templates/ingress.yaml b/services/moneypenny/templates/ingress.yaml new file mode 100644 index 0000000000..566f195cd8 --- /dev/null +++ b/services/moneypenny/templates/ingress.yaml @@ -0,0 +1,31 @@ +apiVersion: gafaelfawr.lsst.io/v1alpha1 +kind: GafaelfawrIngress +metadata: + name: {{ template "moneypenny.fullname" . }} + labels: + {{- include "moneypenny.labels" . | nindent 4 }} +config: + baseUrl: {{ .Values.global.baseUrl | quote }} + scopes: + all: + - "admin:provision" +template: + metadata: + name: {{ template "moneypenny.fullname" . }} + annotations: + nginx.ingress.kubernetes.io/proxy-read-timeout: "310" + {{- with .Values.ingress.annotations }} + {{- toYaml . | nindent 6 }} + {{- end }} + spec: + rules: + - host: {{ required "global.host must be set" .Values.global.host | quote }} + http: + paths: + - path: "/moneypenny" + pathType: Prefix + backend: + service: + name: {{ include "moneypenny.fullname" . }} + port: + number: 8080 diff --git a/services/moneypenny/templates/networkpolicy.yaml b/services/moneypenny/templates/networkpolicy.yaml new file mode 100644 index 0000000000..850f72ad2a --- /dev/null +++ b/services/moneypenny/templates/networkpolicy.yaml @@ -0,0 +1,23 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ template "moneypenny.fullname" . }} + labels: + {{- include "moneypenny.labels" . | nindent 4 }} +spec: + podSelector: + matchLabels: + {{- include "moneypenny.selectorLabels" . | nindent 6 }} + policyTypes: + - Ingress + ingress: + # Allow inbound access from pods (in any namespace) labeled + # gafaelfawr.lsst.io/ingress: true. + - from: + - namespaceSelector: {} + podSelector: + matchLabels: + gafaelfawr.lsst.io/ingress: "true" + ports: + - protocol: "TCP" + port: 8080 diff --git a/services/moneypenny/templates/role.yaml b/services/moneypenny/templates/role.yaml new file mode 100644 index 0000000000..0e730dd5fa --- /dev/null +++ b/services/moneypenny/templates/role.yaml @@ -0,0 +1,21 @@ +kind: Role +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ include "moneypenny.serviceAccountName" . }} + labels: + {{- include "moneypenny.labels" . | nindent 4 }} +rules: + - apiGroups: [""] + resources: + - "pods" + verbs: + - "create" + - "delete" + - "get" + - "list" + - "watch" + - apiGroups: [""] + resources: ["configmaps"] + verbs: + - "create" + - "delete" diff --git a/services/moneypenny/templates/rolebinding.yaml b/services/moneypenny/templates/rolebinding.yaml new file mode 100644 index 0000000000..169978eeaf --- /dev/null +++ b/services/moneypenny/templates/rolebinding.yaml @@ -0,0 +1,13 @@ +kind: RoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ include "moneypenny.serviceAccountName" . }} + labels: + {{- include "moneypenny.labels" . | nindent 4 }} +subjects: + - kind: ServiceAccount + name: {{ include "moneypenny.serviceAccountName" . }} +roleRef: + kind: Role + name: {{ include "moneypenny.serviceAccountName" . }} + apiGroup: rbac.authorization.k8s.io diff --git a/services/moneypenny/templates/service.yaml b/services/moneypenny/templates/service.yaml new file mode 100644 index 0000000000..2b7d9b8da7 --- /dev/null +++ b/services/moneypenny/templates/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "moneypenny.fullname" . }} + labels: + {{- include "moneypenny.labels" . | nindent 4 }} +spec: + type: "ClusterIP" + ports: + - name: "http" + protocol: "TCP" + port: 8080 + targetPort: "http" + selector: + {{- include "moneypenny.selectorLabels" . | nindent 4 }} diff --git a/services/moneypenny/templates/serviceaccount.yaml b/services/moneypenny/templates/serviceaccount.yaml new file mode 100644 index 0000000000..963cbe100d --- /dev/null +++ b/services/moneypenny/templates/serviceaccount.yaml @@ -0,0 +1,6 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "moneypenny.serviceAccountName" . }} + labels: + {{- include "moneypenny.labels" . | nindent 4 }} diff --git a/services/moneypenny/templates/vault-secrets.yaml b/services/moneypenny/templates/vault-secrets.yaml new file mode 100644 index 0000000000..3be6ea057e --- /dev/null +++ b/services/moneypenny/templates/vault-secrets.yaml @@ -0,0 +1,9 @@ +apiVersion: ricoberger.de/v1alpha1 +kind: VaultSecret +metadata: + name: pull-secret + labels: + {{- include "moneypenny.labels" . | nindent 4 }} +spec: + path: "{{- .Values.global.vaultSecretsPath }}/pull-secret" + type: kubernetes.io/dockerconfigjson diff --git a/services/moneypenny/values-base.yaml b/services/moneypenny/values-base.yaml index b400c84e39..3f44c7f597 100644 --- a/services/moneypenny/values-base.yaml +++ b/services/moneypenny/values-base.yaml @@ -1,25 +1,15 @@ -moneypenny: - imagePullSecrets: - - name: "pull-secret" - ingress: - host: "base-lsp.lsst.codes" - - orders: - commission: - - name: initcommission - image: lsstsqre/inituserhome - securityContext: - runAsUser: 0 - runAsNonRootUser: false - volumeMounts: - - mountPath: /homedirs - name: homedirs - volumes: - - name: homedirs - nfs: - server: ddn-nfs.ls.lsst.org - path: /lsstdata/user/staff/jhome - -pull-secret: - enabled: true - path: "secret/k8s_operator/base-lsp.lsst.codes/pull-secret" +orders: + commission: + - name: initcommission + image: lsstsqre/inituserhome + securityContext: + runAsUser: 0 + runAsNonRootUser: false + volumeMounts: + - mountPath: /homedirs + name: homedirs + volumes: + - name: homedirs + nfs: + server: ddn-nfs.ls.lsst.org + path: /lsstdata/user/staff/jhome diff --git a/services/moneypenny/values-ccin2p3.yaml b/services/moneypenny/values-ccin2p3.yaml new file mode 100644 index 0000000000..e653e165c2 --- /dev/null +++ b/services/moneypenny/values-ccin2p3.yaml @@ -0,0 +1,15 @@ +orders: + commission: + - name: initcommission + image: lsstsqre/inituserhome + securityContext: + runAsUser: 0 + runAsNonRootUser: false + volumeMounts: + - mountPath: /homedirs + name: homedirs + volumes: + - name: homedirs + hostPath: + path: /data/rsp/home + type: Directory diff --git a/services/moneypenny/values-idfdev.yaml b/services/moneypenny/values-idfdev.yaml index f01b6aa93b..77b96cbe69 100644 --- a/services/moneypenny/values-idfdev.yaml +++ b/services/moneypenny/values-idfdev.yaml @@ -1,25 +1,15 @@ -moneypenny: - imagePullSecrets: - - name: "pull-secret" - ingress: - host: "data-dev.lsst.cloud" - - orders: - commission: - - name: initcommission - image: lsstsqre/inituserhome - securityContext: - runAsUser: 0 - runAsNonRootUser: false - volumeMounts: - - mountPath: /homedirs - name: homedirs - volumes: - - name: homedirs - nfs: - server: 10.87.86.26 - path: /share1/home - -pull-secret: - enabled: true - path: "secret/k8s_operator/data-dev.lsst.cloud/pull-secret" +orders: + commission: + - name: initcommission + image: lsstsqre/inituserhome + securityContext: + runAsUser: 0 + runAsNonRootUser: false + volumeMounts: + - mountPath: /homedirs + name: homedirs + volumes: + - name: homedirs + nfs: + server: 10.87.86.26 + path: /share1/home diff --git a/services/moneypenny/values-idfint.yaml b/services/moneypenny/values-idfint.yaml index 3836d840e3..bf3fa84444 100644 --- a/services/moneypenny/values-idfint.yaml +++ b/services/moneypenny/values-idfint.yaml @@ -1,25 +1,15 @@ -moneypenny: - imagePullSecrets: - - name: "pull-secret" - ingress: - host: "data-int.lsst.cloud" - - orders: - commission: - - name: initcommission - image: lsstsqre/inituserhome - securityContext: - runAsUser: 0 - runAsNonRootUser: false - volumeMounts: - - mountPath: /homedirs - name: homedirs - volumes: - - name: homedirs - nfs: - server: 10.22.240.130 - path: /share1/home - -pull-secret: - enabled: true - path: "secret/k8s_operator/data-int.lsst.cloud/pull-secret" +orders: + commission: + - name: initcommission + image: lsstsqre/inituserhome + securityContext: + runAsUser: 0 + runAsNonRootUser: false + volumeMounts: + - mountPath: /homedirs + name: homedirs + volumes: + - name: homedirs + nfs: + server: 10.22.240.130 + path: /share1/home diff --git a/services/moneypenny/values-idfprod.yaml b/services/moneypenny/values-idfprod.yaml index 2115b65fa8..e8821fa71b 100644 --- a/services/moneypenny/values-idfprod.yaml +++ b/services/moneypenny/values-idfprod.yaml @@ -1,25 +1,15 @@ -moneypenny: - imagePullSecrets: - - name: "pull-secret" - ingress: - host: "data.lsst.cloud" - - orders: - commission: - - name: initcommission - image: lsstsqre/inituserhome - securityContext: - runAsUser: 0 - runAsNonRootUser: false - volumeMounts: - - mountPath: /homedirs - name: homedirs - volumes: - - name: homedirs - nfs: - server: 10.13.105.122 - path: /share1/home - -pull-secret: - enabled: true - path: "secret/k8s_operator/data.lsst.cloud/pull-secret" +orders: + commission: + - name: initcommission + image: lsstsqre/inituserhome + securityContext: + runAsUser: 0 + runAsNonRootUser: false + volumeMounts: + - mountPath: /homedirs + name: homedirs + volumes: + - name: homedirs + nfs: + server: 10.13.105.122 + path: /share1/home diff --git a/services/moneypenny/values-int.yaml b/services/moneypenny/values-int.yaml deleted file mode 100644 index 82000daa5e..0000000000 --- a/services/moneypenny/values-int.yaml +++ /dev/null @@ -1,11 +0,0 @@ -moneypenny: - imagePullSecrets: - - name: "pull-secret" - ingress: - host: "lsst-lsp-int.ncsa.illinois.edu" - annotations: - nginx.ingress.kubernetes.io/auth-url: "https://lsst-lsp-int.ncsa.illinois.edu/auth?scope=admin:provision" - -pull-secret: - enabled: true - path: "secret/k8s_operator/lsst-lsp-int.ncsa.illinois.edu/pull-secret" diff --git a/services/moneypenny/values-minikube.yaml b/services/moneypenny/values-minikube.yaml index 86ff8d7412..e69de29bb2 100644 --- a/services/moneypenny/values-minikube.yaml +++ b/services/moneypenny/values-minikube.yaml @@ -1,9 +0,0 @@ -moneypenny: - imagePullSecrets: - - name: "pull-secret" - ingress: - host: "minikube.lsst.codes" - -pull-secret: - enabled: true - path: "secret/k8s_operator/minikube.lsst.codes/pull-secret" diff --git a/services/moneypenny/values-red-five.yaml b/services/moneypenny/values-red-five.yaml deleted file mode 100644 index 491617f1c0..0000000000 --- a/services/moneypenny/values-red-five.yaml +++ /dev/null @@ -1,9 +0,0 @@ -moneypenny: - imagePullSecrets: - - name: "pull-secret" - ingress: - host: "red-five.lsst.codes" - -pull-secret: - enabled: true - path: "secret/k8s_operator/red-five.lsst.codes/pull-secret" diff --git a/services/moneypenny/values-roe.yaml b/services/moneypenny/values-roe.yaml index e540bb7797..a16c891ca2 100644 --- a/services/moneypenny/values-roe.yaml +++ b/services/moneypenny/values-roe.yaml @@ -1,20 +1,15 @@ -moneypenny: - imagePullSecrets: - - name: "pull-secret" - ingress: - host: "rsp.lsst.ac.uk" - - orders: - commission: - - name: initcommission - image: lsstsqre/inituserhome - securityContext: - runAsUser: 0 - runAsNonRootUser: false - volumeMounts: - - mountPath: /homedirs - name: homedirs - -pull-secret: - enabled: true - path: "secret/k8s_operator/roe/pull-secret" +orders: + commission: + - name: initcommission + image: lsstsqre/inituserhome + securityContext: + runAsUser: 0 + runAsNonRootUser: false + volumeMounts: + - mountPath: /homedirs + name: homedirs + volumes: + - name: homedirs + nfs: + server: 10.72.0.23 + path: /jhome diff --git a/services/moneypenny/values-stable.yaml b/services/moneypenny/values-stable.yaml deleted file mode 100644 index 3f0d6a0bf1..0000000000 --- a/services/moneypenny/values-stable.yaml +++ /dev/null @@ -1,11 +0,0 @@ -moneypenny: - imagePullSecrets: - - name: "pull-secret" - ingress: - host: "lsst-lsp-stable.ncsa.illinois.edu" - annotations: - nginx.ingress.kubernetes.io/auth-url: "https://lsst-lsp-stable.ncsa.illinois.edu/auth?scope=admin:provision" - -pull-secret: - enabled: true - path: "secret/k8s_operator/lsst-lsp-stable.ncsa.illinois.edu/pull-secret" diff --git a/services/moneypenny/values-summit.yaml b/services/moneypenny/values-summit.yaml index 1a2b978e0d..1436234dbd 100644 --- a/services/moneypenny/values-summit.yaml +++ b/services/moneypenny/values-summit.yaml @@ -1,25 +1,15 @@ -moneypenny: - imagePullSecrets: - - name: "pull-secret" - ingress: - host: "summit-lsp.lsst.codes" - - orders: - commission: - - name: initcommission - image: lsstsqre/inituserhome - securityContext: - runAsUser: 0 - runAsNonRootUser: false - volumeMounts: - - mountPath: /homedirs - name: homedirs - volumes: - - name: homedirs - nfs: - server: nfs1.cp.lsst.org - path: /jhome - -pull-secret: - enabled: true - path: "secret/k8s_operator/summit-lsp.lsst.codes/pull-secret" +orders: + commission: + - name: initcommission + image: lsstsqre/inituserhome + securityContext: + runAsUser: 0 + runAsNonRootUser: false + volumeMounts: + - mountPath: /homedirs + name: homedirs + volumes: + - name: homedirs + nfs: + server: nfs1.cp.lsst.org + path: /jhome diff --git a/services/moneypenny/values-tucson-teststand.yaml b/services/moneypenny/values-tucson-teststand.yaml index e6860d48be..845233c931 100644 --- a/services/moneypenny/values-tucson-teststand.yaml +++ b/services/moneypenny/values-tucson-teststand.yaml @@ -1,25 +1,15 @@ -moneypenny: - imagePullSecrets: - - name: "pull-secret" - ingress: - host: "tucson-teststand.lsst.codes" - - orders: - commission: - - name: initcommission - image: lsstsqre/inituserhome - securityContext: - runAsUser: 0 - runAsNonRootUser: false - volumeMounts: - - mountPath: /homedirs - name: homedirs - volumes: - - name: homedirs - nfs: - server: nfs-jhome.tu.lsst.org - path: /jhome - -pull-secret: - enabled: true - path: "secret/k8s_operator/tucson-teststand.lsst.codes/pull-secret" +orders: + commission: + - name: initcommission + image: lsstsqre/inituserhome + securityContext: + runAsUser: 0 + runAsNonRootUser: false + volumeMounts: + - mountPath: /homedirs + name: homedirs + volumes: + - name: homedirs + nfs: + server: nfs-jhome.tu.lsst.org + path: /jhome diff --git a/services/moneypenny/values.yaml b/services/moneypenny/values.yaml new file mode 100644 index 0000000000..743e2bc0e9 --- /dev/null +++ b/services/moneypenny/values.yaml @@ -0,0 +1,118 @@ +# Default values for moneypenny. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# -- Override the base name for resources +nameOverride: "" + +# -- Override the full name for resources (includes the release name) +fullnameOverride: "" + +# -- Number of pods to start +replicaCount: 1 + +image: + # -- moneypenny image to use + repository: "lsstsqre/moneypenny" + + # -- Pull policy for the moneypenny image + pullPolicy: "IfNotPresent" + + # -- Tag of moneypenny image to use + # @default -- The appVersion of the chart + tag: "" + +serviceAccount: + # -- Name of the service account to use + # @default -- Name based on the fullname template + name: "" + +ingress: + # -- Additional annotations to add to the ingress + annotations: {} + +orders: + # -- List of specifications for containers to run to commission a new user. + # Each member of the list should set a container `name`, `image`, and + # `securityContext` and may contain `volumeMounts`. + commission: + - name: farthing + image: lsstsqre/farthing + securityContext: + runAsUser: 1000 + runAsNonRootUser: true + allowPrivilegeEscalation: false + + # -- List of specifications for containers to run to retire a user. Each + # member of the list should set a container `name`, `image`, and + # `securityContext` and may contain `volumeMounts`. + retire: + - name: farthing + image: lsstsqre/farthing + securityContext: + runAsUser: 1000 + runAsNonRootUser: true + allowPrivilegeEscalation: false + + # -- Additional volumes to mount when commissioning or retiring users. + volumes: [] + +# -- Resource limits and requests for the vo-cutouts frontend pod +resources: {} + +# -- Annotations for the vo-cutouts frontend pod +podAnnotations: {} + +# -- Node selector rules for the vo-cutouts frontend pod +nodeSelector: {} + +# -- Tolerations for the vo-cutouts frontend pod +tolerations: [] + +# -- Affinity rules for the vo-cutouts frontend pod +affinity: {} + +# -- Moneypenny quotes +# @default -- A small selection +quips: | + Flattery will get you nowhere... but don't stop trying. + % + You never take me to dinner looking like this, James. You never take me to dinner, period. + % + M: (on intercom) Miss Moneypenny, give 007 the password we've agreed + with Japanese SIS. + Moneypenny: Yes, Sir. We tried to think of something that you wouldn't + forget. + Bond: Yes? + Moneypenny: I... love... you. Repeat it please, to make sure you get it. + Bond: Don't worry, I get it. Sayonara. + % + My problem is, James, you never do anything with me. + % + I didn't know you were a music lover. Any time you want to come over and hear my Barry Manilow collection... + % + Someday you'll have to make good on your innuendos. + % + You always were a cunning linguist, James. + % + Bond: (about getting shot) In your defense, a moving target is harder to hit. + Moneypenny: Then you'd better keep moving. + % + Moneypenny: Cut-throat razor. How very traditional. + Bond: Well, I like to do some things the old-fashioned way. + Moneypenny: Sometimes the old ways are best. + +# The following will be set by parameters injected by Argo CD and should not +# be set in the individual environment values files. +global: + # -- Base URL for the environment + # @default -- Set by Argo CD + baseUrl: "" + + # -- Host name for ingress + # @default -- Set by Argo CD + host: "" + + # -- Base path for Vault secrets + # @default -- Set by Argo CD + vaultSecretsPath: "" diff --git a/services/narrativelog/Chart.yaml b/services/narrativelog/Chart.yaml index 3c96dd8481..dcddc58a34 100644 --- a/services/narrativelog/Chart.yaml +++ b/services/narrativelog/Chart.yaml @@ -1,10 +1,15 @@ apiVersion: v2 name: narrativelog -version: 0.0.1 -dependencies: - - name: narrativelog - version: ">=0.0.1" - repository: https://lsst-sqre.github.io/charts/ - - name: pull-secret - version: 0.1.2 - repository: https://lsst-sqre.github.io/charts/ +type: application +description: Narrative log service +sources: + - https://github.com/lsst-sqre/narrativelog + +# The chart version. SQuaRE convention is to use 1.0.0 +version: 1.0.0 + +# This is the version number of the application being deployed. This version +# number should be incremented each time you make changes to the +# application. Versions are not expected to follow Semantic Versioning. They +# should reflect the version the application is using. +appVersion: 0.4.0 diff --git a/services/narrativelog/README.md b/services/narrativelog/README.md new file mode 100644 index 0000000000..281d4e69db --- /dev/null +++ b/services/narrativelog/README.md @@ -0,0 +1,41 @@ +# narrativelog + +Narrative log service + +## Source Code + +* + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| affinity | object | `{}` | Affinity rules for the narrativelog pod | +| autoscaling | object | `{"enabled":false,"maxReplicas":100,"minReplicas":1,"targetCPUUtilizationPercentage":80,"targetMemoryUtilizationPercentage":80}` | Narrativelog autoscaling settings | +| autoscaling.enabled | bool | false | enable narrativelog autoscaling | +| autoscaling.maxReplicas | int | `100` | maximum number of narrativelog replicas | +| autoscaling.minReplicas | int | `1` | minimum number of narrativelog replicas | +| autoscaling.targetCPUUtilizationPercentage | int | `80` | Target CPU utilization for narrativelog pod autoscale calculations | +| autoscaling.targetMemoryUtilizationPercentage | int | `80` | Target memory utilization for narrativelog pod autoscale calculations | +| config | object | `{"site_id":""}` | Application-specific configuration | +| config.site_id | string | `""` | Site ID; a non-empty string of up to 16 characters. This should be different for each non-sandbox deployment. Sandboxes should use `test`. | +| db.database | string | `"narrativelog"` | database name | +| db.host | string | `"postgres.postgres"` | database host | +| db.port | int | `5432` | database port | +| db.user | string | `"narrativelog"` | database user | +| fullnameOverride | string | `""` | Override the full name for resources (includes the release name) | +| global.baseUrl | string | Set by Argo CD | Base URL for the environment | +| global.host | string | Set by Argo CD | Host name for ingress | +| global.vaultSecretsPath | string | Set by Argo CD | Base path for Vault secrets | +| image.pullPolicy | string | `"Always"` | Pull policy for the narrativelog image | +| image.repository | string | `"lsstsqre/narrativelog"` | narrativelog image to use | +| image.tag | string | The appVersion of the chart | Tag of exposure image to use | +| ingress.gafaelfawrAuthQuery | string | `""` | Gafaelfawr auth query string | +| nameOverride | string | `""` | Override the base name for resources | +| nodeSelector | object | `{}` | Node selector rules for the narrativelog pod | +| podAnnotations | object | `{}` | Annotations for the narrativelog pod | +| podSecurityContext | object | `{}` | Security context for the narrativelog pod | +| replicaCount | int | `1` | Number of narrativelog replicas to run | +| resources | object | `{}` | Resource limits and requests for the narrativelog pod | +| securityContext | object | `{}` | Security context for the narrativelog deployment | +| tolerations | list | `[]` | Tolerations for the narrativelog pod | diff --git a/services/narrativelog/templates/_helpers.tpl b/services/narrativelog/templates/_helpers.tpl new file mode 100644 index 0000000000..fdd165f0a6 --- /dev/null +++ b/services/narrativelog/templates/_helpers.tpl @@ -0,0 +1,51 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "narrativelog.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "narrativelog.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "narrativelog.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "narrativelog.labels" -}} +helm.sh/chart: {{ include "narrativelog.chart" . }} +{{ include "narrativelog.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "narrativelog.selectorLabels" -}} +app.kubernetes.io/name: {{ include "narrativelog.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} diff --git a/services/narrativelog/templates/deployment.yaml b/services/narrativelog/templates/deployment.yaml new file mode 100644 index 0000000000..3284cd27a7 --- /dev/null +++ b/services/narrativelog/templates/deployment.yaml @@ -0,0 +1,80 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "narrativelog.fullname" . }} + labels: + {{- include "narrativelog.labels" . | nindent 4 }} +spec: + {{- if not .Values.autoscaling.enabled }} + replicas: {{ .Values.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "narrativelog.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "narrativelog.selectorLabels" . | nindent 8 }} + spec: + imagePullSecrets: + - name: "pull-secret" + securityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + containers: + - name: {{ .Chart.Name }} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - all + readOnlyRootFilesystem: true + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - name: http + containerPort: 8080 + protocol: TCP + livenessProbe: + httpGet: + path: /narrativelog + port: http + readinessProbe: + httpGet: + path: /narrativelog + port: http + resources: + {{- toYaml .Values.resources | nindent 12 }} + env: + - name: NARRATIVELOG_DB_USER + value: {{ .Values.db.user | quote }} + - name: NARRATIVELOG_DB_PASSWORD + valueFrom: + secretKeyRef: + name: narrativelog + key: narrativelog_password + - name: NARRATIVELOG_DB_HOST + value: {{ .Values.db.host | quote }} + - name: NARRATIVELOG_DB_PORT + value: {{ .Values.db.port | quote }} + - name: NARRATIVELOG_DB_DATABASE + value: {{ .Values.db.database | quote }} + - name: SITE_ID + value: {{ .Values.config.site_id | quote }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/services/narrativelog/templates/hpa.yaml b/services/narrativelog/templates/hpa.yaml new file mode 100644 index 0000000000..f6f914dabb --- /dev/null +++ b/services/narrativelog/templates/hpa.yaml @@ -0,0 +1,28 @@ +{{- if .Values.autoscaling.enabled }} +apiVersion: autoscaling/v2beta1 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "narrativelog.fullname" . }} + labels: + {{- include "narrativelog.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "narrativelog.fullname" . }} + minReplicas: {{ .Values.autoscaling.minReplicas }} + maxReplicas: {{ .Values.autoscaling.maxReplicas }} + metrics: + {{- if .Values.autoscaling.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: cpu + targetAverageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} + {{- end }} + {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: memory + targetAverageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }} + {{- end }} +{{- end }} diff --git a/services/narrativelog/templates/ingress.yaml b/services/narrativelog/templates/ingress.yaml new file mode 100644 index 0000000000..cdf8f56d85 --- /dev/null +++ b/services/narrativelog/templates/ingress.yaml @@ -0,0 +1,29 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ template "narrativelog.fullname" . }} + labels: + {{- include "narrativelog.labels" . | nindent 4 }} + annotations: + {{- if .Values.ingress.gafaelfawrAuthQuery }} + nginx.ingress.kubernetes.io/auth-method: "GET" + nginx.ingress.kubernetes.io/auth-response-headers: "X-Auth-Request-User,X-Auth-Request-Email,X-Auth-Request-Token" + nginx.ingress.kubernetes.io/auth-signin: "{{ .Values.global.baseUrl }}/login" + nginx.ingress.kubernetes.io/auth-url: "https://{{ .Values.global.baseUrl }}/auth?{{ .Values.ingress.gafaelfawrAuthQuery }}" + {{- end }} + {{- with .Values.ingress.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + ingressClassName: "nginx" + rules: + - host: {{ required "global.host must be set" .Values.global.host | quote }} + http: + paths: + - path: /narrativelog + pathType: Prefix + backend: + service: + name: {{ include "narrativelog.fullname" . }} + port: + number: 8080 diff --git a/services/narrativelog/templates/networkpolicy.yaml b/services/narrativelog/templates/networkpolicy.yaml new file mode 100644 index 0000000000..7afc9f8f78 --- /dev/null +++ b/services/narrativelog/templates/networkpolicy.yaml @@ -0,0 +1,21 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ include "narrativelog.fullname" . }} +spec: + podSelector: + matchLabels: + {{- include "narrativelog.selectorLabels" . | nindent 6 }} + policyTypes: + - Ingress + ingress: + # Allow inbound access from pods (in any namespace) labeled + # gafaelfawr.lsst.io/ingress: true. + - from: + - namespaceSelector: {} + podSelector: + matchLabels: + gafaelfawr.lsst.io/ingress: "true" + ports: + - protocol: "TCP" + port: 8080 diff --git a/services/narrativelog/templates/service.yaml b/services/narrativelog/templates/service.yaml new file mode 100644 index 0000000000..b955aaad6b --- /dev/null +++ b/services/narrativelog/templates/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "narrativelog.fullname" . }} + labels: + {{- include "narrativelog.labels" . | nindent 4 }} +spec: + type: "ClusterIP" + ports: + - port: 8080 + targetPort: http + protocol: TCP + name: http + selector: + {{- include "narrativelog.selectorLabels" . | nindent 4 }} diff --git a/services/narrativelog/templates/tests/test-connection.yaml b/services/narrativelog/templates/tests/test-connection.yaml new file mode 100644 index 0000000000..770b0e6aa4 --- /dev/null +++ b/services/narrativelog/templates/tests/test-connection.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "narrativelog.fullname" . }}-test-connection" + labels: + {{- include "narrativelog.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test +spec: + containers: + - name: wget + image: busybox + command: ['wget'] + args: ['{{ include "narrativelog.fullname" . }}:8080'] + restartPolicy: Never diff --git a/services/narrativelog/templates/vault-secrets.yaml b/services/narrativelog/templates/vault-secrets.yaml index 52f0d5d817..dc13a9537a 100644 --- a/services/narrativelog/templates/vault-secrets.yaml +++ b/services/narrativelog/templates/vault-secrets.yaml @@ -1,8 +1,18 @@ apiVersion: ricoberger.de/v1alpha1 kind: VaultSecret metadata: - name: postgres + name: narrativelog namespace: narrativelog spec: - path: {{ .Values.vault_path }} + path: "{{- .Values.global.vaultSecretsPath }}/narrativelog" type: Opaque +--- +apiVersion: ricoberger.de/v1alpha1 +kind: VaultSecret +metadata: + name: pull-secret + labels: + {{- include "narrativelog.labels" . | nindent 4 }} +spec: + path: "{{- .Values.global.vaultSecretsPath }}/pull-secret" + type: kubernetes.io/dockerconfigjson diff --git a/services/narrativelog/values-base.yaml b/services/narrativelog/values-base.yaml index 836cc179fa..36f59a4d75 100644 --- a/services/narrativelog/values-base.yaml +++ b/services/narrativelog/values-base.yaml @@ -1,14 +1,2 @@ -narrativelog: - imagePullSecrets: - - name: pull-secret - ingress: - enabled: true - host: base-lsp.lsst.codes - +config: site_id: base - -vault_path: secret/k8s_operator/base-lsp.lsst.codes/postgres - -pull-secret: - enabled: true - path: secret/k8s_operator/base-lsp.lsst.codes/pull-secret diff --git a/services/narrativelog/values-minikube.yaml b/services/narrativelog/values-minikube.yaml new file mode 100644 index 0000000000..45d77ff9ce --- /dev/null +++ b/services/narrativelog/values-minikube.yaml @@ -0,0 +1,2 @@ +config: + site_id: minikube diff --git a/services/narrativelog/values-summit.yaml b/services/narrativelog/values-summit.yaml index 6822c91375..04d1372bef 100644 --- a/services/narrativelog/values-summit.yaml +++ b/services/narrativelog/values-summit.yaml @@ -1,14 +1,4 @@ -narrativelog: - imagePullSecrets: - - name: pull-secret - ingress: - enabled: true - host: summit-lsp.lsst.codes - +config: site_id: summit - -vault_path: secret/k8s_operator/summit-lsp.lsst.codes/postgres - -pull-secret: - enabled: true - path: secret/k8s_operator/summit-lsp.lsst.codes/pull-secret +db: + host: postgresdb01.cp.lsst.org diff --git a/services/narrativelog/values-tucson-teststand.yaml b/services/narrativelog/values-tucson-teststand.yaml index ede618f4cf..1350506e76 100644 --- a/services/narrativelog/values-tucson-teststand.yaml +++ b/services/narrativelog/values-tucson-teststand.yaml @@ -1,14 +1,4 @@ -narrativelog: - imagePullSecrets: - - name: pull-secret - ingress: - enabled: true - host: tucson-teststand.lsst.codes - +config: site_id: tucson - -vault_path: secret/k8s_operator/tucson-teststand.lsst.codes/postgres - -pull-secret: - enabled: true - path: secret/k8s_operator/tucson-teststand.lsst.codes/pull-secret +db: + host: squoint.tu.lsst.org diff --git a/services/narrativelog/values.yaml b/services/narrativelog/values.yaml new file mode 100644 index 0000000000..cd16ffa3a6 --- /dev/null +++ b/services/narrativelog/values.yaml @@ -0,0 +1,109 @@ +# Default values for narrativelog. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# -- Override the base name for resources +nameOverride: "" + +# -- Override the full name for resources (includes the release name) +fullnameOverride: "" + +# -- Number of narrativelog replicas to run +replicaCount: 1 + +image: + # -- narrativelog image to use + repository: lsstsqre/narrativelog + # -- Pull policy for the narrativelog image + pullPolicy: Always + # -- Tag of exposure image to use + # @default -- The appVersion of the chart + tag: "" + +db: + # -- database host + host: postgres.postgres + # -- database port + port: 5432 + # -- database user + user: narrativelog + # -- database name + database: narrativelog + +ingress: + # -- Gafaelfawr auth query string + gafaelfawrAuthQuery: "" + +# -- Application-specific configuration +config: + # -- Site ID; a non-empty string of up to 16 characters. + # This should be different for each non-sandbox deployment. + # Sandboxes should use `test`. + site_id: "" + +# -- Annotations for the narrativelog pod +podAnnotations: {} + +# -- Security context for the narrativelog pod +podSecurityContext: {} + # fsGroup: 2000 + +# -- Security context for the narrativelog deployment +securityContext: {} + # capabilities: + # drop: + # - ALL + # readOnlyRootFilesystem: true + # runAsNonRoot: true + # runAsUser: 1000 + +# -- Resource limits and requests for the narrativelog pod +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +# -- Narrativelog autoscaling settings +autoscaling: + # -- enable narrativelog autoscaling + # @default -- false + enabled: false + # -- minimum number of narrativelog replicas + minReplicas: 1 + # -- maximum number of narrativelog replicas + maxReplicas: 100 + # -- Target CPU utilization for narrativelog pod autoscale calculations + targetCPUUtilizationPercentage: 80 + # -- Target memory utilization for narrativelog pod autoscale calculations + targetMemoryUtilizationPercentage: 80 + +# -- Node selector rules for the narrativelog pod +nodeSelector: {} + +# -- Tolerations for the narrativelog pod +tolerations: [] + +# -- Affinity rules for the narrativelog pod +affinity: {} + +# The following will be set by parameters injected by Argo CD and should not +# be set in the individual environment values files. +global: + # -- Base URL for the environment + # @default -- Set by Argo CD + baseUrl: "" + + # -- Host name for ingress + # @default -- Set by Argo CD + host: "" + + # -- Base path for Vault secrets + # @default -- Set by Argo CD + vaultSecretsPath: "" diff --git a/services/noteburst/Chart.yaml b/services/noteburst/Chart.yaml index fe22ca0114..c6b06801f8 100644 --- a/services/noteburst/Chart.yaml +++ b/services/noteburst/Chart.yaml @@ -1,10 +1,26 @@ apiVersion: v2 name: noteburst version: 1.0.0 +appVersion: "0.5.0" +description: Noteburst is a notebook execution service for the Rubin Science Platform. +type: application +home: https://noteburst.lsst.io/ +sources: + - https://github.com/lsst-sqre/noteburst +maintainers: + - name: jonathansick + url: https://github.com/jonathansick + dependencies: - - name: noteburst - version: 0.2.0-alpha.3 - repository: https://lsst-sqre.github.io/charts/ - - name: pull-secret - version: 0.1.2 + - name: redis + version: 0.1.4 repository: https://lsst-sqre.github.io/charts/ + +annotations: + phalanx.lsst.io/docs: | + - id: "SQR-065" + title: "Design of Noteburst, a programatic JupyterLab notebook execution service for the Rubin Science Platform" + url: "https://sqr-065.lsst.io/" + - id: "SQR-062" + title: "The Times Square service for publishing parameterized Jupyter Notebooks in the Rubin Science platform" + url: "https://sqr-062.lsst.io/" diff --git a/services/noteburst/README.md b/services/noteburst/README.md new file mode 100644 index 0000000000..446b7626a7 --- /dev/null +++ b/services/noteburst/README.md @@ -0,0 +1,57 @@ +# noteburst + +Noteburst is a notebook execution service for the Rubin Science Platform. + +**Homepage:** + +## Source Code + +* + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| affinity | object | `{}` | | +| autoscaling.enabled | bool | `false` | | +| autoscaling.maxReplicas | int | `100` | | +| autoscaling.minReplicas | int | `1` | | +| autoscaling.targetCPUUtilizationPercentage | int | `80` | | +| config.logLevel | string | `"INFO"` | Logging level: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL" | +| config.worker.identities | list | `[]` | Science Platform user identities that workers can acquire. Each item is an object with username and uuid keys | +| config.worker.imageReference | string | `""` | Nublado image reference, applicable when imageSelector is "reference" | +| config.worker.imageSelector | string | `"weekly"` | Nublado image stream to select: "recommended", "weekly" or "reference" | +| config.worker.jobTimeout | int | `300` | The default notebook execution timeout, in seconds. | +| config.worker.keepAlive | string | `"normal"` | Worker keep alive mode: "normal", "fast", "disabled" | +| config.worker.tokenLifetime | string | `"2419200"` | Worker token lifetime, in seconds. | +| config.worker.tokenScopes | string | `"exec:notebook,read:image,read:tap,read:alertdb"` | Nublado2 worker account's token scopes as a comma-separated list. | +| config.worker.workerCount | int | `1` | Number of workers to run | +| fullnameOverride | string | `""` | Override the full name for resources (includes the release name) | +| global.baseUrl | string | Set by Argo CD | Base URL for the environment | +| global.host | string | Set by Argo CD | Host name for ingress | +| image.pullPolicy | string | `"IfNotPresent"` | Image pull policy | +| image.repository | string | `"ghcr.io/lsst-sqre/noteburst"` | Noteburst image repository | +| image.tag | string | The appVersion of the chart | Tag of the image | +| imagePullSecrets | list | `[]` | Secret names to use for all Docker pulls | +| ingress.annotations | object | `{}` | Additional annotations to add to the ingress | +| ingress.path | string | `"/noteburst"` | Path prefix where noteburst is hosted | +| nameOverride | string | `""` | Override the base name for resources | +| nodeSelector | object | `{}` | | +| podAnnotations | object | `{}` | Annotations for API and worker pods | +| redis.affinity | object | `{}` | Affinity rules for the Redis pod | +| redis.nodeSelector | object | `{}` | Node selection rules for the Redis pod | +| redis.persistence.enabled | bool | `true` | Whether to persist Redis storage and thus tokens. Setting this to false will use `emptyDir` and reset all tokens on every restart. Only use this for a test deployment. | +| redis.persistence.size | string | `"8Gi"` | Amount of persistent storage to request | +| redis.persistence.storageClass | string | `""` | Class of storage to request | +| redis.persistence.volumeClaimName | string | `""` | Use an existing PVC, not dynamic provisioning. If this is set, the size, storageClass, and accessMode settings are ignored. | +| redis.podAnnotations | object | `{}` | Pod annotations for the Redis pod | +| redis.resources | object | See `values.yaml` | Resource limits and requests for the Redis pod | +| redis.tolerations | list | `[]` | Tolerations for the Redis pod | +| replicaCount | int | `1` | Number of API pods to run | +| resources | object | `{}` | | +| service.port | int | `80` | Port of the service to create and map to the ingress | +| service.type | string | `"ClusterIP"` | Type of service to create | +| serviceAccount.annotations | object | `{}` | Annotations to add to the service account | +| serviceAccount.create | bool | `true` | Specifies whether a service account should be created | +| serviceAccount.name | string | `""` | | +| tolerations | list | `[]` | | diff --git a/services/noteburst/templates/NOTES.txt b/services/noteburst/templates/NOTES.txt new file mode 100644 index 0000000000..4040d0b8b2 --- /dev/null +++ b/services/noteburst/templates/NOTES.txt @@ -0,0 +1,22 @@ +1. Get the application URL by running these commands: +{{- if .Values.ingress.enabled }} +{{- range $host := .Values.ingress.hosts }} + {{- range .paths }} + http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }} + {{- end }} +{{- end }} +{{- else if contains "NodePort" .Values.service.type }} + export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "noteburst.fullname" . }}) + export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT +{{- else if contains "LoadBalancer" .Values.service.type }} + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "noteburst.fullname" . }}' + export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "noteburst.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}") + echo http://$SERVICE_IP:{{ .Values.service.port }} +{{- else if contains "ClusterIP" .Values.service.type }} + export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "noteburst.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") + export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + echo "Visit http://127.0.0.1:8080 to use your application" + kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT +{{- end }} diff --git a/services/noteburst/templates/_helpers.tpl b/services/noteburst/templates/_helpers.tpl new file mode 100644 index 0000000000..b4ce5c66f4 --- /dev/null +++ b/services/noteburst/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "noteburst.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "noteburst.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "noteburst.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "noteburst.labels" -}} +helm.sh/chart: {{ include "noteburst.chart" . }} +{{ include "noteburst.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "noteburst.selectorLabels" -}} +app.kubernetes.io/name: {{ include "noteburst.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "noteburst.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "noteburst.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/services/noteburst/templates/configmap.yaml b/services/noteburst/templates/configmap.yaml new file mode 100644 index 0000000000..cf31ac9fce --- /dev/null +++ b/services/noteburst/templates/configmap.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "noteburst.fullname" . }} + labels: + {{- include "noteburst.labels" . | nindent 4 }} +data: + SAFIR_LOG_LEVEL: {{ .Values.config.logLevel | quote }} + NOTEBURST_PATH_PREFIX: {{ .Values.ingress.path | quote }} + NOTEBURST_ENVIRONMENT_URL: {{ .Values.global.baseUrl | quote }} + NOTEBURST_REDIS_URL: "redis://{{ include "noteburst.fullname" . }}-redis.{{ .Release.Namespace }}:6379/0" diff --git a/services/noteburst/templates/deployment.yaml b/services/noteburst/templates/deployment.yaml new file mode 100644 index 0000000000..375d5befa2 --- /dev/null +++ b/services/noteburst/templates/deployment.yaml @@ -0,0 +1,78 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "noteburst.fullname" . }} + labels: + {{- include "noteburst.labels" . | nindent 4 }} +spec: + {{- if not .Values.autoscaling.enabled }} + replicas: {{ .Values.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "noteburst.selectorLabels" . | nindent 6 }} + template: + metadata: + annotations: + checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} + {{- with .Values.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "noteburst.selectorLabels" . | nindent 8 }} + noteburst-redis-client: "true" + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "noteburst.serviceAccountName" . }} + securityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + containers: + - name: {{ .Chart.Name }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - name: http + containerPort: 8080 + protocol: TCP + livenessProbe: + httpGet: + path: / + port: http + readinessProbe: + httpGet: + path: / + port: http + resources: + {{- toYaml .Values.resources | nindent 12 }} + envFrom: + - configMapRef: + name: {{ include "noteburst.fullname" . }} + env: + - name: "NOTEBURST_GAFAELFAWR_TOKEN" + valueFrom: + secretKeyRef: + name: {{ template "noteburst.fullname" . }}-gafaelfawr-token + key: "token" + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/services/noteburst/templates/gafaelfawrtoken.yaml b/services/noteburst/templates/gafaelfawrtoken.yaml new file mode 100644 index 0000000000..0b42e8cebf --- /dev/null +++ b/services/noteburst/templates/gafaelfawrtoken.yaml @@ -0,0 +1,11 @@ +apiVersion: gafaelfawr.lsst.io/v1alpha1 +kind: GafaelfawrServiceToken +metadata: + name: {{ include "noteburst.fullname" . }}-gafaelfawr-token + labels: + {{- include "noteburst.labels" . | nindent 4 }} +spec: + service: "bot-noteburst" + scopes: + - "admin:token" + - "exec:admin" diff --git a/services/noteburst/templates/hpa.yaml b/services/noteburst/templates/hpa.yaml new file mode 100644 index 0000000000..6aa4c907a0 --- /dev/null +++ b/services/noteburst/templates/hpa.yaml @@ -0,0 +1,28 @@ +{{- if .Values.autoscaling.enabled }} +apiVersion: autoscaling/v2beta1 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "noteburst.fullname" . }} + labels: + {{- include "noteburst.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "noteburst.fullname" . }} + minReplicas: {{ .Values.autoscaling.minReplicas }} + maxReplicas: {{ .Values.autoscaling.maxReplicas }} + metrics: + {{- if .Values.autoscaling.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: cpu + targetAverageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} + {{- end }} + {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: memory + targetAverageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }} + {{- end }} +{{- end }} diff --git a/services/noteburst/templates/ingress.yaml b/services/noteburst/templates/ingress.yaml new file mode 100644 index 0000000000..2fef313df5 --- /dev/null +++ b/services/noteburst/templates/ingress.yaml @@ -0,0 +1,31 @@ +apiVersion: gafaelfawr.lsst.io/v1alpha1 +kind: GafaelfawrIngress +metadata: + name: {{ template "noteburst.fullname" . }} + labels: + {{- include "noteburst.labels" . | nindent 4 }} +config: + baseUrl: {{ .Values.global.baseUrl | quote }} + scopes: + all: + - "exec:admin" + loginRedirect: true +template: + metadata: + name: {{ template "noteburst.fullname" . }} + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 6 }} + {{- end }} + spec: + rules: + - host: {{ required "global.host must be set" .Values.global.host | quote }} + http: + paths: + - path: {{ .Values.ingress.path | quote }} + pathType: "Prefix" + backend: + service: + name: {{ template "noteburst.fullname" . }} + port: + number: {{ .Values.service.port }} diff --git a/services/noteburst/templates/service.yaml b/services/noteburst/templates/service.yaml new file mode 100644 index 0000000000..7a2c392379 --- /dev/null +++ b/services/noteburst/templates/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "noteburst.fullname" . }} + labels: + {{- include "noteburst.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: http + protocol: TCP + name: http + selector: + {{- include "noteburst.selectorLabels" . | nindent 4 }} diff --git a/services/noteburst/templates/serviceaccount.yaml b/services/noteburst/templates/serviceaccount.yaml new file mode 100644 index 0000000000..5035d4622b --- /dev/null +++ b/services/noteburst/templates/serviceaccount.yaml @@ -0,0 +1,12 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "noteburst.serviceAccountName" . }} + labels: + {{- include "noteburst.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/services/noteburst/templates/tests/test-connection.yaml b/services/noteburst/templates/tests/test-connection.yaml new file mode 100644 index 0000000000..c83e8c28ff --- /dev/null +++ b/services/noteburst/templates/tests/test-connection.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "noteburst.fullname" . }}-test-connection" + labels: + {{- include "noteburst.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test +spec: + containers: + - name: wget + image: busybox + command: ['wget'] + args: ['{{ include "noteburst.fullname" . }}:{{ .Values.service.port }}'] + restartPolicy: Never diff --git a/services/noteburst/templates/worker-configmap.yaml b/services/noteburst/templates/worker-configmap.yaml new file mode 100644 index 0000000000..1f47d46fdd --- /dev/null +++ b/services/noteburst/templates/worker-configmap.yaml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "noteburst.fullname" . }}-worker + labels: + {{- include "noteburst.labels" . | nindent 4 }} +data: + SAFIR_LOG_LEVEL: {{ .Values.config.logLevel | quote }} + NOTEBURST_ENVIRONMENT_URL: {{ .Values.global.baseUrl | quote }} + NOTEBURST_REDIS_URL: "redis://{{ include "noteburst.fullname" . }}-redis.{{ .Release.Namespace }}:6379/0" + NOTEBURST_WORKER_LOCK_REDIS_URL: "redis://{{ include "noteburst.fullname" . }}-redis.{{ .Release.Namespace }}:6379/1" + NOTEBURST_WORKER_JOB_TIMEOUT: {{ .Values.config.worker.jobTimeout | quote }} + NOTEBURST_WORKER_TOKEN_LIFETIME: {{ .Values.config.worker.tokenLifetime | quote }} + NOTEBURST_WORKER_IMAGE_SELECTOR: {{ .Values.config.worker.imageSelector | quote }} + NOTEBURST_WORKER_IMAGE_REFERENCE: {{ .Values.config.worker.imageReference | quote }} + NOTEBURST_WORKER_TOKEN_SCOPES: {{ .Values.config.worker.tokenScopes | quote }} + NOTEBURST_WORKER_KEEPALIVE: {{ .Values.config.worker.keepAlive | quote }} diff --git a/services/noteburst/templates/worker-deployment.yaml b/services/noteburst/templates/worker-deployment.yaml new file mode 100644 index 0000000000..12b75fb4fa --- /dev/null +++ b/services/noteburst/templates/worker-deployment.yaml @@ -0,0 +1,80 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "noteburst.fullname" . }}-worker + labels: + {{- include "noteburst.labels" . | nindent 4 }} +spec: + {{- if not .Values.autoscaling.enabled }} + replicas: {{ .Values.config.worker.workerCount }} + {{- end }} + selector: + matchLabels: + {{- include "noteburst.selectorLabels" . | nindent 6 }} + template: + metadata: + annotations: + checksum/config: {{ include (print $.Template.BasePath "/worker-configmap.yaml") . | sha256sum }} + {{- with .Values.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "noteburst.selectorLabels" . | nindent 8 }} + noteburst-redis-client: "true" + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "noteburst.serviceAccountName" . }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + containers: + - name: {{ .Chart.Name }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + command: ["arq"] + args: ["noteburst.worker.main.WorkerSettings"] + livenessProbe: + exec: + command: + - "arq" + - "--check" + - "noteburst.worker.main.WorkerSettings" + initialDelaySeconds: 360 + periodSeconds: 15 + resources: + {{- toYaml .Values.resources | nindent 12 }} + envFrom: + - configMapRef: + name: {{ include "noteburst.fullname" . }}-worker + env: + - name: "NOTEBURST_GAFAELFAWR_TOKEN" + valueFrom: + secretKeyRef: + name: {{ template "noteburst.fullname" . }}-gafaelfawr-token + key: "token" + - name: "NOTEBURST_WORKER_IDENTITIES_PATH" + value: "/etc/noteburst/identities.yaml" + volumeMounts: + - name: "identities" + mountPath: "/etc/noteburst" + readOnly: true + volumes: + - name: "identities" + configMap: + name: {{ include "noteburst.fullname" . }}-worker-identities + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/services/noteburst/templates/worker-identities-configmap.yaml b/services/noteburst/templates/worker-identities-configmap.yaml new file mode 100644 index 0000000000..cff6f95d8a --- /dev/null +++ b/services/noteburst/templates/worker-identities-configmap.yaml @@ -0,0 +1,9 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "noteburst.fullname" . }}-worker-identities + labels: + {{- include "noteburst.labels" . | nindent 4 }} +data: + identities.yaml: | + {{- toYaml .Values.config.worker.identities | nindent 4 }} diff --git a/services/noteburst/values-base.yaml b/services/noteburst/values-base.yaml deleted file mode 100644 index 4257454135..0000000000 --- a/services/noteburst/values-base.yaml +++ /dev/null @@ -1,17 +0,0 @@ -noteburst: - ingress: - enabled: true - annotations: - kubernetes.io/ingress.class: nginx - hosts: - - host: "base-lsp.lsst.codes" - paths: - - path: "/noteburst" - pathType: Prefix - imagePullSecrets: - - name: "pull-secret" - vaultSecretsPath: "secret/k8s_operator/base-lsp.lsst.codes/noteburst" - -pull-secret: - enabled: true - path: secret/k8s_operator/base-lsp.lsst.codes/pull-secret diff --git a/services/noteburst/values-idfdev.yaml b/services/noteburst/values-idfdev.yaml index 79aa8df89c..2226548fca 100644 --- a/services/noteburst/values-idfdev.yaml +++ b/services/noteburst/values-idfdev.yaml @@ -1,27 +1,20 @@ -noteburst: - image: - pullPolicy: Always - tag: tickets-DM-33025 - ingress: - enabled: true - annotations: - kubernetes.io/ingress.class: nginx - hosts: - - host: "data-dev.lsst.cloud" - paths: - - path: "/noteburst" - pathType: Prefix - imagePullSecrets: - - name: "pull-secret" - vaultSecretsPath: "secret/k8s_operator/data-dev.lsst.cloud/noteburst" - config: - environmentUrl: "https://data-dev.lsst.cloud" - worker: - workerCount: 1 - identities: - - uuid: 90000 - username: "noteburst90000" +image: + pullPolicy: Always + # tag: tickets-DM-33025 -pull-secret: - enabled: true - path: secret/k8s_operator/data-dev.lsst.cloud/pull-secret +config: + logLevel: "DEBUG" + worker: + workerCount: 1 + identities: + - username: "bot-noteburst90000" + - username: "bot-noteburst90001" + - username: "bot-noteburst90002" + - username: "bot-noteburst90003" + - username: "bot-noteburst90004" + - username: "bot-noteburst90005" + +# Use SSD for Redis storage. +redis: + persistence: + storageClass: "premium-rwo" diff --git a/services/noteburst/values-idfint.yaml b/services/noteburst/values-idfint.yaml deleted file mode 100644 index 74bd2e783a..0000000000 --- a/services/noteburst/values-idfint.yaml +++ /dev/null @@ -1,17 +0,0 @@ -noteburst: - ingress: - enabled: true - annotations: - kubernetes.io/ingress.class: nginx - hosts: - - host: "data-int.lsst.cloud" - paths: - - path: "/noteburst" - pathType: Prefix - imagePullSecrets: - - name: "pull-secret" - vaultSecretsPath: "secret/k8s_operator/data-int.lsst.cloud/noteburst" - -pull-secret: - enabled: true - path: secret/k8s_operator/data-int.lsst.cloud/pull-secret diff --git a/services/noteburst/values-idfprod.yaml b/services/noteburst/values-idfprod.yaml deleted file mode 100644 index 4a705c588a..0000000000 --- a/services/noteburst/values-idfprod.yaml +++ /dev/null @@ -1,17 +0,0 @@ -noteburst: - ingress: - enabled: true - annotations: - kubernetes.io/ingress.class: nginx - hosts: - - host: "data.lsst.cloud" - paths: - - path: "/noteburst" - pathType: Prefix - imagePullSecrets: - - name: "pull-secret" - vaultSecretsPath: "secret/k8s_operator/data.lsst.cloud/noteburst" - -pull-secret: - enabled: true - path: secret/k8s_operator/data.lsst.cloud/pull-secret diff --git a/services/noteburst/values-int.yaml b/services/noteburst/values-int.yaml deleted file mode 100644 index 1186b1186f..0000000000 --- a/services/noteburst/values-int.yaml +++ /dev/null @@ -1,17 +0,0 @@ -noteburst: - ingress: - enabled: true - annotations: - kubernetes.io/ingress.class: nginx - hosts: - - host: "lsst-lsp-int.ncsa.illinois.edu" - paths: - - path: "/noteburst" - pathType: Prefix - imagePullSecrets: - - name: "pull-secret" - vaultSecretsPath: "secret/k8s_operator/lsst-lsp-int.ncsa.illinois.edu/noteburst" - -pull-secret: - enabled: true - path: secret/k8s_operator/lsst-lsp-int.ncsa.illinois.edu/pull-secret diff --git a/services/noteburst/values-minikube.yaml b/services/noteburst/values-minikube.yaml index 4c309318e2..0e9052d8a5 100644 --- a/services/noteburst/values-minikube.yaml +++ b/services/noteburst/values-minikube.yaml @@ -1,27 +1,6 @@ -noteburst: - image: - pullPolicy: Always - tag: tickets-DM-33025 - ingress: - enabled: true - annotations: - kubernetes.io/ingress.class: nginx - hosts: - - host: "minikube.lsst.codes" - paths: - - path: "/noteburst" - pathType: Prefix - imagePullSecrets: - - name: "pull-secret" - vaultSecretsPath: "secret/k8s_operator/minikube.lsst.codes/noteburst" - config: - environmentUrl: "https://minikube.lsst.cloud" - worker: - workerCount: 1 - identities: - - uuid: 90000 - username: "noteburst90000" - -pull-secret: - enabled: true - path: secret/k8s_operator/minikube.lsst.codes/pull-secret +config: + worker: + workerCount: 0 + identities: + - uid: 90000 + username: "noteburst90000" diff --git a/services/noteburst/values-red-five.yaml b/services/noteburst/values-red-five.yaml deleted file mode 100644 index 166bea6aad..0000000000 --- a/services/noteburst/values-red-five.yaml +++ /dev/null @@ -1,17 +0,0 @@ -noteburst: - ingress: - enabled: true - annotations: - kubernetes.io/ingress.class: nginx - hosts: - - host: "red-five.lsst.codes" - paths: - - path: "/noteburst" - pathType: Prefix - imagePullSecrets: - - name: "pull-secret" - vaultSecretsPath: "secret/k8s_operator/red-five.lsst.codes/noteburst" - -pull-secret: - enabled: true - path: secret/k8s_operator/red-five.lsst.codes/pull-secret diff --git a/services/noteburst/values-stable.yaml b/services/noteburst/values-stable.yaml deleted file mode 100644 index 7ae818681c..0000000000 --- a/services/noteburst/values-stable.yaml +++ /dev/null @@ -1,17 +0,0 @@ -noteburst: - ingress: - enabled: true - annotations: - kubernetes.io/ingress.class: nginx - hosts: - - host: "lsst-lsp-stable.ncsa.illinois.edu" - paths: - - path: "/noteburst" - pathType: Prefix - imagePullSecrets: - - name: "pull-secret" - vaultSecretsPath: "secret/k8s_operator/lsst-lsp-stable.ncsa.illinois.edu/noteburst" - -pull-secret: - enabled: true - path: secret/k8s_operator/lsst-lsp-stable.ncsa.illinois.edu/pull-secret diff --git a/services/noteburst/values-summit.yaml b/services/noteburst/values-summit.yaml deleted file mode 100644 index 5515a94d7f..0000000000 --- a/services/noteburst/values-summit.yaml +++ /dev/null @@ -1,17 +0,0 @@ -noteburst: - ingress: - enabled: true - annotations: - kubernetes.io/ingress.class: nginx - hosts: - - host: "summit-lsp.lsst.codes" - paths: - - path: "/noteburst" - pathType: Prefix - imagePullSecrets: - - name: "pull-secret" - vaultSecretsPath: "secret/k8s_operator/summit-lsp.lsst.codes/noteburst" - -pull-secret: - enabled: true - path: secret/k8s_operator/summit-lsp.lsst.codes/pull-secret diff --git a/services/noteburst/values-tucson-teststand.yaml b/services/noteburst/values-tucson-teststand.yaml deleted file mode 100644 index b1322b5547..0000000000 --- a/services/noteburst/values-tucson-teststand.yaml +++ /dev/null @@ -1,17 +0,0 @@ -noteburst: - ingress: - enabled: true - annotations: - kubernetes.io/ingress.class: nginx - hosts: - - host: "tucson-teststand.lsst.codes" - paths: - - path: "/noteburst" - pathType: Prefix - imagePullSecrets: - - name: "pull-secret" - vaultSecretsPath: "secret/k8s_operator/tucson-teststand.lsst.codes/noteburst" - -pull-secret: - enabled: true - path: secret/k8s_operator/tucson-teststand.lsst.codes/pull-secret diff --git a/services/noteburst/values.yaml b/services/noteburst/values.yaml new file mode 100644 index 0000000000..b6ae64c659 --- /dev/null +++ b/services/noteburst/values.yaml @@ -0,0 +1,155 @@ +# Default values for noteburst. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. +# +# Global parameters will be set by parameters injected by Argo CD and should +# not be set in the individual environment values files. +global: + # -- Base URL for the environment + # @default -- Set by Argo CD + baseUrl: "" + + # -- Host name for ingress + # @default -- Set by Argo CD + host: "" + +# -- Number of API pods to run +replicaCount: 1 + +image: + # -- Noteburst image repository + repository: ghcr.io/lsst-sqre/noteburst + + # -- Image pull policy + pullPolicy: IfNotPresent + + # -- Tag of the image + # @default -- The appVersion of the chart + tag: "" + +# -- Secret names to use for all Docker pulls +imagePullSecrets: [] + +# -- Override the base name for resources +nameOverride: "" + +# -- Override the full name for resources (includes the release name) +fullnameOverride: "" + +serviceAccount: + # -- Specifies whether a service account should be created + create: true + + # -- Annotations to add to the service account + annotations: {} + + # The name of the service account to use. + # @default -- Generated using the fullname template + name: "" + +# -- Annotations for API and worker pods +podAnnotations: {} + +service: + # -- Type of service to create + type: ClusterIP + + # -- Port of the service to create and map to the ingress + port: 80 + +ingress: + # -- Additional annotations to add to the ingress + annotations: {} + + # -- Path prefix where noteburst is hosted + path: "/noteburst" + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +autoscaling: + enabled: false + minReplicas: 1 + maxReplicas: 100 + targetCPUUtilizationPercentage: 80 + # targetMemoryUtilizationPercentage: 80 + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +config: + # -- Logging level: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL" + logLevel: "INFO" + + worker: + # -- Science Platform user identities that workers can acquire. Each item + # is an object with username and uuid keys + identities: [] + + # -- Number of workers to run + workerCount: 1 + + # -- The default notebook execution timeout, in seconds. + jobTimeout: 300 + + # -- Worker token lifetime, in seconds. + tokenLifetime: "2419200" + + # -- Nublado2 worker account's token scopes as a comma-separated list. + tokenScopes: "exec:notebook,read:image,read:tap,read:alertdb" + + # -- Nublado image stream to select: "recommended", "weekly" or "reference" + imageSelector: "weekly" + + # -- Nublado image reference, applicable when imageSelector is "reference" + imageReference: "" + + # -- Worker keep alive mode: "normal", "fast", "disabled" + keepAlive: "normal" + +redis: + persistence: + # -- Whether to persist Redis storage and thus tokens. Setting this to + # false will use `emptyDir` and reset all tokens on every restart. Only + # use this for a test deployment. + enabled: true + + # -- Amount of persistent storage to request + size: "8Gi" + + # -- Class of storage to request + storageClass: "" + + # -- Use an existing PVC, not dynamic provisioning. If this is set, the + # size, storageClass, and accessMode settings are ignored. + volumeClaimName: "" + + # -- Resource limits and requests for the Redis pod + # @default -- See `values.yaml` + resources: + limits: + cpu: "1" + + # -- Pod annotations for the Redis pod + podAnnotations: {} + + # -- Node selection rules for the Redis pod + nodeSelector: {} + + # -- Tolerations for the Redis pod + tolerations: [] + + # -- Affinity rules for the Redis pod + affinity: {} diff --git a/services/nublado2/Chart.yaml b/services/nublado2/Chart.yaml index 543a7c9157..d758b2bf68 100644 --- a/services/nublado2/Chart.yaml +++ b/services/nublado2/Chart.yaml @@ -1,10 +1,24 @@ apiVersion: v2 name: nublado2 version: 1.0.0 +description: JupyterHub for the Rubin Science Platform +home: https://github.com/lsst-sqre/nublado2 +sources: + - https://github.com/lsst-sqre/nublado2 +# This version is not used directly. Also update the tag in values.yaml. +appVersion: "2.6.1" + +# Match the jupyterhub Helm chart for kubeVersion +kubeVersion: ">=1.20.0-0" dependencies: - - name: nublado2 - version: 0.7.0 - repository: https://lsst-sqre.github.io/charts/ - - name: pull-secret - version: 0.1.2 - repository: https://lsst-sqre.github.io/charts/ + - name: jupyterhub + # This is the Zero To Jupyterhub version, *not* the version of the + # Jupyterhub package itself. + version: "2.0.0" + repository: https://jupyterhub.github.io/helm-chart/ + +annotations: + phalanx.lsst.io/docs: | + - id: "DMTN-164" + title: "Nublado v2 Architecture" + url: "https://dmtn-164.lsst.io/" diff --git a/services/nublado2/README.md b/services/nublado2/README.md new file mode 100644 index 0000000000..9caffa00d5 --- /dev/null +++ b/services/nublado2/README.md @@ -0,0 +1,118 @@ +# nublado2 + +JupyterHub for the Rubin Science Platform + +**Homepage:** + +## Source Code + +* + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| config.base_url | string | `""` | base_url must be set in each instantiation of this chart to the URL of the primary ingress. It's used to construct API requests to the authentication service (which should go through the ingress). | +| config.butler_secret_path | string | `""` | butler_secret_path must be set here, because it's passed through to the lab rather than being part of the Hub configuration. | +| config.cachemachine_image_policy | string | `"available"` | Cachemachine image policy: "available" or "desired". Use "desired" at instances with streaming image support. | +| config.lab_environment | object | See `values.yaml` | Environment variables to set in spawned lab containers. Each value will be expanded using Jinja 2 templating. | +| config.pinned_images | list | `[]` | images to pin to spawner menu | +| config.pull_secret_path | string | `""` | pull_secret_path must also be set here; it specifies resources in the lab namespace | +| config.shutdown_on_logout | bool | `true` | shut down user pods on logout. Superfluous, because our LogoutHandler enforces this in any event, but nice to make explicit. | +| config.sizes | list | `[{"cpu":1,"name":"Small","ram":"3072M"},{"cpu":2,"name":"Medium","ram":"6144M"},{"cpu":4,"name":"Large","ram":"12288M"}]` | definitions of Lab sizes available in a given instance | +| config.user_resources_template | string | See `values.yaml` | Templates for the user resources to create for each lab spawn. This is a string that can be templated and then loaded as YAML to generate a list of Kubernetes objects to create. | +| config.volume_mounts | list | `[]` | Where to mount volumes for a particular instance | +| config.volumes | list | `[]` | Volumes to use for a particular instance | +| global.vaultSecretsPath | string | Set by Argo CD | Base path for Vault secrets | +| jupyterhub.cull.enabled | bool | `true` | | +| jupyterhub.cull.every | int | `600` | | +| jupyterhub.cull.maxAge | int | `5184000` | | +| jupyterhub.cull.removeNamedServers | bool | `true` | | +| jupyterhub.cull.timeout | int | `2592000` | | +| jupyterhub.cull.users | bool | `true` | | +| jupyterhub.hub.authenticatePrometheus | bool | `false` | | +| jupyterhub.hub.baseUrl | string | `"/nb"` | | +| jupyterhub.hub.config.Authenticator.enable_auth_state | bool | `true` | | +| jupyterhub.hub.config.JupyterHub.authenticator_class | string | `"nublado2.auth.GafaelfawrAuthenticator"` | | +| jupyterhub.hub.config.ServerApp.shutdown_no_activity_timeout | int | `604800` | | +| jupyterhub.hub.containerSecurityContext.allowPrivilegeEscalation | bool | `false` | | +| jupyterhub.hub.containerSecurityContext.runAsGroup | int | `768` | | +| jupyterhub.hub.containerSecurityContext.runAsUser | int | `768` | | +| jupyterhub.hub.db.password | string | `"true"` | | +| jupyterhub.hub.db.type | string | `"postgres"` | | +| jupyterhub.hub.db.url | string | `"postgresql://jovyan@postgres.postgres/jupyterhub"` | | +| jupyterhub.hub.existingSecret | string | `"nublado2-secret"` | | +| jupyterhub.hub.extraConfig."nublado.py" | string | `"import nublado2.hub_config\nnublado2.hub_config.HubConfig().configure(c)\n"` | | +| jupyterhub.hub.extraVolumeMounts[0].mountPath | string | `"/etc/jupyterhub/nublado_config.yaml"` | | +| jupyterhub.hub.extraVolumeMounts[0].name | string | `"nublado-config"` | | +| jupyterhub.hub.extraVolumeMounts[0].subPath | string | `"nublado_config.yaml"` | | +| jupyterhub.hub.extraVolumeMounts[1].mountPath | string | `"/etc/keys/gafaelfawr-token"` | | +| jupyterhub.hub.extraVolumeMounts[1].name | string | `"nublado-gafaelfawr"` | | +| jupyterhub.hub.extraVolumeMounts[1].subPath | string | `"token"` | | +| jupyterhub.hub.extraVolumes[0].configMap.name | string | `"nublado-config"` | | +| jupyterhub.hub.extraVolumes[0].name | string | `"nublado-config"` | | +| jupyterhub.hub.extraVolumes[1].name | string | `"nublado-gafaelfawr"` | | +| jupyterhub.hub.extraVolumes[1].secret.secretName | string | `"gafaelfawr-token"` | | +| jupyterhub.hub.image.name | string | `"lsstsqre/nublado2"` | | +| jupyterhub.hub.image.tag | string | `"2.6.1"` | | +| jupyterhub.hub.loadRoles.self.scopes[0] | string | `"admin:servers!user"` | | +| jupyterhub.hub.loadRoles.self.scopes[1] | string | `"read:metrics"` | | +| jupyterhub.hub.loadRoles.server.scopes[0] | string | `"inherit"` | | +| jupyterhub.hub.networkPolicy.enabled | bool | `false` | | +| jupyterhub.hub.resources.limits.cpu | string | `"900m"` | | +| jupyterhub.hub.resources.limits.memory | string | `"1Gi"` | | +| jupyterhub.imagePullSecrets[0].name | string | `"pull-secret"` | | +| jupyterhub.ingress.annotations | object | See `values.yaml` | Extra annotations to add to the ingress | +| jupyterhub.ingress.enabled | bool | `true` | | +| jupyterhub.ingress.ingressClassName | string | `"nginx"` | | +| jupyterhub.ingress.pathSuffix | string | `"*"` | | +| jupyterhub.prePuller.continuous.enabled | bool | `false` | | +| jupyterhub.prePuller.hook.enabled | bool | `false` | | +| jupyterhub.proxy.chp.networkPolicy.interNamespaceAccessLabels | string | `"accept"` | | +| jupyterhub.proxy.service.type | string | `"ClusterIP"` | | +| jupyterhub.scheduling.userPlaceholder.enabled | bool | `false` | | +| jupyterhub.scheduling.userScheduler.enabled | bool | `false` | | +| jupyterhub.singleuser.cloudMetadata.blockWithIptables | bool | `false` | | +| jupyterhub.singleuser.cmd | string | `"/opt/lsst/software/jupyterlab/runlab.sh"` | | +| jupyterhub.singleuser.defaultUrl | string | `"/lab"` | | +| jupyterhub.singleuser.extraAnnotations."argocd.argoproj.io/compare-options" | string | `"IgnoreExtraneous"` | | +| jupyterhub.singleuser.extraAnnotations."argocd.argoproj.io/sync-options" | string | `"Prune=false"` | | +| jupyterhub.singleuser.extraLabels."argocd.argoproj.io/instance" | string | `"nublado-users"` | | +| jupyterhub.singleuser.extraLabels."hub.jupyter.org/network-access-hub" | string | `"true"` | | +| jupyterhub.singleuser.storage.extraVolumeMounts[0].mountPath | string | `"/etc/dask"` | | +| jupyterhub.singleuser.storage.extraVolumeMounts[0].name | string | `"dask"` | | +| jupyterhub.singleuser.storage.extraVolumeMounts[1].mountPath | string | `"/opt/lsst/software/jupyterlab/panda"` | | +| jupyterhub.singleuser.storage.extraVolumeMounts[1].name | string | `"idds-config"` | | +| jupyterhub.singleuser.storage.extraVolumeMounts[2].mountPath | string | `"/tmp"` | | +| jupyterhub.singleuser.storage.extraVolumeMounts[2].name | string | `"tmp"` | | +| jupyterhub.singleuser.storage.extraVolumeMounts[3].mountPath | string | `"/opt/lsst/software/jupyterlab/butler-secret"` | | +| jupyterhub.singleuser.storage.extraVolumeMounts[3].name | string | `"butler-secret"` | | +| jupyterhub.singleuser.storage.extraVolumeMounts[4].mountPath | string | `"/opt/lsst/software/jupyterlab/environment"` | | +| jupyterhub.singleuser.storage.extraVolumeMounts[4].name | string | `"lab-environment"` | | +| jupyterhub.singleuser.storage.extraVolumeMounts[5].mountPath | string | `"/etc/passwd"` | | +| jupyterhub.singleuser.storage.extraVolumeMounts[5].name | string | `"passwd"` | | +| jupyterhub.singleuser.storage.extraVolumeMounts[5].readOnly | bool | `true` | | +| jupyterhub.singleuser.storage.extraVolumeMounts[5].subPath | string | `"passwd"` | | +| jupyterhub.singleuser.storage.extraVolumeMounts[6].mountPath | string | `"/etc/group"` | | +| jupyterhub.singleuser.storage.extraVolumeMounts[6].name | string | `"group"` | | +| jupyterhub.singleuser.storage.extraVolumeMounts[6].readOnly | bool | `true` | | +| jupyterhub.singleuser.storage.extraVolumeMounts[6].subPath | string | `"group"` | | +| jupyterhub.singleuser.storage.extraVolumes[0].configMap.name | string | `"dask"` | | +| jupyterhub.singleuser.storage.extraVolumes[0].name | string | `"dask"` | | +| jupyterhub.singleuser.storage.extraVolumes[1].configMap.name | string | `"idds-config"` | | +| jupyterhub.singleuser.storage.extraVolumes[1].name | string | `"idds-config"` | | +| jupyterhub.singleuser.storage.extraVolumes[2].emptyDir | object | `{}` | | +| jupyterhub.singleuser.storage.extraVolumes[2].name | string | `"tmp"` | | +| jupyterhub.singleuser.storage.extraVolumes[3].name | string | `"butler-secret"` | | +| jupyterhub.singleuser.storage.extraVolumes[3].secret.secretName | string | `"butler-secret"` | | +| jupyterhub.singleuser.storage.extraVolumes[4].configMap.defaultMode | int | `420` | | +| jupyterhub.singleuser.storage.extraVolumes[4].configMap.name | string | `"lab-environment"` | | +| jupyterhub.singleuser.storage.extraVolumes[4].name | string | `"lab-environment"` | | +| jupyterhub.singleuser.storage.extraVolumes[5].configMap.defaultMode | int | `420` | | +| jupyterhub.singleuser.storage.extraVolumes[5].configMap.name | string | `"passwd"` | | +| jupyterhub.singleuser.storage.extraVolumes[5].name | string | `"passwd"` | | +| jupyterhub.singleuser.storage.extraVolumes[6].configMap.defaultMode | int | `420` | | +| jupyterhub.singleuser.storage.extraVolumes[6].configMap.name | string | `"group"` | | +| jupyterhub.singleuser.storage.extraVolumes[6].name | string | `"group"` | | +| jupyterhub.singleuser.storage.type | string | `"none"` | | +| network_policy.enabled | bool | `true` | | diff --git a/services/nublado2/templates/_helpers.tpl b/services/nublado2/templates/_helpers.tpl new file mode 100644 index 0000000000..7b318e97f0 --- /dev/null +++ b/services/nublado2/templates/_helpers.tpl @@ -0,0 +1,56 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "nublado2.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "nublado2.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "nublado2.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Common labels +*/}} +{{- define "nublado2.labels" -}} +app.kubernetes.io/name: {{ include "nublado2.name" . }} +helm.sh/chart: {{ include "nublado2.chart" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end -}} + +{{/* +Create the name of the service account to use +*/}} +{{- define "nublado2.serviceAccountName" -}} +{{- if .Values.serviceAccount.create -}} + {{ default (include "nublado2.fullname" .) .Values.serviceAccount.name }} +{{- else -}} + {{ default "default" .Values.serviceAccount.name }} +{{- end -}} +{{- end -}} diff --git a/services/nublado2/templates/clusterrole.yaml b/services/nublado2/templates/clusterrole.yaml new file mode 100644 index 0000000000..cc8a8b5e99 --- /dev/null +++ b/services/nublado2/templates/clusterrole.yaml @@ -0,0 +1,28 @@ +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ template "nublado2.fullname" . }}-hub +rules: +- apiGroups: [""] + resources: ["pods","events", "namespaces", "serviceaccounts", "services", + "persistentvolumeclaims", "persistentvolumes", "resourcequotas", + "configmaps", "pods/log", "pods/exec"] + verbs: ["get", "list", "create", "watch", "delete", "update", "patch"] +- apiGroups: [""] + resources: ["secrets"] + verbs: ["get", "create", "delete"] +- apiGroups: ["policy"] + resources: ["poddisruptionbudgets"] + verbs: ["create", "delete", "get", "list", "watch"] +- apiGroups: ["rbac.authorization.k8s.io"] + resources: ["roles", "rolebindings"] + verbs: ["get", "list", "create", "delete"] +- apiGroups: ["argoproj.io"] + resources: ["workflows", "workflows/finalizers"] + verbs: ["get", "list", "create", "watch", "delete", "update", "patch"] +- apiGroups: ["argoproj.io"] + resources: ["workflowtemplates", "workflowtemplates/finalizers"] + verbs: ["get", "list", "watch"] +- apiGroups: ["ricoberger.de"] + resources: ["vaultsecrets"] + verbs: ["get", "create", "delete", "list"] diff --git a/services/nublado2/templates/clusterrolebinding.yaml b/services/nublado2/templates/clusterrolebinding.yaml new file mode 100644 index 0000000000..cdb0c5fd53 --- /dev/null +++ b/services/nublado2/templates/clusterrolebinding.yaml @@ -0,0 +1,13 @@ +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ template "nublado2.fullname" . }}-hub +subjects: + # Note: this service account is created by the jupyterhub subchart + - kind: ServiceAccount + name: hub + namespace: {{ .Release.Namespace }} +roleRef: + kind: ClusterRole + name: {{ template "nublado2.fullname" . }}-hub + apiGroup: rbac.authorization.k8s.io diff --git a/services/nublado2/templates/gafaelfawr-token.yaml b/services/nublado2/templates/gafaelfawr-token.yaml new file mode 100644 index 0000000000..06a9822b82 --- /dev/null +++ b/services/nublado2/templates/gafaelfawr-token.yaml @@ -0,0 +1,10 @@ +apiVersion: gafaelfawr.lsst.io/v1alpha1 +kind: GafaelfawrServiceToken +metadata: + name: "gafaelfawr-token" + labels: + {{- include "nublado2.labels" . | nindent 4 }} +spec: + service: "bot-nublado2" + scopes: + - "admin:provision" diff --git a/services/nublado2/templates/netpol.yaml b/services/nublado2/templates/netpol.yaml new file mode 100644 index 0000000000..91da074252 --- /dev/null +++ b/services/nublado2/templates/netpol.yaml @@ -0,0 +1,27 @@ +{{- if .Values.network_policy.enabled }} +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: hub + labels: + {{- include "nublado2.labels" . | nindent 4 }} +spec: + podSelector: + matchLabels: + app: jupyterhub + component: hub + release: {{ .Release.Name }} + policyTypes: + - Ingress + + ingress: + # allowed pods (hub.jupyter.org/network-access-hub) --> hub + - ports: + - port: http + - port: 8081 + from: + - podSelector: + matchLabels: + hub.jupyter.org/network-access-hub: "true" + namespaceSelector: {} +{{- end }} diff --git a/services/nublado2/templates/nublado-config.yaml b/services/nublado2/templates/nublado-config.yaml new file mode 100644 index 0000000000..fbc234d394 --- /dev/null +++ b/services/nublado2/templates/nublado-config.yaml @@ -0,0 +1,9 @@ +kind: ConfigMap +apiVersion: v1 +metadata: + name: nublado-config + labels: + {{- include "nublado2.labels" . | nindent 4 }} +data: + nublado_config.yaml: | + {{- toYaml .Values.config | nindent 4 }} diff --git a/services/nublado2/templates/vault-secrets.yaml b/services/nublado2/templates/vault-secrets.yaml new file mode 100644 index 0000000000..962d6c1896 --- /dev/null +++ b/services/nublado2/templates/vault-secrets.yaml @@ -0,0 +1,33 @@ +apiVersion: ricoberger.de/v1alpha1 +kind: VaultSecret +metadata: + name: "nublado2-secret" +spec: + path: "{{- .Values.global.vaultSecretsPath }}/nublado2" + type: Opaque + + templates: + {{- /* dump in values.yaml for jupyterhub, without changing it */}} + {{- /* this is copied from the zero-to-jupyterhub chart where it does this */}} + {{- $values := merge dict .Values.jupyterhub }} + {{- /* passthrough subset of Chart / Release */}} + {{- $_ := set $values "Chart" (dict "Name" .Chart.Name "Version" .Chart.Version) }} + {{- $_ := set $values "Release" (pick .Release "Name" "Namespace" "Service") }} + values.yaml: {{ $values | toYaml | quote }} + + {{- /* dump in the rest of the keys in this path and their values */}} + {{- /* this uses the templating provided by vault-secrets-operator */}} + hub.db.password: "{% .Secrets.hub_db_password %}" + hub.config.JupyterHub.cookie_secret: "{% .Secrets.crypto_key %}" + hub.config.CryptKeeper.keys: "{% .Secrets.cryptkeeper_key %}" + hub.config.ConfigurableHTTPProxy.auth_token: "{% .Secrets.proxy_token %}" +--- +apiVersion: ricoberger.de/v1alpha1 +kind: VaultSecret +metadata: + name: pull-secret + labels: + {{- include "nublado2.labels" . | nindent 4 }} +spec: + path: "{{- .Values.global.vaultSecretsPath }}/pull-secret" + type: kubernetes.io/dockerconfigjson diff --git a/services/nublado2/values-base.yaml b/services/nublado2/values-base.yaml index 14d95ee162..4eea1c4f87 100644 --- a/services/nublado2/values-base.yaml +++ b/services/nublado2/values-base.yaml @@ -1,52 +1,46 @@ -nublado2: - jupyterhub: - ingress: - hosts: ["base-lsp.lsst.codes"] - annotations: - nginx.ingress.kubernetes.io/auth-signin: "https://base-lsp.lsst.codes/login" +jupyterhub: + ingress: + hosts: ["base-lsp.lsst.codes"] + annotations: + nginx.ingress.kubernetes.io/auth-signin: "https://base-lsp.lsst.codes/login" - singleuser: - extraAnnotations: - k8s.v1.cni.cncf.io/networks: "kube-system/macvlan-conf" - initContainers: - - name: "multus-init" - image: "lsstit/ddsnet4u:latest" - securityContext: - privileged: true + singleuser: + extraAnnotations: + k8s.v1.cni.cncf.io/networks: "kube-system/macvlan-conf" + initContainers: + - name: "multus-init" + image: "lsstit/ddsnet4u:latest" + securityContext: + privileged: true - config: - base_url: "https://base-lsp.lsst.codes" - butler_secret_path: "secret/k8s_operator/base-lsp.lsst.codes/butler-secret" - pull_secret_path: "secret/k8s_operator/base-lsp.lsst.codes/pull-secret" - lab_environment: - AUTO_REPO_URLS: "https://github.com/lsst-sqre/system-test" - AUTO_REPO_BRANCH: "prod" - AUTO_REPO_SPECS: "https://github.com/lsst-sqre/system-test@prod" - LSST_DDS_INTERFACE: net1 - LSST_DDS_PARTITION_PREFIX: base - volumes: - - name: home - nfs: - path: /lsstdata/user/staff/jhome - server: ddn-nfs.ls.lsst.org - - name: project - nfs: - path: /lsstdata/user/staff/project - server: ddn-nfs.ls.lsst.org - - name: scratch - nfs: - path: /lsstdata/user/staff/scratch - server: ddn-nfs.ls.lsst.org - volume_mounts: - - name: home - mountPath: /home - - name: project - mountPath: /project - - name: scratch - mountPath: /scratch - - vault_secret_path: "secret/k8s_operator/base-lsp.lsst.codes/nublado2" - -pull-secret: - enabled: true - path: "secret/k8s_operator/base-lsp.lsst.codes/pull-secret" +config: + base_url: "https://base-lsp.lsst.codes" + butler_secret_path: "secret/k8s_operator/base-lsp.lsst.codes/butler-secret" + pull_secret_path: "secret/k8s_operator/base-lsp.lsst.codes/pull-secret" + lab_environment: + AUTO_REPO_URLS: "https://github.com/lsst-sqre/system-test" + AUTO_REPO_BRANCH: "prod" + AUTO_REPO_SPECS: "https://github.com/lsst-sqre/system-test@prod" + LSST_DDS_INTERFACE: net1 + LSST_DDS_PARTITION_PREFIX: base + LSST_SITE: base + volumes: + - name: home + nfs: + path: /lsstdata/user/staff/jhome + server: ddn-nfs.ls.lsst.org + - name: project + nfs: + path: /lsstdata/user/staff/project + server: ddn-nfs.ls.lsst.org + - name: scratch + nfs: + path: /lsstdata/user/staff/scratch + server: ddn-nfs.ls.lsst.org + volume_mounts: + - name: home + mountPath: /home + - name: project + mountPath: /project + - name: scratch + mountPath: /scratch diff --git a/services/nublado2/values-ccin2p3.yaml b/services/nublado2/values-ccin2p3.yaml new file mode 100644 index 0000000000..33e2c594ba --- /dev/null +++ b/services/nublado2/values-ccin2p3.yaml @@ -0,0 +1,209 @@ +jupyterhub: + debug: + enabled: true + hub: + db: + upgrade: true + cull: + enabled: true + users: false + removeNamedServers: false + timeout: 432000 + every: 300 + maxAge: 2160000 + + ingress: + hosts: ["data-dev.lsst.eu"] + annotations: + nginx.ingress.kubernetes.io/auth-signin: "https://data-dev.lsst.eu/login" + nginx.ingress.kubernetes.io/auth-url: "https://data-dev.lsst.eu/auth?scope=exec:notebook¬ebook=true" + nginx.ingress.kubernetes.io/proxy-connect-timeout: "50s" + nginx.ingress.kubernetes.io/proxy-read-timeout: "50s" + nginx.ingress.kubernetes.io/client-max-body-size: "50m" + nginx.ingress.kubernetes.io/proxy-body-size: "50m" + +config: + base_url: "https://data-dev.lsst.eu" + butler_secret_path: "secret/k8s_operator/rsp-cc/butler-secret" + pull_secret_path: "secret/k8s_operator/rsp-cc/pull-secret" + lab_environment: + AUTO_REPO_URLS: "https://github.com/lsst-sqre/system-test" + AUTO_REPO_BRANCH: "prod" + AUTO_REPO_SPECS: "https://github.com/lsst-sqre/system-test@prod" + NO_ACTIVITY_TIMEOUT: "432000" + CULL_KERNEL_IDLE_TIMEOUT: "432000" + CULL_KERNEL_CONNECTED: "True" + CULL_KERNEL_INTERVAL: "300" + CULL_TERMINAL_INACTIVE_TIMEOUT: "432000" + CULL_TERMINAL_INTERVAL: "300" + pinned_images: + - image_url: registry.hub.docker.com/lsstsqre/sciplat-lab:recommended + name: Recommended + volumes: + - name: home + hostPath: + path: /pbs/home + + volume_mounts: + - name: home + mountPath: /home + + user_resources_template: | + - apiVersion: v1 + kind: Namespace + metadata: + name: "{{ user_namespace }}" + - apiVersion: v1 + kind: ConfigMap + metadata: + name: group + namespace: "{{ user_namespace }}" + data: + group: | + root:x:0: + bin:x:1: + daemon:x:2: + sys:x:3: + adm:x:4: + tty:x:5: + disk:x:6: + lp:x:7: + mem:x:8: + kmem:x:9: + wheel:x:10: + cdrom:x:11: + mail:x:12: + man:x:15: + dialout:x:18: + floppy:x:19: + games:x:20: + tape:x:33: + video:x:39: + ftp:x:50: + lock:x:54: + audio:x:63: + nobody:x:99: + users:x:100: + utmp:x:22: + utempter:x:35: + input:x:999: + systemd-journal:x:190: + systemd-network:x:192: + dbus:x:81: + ssh_keys:x:998: + lsst_lcl:x:1000:{{ user }} + tss:x:59: + cgred:x:997: + screen:x:84: + jovyan:x:768:{{ user }}{% for g in groups %} + {{ g.name }}:x:{{ g.id }}:{{ user if g.id != gid else "" }}{% endfor %} + - apiVersion: v1 + kind: ConfigMap + metadata: + name: passwd + namespace: "{{ user_namespace }}" + data: + passwd: | + root:x:0:0:root:/root:/bin/bash + bin:x:1:1:bin:/bin:/sbin/nologin + daemon:x:2:2:daemon:/sbin:/sbin/nologin + adm:x:3:4:adm:/var/adm:/sbin/nologin + lp:x:4:7:lp:/var/spool/lpd:/sbin/nologin + sync:x:5:0:sync:/sbin:/bin/sync + shutdown:x:6:0:shutdown:/sbin:/sbin/shutdown + halt:x:7:0:halt:/sbin:/sbin/halt + mail:x:8:12:mail:/var/spool/mail:/sbin/nologin + operator:x:11:0:operator:/root:/sbin/nologin + games:x:12:100:games:/usr/games:/sbin/nologin + ftp:x:14:50:FTP User:/var/ftp:/sbin/nologin + nobody:x:99:99:Nobody:/:/sbin/nologin + systemd-network:x:192:192:systemd Network Management:/:/sbin/nologin + dbus:x:81:81:System message bus:/:/sbin/nologin + tss:x:59:59:Account used by the trousers package to sandbox the tcsd daemon:/dev/null:/sbin/nologin + {{ user }}:x:{{ uid }}:{{ gid if gid else uid }}::/home/{{ user[0] }}/{{ user }}/rsp_home:/bin/bash + - apiVersion: v1 + kind: ConfigMap + metadata: + name: dask + namespace: "{{ user_namespace }}" + data: + dask_worker.yml: | + {{ dask_yaml | indent(6) }} + # When we break out the resources we should make this per-instance + # configurable. + - apiVersion: v1 + kind: ConfigMap + metadata: + name: idds-config + namespace: "{{ user_namespace }}" + data: + idds_cfg.client.template: | + # Licensed under the Apache License, Version 2.0 (the "License"); + # You may not use this file except in compliance with the License. + # You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + # + # Authors: + # - Wen Guan, , 2020 + [common] + # if logdir is configured, idds will write to idds.log in this directory. + # else idds will go to stdout/stderr. + # With supervisord, it's good to write to stdout/stderr, then supervisord can manage and rotate logs. + # logdir = /var/log/idds + loglevel = INFO + [rest] + host = https://iddsserver.cern.ch:443/idds + #url_prefix = /idds + #cacher_dir = /tmp + cacher_dir = /data/idds + - apiVersion: v1 + kind: ServiceAccount + metadata: + name: "{{ user }}-serviceaccount" + namespace: "{{ user_namespace }}" + imagePullSecrets: + - name: pull-secret + - apiVersion: rbac.authorization.k8s.io/v1 + kind: Role + metadata: + name: "{{ user }}-role" + namespace: "{{ user_namespace }}" + rules: + # cf https://kubernetes.dask.org/en/latest/kubecluster.html + - apiGroups: [""] + resources: ["pods", "services"] + verbs: ["create", "delete", "get", "list", "watch"] + - apiGroups: [""] + resources: ["pods/log"] + verbs: ["get","list"] + - apiGroups: ["policy"] + resources: ["poddisruptionbudgets"] + verbs: ["create", "delete", "get", "list", "watch"] + - apiVersion: rbac.authorization.k8s.io/v1 + kind: RoleBinding + metadata: + name: "{{ user }}-rolebinding" + namespace: "{{ user_namespace }}" + roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: "{{ user }}-role" + subjects: + - kind: ServiceAccount + name: "{{ user }}-serviceaccount" + namespace: "{{ user_namespace }}" + - apiVersion: ricoberger.de/v1alpha1 + kind: VaultSecret + metadata: + name: butler-secret + namespace: "{{ user_namespace }}" + spec: + path: "{{ butler_secret_path }}" + type: Opaque + - apiVersion: ricoberger.de/v1alpha1 + kind: VaultSecret + metadata: + name: pull-secret + namespace: "{{ user_namespace }}" + spec: + path: "{{ pull_secret_path }}" + type: kubernetes.io/dockerconfigjson diff --git a/services/nublado2/values-idfdev.yaml b/services/nublado2/values-idfdev.yaml index a7736eb5c3..6ef89aafea 100644 --- a/services/nublado2/values-idfdev.yaml +++ b/services/nublado2/values-idfdev.yaml @@ -1,49 +1,61 @@ -nublado2: - jupyterhub: - hub: - resources: - requests: - cpu: 300m - memory: 512Mi - ingress: - hosts: ["data-dev.lsst.cloud"] - annotations: - nginx.ingress.kubernetes.io/auth-signin: "https://data-dev.lsst.cloud/login" +jupyterhub: + hub: + config: + ServerApp: + shutdown_no_activity_timeout: 432000 + db: + upgrade: true - config: - base_url: "https://data-dev.lsst.cloud" - butler_secret_path: "secret/k8s_operator/data-dev.lsst.cloud/butler-secret" - pull_secret_path: "secret/k8s_operator/data-dev.lsst.cloud/pull-secret" - lab_environment: - PGPASSFILE: "/opt/lsst/software/jupyterlab/butler-secret/postgres-credentials.txt" - AWS_SHARED_CREDENTIALS_FILE: "/opt/lsst/software/jupyterlab/butler-secret/aws-credentials.ini" - S3_ENDPOINT_URL: "https://storage.googleapis.com" - AUTO_REPO_URLS: https://github.com/lsst-sqre/system-test,https://github.com/rubin-dp0/tutorial-notebooks - AUTO_REPO_BRANCH: prod - AUTO_REPO_SPECS: https://github.com/lsst-sqre/system-test@prod,https://github.com/rubin-dp0/tutorial-notebooks@prod - volumes: - - name: home - nfs: - path: /share1/home - server: 10.87.86.26 - - name: project - nfs: - path: /share1/project - server: 10.87.86.26 - - name: scratch - nfs: - path: /share1/scratch - server: 10.87.86.26 - volume_mounts: - - name: home - mountPath: /home - - name: project - mountPath: /project - - name: scratch - mountPath: /scratch + cull: + enabled: true + users: false + removeNamedServers: false + timeout: 432000 + every: 300 + maxAge: 2160000 - vault_secret_path: "secret/k8s_operator/data-dev.lsst.cloud/nublado2" + ingress: + hosts: ["data-dev.lsst.cloud"] + annotations: + nginx.ingress.kubernetes.io/auth-signin: "https://data-dev.lsst.cloud/login" -pull-secret: - enabled: true - path: "secret/k8s_operator/data-dev.lsst.cloud/pull-secret" +config: + base_url: "https://data-dev.lsst.cloud" + butler_secret_path: "secret/k8s_operator/data-dev.lsst.cloud/butler-secret" + pull_secret_path: "secret/k8s_operator/data-dev.lsst.cloud/pull-secret" + cachemachine_image_policy: "desired" + lab_environment: + PGPASSFILE: "/opt/lsst/software/jupyterlab/butler-secret/postgres-credentials.txt" + AWS_SHARED_CREDENTIALS_FILE: "/opt/lsst/software/jupyterlab/butler-secret/aws-credentials.ini" + S3_ENDPOINT_URL: "https://storage.googleapis.com" + GOOGLE_APPLICATION_CREDENTIALS: "/opt/lsst/software/jupyterlab/butler-secret/butler-gcs-idf-creds.json" + DAF_BUTLER_REPOSITORY_INDEX: "s3://butler-us-central1-repo-locations/data-int-repos.yaml" + AUTO_REPO_URLS: https://github.com/lsst-sqre/system-test,https://github.com/rubin-dp0/tutorial-notebooks + AUTO_REPO_BRANCH: prod + AUTO_REPO_SPECS: https://github.com/lsst-sqre/system-test@prod,https://github.com/rubin-dp0/tutorial-notebooks@prod + NO_ACTIVITY_TIMEOUT: "432000" + CULL_KERNEL_IDLE_TIMEOUT: "432000" + CULL_KERNEL_CONNECTED: "True" + CULL_KERNEL_INTERVAL: "300" + CULL_TERMINAL_INACTIVE_TIMEOUT: "432000" + CULL_TERMINAL_INTERVAL: "300" + volumes: + - name: home + nfs: + path: /share1/home + server: 10.87.86.26 + - name: project + nfs: + path: /share1/project + server: 10.87.86.26 + - name: scratch + nfs: + path: /share1/scratch + server: 10.87.86.26 + volume_mounts: + - name: home + mountPath: /home + - name: project + mountPath: /project + - name: scratch + mountPath: /scratch diff --git a/services/nublado2/values-idfint.yaml b/services/nublado2/values-idfint.yaml index cf4cbe7d22..a86b9072eb 100644 --- a/services/nublado2/values-idfint.yaml +++ b/services/nublado2/values-idfint.yaml @@ -1,313 +1,248 @@ -nublado2: - jupyterhub: - hub: - resources: - requests: - cpu: "1" - memory: 3Gi - ingress: - hosts: ["data-int.lsst.cloud"] - annotations: - nginx.ingress.kubernetes.io/auth-signin: "https://data-int.lsst.cloud/login" - config: - base_url: "https://data-int.lsst.cloud" - butler_secret_path: "secret/k8s_operator/data-int.lsst.cloud/butler-secret" - pull_secret_path: "secret/k8s_operator/data-int.lsst.cloud/pull-secret" - lab_environment: - PGPASSFILE: "/opt/lsst/software/jupyterlab/butler-secret/postgres-credentials.txt" - AWS_SHARED_CREDENTIALS_FILE: "/opt/lsst/software/jupyterlab/butler-secret/aws-credentials.ini" - S3_ENDPOINT_URL: "https://storage.googleapis.com" - AUTO_REPO_URLS: https://github.com/lsst-sqre/system-test,https://github.com/rubin-dp0/tutorial-notebooks - AUTO_REPO_BRANCH: prod - AUTO_REPO_SPECS: https://github.com/lsst-sqre/system-test@prod,https://github.com/rubin-dp0/tutorial-notebooks@prod - PANDA_AUTH: oidc - PANDA_VERIFY_HOST: "off" - PANDA_AUTH_VO: Rubin - PANDA_URL_SSL: https://pandaserver-doma.cern.ch:25443/server/panda - PANDA_URL: http://pandaserver-doma.cern.ch:25080/server/panda - IDDS_CONFIG: /opt/lsst/software/jupyterlab/panda/idds.cfg.client.template - PANDA_CONFIG_ROOT: "~" - sizes: - - name: Small - cpu: 1 - ram: 3072M - - name: Medium - cpu: 2 - ram: 6144M - - name: Large - cpu: 4 - ram: 12288M - - name: Huge - cpu: 8 - ram: 24576M - volumes: - - name: home - nfs: - path: /share1/home - server: 10.22.240.130 - - name: project - nfs: - path: /share1/project - server: 10.22.240.130 - - name: scratch - nfs: - path: /share1/scratch - server: 10.22.240.130 - volume_mounts: - - name: home - mountPath: /home - - name: project - mountPath: /project - - name: scratch - mountPath: /scratch - # Workaround to impose resource quotas at IDF - user_resources_template: | - - apiVersion: v1 - kind: Namespace - metadata: - name: "{{ user_namespace }}" - - apiVersion: v1 - kind: ConfigMap - metadata: - name: group - namespace: "{{ user_namespace }}" - data: - group: | - root:x:0: - bin:x:1: - daemon:x:2: - sys:x:3: - adm:x:4: - tty:x:5: - disk:x:6: - lp:x:7: - mem:x:8: - kmem:x:9: - wheel:x:10: - cdrom:x:11: - mail:x:12: - man:x:15: - dialout:x:18: - floppy:x:19: - games:x:20: - tape:x:33: - video:x:39: - ftp:x:50: - lock:x:54: - audio:x:63: - nobody:x:99: - users:x:100: - utmp:x:22: - utempter:x:35: - input:x:999: - systemd-journal:x:190: - systemd-network:x:192: - dbus:x:81: - ssh_keys:x:998: - lsst_lcl:x:1000:{{ user }} - tss:x:59: - cgred:x:997: - screen:x:84: - jovyan:x:768:{{ user }} - provisionator:x:769: - {{user}}:x:{{uid}}:{% for group in groups %} - {{ group.name }}:x:{{ group.id }}:{{ user }}{% endfor %} - - apiVersion: v1 - kind: ConfigMap - metadata: - name: gshadow - namespace: "{{ user_namespace }}" - data: - gshadow: | - root:!:: - bin:!:: - daemon:!:: - sys:!:: - adm:!:: - tty:!:: - disk:!:: - lp:!:: - mem:!:: - kmem:!:: - wheel:!:: - cdrom:!:: - mail:!:: - man:!:: - dialout:!:: - floppy:!:: - games:!:: - tape:!:: - video:!:: - ftp:!:: - lock:!:: - audio:!:: - nobody:!:: - users:!:: - utmp:!:: - utempter:!:: - input:!:: - systemd-journal:!:: - systemd-network:!:: - dbus:!:: - ssh_keys:!:: - lsst_lcl:!::{{ user }} - tss:!:: - cgred:!:: - screen:!:: - jovyan:!::{{ user }} - provisionator:!:: - {{ user }}:!::{% for g in groups %} - {{ g.name }}:!::{{ user }}{% endfor %} - - apiVersion: v1 - kind: ConfigMap - metadata: - name: passwd - namespace: "{{ user_namespace }}" - data: - passwd: | - root:x:0:0:root:/root:/bin/bash - bin:x:1:1:bin:/bin:/sbin/nologin - daemon:x:2:2:daemon:/sbin:/sbin/nologin - adm:x:3:4:adm:/var/adm:/sbin/nologin - lp:x:4:7:lp:/var/spool/lpd:/sbin/nologin - sync:x:5:0:sync:/sbin:/bin/sync - shutdown:x:6:0:shutdown:/sbin:/sbin/shutdown - halt:x:7:0:halt:/sbin:/sbin/halt - mail:x:8:12:mail:/var/spool/mail:/sbin/nologin - operator:x:11:0:operator:/root:/sbin/nologin - games:x:12:100:games:/usr/games:/sbin/nologin - ftp:x:14:50:FTP User:/var/ftp:/sbin/nologin - nobody:x:99:99:Nobody:/:/sbin/nologin - systemd-network:x:192:192:systemd Network Management:/:/sbin/nologin - dbus:x:81:81:System message bus:/:/sbin/nologin - lsst_lcl:x:1000:1000::/home/lsst_lcl:/bin/bash - tss:x:59:59:Account used by the trousers package to sandbox the tcsd daemon:/dev/null:/sbin/nologin - provisionator:x:769:769:Lab provisioning user:/home/provisionator:/bin/bash - {{ user }}:x:{{ uid }}:{{ uid }}::/home/{{ user }}:/bin/bash - - apiVersion: v1 - kind: ConfigMap - metadata: - name: shadow - namespace: "{{ user_namespace }}" - data: - shadow: | - root:*:18000:0:99999:7::: - bin:*:18000:0:99999:7::: - daemon:*:18000:0:99999:7::: - adm:*:18000:0:99999:7::: - lp:*:18000:0:99999:7::: - sync:*:18000:0:99999:7::: - shutdown:*:18000:0:99999:7::: - halt:*:18000:0:99999:7::: - mail:*:18000:0:99999:7::: - operator:*:18000:0:99999:7::: - games:*:18000:0:99999:7::: - ftp:*:18000:0:99999:7::: - nobody:*:18000:0:99999:7::: - systemd-network:*:18000:0:99999:7::: - dbus:*:18000:0:99999:7::: - lsst_lcl:*:18000:0:99999:7::: - tss:*:18000:0:99999:7::: - provisionator:*:18000:0:99999:7::: - {{user}}:*:18000:0:99999:7::: - - apiVersion: v1 - kind: ConfigMap - metadata: - name: dask - namespace: "{{ user_namespace }}" - data: - dask_worker.yml: | - {{ dask_yaml | indent(6) }} - # When we break out the resources we should make this per-instance - # configurable. - - apiVersion: v1 - kind: ConfigMap - metadata: - name: idds-config - namespace: "{{ user_namespace }}" - data: - idds.cfg.client.template: | - # Licensed under the Apache License, Version 2.0 (the "License"); - # You may not use this file except in compliance with the License. - # You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 - # - # Authors: - # - Wen Guan, , 2020 - [common] - # if logdir is configured, idds will write to idds.log in this directory. - # else idds will go to stdout/stderr. - # With supervisord, it's good to write to stdout/stderr, then supervisord can manage and rotate logs. - # logdir = /var/log/idds - loglevel = INFO - [rest] - host = https://iddsserver.cern.ch:443/idds - #url_prefix = /idds - #cacher_dir = /tmp - cacher_dir = /data/idds - - apiVersion: v1 - kind: ServiceAccount - metadata: - name: "{{ user }}-serviceaccount" - namespace: "{{ user_namespace }}" - imagePullSecrets: - - name: pull-secret - - apiVersion: rbac.authorization.k8s.io/v1 - kind: Role - metadata: - name: "{{ user }}-role" - namespace: "{{ user_namespace }}" - rules: - # cf https://kubernetes.dask.org/en/latest/kubecluster.html - - apiGroups: [""] - resources: ["pods", "services"] - verbs: ["create", "delete", "get", "list", "watch"] - - apiGroups: [""] - resources: ["pods/log"] - verbs: ["get","list"] - - apiGroups: ["policy"] - resources: ["poddisruptionbudgets"] - verbs: ["create", "delete", "get", "list", "watch"] - - apiVersion: rbac.authorization.k8s.io/v1 - kind: RoleBinding - metadata: - name: "{{ user }}-rolebinding" - namespace: "{{ user_namespace }}" - roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: "{{ user }}-role" - subjects: - - kind: ServiceAccount - name: "{{ user }}-serviceaccount" - namespace: "{{ user_namespace }}" - - apiVersion: ricoberger.de/v1alpha1 - kind: VaultSecret - metadata: - name: butler-secret - namespace: "{{ user_namespace }}" - spec: - path: "{{ butler_secret_path }}" - type: Opaque - - apiVersion: ricoberger.de/v1alpha1 - kind: VaultSecret - metadata: - name: pull-secret - namespace: "{{ user_namespace }}" - spec: - path: "{{ pull_secret_path }}" - type: kubernetes.io/dockerconfigjson - - apiVersion: v1 - kind: ResourceQuota - metadata: - name: user-quota - namespace: "{{ user_namespace }}" - spec: - hard: - limits.cpu: 9 - limits.memory: 27Gi +jupyterhub: + hub: + config: + ServerApp: + shutdown_no_activity_timeout: 432000 - vault_secret_path: "secret/k8s_operator/data-int.lsst.cloud/nublado2" + cull: + enabled: true + users: false + removeNamedServers: false + timeout: 432000 + every: 300 + maxAge: 2160000 -pull-secret: - enabled: true - path: "secret/k8s_operator/data-int.lsst.cloud/pull-secret" + ingress: + hosts: ["data-int.lsst.cloud"] + annotations: + nginx.ingress.kubernetes.io/auth-signin: "https://data-int.lsst.cloud/login" +config: + base_url: "https://data-int.lsst.cloud" + butler_secret_path: "secret/k8s_operator/data-int.lsst.cloud/butler-secret" + pull_secret_path: "secret/k8s_operator/data-int.lsst.cloud/pull-secret" + cachemachine_image_policy: "desired" + lab_environment: + PGPASSFILE: "/opt/lsst/software/jupyterlab/butler-secret/postgres-credentials.txt" + AWS_SHARED_CREDENTIALS_FILE: "/opt/lsst/software/jupyterlab/butler-secret/aws-credentials.ini" + S3_ENDPOINT_URL: "https://storage.googleapis.com" + GOOGLE_APPLICATION_CREDENTIALS: "/opt/lsst/software/jupyterlab/butler-secret/butler-gcs-idf-creds.json" + DAF_BUTLER_REPOSITORY_INDEX: "s3://butler-us-central1-repo-locations/data-int-repos.yaml" + AUTO_REPO_URLS: https://github.com/lsst-sqre/system-test,https://github.com/rubin-dp0/tutorial-notebooks + AUTO_REPO_BRANCH: prod + AUTO_REPO_SPECS: https://github.com/lsst-sqre/system-test@prod,https://github.com/rubin-dp0/tutorial-notebooks@prod + PANDA_AUTH: oidc + PANDA_VERIFY_HOST: "off" + PANDA_AUTH_VO: Rubin + PANDA_URL_SSL: https://pandaserver-doma.cern.ch:25443/server/panda + PANDA_URL: http://pandaserver-doma.cern.ch:25080/server/panda + IDDS_CONFIG: /opt/lsst/software/jupyterlab/panda/idds.cfg.client.template + PANDA_CONFIG_ROOT: "~" + NO_ACTIVITY_TIMEOUT: "432000" + CULL_KERNEL_IDLE_TIMEOUT: "432000" + CULL_KERNEL_CONNECTED: "True" + CULL_KERNEL_INTERVAL: "300" + CULL_TERMINAL_INACTIVE_TIMEOUT: "432000" + CULL_TERMINAL_INTERVAL: "300" + sizes: + - name: Small + cpu: 1 + ram: 3072M + - name: Medium + cpu: 2 + ram: 6144M + - name: Large + cpu: 4 + ram: 12288M + - name: Huge + cpu: 8 + ram: 24576M + volumes: + - name: home + nfs: + path: /share1/home + server: 10.22.240.130 + - name: project + nfs: + path: /share1/project + server: 10.22.240.130 + - name: scratch + nfs: + path: /share1/scratch + server: 10.22.240.130 + volume_mounts: + - name: home + mountPath: /home + - name: project + mountPath: /project + - name: scratch + mountPath: /scratch + # Workaround to impose resource quotas at IDF + user_resources_template: | + - apiVersion: v1 + kind: Namespace + metadata: + name: "{{ user_namespace }}" + - apiVersion: v1 + kind: ConfigMap + metadata: + name: group + namespace: "{{ user_namespace }}" + data: + group: | + root:x:0: + bin:x:1: + daemon:x:2: + sys:x:3: + adm:x:4: + tty:x:5: + disk:x:6: + lp:x:7: + mem:x:8: + kmem:x:9: + wheel:x:10: + cdrom:x:11: + mail:x:12: + man:x:15: + dialout:x:18: + floppy:x:19: + games:x:20: + tape:x:33: + video:x:39: + ftp:x:50: + lock:x:54: + audio:x:63: + nobody:x:99: + users:x:100: + utmp:x:22: + utempter:x:35: + input:x:999: + systemd-journal:x:190: + systemd-network:x:192: + dbus:x:81: + ssh_keys:x:998: + lsst_lcl:x:1000:{{ user }} + tss:x:59: + cgred:x:997: + screen:x:84: + jovyan:x:768:{{ user }}{% for g in groups %} + {{ g.name }}:x:{{ g.id }}:{{ user if g.id != gid else "" }}{% endfor %} + - apiVersion: v1 + kind: ConfigMap + metadata: + name: passwd + namespace: "{{ user_namespace }}" + data: + passwd: | + root:x:0:0:root:/root:/bin/bash + bin:x:1:1:bin:/bin:/sbin/nologin + daemon:x:2:2:daemon:/sbin:/sbin/nologin + adm:x:3:4:adm:/var/adm:/sbin/nologin + lp:x:4:7:lp:/var/spool/lpd:/sbin/nologin + sync:x:5:0:sync:/sbin:/bin/sync + shutdown:x:6:0:shutdown:/sbin:/sbin/shutdown + halt:x:7:0:halt:/sbin:/sbin/halt + mail:x:8:12:mail:/var/spool/mail:/sbin/nologin + operator:x:11:0:operator:/root:/sbin/nologin + games:x:12:100:games:/usr/games:/sbin/nologin + ftp:x:14:50:FTP User:/var/ftp:/sbin/nologin + nobody:x:99:99:Nobody:/:/sbin/nologin + systemd-network:x:192:192:systemd Network Management:/:/sbin/nologin + dbus:x:81:81:System message bus:/:/sbin/nologin + lsst_lcl:x:1000:1000::/home/lsst_lcl:/bin/bash + tss:x:59:59:Account used by the trousers package to sandbox the tcsd daemon:/dev/null:/sbin/nologin + {{ user }}:x:{{ uid }}:{{ gid if gid else uid }}::/home/{{ user }}:/bin/bash + - apiVersion: v1 + kind: ConfigMap + metadata: + name: dask + namespace: "{{ user_namespace }}" + data: + dask_worker.yml: | + {{ dask_yaml | indent(6) }} + # When we break out the resources we should make this per-instance + # configurable. + - apiVersion: v1 + kind: ConfigMap + metadata: + name: idds-config + namespace: "{{ user_namespace }}" + data: + idds.cfg.client.template: | + # Licensed under the Apache License, Version 2.0 (the "License"); + # You may not use this file except in compliance with the License. + # You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + # + # Authors: + # - Wen Guan, , 2020 + [common] + # if logdir is configured, idds will write to idds.log in this directory. + # else idds will go to stdout/stderr. + # With supervisord, it's good to write to stdout/stderr, then supervisord can manage and rotate logs. + # logdir = /var/log/idds + loglevel = INFO + [rest] + host = https://iddsserver.cern.ch:443/idds + #url_prefix = /idds + #cacher_dir = /tmp + cacher_dir = /data/idds + - apiVersion: v1 + kind: ServiceAccount + metadata: + name: "{{ user }}-serviceaccount" + namespace: "{{ user_namespace }}" + imagePullSecrets: + - name: pull-secret + - apiVersion: rbac.authorization.k8s.io/v1 + kind: Role + metadata: + name: "{{ user }}-role" + namespace: "{{ user_namespace }}" + rules: + # cf https://kubernetes.dask.org/en/latest/kubecluster.html + - apiGroups: [""] + resources: ["pods", "services"] + verbs: ["create", "delete", "get", "list", "watch"] + - apiGroups: [""] + resources: ["pods/log"] + verbs: ["get","list"] + - apiGroups: ["policy"] + resources: ["poddisruptionbudgets"] + verbs: ["create", "delete", "get", "list", "watch"] + - apiVersion: rbac.authorization.k8s.io/v1 + kind: RoleBinding + metadata: + name: "{{ user }}-rolebinding" + namespace: "{{ user_namespace }}" + roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: "{{ user }}-role" + subjects: + - kind: ServiceAccount + name: "{{ user }}-serviceaccount" + namespace: "{{ user_namespace }}" + - apiVersion: ricoberger.de/v1alpha1 + kind: VaultSecret + metadata: + name: butler-secret + namespace: "{{ user_namespace }}" + spec: + path: "{{ butler_secret_path }}" + type: Opaque + - apiVersion: ricoberger.de/v1alpha1 + kind: VaultSecret + metadata: + name: pull-secret + namespace: "{{ user_namespace }}" + spec: + path: "{{ pull_secret_path }}" + type: kubernetes.io/dockerconfigjson + - apiVersion: v1 + kind: ResourceQuota + metadata: + name: user-quota + namespace: "{{ user_namespace }}" + spec: + hard: + limits.cpu: 9 + limits.memory: 27Gi diff --git a/services/nublado2/values-idfprod.yaml b/services/nublado2/values-idfprod.yaml index a6252cf53d..ff3cb92991 100644 --- a/services/nublado2/values-idfprod.yaml +++ b/services/nublado2/values-idfprod.yaml @@ -1,293 +1,228 @@ -nublado2: - jupyterhub: - hub: - resources: - requests: - cpu: "1" - memory: 3Gi - ingress: - hosts: ["data.lsst.cloud"] - annotations: - nginx.ingress.kubernetes.io/auth-signin: "https://data.lsst.cloud/login" - config: - base_url: "https://data.lsst.cloud" - butler_secret_path: "secret/k8s_operator/data.lsst.cloud/butler-secret" - pull_secret_path: "secret/k8s_operator/data.lsst.cloud/pull-secret" - lab_environment: - PGPASSFILE: "/opt/lsst/software/jupyterlab/butler-secret/postgres-credentials.txt" - AWS_SHARED_CREDENTIALS_FILE: "/opt/lsst/software/jupyterlab/butler-secret/aws-credentials.ini" - S3_ENDPOINT_URL: "https://storage.googleapis.com" - AUTO_REPO_URLS: https://github.com/lsst-sqre/system-test,https://github.com/rubin-dp0/tutorial-notebooks - AUTO_REPO_BRANCH: prod - AUTO_REPO_SPECS: https://github.com/lsst-sqre/system-test@prod,https://github.com/rubin-dp0/tutorial-notebooks@prod - volumes: - - name: home - nfs: - path: /share1/home - server: 10.13.105.122 - - name: project - nfs: - path: /share1/project - server: 10.13.105.122 - - name: scratch - nfs: - path: /share1/scratch - server: 10.13.105.122 - volume_mounts: - - name: home - mountPath: /home - - name: project - mountPath: /project - - name: scratch - mountPath: /scratch - # Workaround to impose resource quotas at IDF - user_resources_template: | - - apiVersion: v1 - kind: Namespace - metadata: - name: "{{ user_namespace }}" - - apiVersion: v1 - kind: ConfigMap - metadata: - name: group - namespace: "{{ user_namespace }}" - data: - group: | - root:x:0: - bin:x:1: - daemon:x:2: - sys:x:3: - adm:x:4: - tty:x:5: - disk:x:6: - lp:x:7: - mem:x:8: - kmem:x:9: - wheel:x:10: - cdrom:x:11: - mail:x:12: - man:x:15: - dialout:x:18: - floppy:x:19: - games:x:20: - tape:x:33: - video:x:39: - ftp:x:50: - lock:x:54: - audio:x:63: - nobody:x:99: - users:x:100: - utmp:x:22: - utempter:x:35: - input:x:999: - systemd-journal:x:190: - systemd-network:x:192: - dbus:x:81: - ssh_keys:x:998: - lsst_lcl:x:1000:{{ user }} - tss:x:59: - cgred:x:997: - screen:x:84: - jovyan:x:768:{{ user }} - provisionator:x:769: - {{user}}:x:{{uid}}:{% for group in groups %} - {{ group.name }}:x:{{ group.id }}:{{ user }}{% endfor %} - - apiVersion: v1 - kind: ConfigMap - metadata: - name: gshadow - namespace: "{{ user_namespace }}" - data: - gshadow: | - root:!:: - bin:!:: - daemon:!:: - sys:!:: - adm:!:: - tty:!:: - disk:!:: - lp:!:: - mem:!:: - kmem:!:: - wheel:!:: - cdrom:!:: - mail:!:: - man:!:: - dialout:!:: - floppy:!:: - games:!:: - tape:!:: - video:!:: - ftp:!:: - lock:!:: - audio:!:: - nobody:!:: - users:!:: - utmp:!:: - utempter:!:: - input:!:: - systemd-journal:!:: - systemd-network:!:: - dbus:!:: - ssh_keys:!:: - lsst_lcl:!::{{ user }} - tss:!:: - cgred:!:: - screen:!:: - jovyan:!::{{ user }} - provisionator:!:: - {{ user }}:!::{% for g in groups %} - {{ g.name }}:!::{{ user }}{% endfor %} - - apiVersion: v1 - kind: ConfigMap - metadata: - name: passwd - namespace: "{{ user_namespace }}" - data: - passwd: | - root:x:0:0:root:/root:/bin/bash - bin:x:1:1:bin:/bin:/sbin/nologin - daemon:x:2:2:daemon:/sbin:/sbin/nologin - adm:x:3:4:adm:/var/adm:/sbin/nologin - lp:x:4:7:lp:/var/spool/lpd:/sbin/nologin - sync:x:5:0:sync:/sbin:/bin/sync - shutdown:x:6:0:shutdown:/sbin:/sbin/shutdown - halt:x:7:0:halt:/sbin:/sbin/halt - mail:x:8:12:mail:/var/spool/mail:/sbin/nologin - operator:x:11:0:operator:/root:/sbin/nologin - games:x:12:100:games:/usr/games:/sbin/nologin - ftp:x:14:50:FTP User:/var/ftp:/sbin/nologin - nobody:x:99:99:Nobody:/:/sbin/nologin - systemd-network:x:192:192:systemd Network Management:/:/sbin/nologin - dbus:x:81:81:System message bus:/:/sbin/nologin - lsst_lcl:x:1000:1000::/home/lsst_lcl:/bin/bash - tss:x:59:59:Account used by the trousers package to sandbox the tcsd daemon:/dev/null:/sbin/nologin - provisionator:x:769:769:Lab provisioning user:/home/provisionator:/bin/bash - {{ user }}:x:{{ uid }}:{{ uid }}::/home/{{ user }}:/bin/bash - - apiVersion: v1 - kind: ConfigMap - metadata: - name: shadow - namespace: "{{ user_namespace }}" - data: - shadow: | - root:*:18000:0:99999:7::: - bin:*:18000:0:99999:7::: - daemon:*:18000:0:99999:7::: - adm:*:18000:0:99999:7::: - lp:*:18000:0:99999:7::: - sync:*:18000:0:99999:7::: - shutdown:*:18000:0:99999:7::: - halt:*:18000:0:99999:7::: - mail:*:18000:0:99999:7::: - operator:*:18000:0:99999:7::: - games:*:18000:0:99999:7::: - ftp:*:18000:0:99999:7::: - nobody:*:18000:0:99999:7::: - systemd-network:*:18000:0:99999:7::: - dbus:*:18000:0:99999:7::: - lsst_lcl:*:18000:0:99999:7::: - tss:*:18000:0:99999:7::: - provisionator:*:18000:0:99999:7::: - {{user}}:*:18000:0:99999:7::: - - apiVersion: v1 - kind: ConfigMap - metadata: - name: dask - namespace: "{{ user_namespace }}" - data: - dask_worker.yml: | - {{ dask_yaml | indent(6) }} - # When we break out the resources we should make this per-instance - # configurable. - - apiVersion: v1 - kind: ConfigMap - metadata: - name: idds-config - namespace: "{{ user_namespace }}" - data: - idds_cfg.client.template: | - # Licensed under the Apache License, Version 2.0 (the "License"); - # You may not use this file except in compliance with the License. - # You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 - # - # Authors: - # - Wen Guan, , 2020 - [common] - # if logdir is configured, idds will write to idds.log in this directory. - # else idds will go to stdout/stderr. - # With supervisord, it's good to write to stdout/stderr, then supervisord can manage and rotate logs. - # logdir = /var/log/idds - loglevel = INFO - [rest] - host = https://iddsserver.cern.ch:443/idds - #url_prefix = /idds - #cacher_dir = /tmp - cacher_dir = /data/idds - - apiVersion: v1 - kind: ServiceAccount - metadata: - name: "{{ user }}-serviceaccount" - namespace: "{{ user_namespace }}" - imagePullSecrets: - - name: pull-secret - - apiVersion: rbac.authorization.k8s.io/v1 - kind: Role - metadata: - name: "{{ user }}-role" - namespace: "{{ user_namespace }}" - rules: - # cf https://kubernetes.dask.org/en/latest/kubecluster.html - - apiGroups: [""] - resources: ["pods", "services"] - verbs: ["create", "delete", "get", "list", "watch"] - - apiGroups: [""] - resources: ["pods/log"] - verbs: ["get","list"] - - apiGroups: ["policy"] - resources: ["poddisruptionbudgets"] - verbs: ["create", "delete", "get", "list", "watch"] - - apiVersion: rbac.authorization.k8s.io/v1 - kind: RoleBinding - metadata: - name: "{{ user }}-rolebinding" - namespace: "{{ user_namespace }}" - roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: "{{ user }}-role" - subjects: - - kind: ServiceAccount - name: "{{ user }}-serviceaccount" - namespace: "{{ user_namespace }}" - - apiVersion: ricoberger.de/v1alpha1 - kind: VaultSecret - metadata: - name: butler-secret - namespace: "{{ user_namespace }}" - spec: - path: "{{ butler_secret_path }}" - type: Opaque - - apiVersion: ricoberger.de/v1alpha1 - kind: VaultSecret - metadata: - name: pull-secret - namespace: "{{ user_namespace }}" - spec: - path: "{{ pull_secret_path }}" - type: kubernetes.io/dockerconfigjson - - apiVersion: v1 - kind: ResourceQuota - metadata: - name: user-quota - namespace: "{{ user_namespace }}" - spec: - hard: - limits.cpu: 9 - limits.memory: 27Gi +jupyterhub: + hub: + config: + ServerApp: + shutdown_no_activity_timeout: 432000 - vault_secret_path: "secret/k8s_operator/data.lsst.cloud/nublado2" + cull: + enabled: true + users: false + removeNamedServers: false + timeout: 432000 + every: 300 + maxAge: 2160000 -pull-secret: - enabled: true - path: "secret/k8s_operator/data.lsst.cloud/pull-secret" + ingress: + hosts: ["data.lsst.cloud"] + annotations: + nginx.ingress.kubernetes.io/auth-signin: "https://data.lsst.cloud/login" +config: + base_url: "https://data.lsst.cloud" + butler_secret_path: "secret/k8s_operator/data.lsst.cloud/butler-secret" + pull_secret_path: "secret/k8s_operator/data.lsst.cloud/pull-secret" + cachemachine_image_policy: "desired" + lab_environment: + PGPASSFILE: "/opt/lsst/software/jupyterlab/butler-secret/postgres-credentials.txt" + AWS_SHARED_CREDENTIALS_FILE: "/opt/lsst/software/jupyterlab/butler-secret/aws-credentials.ini" + GOOGLE_APPLICATION_CREDENTIALS: "/opt/lsst/software/jupyterlab/butler-secret/butler-gcs-idf-creds.json" + DAF_BUTLER_REPOSITORY_INDEX: "s3://butler-us-central1-repo-locations/data-repos.yaml" + S3_ENDPOINT_URL: "https://storage.googleapis.com" + AUTO_REPO_URLS: https://github.com/lsst-sqre/system-test,https://github.com/rubin-dp0/tutorial-notebooks + AUTO_REPO_BRANCH: prod + AUTO_REPO_SPECS: https://github.com/lsst-sqre/system-test@prod,https://github.com/rubin-dp0/tutorial-notebooks@prod + NO_ACTIVITY_TIMEOUT: "432000" + CULL_KERNEL_IDLE_TIMEOUT: "432000" + CULL_KERNEL_CONNECTED: "True" + CULL_KERNEL_INTERVAL: "300" + CULL_TERMINAL_INACTIVE_TIMEOUT: "432000" + CULL_TERMINAL_INTERVAL: "300" + volumes: + - name: home + nfs: + path: /share1/home + server: 10.13.105.122 + - name: project + nfs: + path: /share1/project + server: 10.13.105.122 + - name: scratch + nfs: + path: /share1/scratch + server: 10.13.105.122 + volume_mounts: + - name: home + mountPath: /home + - name: project + mountPath: /project + - name: scratch + mountPath: /scratch + # Workaround to impose resource quotas at IDF + user_resources_template: | + - apiVersion: v1 + kind: Namespace + metadata: + name: "{{ user_namespace }}" + - apiVersion: v1 + kind: ConfigMap + metadata: + name: group + namespace: "{{ user_namespace }}" + data: + group: | + root:x:0: + bin:x:1: + daemon:x:2: + sys:x:3: + adm:x:4: + tty:x:5: + disk:x:6: + lp:x:7: + mem:x:8: + kmem:x:9: + wheel:x:10: + cdrom:x:11: + mail:x:12: + man:x:15: + dialout:x:18: + floppy:x:19: + games:x:20: + tape:x:33: + video:x:39: + ftp:x:50: + lock:x:54: + audio:x:63: + nobody:x:99: + users:x:100: + utmp:x:22: + utempter:x:35: + input:x:999: + systemd-journal:x:190: + systemd-network:x:192: + dbus:x:81: + ssh_keys:x:998: + lsst_lcl:x:1000:{{ user }} + tss:x:59: + cgred:x:997: + screen:x:84: + jovyan:x:768:{{ user }}{% for g in groups %} + {{ g.name }}:x:{{ g.id }}:{{ user if g.id != gid else "" }}{% endfor %} + - apiVersion: v1 + kind: ConfigMap + metadata: + name: passwd + namespace: "{{ user_namespace }}" + data: + passwd: | + root:x:0:0:root:/root:/bin/bash + bin:x:1:1:bin:/bin:/sbin/nologin + daemon:x:2:2:daemon:/sbin:/sbin/nologin + adm:x:3:4:adm:/var/adm:/sbin/nologin + lp:x:4:7:lp:/var/spool/lpd:/sbin/nologin + sync:x:5:0:sync:/sbin:/bin/sync + shutdown:x:6:0:shutdown:/sbin:/sbin/shutdown + halt:x:7:0:halt:/sbin:/sbin/halt + mail:x:8:12:mail:/var/spool/mail:/sbin/nologin + operator:x:11:0:operator:/root:/sbin/nologin + games:x:12:100:games:/usr/games:/sbin/nologin + ftp:x:14:50:FTP User:/var/ftp:/sbin/nologin + nobody:x:99:99:Nobody:/:/sbin/nologin + systemd-network:x:192:192:systemd Network Management:/:/sbin/nologin + dbus:x:81:81:System message bus:/:/sbin/nologin + lsst_lcl:x:1000:1000::/home/lsst_lcl:/bin/bash + tss:x:59:59:Account used by the trousers package to sandbox the tcsd daemon:/dev/null:/sbin/nologin + {{ user }}:x:{{ uid }}:{{ gid if gid else uid }}::/home/{{ user }}:/bin/bash + - apiVersion: v1 + kind: ConfigMap + metadata: + name: dask + namespace: "{{ user_namespace }}" + data: + dask_worker.yml: | + {{ dask_yaml | indent(6) }} + # When we break out the resources we should make this per-instance + # configurable. + - apiVersion: v1 + kind: ConfigMap + metadata: + name: idds-config + namespace: "{{ user_namespace }}" + data: + idds_cfg.client.template: | + # Licensed under the Apache License, Version 2.0 (the "License"); + # You may not use this file except in compliance with the License. + # You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + # + # Authors: + # - Wen Guan, , 2020 + [common] + # if logdir is configured, idds will write to idds.log in this directory. + # else idds will go to stdout/stderr. + # With supervisord, it's good to write to stdout/stderr, then supervisord can manage and rotate logs. + # logdir = /var/log/idds + loglevel = INFO + [rest] + host = https://iddsserver.cern.ch:443/idds + #url_prefix = /idds + #cacher_dir = /tmp + cacher_dir = /data/idds + - apiVersion: v1 + kind: ServiceAccount + metadata: + name: "{{ user }}-serviceaccount" + namespace: "{{ user_namespace }}" + imagePullSecrets: + - name: pull-secret + - apiVersion: rbac.authorization.k8s.io/v1 + kind: Role + metadata: + name: "{{ user }}-role" + namespace: "{{ user_namespace }}" + rules: + # cf https://kubernetes.dask.org/en/latest/kubecluster.html + - apiGroups: [""] + resources: ["pods", "services"] + verbs: ["create", "delete", "get", "list", "watch"] + - apiGroups: [""] + resources: ["pods/log"] + verbs: ["get","list"] + - apiGroups: ["policy"] + resources: ["poddisruptionbudgets"] + verbs: ["create", "delete", "get", "list", "watch"] + - apiVersion: rbac.authorization.k8s.io/v1 + kind: RoleBinding + metadata: + name: "{{ user }}-rolebinding" + namespace: "{{ user_namespace }}" + roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: "{{ user }}-role" + subjects: + - kind: ServiceAccount + name: "{{ user }}-serviceaccount" + namespace: "{{ user_namespace }}" + - apiVersion: ricoberger.de/v1alpha1 + kind: VaultSecret + metadata: + name: butler-secret + namespace: "{{ user_namespace }}" + spec: + path: "{{ butler_secret_path }}" + type: Opaque + - apiVersion: ricoberger.de/v1alpha1 + kind: VaultSecret + metadata: + name: pull-secret + namespace: "{{ user_namespace }}" + spec: + path: "{{ pull_secret_path }}" + type: kubernetes.io/dockerconfigjson + - apiVersion: v1 + kind: ResourceQuota + metadata: + name: user-quota + namespace: "{{ user_namespace }}" + spec: + hard: + limits.cpu: 9 + limits.memory: 27Gi diff --git a/services/nublado2/values-int.yaml b/services/nublado2/values-int.yaml deleted file mode 100644 index 2dd300ecd6..0000000000 --- a/services/nublado2/values-int.yaml +++ /dev/null @@ -1,47 +0,0 @@ -nublado2: - jupyterhub: - ingress: - hosts: ["lsst-lsp-int.ncsa.illinois.edu"] - annotations: - nginx.ingress.kubernetes.io/auth-signin: "https://lsst-lsp-int.ncsa.illinois.edu/login" - nginx.ingress.kubernetes.io/auth-url: "https://lsst-lsp-int.ncsa.illinois.edu/auth?scope=exec:notebook¬ebook=true" - - config: - base_url: "https://lsst-lsp-int.ncsa.illinois.edu" - butler_secret_path: "secret/k8s_operator/lsst-lsp-int.ncsa.illinois.edu/butler-secret" - pull_secret_path: "secret/k8s_operator/lsst-lsp-int.ncsa.illinois.edu/pull-secret" - lab_environment: - AUTO_REPO_URLS: "https://github.com/lsst-sqre/system-test" - AUTO_REPO_BRANCH: "NCSA-prod" - AUTO_REPO_SPECS: "https://github.com/lsst-sqre/system-test@NCSA-prod" - pinned_images: - - image_url: registry.hub.docker.com/lsstsqre/sciplat-lab:recommended - name: Recommended - volumes: - - name: datasets - hostPath: - path: /lsstdata/user/precursor_data/datasets - - name: home - hostPath: - path: /lsstdata/user/staff/jhome - - name: project - hostPath: - path: /lsstdata/user/staff/project - - name: scratch - hostPath: - path: /lsstdata/user/staff/scratch - volume_mounts: - - name: datasets - mountPath: /datasets - - name: home - mountPath: /home - - name: project - mountPath: /project - - name: scratch - mountPath: /scratch - - vault_secret_path: "secret/k8s_operator/lsst-lsp-int.ncsa.illinois.edu/nublado2" - -pull-secret: - enabled: true - path: "secret/k8s_operator/lsst-lsp-int.ncsa.illinois.edu/pull-secret" diff --git a/services/nublado2/values-minikube.yaml b/services/nublado2/values-minikube.yaml index a824676526..ddb9f8c155 100644 --- a/services/nublado2/values-minikube.yaml +++ b/services/nublado2/values-minikube.yaml @@ -1,28 +1,21 @@ -nublado2: - jupyterhub: - debug: - enabled: true - ingress: - hosts: ["minikube.lsst.codes"] - annotations: - nginx.ingress.kubernetes.io/auth-signin: "https://minikube.lsst.codes/login" - config: - base_url: "https://minikube.lsst.codes" - butler_secret_path: "secret/k8s_operator/minikube.lsst.codes/butler-secret" - pull_secret_path: "secret/k8s_operator/minikube.lsst.codes/pull-secret" - lab_environment: - AUTO_REPO_URLS: "https://github.com/lsst-sqre/system-test" - AUTO_REPO_BRANCH: "prod" - AUTO_REPO_SPECS: "https://github.com/lsst-sqre/system-test@prod" - volumes: - - name: home - emptyDir: {} - volume_mounts: - - name: home - mountPath: /home - - vault_secret_path: "secret/k8s_operator/minikube.lsst.codes/nublado2" - -pull-secret: - enabled: true - path: "secret/k8s_operator/minikube.lsst.codes/pull-secret" +jupyterhub: + debug: + enabled: true + ingress: + hosts: ["minikube.lsst.codes"] + annotations: + nginx.ingress.kubernetes.io/auth-signin: "https://minikube.lsst.codes/login" +config: + base_url: "https://minikube.lsst.codes" + butler_secret_path: "secret/k8s_operator/minikube.lsst.codes/butler-secret" + pull_secret_path: "secret/k8s_operator/minikube.lsst.codes/pull-secret" + lab_environment: + AUTO_REPO_URLS: "https://github.com/lsst-sqre/system-test" + AUTO_REPO_BRANCH: "prod" + AUTO_REPO_SPECS: "https://github.com/lsst-sqre/system-test@prod" + volumes: + - name: home + emptyDir: {} + volume_mounts: + - name: home + mountPath: /home diff --git a/services/nublado2/values-red-five.yaml b/services/nublado2/values-red-five.yaml deleted file mode 100644 index b406f4eeff..0000000000 --- a/services/nublado2/values-red-five.yaml +++ /dev/null @@ -1,50 +0,0 @@ -nublado2: - jupyterhub: - debug: - enabled: true - - ingress: - hosts: ["red-five.lsst.codes"] - annotations: - nginx.ingress.kubernetes.io/auth-signin: "https://red-five.lsst.codes/login" - - config: - base_url: "https://red-five.lsst.codes" - butler_secret_path: "secret/k8s_operator/red-five.lsst.codes/butler-secret" - pull_secret_path: "secret/k8s_operator/red-five.lsst.codes/pull-secret" - lab_environment: - AUTO_REPO_URLS: "https://github.com/lsst-sqre/system-test" - AUTO_REPO_BRANCH: "prod" - AUTO_REPO_SPECS: "https://github.com/lsst-sqre/system-test@prod" - volumes: - - name: home - nfs: - path: /exports/home - server: 10.128.0.49 - - name: datasets - nfs: - path: /exports/datasets - server: 10.128.0.49 - - name: project - nfs: - path: /exports/project - server: 10.128.0.49 - - name: scratch - nfs: - path: /exports/scratch - server: 10.128.0.49 - volume_mounts: - - name: home - mountPath: /home - - name: datasets - mountPath: /datasets - - name: project - mountPath: /project - - name: scratch - mountPath: /scratch - - vault_secret_path: "secret/k8s_operator/red-five.lsst.codes/nublado2" - -pull-secret: - enabled: true - path: "secret/k8s_operator/red-five.lsst.codes/pull-secret" diff --git a/services/nublado2/values-roe.yaml b/services/nublado2/values-roe.yaml index ce312639c7..186887deb5 100644 --- a/services/nublado2/values-roe.yaml +++ b/services/nublado2/values-roe.yaml @@ -1,46 +1,43 @@ -nublado2: - jupyterhub: - ingress: - hosts: ["rsp.lsst.ac.uk"] - annotations: - nginx.ingress.kubernetes.io/auth-signin: "https://rsp.lsst.ac.uk/login" - nginx.ingress.kubernetes.io/auth-url: "https://rsp.lsst.ac.uk/auth?scope=exec:notebook¬ebook=true" +jupyterhub: + ingress: + hosts: ["rsp.lsst.ac.uk"] + annotations: + nginx.ingress.kubernetes.io/auth-signin: "https://rsp.lsst.ac.uk/login" + nginx.ingress.kubernetes.io/auth-url: "https://rsp.lsst.ac.uk/auth?scope=exec:notebook¬ebook=true" - config: - base_url: "https://rsp.lsst.ac.uk" - butler_secret_path: "secret/k8s_operator/roe/butler-secret" - pull_secret_path: "secret/k8s_operator/roe/pull-secret" - lab_environment: - AUTO_REPO_URLS: "https://github.com/lsst-sqre/system-test" - AUTO_REPO_BRANCH: "prod" - AUTO_REPO_SPECS: "https://github.com/lsst-sqre/system-test@prod" - pinned_images: - - image_url: registry.hub.docker.com/lsstsqre/sciplat-lab:recommended - name: Recommended - volumes: - - name: datasets - hostPath: - path: /lsstdata/user/precursor_data/datasets - - name: home - hostPath: - path: /lsstdata/user/staff/jhome - - name: project - hostPath: - path: /lsstdata/user/staff/project - - name: scratch - hostPath: - path: /lsstdata/user/staff/scratch - volume_mounts: - - name: datasets - mountPath: /datasets - - name: home - mountPath: /home - - name: project - mountPath: /project - - name: scratch - mountPath: /scratch +config: + base_url: "https://rsp.lsst.ac.uk" + butler_secret_path: "secret/k8s_operator/roe/butler-secret" + pull_secret_path: "secret/k8s_operator/roe/pull-secret" + lab_environment: + AUTO_REPO_URLS: "https://github.com/lsst-sqre/system-test" + AUTO_REPO_BRANCH: "prod" + AUTO_REPO_SPECS: "https://github.com/lsst-sqre/system-test@prod" + pinned_images: + - image_url: registry.hub.docker.com/lsstsqre/sciplat-lab:recommended + name: Recommended + volumes: + - name: data + nfs: + path: /data + server: 10.72.0.23 + - name: home + nfs: + path: /jhome + server: 10.72.0.23 + - name: datasets + nfs: + path: /datasets + server: 10.72.0.23 + volume_mounts: + - name: data + mountPath: /data + - name: home + mountPath: /home + - name: datasets + mountPath: /datasets - vault_secret_path: "secret/k8s_operator/roe/nublado2" +vault_secret_path: "secret/k8s_operator/roe/nublado2" pull-secret: enabled: true diff --git a/services/nublado2/values-stable.yaml b/services/nublado2/values-stable.yaml deleted file mode 100644 index 676ad84d35..0000000000 --- a/services/nublado2/values-stable.yaml +++ /dev/null @@ -1,61 +0,0 @@ -nublado2: - jupyterhub: - ingress: - hosts: ["lsst-lsp-stable.ncsa.illinois.edu"] - annotations: - nginx.ingress.kubernetes.io/auth-signin: "https://lsst-lsp-stable.ncsa.illinois.edu/login" - nginx.ingress.kubernetes.io/auth-url: "https://lsst-lsp-stable.ncsa.illinois.edu/auth?scope=exec:notebook¬ebook=true" - - config: - base_url: "https://lsst-lsp-stable.ncsa.illinois.edu" - butler_secret_path: "secret/k8s_operator/lsst-lsp-stable.ncsa.illinois.edu/butler-secret" - pull_secret_path: "secret/k8s_operator/lsst-lsp-stable.ncsa.illinois.edu/pull-secret" - lab_environment: - AUTO_REPO_URLS: "https://github.com/lsst-sqre/system-test" - AUTO_REPO_BRANCH: "NCSA-prod" - AUTO_REPO_SPECS: "https://github.com/lsst-sqre/system-test@NCSA-prod" - volumes: - - name: datasets - hostPath: - path: /lsstdata/user/precursor_data/datasets - - name: home - hostPath: - path: /lsstdata/user/staff/jhome - - name: project - hostPath: - path: /lsstdata/user/staff/project - - name: scratch - hostPath: - path: /lsstdata/user/staff/scratch - - name: teststand - hostPath: - path: /lsstdata/offline/teststand - - name: instrument - hostPath: - path: /lsstdata/offline/instrument - - name: repo - hostPath: - path: /repo - volume_mounts: - - name: datasets - mountPath: /datasets - - name: home - mountPath: /home - - name: project - mountPath: /project - - name: scratch - mountPath: /scratch - - name: teststand - mountPath: /lsstdata/offline/teststand - readOnly: true - - name: instrument - mountPath: /lsstdata/offline/instrument - readOnly: true - - name: repo - mountPath: /repo - - vault_secret_path: "secret/k8s_operator/lsst-lsp-stable.ncsa.illinois.edu/nublado2" - -pull-secret: - enabled: true - path: "secret/k8s_operator/lsst-lsp-stable.ncsa.illinois.edu/pull-secret" diff --git a/services/nublado2/values-summit.yaml b/services/nublado2/values-summit.yaml index 9038db69df..de9404621d 100644 --- a/services/nublado2/values-summit.yaml +++ b/services/nublado2/values-summit.yaml @@ -1,104 +1,104 @@ -nublado2: - jupyterhub: - ingress: - hosts: ["summit-lsp.lsst.codes"] - annotations: - nginx.ingress.kubernetes.io/auth-signin: "https://summit-lsp.lsst.codes/login" +jupyterhub: + ingress: + hosts: ["summit-lsp.lsst.codes"] + annotations: + nginx.ingress.kubernetes.io/auth-signin: "https://summit-lsp.lsst.codes/login" + hub: + db: + upgrade: true + url: "postgresql://jovyan@postgresdb01.cp.lsst.org/jupyterhub" + singleuser: + extraAnnotations: + k8s.v1.cni.cncf.io/networks: "kube-system/macvlan-conf" + initContainers: + - name: "multus-init" + image: "lsstit/ddsnet4u:latest" + securityContext: + privileged: true - singleuser: - extraAnnotations: - k8s.v1.cni.cncf.io/networks: "kube-system/macvlan-conf" - initContainers: - - name: "multus-init" - image: "lsstit/ddsnet4u:latest" - securityContext: - privileged: true - - config: - base_url: "https://summit-lsp.lsst.codes" - butler_secret_path: "secret/k8s_operator/summit-lsp.lsst.codes/butler-secret" - pull_secret_path: "secret/k8s_operator/summit-lsp.lsst.codes/pull-secret" - lab_environment: - AUTO_REPO_URLS: "https://github.com/lsst-sqre/system-test" - AUTO_REPO_BRANCH: "prod" - AUTO_REPO_SPECS: "https://github.com/lsst-sqre/system-test@prod" - LSST_DDS_INTERFACE: net1 - LSST_DDS_PARTITION_PREFIX: summit - volumes: - - name: home - nfs: - path: /jhome - server: nfs1.cp.lsst.org - - name: project - nfs: - path: /project - server: nfs1.cp.lsst.org - - name: scratch - nfs: - path: /scratch - server: nfs1.cp.lsst.org - - name: auxtel - nfs: - path: /lsstdata - server: atarchiver.cp.lsst.org - readOnly: true - - name: comcam - nfs: - path: /lsstdata - server: comcam-arctl01.cp.lsst.org - readOnly: true - - name: other - nfs: - path: /lsstdata - server: nfs1.cp.lsst.org - readOnly: true - - name: latiss - nfs: - path: /repo/LATISS - server: atarchiver.cp.lsst.org - - name: base-auxtel - nfs: - path: /lsstdata/base/auxtel - server: atarchiver.cp.lsst.org - readOnly: true - - name: lsstcomcam - nfs: - path: /repo/LSSTComCam - server: comcam-arctl01.cp.lsst.org - - name: base-comcam - nfs: - path: /lsstdata/base/comcam - server: comcam-arctl01.cp.lsst.org - readOnly: true - volume_mounts: - - name: home - mountPath: /home - - name: project - mountPath: /project - - name: scratch - mountPath: /scratch - - name: auxtel - mountPath: /readonly/lsstdata/auxtel +config: + base_url: "https://summit-lsp.lsst.codes" + butler_secret_path: "secret/k8s_operator/summit-lsp.lsst.codes/butler-secret" + pull_secret_path: "secret/k8s_operator/summit-lsp.lsst.codes/pull-secret" + lab_environment: + AUTO_REPO_URLS: "https://github.com/lsst-sqre/system-test" + AUTO_REPO_BRANCH: "prod" + AUTO_REPO_SPECS: "https://github.com/lsst-sqre/system-test@prod" + DAF_BUTLER_REPOSITORY_INDEX: "/project/data-repos.yaml" + LSST_DDS_INTERFACE: net1 + LSST_DDS_PARTITION_PREFIX: summit + LSST_SITE: summit + PGPASSFILE: "/opt/lsst/software/jupyterlab/butler-secret/postgres-credentials.txt" + PGUSER: "oods" + volumes: + - name: home + nfs: + path: /jhome + server: nfs1.cp.lsst.org + - name: project + nfs: + path: /project + server: nfs1.cp.lsst.org + - name: scratch + nfs: + path: /scratch + server: nfs1.cp.lsst.org + - name: auxtel + nfs: + path: /lsstdata + server: auxtel-archiver.cp.lsst.org readOnly: true - - name: comcam - mountPath: /readonly/lsstdata/comcam + - name: comcam + nfs: + path: /lsstdata + server: comcam-archiver.cp.lsst.org readOnly: true - - name: other - mountPath: /readonly/lsstdata/other + - name: other + nfs: + path: /lsstdata + server: nfs1.cp.lsst.org readOnly: true - - name: latiss - mountPath: /repo/LATISS - - name: base-auxtel - mountPath: /data/lsstdata/base/auxtel + - name: latiss + nfs: + path: /repo/LATISS + server: auxtel-archiver.cp.lsst.org + - name: base-auxtel + nfs: + path: /lsstdata/base/auxtel + server: auxtel-archiver.cp.lsst.org readOnly: true - - name: lsstcomcam - mountPath: /repo/LSSTComCam - - name: base-comcam - mountPath: /data/lsstdata/base/comcam + - name: lsstcomcam + nfs: + path: /repo/LSSTComCam + server: comcam-archiver.cp.lsst.org + - name: base-comcam + nfs: + path: /lsstdata/base/comcam + server: comcam-archiver.cp.lsst.org readOnly: true - - vault_secret_path: "secret/k8s_operator/summit-lsp.lsst.codes/nublado2" - -pull-secret: - enabled: true - path: "secret/k8s_operator/summit-lsp.lsst.codes/pull-secret" + volume_mounts: + - name: home + mountPath: /home + - name: project + mountPath: /project + - name: scratch + mountPath: /scratch + - name: auxtel + mountPath: /readonly/lsstdata/auxtel + readOnly: true + - name: comcam + mountPath: /readonly/lsstdata/comcam + readOnly: true + - name: other + mountPath: /readonly/lsstdata/other + readOnly: true + - name: latiss + mountPath: /repo/LATISS + - name: base-auxtel + mountPath: /data/lsstdata/base/auxtel + readOnly: true + - name: lsstcomcam + mountPath: /repo/LSSTComCam + - name: base-comcam + mountPath: /data/lsstdata/base/comcam + readOnly: true diff --git a/services/nublado2/values-tucson-teststand.yaml b/services/nublado2/values-tucson-teststand.yaml index 17423aeae8..6c19a72b7e 100644 --- a/services/nublado2/values-tucson-teststand.yaml +++ b/services/nublado2/values-tucson-teststand.yaml @@ -1,86 +1,94 @@ -nublado2: - jupyterhub: - ingress: - hosts: ["tucson-teststand.lsst.codes"] - annotations: - nginx.ingress.kubernetes.io/auth-signin: "https://tucson-teststand.lsst.codes/login" +jupyterhub: + cull: + enabled: true + users: false + removeNamedServers: false + timeout: 432000 + every: 300 + maxAge: 2160000 + ingress: + hosts: ["tucson-teststand.lsst.codes"] + annotations: + nginx.ingress.kubernetes.io/auth-signin: "https://tucson-teststand.lsst.codes/login" + hub: + db: + upgrade: true + url: "postgresql://jovyan@squoint.tu.lsst.org/jupyterhub" - singleuser: - extraAnnotations: - k8s.v1.cni.cncf.io/networks: "kube-system/misc-dds" - initContainers: - - name: "multus-init" - image: "lsstit/ddsnet4u:latest" - securityContext: - privileged: true + singleuser: + extraAnnotations: + k8s.v1.cni.cncf.io/networks: "kube-system/misc-dds" + initContainers: + - name: "multus-init" + image: "lsstit/ddsnet4u:latest" + securityContext: + privileged: true - config: - base_url: "https://tucson-teststand.lsst.codes" - butler_secret_path: "secret/k8s_operator/tucson-teststand.lsst.codes/butler-secret" - pull_secret_path: "secret/k8s_operator/tucson-teststand.lsst.codes/pull-secret" - lab_environment: - AUTO_REPO_URLS: "https://github.com/lsst-sqre/system-test" - AUTO_REPO_BRANCH: "prod" - AUTO_REPO_SPECS: "https://github.com/lsst-sqre/system-test@prod" - LSST_DDS_INTERFACE: net1 - LSST_DDS_PARTITION_PREFIX: tucson - volumes: - - name: home - nfs: - path: /jhome - server: nfs-jhome.tu.lsst.org - - name: project - nfs: - path: /project - server: nfs-project.tu.lsst.org - - name: scratch - nfs: - path: /scratch - server: nfs-scratch.tu.lsst.org - - name: datasets - nfs: - path: /lsstdata - server: nfs-lsstdata.tu.lsst.org - - name: auxtel-butler - nfs: - path: /repo/LATISS - server: auxtel-archiver.tu.lsst.org - - name: auxtel-oods - nfs: - path: /lsstdata/TTS/auxtel - server: auxtel-archiver.tu.lsst.org - readOnly: true - - name: comcam-butler - nfs: - path: /repo/LSSTComCam - server: comcam-archiver.tu.lsst.org - - name: comcam-oods - nfs: - path: /lsstdata/TTS/comcam - server: comcam-archiver.tu.lsst.org - readOnly: true - volume_mounts: - - name: home - mountPath: /home - - name: datasets - mountPath: /datasets - - name: project - mountPath: /project - - name: scratch - mountPath: /scratch - - name: auxtel-butler - mountPath: /repo/LATISS - - name: auxtel-oods - mountPath: /data/lsstdata/TTS/auxtel +config: + base_url: "https://tucson-teststand.lsst.codes" + butler_secret_path: "secret/k8s_operator/tucson-teststand.lsst.codes/butler-secret" + pull_secret_path: "secret/k8s_operator/tucson-teststand.lsst.codes/pull-secret" + lab_environment: + AUTO_REPO_URLS: "https://github.com/lsst-sqre/system-test" + AUTO_REPO_BRANCH: "prod" + AUTO_REPO_SPECS: "https://github.com/lsst-sqre/system-test@prod" + DAF_BUTLER_REPOSITORY_INDEX: "/project/data-repos.yaml" + LSST_DDS_INTERFACE: net1 + LSST_DDS_PARTITION_PREFIX: tucson + LSST_SITE: tucson + PGPASSFILE: "/opt/lsst/software/jupyterlab/butler-secret/postgres-credentials.txt" + PGUSER: "oods" + volumes: + - name: home + nfs: + path: /jhome + server: nfs-jhome.tu.lsst.org + - name: project + nfs: + path: /project + server: nfs-project.tu.lsst.org + - name: scratch + nfs: + path: /scratch + server: nfs-scratch.tu.lsst.org + - name: datasets + nfs: + path: /lsstdata + server: nfs-lsstdata.tu.lsst.org + - name: auxtel-butler + nfs: + path: /repo/LATISS + server: auxtel-archiver.tu.lsst.org + - name: auxtel-oods + nfs: + path: /lsstdata/TTS/auxtel + server: auxtel-archiver.tu.lsst.org readOnly: true - - name: comcam-butler - mountPath: /repo/LSSTComCam - - name: comcam-oods - mountPath: /data/lsstdata/TTS/comcam + - name: comcam-butler + nfs: + path: /repo/LSSTComCam + server: comcam-archiver.tu.lsst.org + - name: comcam-oods + nfs: + path: /lsstdata/TTS/comcam + server: comcam-archiver.tu.lsst.org readOnly: true - - vault_secret_path: "secret/k8s_operator/tucson-teststand.lsst.codes/nublado2" - -pull-secret: - enabled: true - path: "secret/k8s_operator/tucson-teststand.lsst.codes/pull-secret" + volume_mounts: + - name: home + mountPath: /home + - name: datasets + mountPath: /datasets + - name: project + mountPath: /project + - name: scratch + mountPath: /scratch + - name: auxtel-butler + mountPath: /repo/LATISS + - name: auxtel-oods + mountPath: /data/lsstdata/TTS/auxtel + readOnly: true + - name: comcam-butler + mountPath: /repo/LSSTComCam + - name: comcam-oods + mountPath: /data/lsstdata/TTS/comcam + readOnly: true diff --git a/services/nublado2/values.yaml b/services/nublado2/values.yaml new file mode 100644 index 0000000000..80f478df35 --- /dev/null +++ b/services/nublado2/values.yaml @@ -0,0 +1,414 @@ +# Default values for nublado2. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +jupyterhub: + hub: + authenticatePrometheus: false + image: + name: lsstsqre/nublado2 + tag: "2.6.1" + resources: + limits: + cpu: 900m + memory: 1Gi # Should support about 200 users + config: + Authenticator: + enable_auth_state: true + JupyterHub: + authenticator_class: nublado2.auth.GafaelfawrAuthenticator + ServerApp: + shutdown_no_activity_timeout: 604800 # one week + db: + # Password comes from the nublado2-secret. + type: "postgres" + password: "true" + url: "postgresql://jovyan@postgres.postgres/jupyterhub" + containerSecurityContext: + runAsUser: 768 + runAsGroup: 768 + allowPrivilegeEscalation: false + baseUrl: "/nb" + # Note: this has to match up with the kubernetes secret created by the + # vault secret, and since you can't put templating in a values file, I'm + # just setting the name here. + existingSecret: "nublado2-secret" + extraConfig: + nublado.py: | + import nublado2.hub_config + nublado2.hub_config.HubConfig().configure(c) + extraVolumes: + - name: nublado-config + configMap: + name: nublado-config + - name: nublado-gafaelfawr + secret: + secretName: gafaelfawr-token + extraVolumeMounts: + - name: nublado-config + mountPath: /etc/jupyterhub/nublado_config.yaml + subPath: nublado_config.yaml + - name: nublado-gafaelfawr + mountPath: /etc/keys/gafaelfawr-token + subPath: token + # We still have to use our own, enabled at the top level, which is + # similar but not identical. This one still doesn't work, even if + # you explicitly enable port 8081 so the labs can talk to the Hub. + networkPolicy: + enabled: false + loadRoles: + self: + scopes: ['admin:servers!user', 'read:metrics'] + server: + scopes: ['inherit'] # Let server use API like user + + prePuller: + continuous: + enabled: false + hook: + enabled: false + + singleuser: + cloudMetadata: + blockWithIptables: false + cmd: "/opt/lsst/software/jupyterlab/runlab.sh" + defaultUrl: "/lab" + extraAnnotations: + argocd.argoproj.io/compare-options: 'IgnoreExtraneous' + argocd.argoproj.io/sync-options: 'Prune=false' + extraLabels: + hub.jupyter.org/network-access-hub: 'true' + argocd.argoproj.io/instance: 'nublado-users' + storage: + extraVolumes: + - name: dask + configMap: + name: dask + - name: idds-config + configMap: + name: idds-config + - name: tmp + emptyDir: {} + - name: butler-secret + secret: + secretName: butler-secret + - name: lab-environment + configMap: + defaultMode: 420 + name: lab-environment + - name: passwd + configMap: + defaultMode: 420 + name: passwd + - name: group + configMap: + defaultMode: 420 + name: group + extraVolumeMounts: + - name: dask + mountPath: /etc/dask + - name: idds-config + mountPath: /opt/lsst/software/jupyterlab/panda + - name: tmp + mountPath: /tmp + - name: butler-secret + mountPath: /opt/lsst/software/jupyterlab/butler-secret + - name: lab-environment + mountPath: /opt/lsst/software/jupyterlab/environment + - name: passwd + mountPath: /etc/passwd + readOnly: true + subPath: passwd + - name: group + mountPath: /etc/group + readOnly: true + subPath: group + type: none + + proxy: + service: + type: ClusterIP + chp: + networkPolicy: + interNamespaceAccessLabels: accept + # This currently causes Minikube deployment in GH-actions to fail. + # We want it sometime but it's not critical; it will help with + # scale-down + # pdb: + # enabled: true + # minAvailable: 1 + + # Any instantiation of this chart must also set ingress.hosts and add + # the nginx.ingress.kubernetes.io/auth-signin annotation pointing to the + # appropriate fully-qualified URLs for the Gafaelfawr /login route. + ingress: + enabled: true + + # -- Extra annotations to add to the ingress + # @default -- See `values.yaml` + annotations: + nginx.ingress.kubernetes.io/auth-method: "GET" + nginx.ingress.kubernetes.io/auth-response-headers: "Authorization,Cookie,X-Auth-Request-Email,X-Auth-Request-User,X-Auth-Request-Token" + nginx.ingress.kubernetes.io/auth-url: "http://gafaelfawr.gafaelfawr.svc.cluster.local:8080/auth?scope=exec:notebook¬ebook=true&minimum_lifetime=2160000" + nginx.ingress.kubernetes.io/configuration-snippet: | + auth_request_set $auth_www_authenticate $upstream_http_www_authenticate; + auth_request_set $auth_status $upstream_http_x_error_status; + auth_request_set $auth_error_body $upstream_http_x_error_body; + error_page 403 = @autherror; + ingressClassName: "nginx" + pathSuffix: "*" + + cull: + enabled: true + timeout: 2592000 # 30 days -- shorten later + every: 600 # Check every ten minutes + users: true # log out user when we cull + removeNamedServers: true # Post-stop hook may already do this + maxAge: 5184000 # 60 days -- shorten later + + imagePullSecrets: + - name: pull-secret + + scheduling: + userScheduler: + enabled: false + userPlaceholder: + enabled: false + +config: + # -- base_url must be set in each instantiation of this chart to the URL of + # the primary ingress. It's used to construct API requests to the + # authentication service (which should go through the ingress). + base_url: "" + # -- butler_secret_path must be set here, because it's passed through to + # the lab rather than being part of the Hub configuration. + butler_secret_path: "" + # -- pull_secret_path must also be set here; it specifies resources in + # the lab namespace + pull_secret_path: "" + # -- images to pin to spawner menu + pinned_images: [] + # -- Cachemachine image policy: "available" or "desired". Use + # "desired" at instances with streaming image support. + cachemachine_image_policy: "available" + # -- shut down user pods on logout. Superfluous, because our + # LogoutHandler enforces this in any event, but nice to make explicit. + shutdown_on_logout: true + # -- definitions of Lab sizes available in a given instance + sizes: + - name: Small + cpu: 1 + ram: 3072M + - name: Medium + cpu: 2 + ram: 6144M + - name: Large + cpu: 4 + ram: 12288M + # -- Volumes to use for a particular instance + volumes: [] + # -- Where to mount volumes for a particular instance + volume_mounts: [] + + # -- Environment variables to set in spawned lab containers. Each value will + # be expanded using Jinja 2 templating. + # @default -- See `values.yaml` + lab_environment: + EXTERNAL_INSTANCE_URL: "{{ base_url }}" + FIREFLY_ROUTE: /portal/app + HUB_ROUTE: "{{ nublado_base_url }}" + JS9_ROUTE: /js9 + API_ROUTE: /api + TAP_ROUTE: /api/tap + SODA_ROUTE: /api/image/soda + WORKFLOW_ROUTE: /wf + AUTO_REPO_URLS: https://github.com/lsst-sqre/notebook-demo + NO_SUDO: "TRUE" + EXTERNAL_GID: "{{ gid if gid else uid }}" + EXTERNAL_GROUPS: "{{ external_groups }}" + EXTERNAL_UID: "{{ uid }}" + ACCESS_TOKEN: "{{ token }}" + IMAGE_DIGEST: "{{ options.image_info.digest }}" + IMAGE_DESCRIPTION: "{{ options.image_info.display_name }}" + RESET_USER_ENV: "{{ options.reset_user_env }}" + # We need to set CLEAR_DOTLOCAL until all images that didn't know + # about RESET_USER_ENV have aged out (late 2022) + CLEAR_DOTLOCAL: "{{ options.reset_user_env }}" + DEBUG: "{{ options.debug }}" + + # -- Templates for the user resources to create for each lab spawn. This is + # a string that can be templated and then loaded as YAML to generate a list + # of Kubernetes objects to create. + # @default -- See `values.yaml` + user_resources_template: | + - apiVersion: v1 + kind: Namespace + metadata: + name: "{{ user_namespace }}" + - apiVersion: v1 + kind: ConfigMap + metadata: + name: group + namespace: "{{ user_namespace }}" + data: + group: | + root:x:0: + bin:x:1: + daemon:x:2: + sys:x:3: + adm:x:4: + tty:x:5: + disk:x:6: + lp:x:7: + mem:x:8: + kmem:x:9: + wheel:x:10: + cdrom:x:11: + mail:x:12: + man:x:15: + dialout:x:18: + floppy:x:19: + games:x:20: + tape:x:33: + video:x:39: + ftp:x:50: + lock:x:54: + audio:x:63: + nobody:x:99: + users:x:100: + utmp:x:22: + utempter:x:35: + input:x:999: + systemd-journal:x:190: + systemd-network:x:192: + dbus:x:81: + ssh_keys:x:998: + lsst_lcl:x:1000:{{ user }} + tss:x:59: + cgred:x:997: + screen:x:84: + jovyan:x:768:{{ user }}{% for g in groups %} + {{ g.name }}:x:{{ g.id }}:{{ user if g.id != gid else "" }}{% endfor %} + - apiVersion: v1 + kind: ConfigMap + metadata: + name: passwd + namespace: "{{ user_namespace }}" + data: + passwd: | + root:x:0:0:root:/root:/bin/bash + bin:x:1:1:bin:/bin:/sbin/nologin + daemon:x:2:2:daemon:/sbin:/sbin/nologin + adm:x:3:4:adm:/var/adm:/sbin/nologin + lp:x:4:7:lp:/var/spool/lpd:/sbin/nologin + sync:x:5:0:sync:/sbin:/bin/sync + shutdown:x:6:0:shutdown:/sbin:/sbin/shutdown + halt:x:7:0:halt:/sbin:/sbin/halt + mail:x:8:12:mail:/var/spool/mail:/sbin/nologin + operator:x:11:0:operator:/root:/sbin/nologin + games:x:12:100:games:/usr/games:/sbin/nologin + ftp:x:14:50:FTP User:/var/ftp:/sbin/nologin + nobody:x:99:99:Nobody:/:/sbin/nologin + systemd-network:x:192:192:systemd Network Management:/:/sbin/nologin + dbus:x:81:81:System message bus:/:/sbin/nologin + lsst_lcl:x:1000:1000::/home/lsst_lcl:/bin/bash + tss:x:59:59:Account used by the trousers package to sandbox the tcsd daemon:/dev/null:/sbin/nologin + {{ user }}:x:{{ uid }}:{{ gid if gid else uid }}::/home/{{ user }}:/bin/bash + - apiVersion: v1 + kind: ConfigMap + metadata: + name: dask + namespace: "{{ user_namespace }}" + data: + dask_worker.yml: | + {{ dask_yaml | indent(6) }} + # When we break out the resources we should make this per-instance + # configurable. + - apiVersion: v1 + kind: ConfigMap + metadata: + name: idds-config + namespace: "{{ user_namespace }}" + data: + idds_cfg.client.template: | + # Licensed under the Apache License, Version 2.0 (the "License"); + # You may not use this file except in compliance with the License. + # You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 + # + # Authors: + # - Wen Guan, , 2020 + [common] + # if logdir is configured, idds will write to idds.log in this directory. + # else idds will go to stdout/stderr. + # With supervisord, it's good to write to stdout/stderr, then supervisord can manage and rotate logs. + # logdir = /var/log/idds + loglevel = INFO + [rest] + host = https://iddsserver.cern.ch:443/idds + #url_prefix = /idds + #cacher_dir = /tmp + cacher_dir = /data/idds + - apiVersion: v1 + kind: ServiceAccount + metadata: + name: "{{ user }}-serviceaccount" + namespace: "{{ user_namespace }}" + imagePullSecrets: + - name: pull-secret + - apiVersion: rbac.authorization.k8s.io/v1 + kind: Role + metadata: + name: "{{ user }}-role" + namespace: "{{ user_namespace }}" + rules: + # cf https://kubernetes.dask.org/en/latest/kubecluster.html + - apiGroups: [""] + resources: ["pods", "services"] + verbs: ["create", "delete", "get", "list", "watch"] + - apiGroups: [""] + resources: ["pods/log"] + verbs: ["get","list"] + - apiGroups: ["policy"] + resources: ["poddisruptionbudgets"] + verbs: ["create", "delete", "get", "list", "watch"] + - apiVersion: rbac.authorization.k8s.io/v1 + kind: RoleBinding + metadata: + name: "{{ user }}-rolebinding" + namespace: "{{ user_namespace }}" + roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: "{{ user }}-role" + subjects: + - kind: ServiceAccount + name: "{{ user }}-serviceaccount" + namespace: "{{ user_namespace }}" + - apiVersion: ricoberger.de/v1alpha1 + kind: VaultSecret + metadata: + name: butler-secret + namespace: "{{ user_namespace }}" + spec: + path: "{{ butler_secret_path }}" + type: Opaque + - apiVersion: ricoberger.de/v1alpha1 + kind: VaultSecret + metadata: + name: pull-secret + namespace: "{{ user_namespace }}" + spec: + path: "{{ pull_secret_path }}" + type: kubernetes.io/dockerconfigjson + +# Built-in network policy doesn't quite work (Labs can't talk to Hub, +# even with port 8081 explicitly enabled), so let's use our own for now. +network_policy: + enabled: true + +# The following will be set by parameters injected by Argo CD and should not +# be set in the individual environment values files. +global: + # -- Base path for Vault secrets + # @default -- Set by Argo CD + vaultSecretsPath: "" diff --git a/services/obstap/Chart.yaml b/services/obstap/Chart.yaml deleted file mode 100644 index c07bcc16a6..0000000000 --- a/services/obstap/Chart.yaml +++ /dev/null @@ -1,10 +0,0 @@ -apiVersion: v2 -name: obstap -version: 1.0.0 -dependencies: -- name: cadc-tap-postgres - version: ">=0.1.0" - repository: https://lsst-sqre.github.io/charts/ -- name: pull-secret - version: 0.1.2 - repository: https://lsst-sqre.github.io/charts/ diff --git a/services/obstap/values-idfdev.yaml b/services/obstap/values-idfdev.yaml deleted file mode 100644 index 66559b5f23..0000000000 --- a/services/obstap/values-idfdev.yaml +++ /dev/null @@ -1,18 +0,0 @@ -cadc-tap-postgres: - pull_secret: 'pull-secret' - tag: "1.1" - host: "data-dev.lsst.cloud" - - secrets: - enabled: false - - vault_secrets: - enabled: true - path: 'secret/k8s_operator/data-dev.lsst.cloud/tap' - - gcs_bucket: 'async-results.lsst.codes' - gcs_bucket_url: 'http://async-results.lsst.codes' - -pull-secret: - enabled: true - path: secret/k8s_operator/data-dev.lsst.cloud/pull-secret diff --git a/services/obstap/values-idfint.yaml b/services/obstap/values-idfint.yaml deleted file mode 100644 index befb9f9557..0000000000 --- a/services/obstap/values-idfint.yaml +++ /dev/null @@ -1,18 +0,0 @@ -cadc-tap-postgres: - pull_secret: 'pull-secret' - tag: "1.1" - host: "data-int.lsst.cloud" - - secrets: - enabled: false - - vault_secrets: - enabled: true - path: 'secret/k8s_operator/data-int.lsst.cloud/tap' - - gcs_bucket: 'async-results.lsst.codes' - gcs_bucket_url: 'http://async-results.lsst.codes' - -pull-secret: - enabled: true - path: secret/k8s_operator/data-int.lsst.cloud/pull-secret diff --git a/services/obstap/values-idfprod.yaml b/services/obstap/values-idfprod.yaml deleted file mode 100644 index 5d7994882b..0000000000 --- a/services/obstap/values-idfprod.yaml +++ /dev/null @@ -1,18 +0,0 @@ -cadc-tap-postgres: - pull_secret: 'pull-secret' - tag: "1.1" - host: "data.lsst.cloud" - - secrets: - enabled: false - - vault_secrets: - enabled: true - path: 'secret/k8s_operator/data.lsst.cloud/tap' - - gcs_bucket: 'async-results.lsst.codes' - gcs_bucket_url: 'http://async-results.lsst.codes' - -pull-secret: - enabled: true - path: secret/k8s_operator/data.lsst.cloud/pull-secret diff --git a/services/obstap/values-int.yaml b/services/obstap/values-int.yaml deleted file mode 100644 index 277a485009..0000000000 --- a/services/obstap/values-int.yaml +++ /dev/null @@ -1,42 +0,0 @@ -cadc-tap-postgres: - pull_secret: 'pull-secret' - tag: "1.1" - host: "lsst-lsp-int.ncsa.illinois.edu" - - secrets: - enabled: false - - vault_secrets: - enabled: true - path: 'secret/k8s_operator/lsst-lsp-int.ncsa.illinois.edu/tap' - - ingress: - authenticated_annotations: - nginx.ingress.kubernetes.io/auth-response-headers: X-Auth-Request-Uid, X-Auth-Request-Token - nginx.ingress.kubernetes.io/auth-url: "https://lsst-lsp-int.ncsa.illinois.edu/auth?scope=read:tap&auth_type=basic" - nginx.ingress.kubernetes.io/configuration-snippet: | - auth_request_set $auth_token $upstream_http_x_auth_request_token; - proxy_set_header Authorization "Bearer $auth_token"; - - resources: - requests: - cpu: 2.0 - memory: 2G - limits: - cpu: 8.0 - memory: 16G - - aux_resources: - requests: - cpu: 0.25 - memory: 1G - limits: - cpu: 2.0 - memory: 4G - - gcs_bucket: 'async-results.lsst.codes' - gcs_bucket_url: 'http://async-results.lsst.codes' - -pull-secret: - enabled: true - path: secret/k8s_operator/lsst-lsp-int.ncsa.illinois.edu/pull-secret diff --git a/services/obstap/values-minikube.yaml b/services/obstap/values-minikube.yaml deleted file mode 100644 index 0488f65eab..0000000000 --- a/services/obstap/values-minikube.yaml +++ /dev/null @@ -1,18 +0,0 @@ -cadc-tap-postgres: - pull_secret: 'pull-secret' - tag: "1.1" - host: "minikube.lsst.codes" - - secrets: - enabled: false - - vault_secrets: - enabled: true - path: 'secret/k8s_operator/minikube.lsst.codes/tap' - - gcs_bucket: 'async-results.lsst.codes' - gcs_bucket_url: 'http://async-results.lsst.codes' - -pull-secret: - enabled: true - path: secret/k8s_operator/minikube.lsst.codes/pull-secret diff --git a/services/obstap/values-red-five.yaml b/services/obstap/values-red-five.yaml deleted file mode 100644 index 5c17bf4dd1..0000000000 --- a/services/obstap/values-red-five.yaml +++ /dev/null @@ -1,18 +0,0 @@ -cadc-tap-postgres: - pull_secret: 'pull-secret' - tag: "1.1" - host: "red-five.lsst.codes" - - secrets: - enabled: false - - vault_secrets: - enabled: true - path: 'secret/k8s_operator/red-five.lsst.codes/tap' - - gcs_bucket: 'async-results.lsst.codes' - gcs_bucket_url: 'http://async-results.lsst.codes' - -pull-secret: - enabled: true - path: secret/k8s_operator/red-five.lsst.codes/pull-secret diff --git a/services/obstap/values-roe.yaml b/services/obstap/values-roe.yaml deleted file mode 100644 index cac61a205c..0000000000 --- a/services/obstap/values-roe.yaml +++ /dev/null @@ -1,15 +0,0 @@ -cadc-tap-postgres: - pull_secret: 'pull-secret' - tag: "1.1" - host: "rsp.lsst.ac.uk" - - secrets: - enabled: false - - vault_secrets: - enabled: true - path: 'secret/k8s_operator/roe/tap' - -pull-secret: - enabled: true - path: secret/k8s_operator/roe/pull-secret diff --git a/services/obstap/values-stable.yaml b/services/obstap/values-stable.yaml deleted file mode 100644 index ffb30de8c5..0000000000 --- a/services/obstap/values-stable.yaml +++ /dev/null @@ -1,42 +0,0 @@ -cadc-tap-postgres: - pull_secret: 'pull-secret' - tag: "1.1" - host: "lsst-lsp-stable.ncsa.illinois.edu" - - secrets: - enabled: false - - vault_secrets: - enabled: true - path: 'secret/k8s_operator/lsst-lsp-stable.ncsa.illinois.edu/tap' - - ingress: - authenticated_annotations: - nginx.ingress.kubernetes.io/auth-response-headers: X-Auth-Request-Uid, X-Auth-Request-Token - nginx.ingress.kubernetes.io/auth-url: "https://lsst-lsp-stable.ncsa.illinois.edu/auth?scope=read:tap&auth_type=basic" - nginx.ingress.kubernetes.io/configuration-snippet: | - auth_request_set $auth_token $upstream_http_x_auth_request_token; - proxy_set_header Authorization "Bearer $auth_token"; - - resources: - requests: - cpu: 2.0 - memory: 2G - limits: - cpu: 8.0 - memory: 16G - - aux_resources: - requests: - cpu: 0.25 - memory: 1G - limits: - cpu: 2.0 - memory: 4G - - gcs_bucket: 'async-results.lsst.codes' - gcs_bucket_url: 'http://async-results.lsst.codes' - -pull-secret: - enabled: true - path: secret/k8s_operator/lsst-lsp-stable.ncsa.illinois.edu/pull-secret diff --git a/services/plot-navigator/Chart.yaml b/services/plot-navigator/Chart.yaml index eee10bee88..9b0c3b7dd3 100644 --- a/services/plot-navigator/Chart.yaml +++ b/services/plot-navigator/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v2 name: plot-navigator -version: 1.0.0 -dependencies: -- name: plot-navigator - version: "=1.6.1" - repository: https://lsst-sqre.github.io/charts/ +description: Panel-based plot viewer +version: 1.6.1 +sources: + - https://github.com/lsst-dm/pipetask-plot-navigator +appVersion: 0.6.1 diff --git a/services/plot-navigator/README.md b/services/plot-navigator/README.md new file mode 100644 index 0000000000..68c1660ee7 --- /dev/null +++ b/services/plot-navigator/README.md @@ -0,0 +1,19 @@ +# plot-navigator + +Panel-based plot viewer + +## Source Code + +* + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| environment | object | `{}` | Environment variables (e.g. butler configuration/auth parms) for panel | +| global.baseUrl | string | Set by Argo CD | Base URL for the environment | +| global.host | string | Set by Argo CD | Host name for ingress | +| global.vaultSecretsPath | string | Set by Argo CD | Base path for Vault secrets | +| image.repository | string | `"lsstdm/pipetask-plot-navigator"` | plot-navigator image to use | +| image.tag | string | `""` | | +| ingress.annotations | object | `{}` | Additional annotations to add to the ingress | diff --git a/services/plot-navigator/templates/_helpers.tpl b/services/plot-navigator/templates/_helpers.tpl new file mode 100644 index 0000000000..7a48c59875 --- /dev/null +++ b/services/plot-navigator/templates/_helpers.tpl @@ -0,0 +1,51 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "plot-navigator.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "plot-navigator.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "plot-navigator.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "plot-navigator.labels" -}} +helm.sh/chart: {{ include "plot-navigator.chart" . }} +{{ include "plot-navigator.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "plot-navigator.selectorLabels" -}} +app.kubernetes.io/name: {{ include "plot-navigator.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} diff --git a/services/plot-navigator/templates/deployment.yaml b/services/plot-navigator/templates/deployment.yaml new file mode 100644 index 0000000000..3fd6d69bae --- /dev/null +++ b/services/plot-navigator/templates/deployment.yaml @@ -0,0 +1,65 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: plot-navigator + labels: + {{- include "plot-navigator.labels" . | nindent 4 }} +spec: + replicas: 1 + selector: + matchLabels: + {{- include "plot-navigator.selectorLabels" . | nindent 6 }} + template: + metadata: + labels: + {{- include "plot-navigator.selectorLabels" . | nindent 8 }} + spec: + imagePullSecrets: + - name: "pull-secret" + volumes: + # butler-secrets-raw is the secrets we get from vault + - name: "butler-secrets-raw" + secret: + secretName: "butler-secret" + # butler-secrets are the copied and chmoded versions + - name: "butler-secrets" + emptyDir: {} + # Have to fix permissions on the pgpass file. + # init container pattern borrowed from vo-cutouts. + initContainers: + - name: fix-secret-permissions + image: {{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }} + imagePullPolicy: Always + command: + - "/bin/bash" + - "-c" + - | + cp -RL /home/worker/secrets-raw/* /home/worker/.lsst/ + chown worker:worker /home/worker/.lsst/* + chmod 0400 /home/worker/.lsst/* + securityContext: + runAsNonRoot: false + runAsUser: 0 + runAsGroup: 0 + volumeMounts: + - name: "butler-secrets" + mountPath: "/home/worker/.lsst/" + - name: "butler-secrets-raw" + mountPath: "/home/worker/secrets-raw/" + readOnly: true + containers: + - name: plot-navigator + image: {{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion}} + imagePullPolicy: Always + env: +{{- range $key, $value := .Values.environment }} + - name: {{ $key | quote }} + value: {{ $value | quote }} +{{- end }} + volumeMounts: + - name: butler-secrets + mountPath: "/home/worker/.lsst/" + command: + - /bin/bash + - -c + - panel serve dashboard_gen3.py --port 8080 --prefix /plot-navigator --allow-websocket-origin {{ .Values.global.host }} --static-dirs assets=./assets diff --git a/services/plot-navigator/templates/ingress.yaml b/services/plot-navigator/templates/ingress.yaml new file mode 100644 index 0000000000..081d11c983 --- /dev/null +++ b/services/plot-navigator/templates/ingress.yaml @@ -0,0 +1,35 @@ +apiVersion: gafaelfawr.lsst.io/v1alpha1 +kind: GafaelfawrIngress +metadata: + name: "plot-navigator" + labels: + {{- include "plot-navigator.labels" . | nindent 4 }} +config: + baseUrl: {{ .Values.global.baseUrl | quote }} + scopes: + all: + - "exec:portal" + loginRedirect: true + delegate: + internal: + scopes: [] + service: "plot-navigator" +template: + metadata: + name: "plot-navigator" + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 6 }} + {{- end }} + spec: + rules: + - host: {{ required "global.host must be set" .Values.global.host | quote }} + http: + paths: + - path: "/plot-navigator" + pathType: "Prefix" + backend: + service: + name: "plot-navigator" + port: + number: 80 diff --git a/services/plot-navigator/templates/service.yaml b/services/plot-navigator/templates/service.yaml new file mode 100644 index 0000000000..ba648bdc01 --- /dev/null +++ b/services/plot-navigator/templates/service.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: Service +metadata: + name: plot-navigator + labels: + {{- include "plot-navigator.labels" . | nindent 4 }} +spec: + selector: + {{- include "plot-navigator.selectorLabels" . | nindent 4 }} + ports: + - port: 80 + protocol: TCP + targetPort: 8080 diff --git a/services/plot-navigator/templates/vault-secrets.yaml b/services/plot-navigator/templates/vault-secrets.yaml new file mode 100644 index 0000000000..c189eb29c7 --- /dev/null +++ b/services/plot-navigator/templates/vault-secrets.yaml @@ -0,0 +1,20 @@ +--- +apiVersion: ricoberger.de/v1alpha1 +kind: VaultSecret +metadata: + name: butler-secret + labels: + {{- include "plot-navigator.labels" . | nindent 4 }} +spec: + path: "{{ .Values.global.vaultSecretsPath }}/butler-secret" + type: Opaque +--- +apiVersion: ricoberger.de/v1alpha1 +kind: VaultSecret +metadata: + name: pull-secret + labels: + {{- include "plot-navigator.labels" . | nindent 4 }} +spec: + path: "{{- .Values.global.vaultSecretsPath }}/pull-secret" + type: kubernetes.io/dockerconfigjson diff --git a/services/plot-navigator/values-idfint.yaml b/services/plot-navigator/values-idfint.yaml index 8e8b09f2dc..4dc30dc478 100644 --- a/services/plot-navigator/values-idfint.yaml +++ b/services/plot-navigator/values-idfint.yaml @@ -1,16 +1,5 @@ -plot-navigator: - hostname: "data-int.lsst.cloud" - basePath: "/plot-navigator" - butler_secret_path: "secret/k8s_operator/data-int.lsst.cloud/butler-secret" - environment: - BUTLER_URI: "s3://butler-us-central1-panda-dev/dc2/butler-external.yaml" - PGPASSFILE: "/home/worker/.lsst/postgres-credentials.txt" - AWS_SHARED_CREDENTIALS_FILE: "/home/worker/.lsst/aws-credentials.ini" - S3_ENDPOINT_URL: "https://storage.googleapis.com" - ingress: - host: "data-int.lsst.cloud" - path: "/plot-navigator" - annotations: - nginx.ingress.kubernetes.io/auth-method: GET - nginx.ingress.kubernetes.io/auth-signin: "https://data-int.lsst.cloud/login" - nginx.ingress.kubernetes.io/auth-url: "https://data-int.lsst.cloud/auth?scope=exec:portal&delegate_to=plotnavigator" +environment: + BUTLER_URI: "s3://butler-us-central1-panda-dev/dc2/butler-external.yaml" + PGPASSFILE: "/home/worker/.lsst/postgres-credentials.txt" + AWS_SHARED_CREDENTIALS_FILE: "/home/worker/.lsst/aws-credentials.ini" + S3_ENDPOINT_URL: "https://storage.googleapis.com" diff --git a/services/plot-navigator/values-minikube.yaml b/services/plot-navigator/values-minikube.yaml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/services/plot-navigator/values.yaml b/services/plot-navigator/values.yaml new file mode 100644 index 0000000000..71f20f907f --- /dev/null +++ b/services/plot-navigator/values.yaml @@ -0,0 +1,26 @@ +image: + # -- plot-navigator image to use + repository: lsstdm/pipetask-plot-navigator + tag: "" + +# -- Environment variables (e.g. butler configuration/auth parms) for panel +environment: {} + +ingress: + # -- Additional annotations to add to the ingress + annotations: {} + +# The following will be set by parameters injected by Argo CD and should not +# be set in the individual environment values files. +global: + # -- Base URL for the environment + # @default -- Set by Argo CD + baseUrl: "" + + # -- Host name for ingress + # @default -- Set by Argo CD + host: "" + + # -- Base path for Vault secrets + # @default -- Set by Argo CD + vaultSecretsPath: "" diff --git a/services/portal/.helmignore b/services/portal/.helmignore new file mode 100644 index 0000000000..50af031725 --- /dev/null +++ b/services/portal/.helmignore @@ -0,0 +1,22 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/services/portal/Chart.yaml b/services/portal/Chart.yaml index 3902c631fb..a2b7f4cdcc 100644 --- a/services/portal/Chart.yaml +++ b/services/portal/Chart.yaml @@ -1,10 +1,19 @@ apiVersion: v2 name: portal version: 1.0.0 +description: Rubin Science Platform Portal Aspect +sources: + - https://github.com/lsst/suit + - https://github.com/Caltech-IPAC/firefly +appVersion: "suit-2022.5.5" + dependencies: - - name: firefly - version: 0.3.7 - repository: https://lsst-sqre.github.io/charts/ - - name: pull-secret - version: 0.1.2 + - name: redis + version: 0.1.4 repository: https://lsst-sqre.github.io/charts/ + +annotations: + phalanx.lsst.io/docs: | + - id: "DMTN-136" + title: "LSST Science Platform Portal Aspect Design and Maintenance Manual" + url: "https://dmtn-136.lsst.io/" diff --git a/services/portal/README.md b/services/portal/README.md new file mode 100644 index 0000000000..ba46958eba --- /dev/null +++ b/services/portal/README.md @@ -0,0 +1,45 @@ +# portal + +Rubin Science Platform Portal Aspect + +## Source Code + +* +* + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| affinity | object | `{}` | Affinity rules for the Portal pod | +| config.cleanupInterval | string | `"36h"` | How long results should be retained before being deleted | +| config.debug | string | `"FALSE"` | Set to `TRUE` to enable service debugging | +| config.hipsUrl | string | `/api/hips/images/color_gri` in the local Science Platform | URL for default HiPS service | +| config.visualizeFitsSearchPath | string | `"/datasets"` | Search path for FITS files | +| config.volumes.configHostPath | string | Use an `emptyDir` | hostPath to mount as configuration. Set either this of `configNfs`, not both. | +| config.volumes.configNfs | object | Use an `emptyDir` | NFS information for a configuration. If set, must have keys for path and server, Set either this of `configHostPath`, not both. | +| config.volumes.workareaHostPath | string | Use an `emptyDir` | hostPath to mount as a shared work area. Set either this or `workareaNfs`, not both. | +| config.volumes.workareaNfs | object | Use an `emptyDir` | NFS information for a shared work area. If set, must have keys for path and server. Set either this or `workareaHostPath`, not both. | +| fullnameOverride | string | `""` | Override the full name for resources (includes the release name) | +| global.baseUrl | string | Set by Argo CD | Base URL for the environment | +| global.host | string | Set by Argo CD | Host name for ingress | +| global.vaultSecretsPath | string | Set by Argo CD | Base path for Vault secrets | +| image.pullPolicy | string | `"IfNotPresent"` | Pull policy for the Portal image | +| image.repository | string | `"ipac/suit"` | Portal image to use | +| image.tag | string | The appVersion of the chart | Tag of Portal image to use | +| ingress.annotations | object | `{}` | Additional annotations to add to the ingress | +| nameOverride | string | `""` | Override the base name for resources | +| nodeSelector | object | `{}` | Node selector rules for the Portal pod | +| podAnnotations | object | `{}` | Annotations for the Portal pod | +| redis.affinity | object | `{}` | Affinity rules for the Redis pod | +| redis.config.secretKey | string | `"ADMIN_PASSWORD"` | Key inside secret from which to get the Redis password (do not change) | +| redis.config.secretName | string | `"portal-secret"` | Name of secret containing Redis password (may require changing if fullnameOverride is set) | +| redis.nodeSelector | object | `{}` | Node selection rules for the Redis pod | +| redis.persistence.enabled | bool | `false` | Whether to persist Redis storage. Setting this to false will use `emptyDir` and reset all data on every restart. | +| redis.podAnnotations | object | `{}` | Pod annotations for the Redis pod | +| redis.resources | object | See `values.yaml` | Resource limits and requests for the Redis pod | +| redis.tolerations | list | `[]` | Tolerations for the Redis pod | +| replicaCount | int | `1` | Number of pods to start | +| resources | object | `{"limits":{"cpu":2,"memory":"6Gi"}}` | Resource limits and requests. The Portal will use (by default) 93% of container RAM. This is a smallish Portal; tweak it as you need to in instance definitions in Phalanx. | +| securityContext | object | `{}` | Security context for the Portal pod | +| tolerations | list | `[]` | Tolerations for the Portal pod | diff --git a/services/portal/templates/_helpers.tpl b/services/portal/templates/_helpers.tpl new file mode 100644 index 0000000000..114b6681fe --- /dev/null +++ b/services/portal/templates/_helpers.tpl @@ -0,0 +1,51 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "portal.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "portal.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "portal.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "portal.labels" -}} +helm.sh/chart: {{ include "portal.chart" . }} +{{ include "portal.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "portal.selectorLabels" -}} +app.kubernetes.io/name: {{ include "portal.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} diff --git a/services/portal/templates/deployment.yaml b/services/portal/templates/deployment.yaml new file mode 100644 index 0000000000..60f6147ba1 --- /dev/null +++ b/services/portal/templates/deployment.yaml @@ -0,0 +1,160 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "portal.fullname" . }} + labels: + {{- include "portal.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "portal.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: "firefly" + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "portal.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: "firefly" + portal-redis-client: "true" + spec: + automountServiceAccountToken: false + containers: + - name: "firefly" + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + env: + - name: "MANAGER" + value: "TRUE" + - name: "ADMIN_PASSWORD" + valueFrom: + secretKeyRef: + name: {{ include "portal.fullname" . }}-secret + key: "ADMIN_PASSWORD" + - name: "USE_ADMIN_AUTH" + value: "false" + - name: "REDIS_PASSWORD" + valueFrom: + secretKeyRef: + name: {{ include "portal.fullname" . }}-secret + key: "ADMIN_PASSWORD" + - name: "PROPS_redis__host" + value: {{ include "portal.fullname" . }}-redis + - name: "PROPS_sso__req__auth__hosts" + value: {{ .Values.global.host | quote }} + - name: "PROPS_lsst__hips__masterUrl" + value: "{{ .Values.global.baseUrl }}/api/hips/list" + - name: "PROPS_FIREFLY_OPTIONS" + value: >- + $'{ + "coverage": { + {{- if .Values.config.hipsUrl }} + "hipsSourceURL" : "{{ .Values.config.hipsUrl }}", + "hipsSource360URL" : "{{ .Values.config.hipsUrl }}" + {{- else }} + "hipsSourceURL" : "{{ .Values.global.baseUrl }}/api/hips/images/color_gri", + "hipsSource360URL" : "{{ .Values.global.baseUrl }}/api/hips/images/color_gri" + {{- end }} + }, + "tap" : { + "additional": { + "services": [ { + "label": "LSST RSP", + "value": "{{ .Values.global.baseUrl }}/api/tap", + {{- if .Values.config.hipsUrl }} + "hipsUrl": "{{ .Values.config.hipsUrl }}", + {{- else }} + "hipsUrl": "{{ .Values.global.baseUrl }}/api/hips/images/color_gri", + {{- end }} + "centerWP": "62;-37;EQ_J2000", + "fovDeg": 10 + } ] + } + }, + "hips": { + "defHipsSources": {"source": "lsst", "label": "Rubin Featured"}, + "adhocMocSource": { + "sources": [ + "temp://lsst/dp02_dc2/hips/images/color_gri", + "temp://lsst/dp02_dc2/hips/images/band_u", + "temp://lsst/dp02_dc2/hips/images/band_g", + "temp://lsst/dp02_dc2/hips/images/band_r", + "temp://lsst/dp02_dc2/hips/images/band_i", + "temp://lsst/dp02_dc2/hips/images/band_z", + "temp://lsst/dp02_dc2/hips/images/band_y" + ], + "label": "Rubin Featured MOC" + } + }, + "searchActionsCmdMask": [ + "tableHiPS", "tapRadius", "tapArea", "tableTapRadius", + "HiPS", "lsstObsCoreTap", "lsstTruthSummaryRadius", "lsstTruthSummaryArea", + "lsstObsCoreTapTable", "lsstTruthSummaryRadiusTable" + ] + }' + - name: "SERVER_CONFIG_DIR" + value: "/firefly/config" + - name: "CLEANUP_INTERVAL" + value: "{{ .Values.config.cleanupInterval }}" + - name: VISUALIZE_FITS_SEARCH_PATH + value: "{{ .Values.config.visualizeFitsSearchPath }}" + - name: DEBUG + value: "{{ .Values.config.debug }}" + ports: + - containerPort: 8080 + {{- with .Values.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + volumeMounts: + - mountPath: "/firefly/shared-workarea" + name: "firefly-shared-workarea" + - mountPath: "/firefly/config" + name: "firefly-config" + readOnly: true + imagePullSecrets: + - name: "pull-secret" + {{- with .Values.securityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} + volumes: + - name: "firefly-shared-workarea" + {{- if .Values.config.volumes.workareaHostPath }} + hostPath: + path: {{ .Values.config.volumes.workareaHostPath | quote }} + type: "Directory" + {{- else if .Values.config.volumes.workareaNfs }} + nfs: + path: {{ .Values.config.volumes.workareaNfs.path | quote }} + server: {{ .Values.config.volumes.workareaNfs.server | quote }} + {{- else }} + emptyDir: {} + {{- end }} + - name: "firefly-config" + {{- if .Values.config.volumes.configHostPath }} + hostPath: + path: {{ .Values.config.volumes.configHostPath | quote }} + type: "Directory" + {{- else if .Values.config.volumes.configNfs }} + nfs: + path: {{ .Values.config.volumes.configNfs.path | quote }} + server: {{ .Values.config.volumes.configNfs.server | quote }} + {{- else }} + emptyDir: {} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/services/portal/templates/ingress-admin.yaml b/services/portal/templates/ingress-admin.yaml new file mode 100644 index 0000000000..2a107ab1f5 --- /dev/null +++ b/services/portal/templates/ingress-admin.yaml @@ -0,0 +1,47 @@ +apiVersion: gafaelfawr.lsst.io/v1alpha1 +kind: GafaelfawrIngress +metadata: + name: {{ include "portal.fullname" . }}-admin + labels: + {{- include "portal.labels" . | nindent 4 }} +config: + baseUrl: {{ .Values.global.baseUrl | quote }} + scopes: + all: + - "exec:admin" + loginRedirect: true +template: + metadata: + name: {{ include "portal.fullname" . }}-admin + annotations: + nginx.ingress.kubernetes.io/ssl-redirect: "true" + nginx.ingress.kubernetes.io/affinity: "cookie" + nginx.ingress.kubernetes.io/session-cookie-change-on-failure: "true" + nginx.ingress.kubernetes.io/proxy-body-size: "0m" + nginx.ingress.kubernetes.io/proxy-buffer-size: "24k" + nginx.ingress.kubernetes.io/client-header-buffer-size: "24k" + nginx.ingress.kubernetes.io/rewrite-target: "/suit$1" + nginx.ingress.kubernetes.io/proxy-redirect-from: "/suit/" + nginx.ingress.kubernetes.io/proxy-redirect-to: "/portal/app/" + nginx.ingress.kubernetes.io/proxy-cookie-path: "/suit /portal/app" + nginx.ingress.kubernetes.io/session-cookie-path: "/portal/app" + nginx.ingress.kubernetes.io/configuration-snippet: | + proxy_set_header X-Original-URI $request_uri; + proxy_set_header X-Forwarded-Proto https; + proxy_set_header X-Forwarded-Port 443; + proxy_set_header X-Forwarded-Path /portal/app; + {{- with .Values.ingress.annotations }} + {{- toYaml . | nindent 6 }} + {{- end }} + spec: + rules: + - host: {{ required "global.host must be set" .Values.global.host | quote }} + http: + paths: + - path: "/portal/app(/admin.*)" + pathType: "ImplementationSpecific" + backend: + service: + name: {{ include "portal.fullname" . }} + port: + number: 8080 diff --git a/services/portal/templates/ingress.yaml b/services/portal/templates/ingress.yaml new file mode 100644 index 0000000000..547fa5b484 --- /dev/null +++ b/services/portal/templates/ingress.yaml @@ -0,0 +1,53 @@ +apiVersion: gafaelfawr.lsst.io/v1alpha1 +kind: GafaelfawrIngress +metadata: + name: {{ include "portal.fullname" . }} + labels: + {{- include "portal.labels" . | nindent 4 }} +config: + baseUrl: {{ .Values.global.baseUrl | quote }} + scopes: + all: + - "exec:portal" + loginRedirect: true + delegate: + internal: + service: "portal" + scopes: + - "read:image" + - "read:tap" +template: + metadata: + name: {{ include "portal.fullname" . }} + annotations: + nginx.ingress.kubernetes.io/ssl-redirect: "true" + nginx.ingress.kubernetes.io/affinity: "cookie" + nginx.ingress.kubernetes.io/session-cookie-change-on-failure: "true" + nginx.ingress.kubernetes.io/proxy-body-size: "0m" + nginx.ingress.kubernetes.io/proxy-buffer-size: "24k" + nginx.ingress.kubernetes.io/client-header-buffer-size: "24k" + nginx.ingress.kubernetes.io/rewrite-target: "/suit$1$2" + nginx.ingress.kubernetes.io/proxy-redirect-from: "/suit/" + nginx.ingress.kubernetes.io/proxy-redirect-to: "/portal/app/" + nginx.ingress.kubernetes.io/proxy-cookie-path: "/suit /portal/app" + nginx.ingress.kubernetes.io/session-cookie-path: "/portal/app" + nginx.ingress.kubernetes.io/configuration-snippet: | + proxy_set_header X-Original-URI $request_uri; + proxy_set_header X-Forwarded-Proto https; + proxy_set_header X-Forwarded-Port 443; + proxy_set_header X-Forwarded-Path /portal/app; + {{- with .Values.ingress.annotations }} + {{- toYaml . | nindent 6 }} + {{- end }} + spec: + rules: + - host: {{ required "global.host must be set" .Values.global.host | quote }} + http: + paths: + - path: "/portal/app(/|$)(.*)" + pathType: "ImplementationSpecific" + backend: + service: + name: {{ include "portal.fullname" . }} + port: + number: 8080 diff --git a/services/portal/templates/networkpolicy.yaml b/services/portal/templates/networkpolicy.yaml new file mode 100644 index 0000000000..c0b85c662c --- /dev/null +++ b/services/portal/templates/networkpolicy.yaml @@ -0,0 +1,29 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ include "portal.fullname" . }} +spec: + podSelector: + matchLabels: + {{- include "portal.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: "firefly" + policyTypes: + - "Ingress" + ingress: + # Allow inbound access from pods (in any namespace) labeled + # gafaelfawr.lsst.io/ingress: true. + - from: + - namespaceSelector: {} + podSelector: + matchLabels: + gafaelfawr.lsst.io/ingress: "true" + ports: + - protocol: "TCP" + port: 8080 + # Allow all traffic between the Portal pods. They talk to each other on + # random ports to synchronize requests. + - from: + - podSelector: + matchLabels: + {{- include "portal.selectorLabels" . | nindent 14 }} + app.kubernetes.io/component: "firefly" diff --git a/services/portal/templates/service.yaml b/services/portal/templates/service.yaml new file mode 100644 index 0000000000..c4ac3dedbf --- /dev/null +++ b/services/portal/templates/service.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "portal.fullname" . }} + labels: + {{- include "portal.labels" . | nindent 4 }} +spec: + type: "ClusterIP" + ports: + - protocol: "TCP" + port: 8080 + targetPort: 8080 + selector: + {{- include "portal.selectorLabels" . | nindent 4 }} diff --git a/services/portal/templates/vault-secrets.yaml b/services/portal/templates/vault-secrets.yaml new file mode 100644 index 0000000000..c3bbbb8046 --- /dev/null +++ b/services/portal/templates/vault-secrets.yaml @@ -0,0 +1,19 @@ +apiVersion: ricoberger.de/v1alpha1 +kind: VaultSecret +metadata: + name: {{ template "portal.fullname" . }}-secret + labels: + {{- include "portal.labels" . | nindent 4 }} +spec: + path: "{{ .Values.global.vaultSecretsPath }}/portal" + type: "Opaque" +--- +apiVersion: ricoberger.de/v1alpha1 +kind: VaultSecret +metadata: + name: "pull-secret" + labels: + {{- include "portal.labels" . | nindent 4 }} +spec: + path: "{{ .Values.global.vaultSecretsPath }}/pull-secret" + type: "kubernetes.io/dockerconfigjson" diff --git a/services/portal/values-base.yaml b/services/portal/values-base.yaml index c6fa254c00..30b83cac99 100644 --- a/services/portal/values-base.yaml +++ b/services/portal/values-base.yaml @@ -1,35 +1,3 @@ -firefly: - pull_secret: 'pull-secret' - - ingress: - host: 'base-lsp.lsst.codes' - annotations: - nginx.ingress.kubernetes.io/auth-method: GET - nginx.ingress.kubernetes.io/auth-response-headers: X-Auth-Request-Uid, X-Auth-Request-Token - nginx.ingress.kubernetes.io/auth-signin: "https://base-lsp.lsst.codes/login" - nginx.ingress.kubernetes.io/auth-url: "https://base-lsp.lsst.codes/auth?scope=exec:portal&delegate_to=portal&delegate_scope=read:tap" - nginx.ingress.kubernetes.io/configuration-snippet: | - proxy_set_header X-Original-URI $request_uri; - proxy_set_header X-Forwarded-Proto https; - proxy_set_header X-Forwarded-Port 443; - proxy_set_header X-Forwarded-Path /portal/app; - - resources: - limits: - memory: 32Gi - - secrets: - enabled: false - - vault_secrets: - enabled: true - path: secret/k8s_operator/base-lsp.lsst.codes/portal - - redis: - resources: - limits: - memory: 20Mi - -pull-secret: - enabled: true - path: secret/k8s_operator/base-lsp.lsst.codes/pull-secret +resources: + limits: + memory: "32Gi" diff --git a/services/portal/values-ccin2p3.yaml b/services/portal/values-ccin2p3.yaml new file mode 100644 index 0000000000..fa6a1ef9bc --- /dev/null +++ b/services/portal/values-ccin2p3.yaml @@ -0,0 +1,8 @@ +replicaCount: 2 + +resources: + limits: + memory: "24Gi" + +config: + hipsUrl: "http://alasky.cds.unistra.fr/DSS/DSSColor" diff --git a/services/portal/values-idfdev.yaml b/services/portal/values-idfdev.yaml index d188933b54..b8b3ea0bd2 100644 --- a/services/portal/values-idfdev.yaml +++ b/services/portal/values-idfdev.yaml @@ -1,35 +1,14 @@ -firefly: - pull_secret: 'pull-secret' +replicaCount: 2 - ingress: - host: "data-dev.lsst.cloud" - annotations: - nginx.ingress.kubernetes.io/auth-method: GET - nginx.ingress.kubernetes.io/auth-response-headers: X-Auth-Request-User,X-Auth-Request-Email,X-Auth-Request-Token - nginx.ingress.kubernetes.io/auth-signin: "https://data-dev.lsst.cloud/login" - nginx.ingress.kubernetes.io/auth-url: "https://data-dev.lsst.cloud/auth?scope=exec:portal&delegate_to=portal&delegate_scope=read:tap" - nginx.ingress.kubernetes.io/configuration-snippet: | - proxy_set_header X-Original-URI $request_uri; - proxy_set_header X-Forwarded-Proto https; - proxy_set_header X-Forwarded-Port 443; - proxy_set_header X-Forwarded-Path /portal/app; +image: + tag: "suit-2022.6.0" - resources: - limits: - memory: 8Gi +config: + volumes: + workareaNfs: + path: "/share1/home/firefly/shared-workarea" + server: "10.87.86.26" - secrets: - enabled: false - - vault_secrets: - enabled: true - path: secret/k8s_operator/data-dev.lsst.cloud/portal - - redis: - resources: - limits: - memory: 20Mi - -pull-secret: - enabled: true - path: secret/k8s_operator/data-dev.lsst.cloud/pull-secret +resources: + limits: + memory: "2Gi" diff --git a/services/portal/values-idfint.yaml b/services/portal/values-idfint.yaml index a55b28aefe..5098a43654 100644 --- a/services/portal/values-idfint.yaml +++ b/services/portal/values-idfint.yaml @@ -1,43 +1,14 @@ -firefly: - pull_secret: 'pull-secret' - replicaCount: 4 +replicaCount: 4 - volumes: - firefly_shared_workarea_nfs: - path: /share1/home/firefly/shared-workarea - server: 10.22.240.130 - - ingress: - host: "data-int.lsst.cloud" - annotations: - nginx.ingress.kubernetes.io/auth-method: GET - nginx.ingress.kubernetes.io/auth-response-headers: X-Auth-Request-User,X-Auth-Request-Email,X-Auth-Request-Token - nginx.ingress.kubernetes.io/auth-signin: "https://data-int.lsst.cloud/login" - nginx.ingress.kubernetes.io/auth-url: "https://data-int.lsst.cloud/auth?scope=exec:portal&delegate_to=portal&delegate_scope=read:tap" - nginx.ingress.kubernetes.io/proxy-read-timeout: "600" - nginx.ingress.kubernetes.io/proxy-send-timeout: "600" - nginx.ingress.kubernetes.io/configuration-snippet: | - proxy_set_header X-Original-URI $request_uri; - proxy_set_header X-Forwarded-Proto https; - proxy_set_header X-Forwarded-Port 443; - proxy_set_header X-Forwarded-Path /portal/app; - - resources: - limits: - memory: 30Gi +image: + tag: "suit-2022.6.0" - secrets: - enabled: false - - vault_secrets: - enabled: true - path: secret/k8s_operator/data-int.lsst.cloud/portal - - redis: - resources: - limits: - memory: 20Mi +config: + volumes: + workareaNfs: + path: "/share1/home/firefly/shared-workarea" + server: "10.22.240.130" -pull-secret: - enabled: true - path: secret/k8s_operator/data-int.lsst.cloud/pull-secret +resources: + limits: + memory: "30Gi" diff --git a/services/portal/values-idfprod.yaml b/services/portal/values-idfprod.yaml index 98e0f2c392..d3325ec38f 100644 --- a/services/portal/values-idfprod.yaml +++ b/services/portal/values-idfprod.yaml @@ -1,43 +1,11 @@ -firefly: - pull_secret: 'pull-secret' - replicaCount: 4 +replicaCount: 4 +config: volumes: - firefly_shared_workarea_nfs: - path: /share1/home/firefly/shared-workarea - server: 10.13.105.122 + workareaNfs: + path: "/share1/home/firefly/shared-workarea" + server: "10.13.105.122" - ingress: - host: "data.lsst.cloud" - annotations: - nginx.ingress.kubernetes.io/auth-method: GET - nginx.ingress.kubernetes.io/auth-response-headers: X-Auth-Request-User,X-Auth-Request-Email,X-Auth-Request-Token - nginx.ingress.kubernetes.io/auth-signin: "https://data.lsst.cloud/login" - nginx.ingress.kubernetes.io/auth-url: "https://data.lsst.cloud/auth?scope=exec:portal&delegate_to=portal&delegate_scope=read:tap" - nginx.ingress.kubernetes.io/proxy-read-timeout: "600" - nginx.ingress.kubernetes.io/proxy-send-timeout: "600" - nginx.ingress.kubernetes.io/configuration-snippet: | - proxy_set_header X-Original-URI $request_uri; - proxy_set_header X-Forwarded-Proto https; - proxy_set_header X-Forwarded-Port 443; - proxy_set_header X-Forwarded-Path /portal/app; - - resources: - limits: - memory: 30Gi - - secrets: - enabled: false - - vault_secrets: - enabled: true - path: secret/k8s_operator/data.lsst.cloud/portal - - redis: - resources: - limits: - memory: 20Mi - -pull-secret: - enabled: true - path: secret/k8s_operator/data.lsst.cloud/pull-secret +resources: + limits: + memory: "30Gi" diff --git a/services/portal/values-int.yaml b/services/portal/values-int.yaml deleted file mode 100644 index 54dc374ed7..0000000000 --- a/services/portal/values-int.yaml +++ /dev/null @@ -1,53 +0,0 @@ -firefly: - pull_secret: 'pull-secret' - replicaCount: 2 - - ingress: - host: 'lsst-lsp-int.ncsa.illinois.edu' - annotations: - nginx.ingress.kubernetes.io/auth-method: GET - nginx.ingress.kubernetes.io/auth-response-headers: X-Auth-Request-User,X-Auth-Request-Email,X-Auth-Request-Token - nginx.ingress.kubernetes.io/auth-signin: "https://lsst-lsp-int.ncsa.illinois.edu/login" - nginx.ingress.kubernetes.io/auth-url: "https://lsst-lsp-int.ncsa.illinois.edu/auth?scope=exec:portal&delegate_to=portal&delegate_scope=read:tap" - nginx.ingress.kubernetes.io/configuration-snippet: | - proxy_set_header X-Original-URI $request_uri; - proxy_set_header X-Forwarded-Proto https; - proxy_set_header X-Forwarded-Port 443; - proxy_set_header X-Forwarded-Path /portal/app; - - secrets: - enabled: false - - vault_secrets: - enabled: true - path: 'secret/k8s_operator/lsst-lsp-int.ncsa.illinois.edu/portal' - - redis: - resources: - limits: - memory: 20Mi - - nodeSelector: - environment: portal-int - - tolerations: - - effect: NoSchedule - key: dedicated - operator: Equal - value: portal - - resources: - limits: - memory: 24Gi - - securityContext: - runAsUser: 101 - runAsGroup: 102 - - volumes: - firefly_shared_workarea_hostpath: /sui/firefly/workarea - firefly_config_hostpath: /sui/firefly/config - -pull-secret: - enabled: true - path: secret/k8s_operator/lsst-lsp-int.ncsa.illinois.edu/pull-secret diff --git a/services/portal/values-minikube.yaml b/services/portal/values-minikube.yaml index 8ba2d28b82..2592098cb3 100644 --- a/services/portal/values-minikube.yaml +++ b/services/portal/values-minikube.yaml @@ -1,36 +1,4 @@ -firefly: - pull_secret: 'pull-secret' - - ingress: - host: "minikube.lsst.codes" - annotations: - nginx.ingress.kubernetes.io/auth-method: GET - nginx.ingress.kubernetes.io/auth-response-headers: X-Auth-Request-Uid, X-Auth-Request-Token - nginx.ingress.kubernetes.io/auth-signin: "https://minikube.lsst.codes/login" - nginx.ingress.kubernetes.io/auth-url: "http://gafaelfawr.gafaelfawr.svc.cluster.local:8080/auth?scope=exec:portal&delegate_to=portal&delegate_scope=read:tap" - nginx.ingress.kubernetes.io/configuration-snippet: | - proxy_set_header X-Original-URI $request_uri; - proxy_set_header X-Forwarded-Proto https; - proxy_set_header X-Forwarded-Port 443; - proxy_set_header X-Forwarded-Path /portal/app; - - resources: - limits: - cpu: 0.3 - memory: 2Gi - - secrets: - enabled: false - - vault_secrets: - enabled: true - path: secret/k8s_operator/minikube.lsst.codes/portal - - redis: - resources: - limits: - memory: 20Mi - -pull-secret: - enabled: true - path: secret/k8s_operator/minikube.lsst.codes/pull-secret +resources: + limits: + cpu: 0.3 + memory: "2Gi" diff --git a/services/portal/values-red-five.yaml b/services/portal/values-red-five.yaml deleted file mode 100644 index 74b371a943..0000000000 --- a/services/portal/values-red-five.yaml +++ /dev/null @@ -1,35 +0,0 @@ -firefly: - pull_secret: 'pull-secret' - - ingress: - host: "red-five.lsst.codes" - annotations: - nginx.ingress.kubernetes.io/auth-method: GET - nginx.ingress.kubernetes.io/auth-response-headers: X-Auth-Request-Uid, X-Auth-Request-Token - nginx.ingress.kubernetes.io/auth-signin: "https://red-five.lsst.codes/login" - nginx.ingress.kubernetes.io/auth-url: "https://red-five.lsst.codes/auth?scope=exec:portal&delegate_to=portal&delegate_scope=read:tap" - nginx.ingress.kubernetes.io/configuration-snippet: | - proxy_set_header X-Original-URI $request_uri; - proxy_set_header X-Forwarded-Proto https; - proxy_set_header X-Forwarded-Port 443; - proxy_set_header X-Forwarded-Path /portal/app; - - resources: - limits: - memory: 8Gi - - secrets: - enabled: false - - vault_secrets: - enabled: true - path: secret/k8s_operator/red-five.lsst.codes/portal - - redis: - resources: - limits: - memory: 20Mi - -pull-secret: - enabled: true - path: secret/k8s_operator/red-five.lsst.codes/pull-secret diff --git a/services/portal/values-roe.yaml b/services/portal/values-roe.yaml index 626f2456a9..2451c233b2 100644 --- a/services/portal/values-roe.yaml +++ b/services/portal/values-roe.yaml @@ -1,37 +1,3 @@ -firefly: - pull_secret: 'pull-secret' - - ingress: - host: "rsp.lsst.ac.uk" - annotations: - nginx.ingress.kubernetes.io/auth-method: GET - nginx.ingress.kubernetes.io/auth-response-headers: X-Auth-Request-User,X-Auth-Request-Email,X-Auth-Request-Token - nginx.ingress.kubernetes.io/auth-signin: "https://rsp.lsst.ac.uk/login" - nginx.ingress.kubernetes.io/auth-url: "https://rsp.lsst.ac.uk/auth?scope=exec:portal&delegate_to=portal&delegate_scope=read:tap" - nginx.ingress.kubernetes.io/configuration-snippet: | - proxy_set_header X-Original-URI $request_uri; - proxy_set_header X-Forwarded-Proto https; - proxy_set_header X-Forwarded-Port 443; - proxy_set_header X-Forwarded-Path /portal/app; - - max_jvm_size: "7G" - - resources: - limits: - memory: 8Gi - - secrets: - enabled: false - - vault_secrets: - enabled: true - path: secret/k8s_operator/roe/portal - - redis: - resources: - limits: - memory: 20Mi - -pull-secret: - enabled: true - path: secret/k8s_operator/roe/pull-secret +resources: + limits: + memory: "8Gi" diff --git a/services/portal/values-stable.yaml b/services/portal/values-stable.yaml deleted file mode 100644 index 87a50f9dd9..0000000000 --- a/services/portal/values-stable.yaml +++ /dev/null @@ -1,53 +0,0 @@ -firefly: - pull_secret: 'pull-secret' - replicaCount: 2 - - ingress: - host: 'lsst-lsp-stable.ncsa.illinois.edu' - annotations: - nginx.ingress.kubernetes.io/auth-method: GET - nginx.ingress.kubernetes.io/auth-response-headers: X-Auth-Request-User,X-Auth-Request-Email,X-Auth-Request-Token - nginx.ingress.kubernetes.io/auth-signin: "https://lsst-lsp-stable.ncsa.illinois.edu/login" - nginx.ingress.kubernetes.io/auth-url: "https://lsst-lsp-stable.ncsa.illinois.edu/auth?scope=exec:portal&delegate_to=portal&delegate_scope=read:tap" - nginx.ingress.kubernetes.io/configuration-snippet: | - proxy_set_header X-Original-URI $request_uri; - proxy_set_header X-Forwarded-Proto https; - proxy_set_header X-Forwarded-Port 443; - proxy_set_header X-Forwarded-Path /portal/app; - - secrets: - enabled: false - - vault_secrets: - enabled: true - path: 'secret/k8s_operator/lsst-lsp-stable.ncsa.illinois.edu/portal' - - redis: - resources: - limits: - memory: 20Mi - - nodeSelector: - environment: portal-stable - - tolerations: - - effect: NoSchedule - key: dedicated - operator: Equal - value: portal - - resources: - limits: - memory: 24Gi - - securityContext: - runAsUser: 101 - runAsGroup: 102 - - volumes: - firefly_shared_workarea_hostpath: /sui/firefly/workarea - firefly_config_hostpath: /sui/firefly/config - -pull-secret: - enabled: true - path: secret/k8s_operator/lsst-lsp-stable.ncsa.illinois.edu/pull-secret diff --git a/services/portal/values-summit.yaml b/services/portal/values-summit.yaml index 889a1ff376..30b83cac99 100644 --- a/services/portal/values-summit.yaml +++ b/services/portal/values-summit.yaml @@ -1,35 +1,3 @@ -firefly: - pull_secret: 'pull-secret' - - ingress: - host: 'summit-lsp.lsst.codes' - annotations: - nginx.ingress.kubernetes.io/auth-method: GET - nginx.ingress.kubernetes.io/auth-response-headers: X-Auth-Request-Uid, X-Auth-Request-Token - nginx.ingress.kubernetes.io/auth-signin: "https://summit-lsp.lsst.codes/login" - nginx.ingress.kubernetes.io/auth-url: "https://summit-lsp.lsst.codes/auth?scope=exec:portal&delegate_to=portal&delegate_scope=read:tap" - nginx.ingress.kubernetes.io/configuration-snippet: | - proxy_set_header X-Original-URI $request_uri; - proxy_set_header X-Forwarded-Proto https; - proxy_set_header X-Forwarded-Port 443; - proxy_set_header X-Forwarded-Path /portal/app; - - resources: - limits: - memory: 32Gi - - secrets: - enabled: false - - vault_secrets: - enabled: true - path: secret/k8s_operator/summit-lsp.lsst.codes/portal - - redis: - resources: - limits: - memory: 20Mi - -pull-secret: - enabled: true - path: secret/k8s_operator/summit-lsp.lsst.codes/pull-secret +resources: + limits: + memory: "32Gi" diff --git a/services/portal/values-tucson-teststand.yaml b/services/portal/values-tucson-teststand.yaml index c30e046e68..30b83cac99 100644 --- a/services/portal/values-tucson-teststand.yaml +++ b/services/portal/values-tucson-teststand.yaml @@ -1,35 +1,3 @@ -firefly: - pull_secret: 'pull-secret' - - ingress: - host: 'tucson-teststand.lsst.codes' - annotations: - nginx.ingress.kubernetes.io/auth-method: GET - nginx.ingress.kubernetes.io/auth-response-headers: X-Auth-Request-Uid, X-Auth-Request-Token - nginx.ingress.kubernetes.io/auth-signin: "https://tucson-teststand.lsst.codes/login" - nginx.ingress.kubernetes.io/auth-url: "https://tucson-teststand.lsst.codes/auth?scope=exec:portal&delegate_to=portal&delegate_scope=read:tap" - nginx.ingress.kubernetes.io/configuration-snippet: | - proxy_set_header X-Original-URI $request_uri; - proxy_set_header X-Forwarded-Proto https; - proxy_set_header X-Forwarded-Port 443; - proxy_set_header X-Forwarded-Path /portal/app; - - resources: - limits: - memory: 32Gi - - secrets: - enabled: false - - vault_secrets: - enabled: true - path: secret/k8s_operator/tucson-teststand.lsst.codes/portal - - redis: - resources: - limits: - memory: 20Mi - -pull-secret: - enabled: true - path: secret/k8s_operator/tucson-teststand.lsst.codes/pull-secret +resources: + limits: + memory: "32Gi" diff --git a/services/portal/values.yaml b/services/portal/values.yaml new file mode 100644 index 0000000000..a369100e8c --- /dev/null +++ b/services/portal/values.yaml @@ -0,0 +1,132 @@ +# Default values for the Portal Aspect. + +# -- Number of pods to start +replicaCount: 1 + +# -- Override the base name for resources +nameOverride: "" + +# -- Override the full name for resources (includes the release name) +fullnameOverride: "" + +image: + # -- Portal image to use + repository: "ipac/suit" + + # -- Pull policy for the Portal image + pullPolicy: "IfNotPresent" + + # -- Tag of Portal image to use + # @default -- The appVersion of the chart + tag: "" + +ingress: + # -- Additional annotations to add to the ingress + annotations: {} + +# -- Resource limits and requests. The Portal will use (by default) 93% of +# container RAM. This is a smallish Portal; tweak it as you need to in +# instance definitions in Phalanx. +resources: + limits: + cpu: 2.0 + memory: "6Gi" + +# -- Annotations for the Portal pod +podAnnotations: {} + +# -- Node selector rules for the Portal pod +nodeSelector: {} + +# -- Tolerations for the Portal pod +tolerations: [] + +# -- Affinity rules for the Portal pod +affinity: {} + +# -- Security context for the Portal pod +securityContext: {} + +config: + # -- Set to `TRUE` to enable service debugging + debug: "FALSE" + + # -- How long results should be retained before being deleted + cleanupInterval: "36h" + + # -- URL for default HiPS service + # @default -- `/api/hips/images/color_gri` in the local Science Platform + hipsUrl: "" + + # -- Search path for FITS files + visualizeFitsSearchPath: "/datasets" + + volumes: + # -- hostPath to mount as a shared work area. Set either this or + # `workareaNfs`, not both. + # @default -- Use an `emptyDir` + workareaHostPath: "" + + # -- NFS information for a shared work area. If set, must have keys for + # path and server. Set either this or `workareaHostPath`, not both. + # @default -- Use an `emptyDir` + workareaNfs: {} + + # -- hostPath to mount as configuration. Set either this of + # `configNfs`, not both. + # @default -- Use an `emptyDir` + configHostPath: "" + + # -- NFS information for a configuration. If set, must have keys for path + # and server, Set either this of `configHostPath`, not both. + # @default -- Use an `emptyDir` + configNfs: {} + +redis: + config: + # -- Name of secret containing Redis password (may require changing if + # fullnameOverride is set) + secretName: "portal-secret" + + # -- Key inside secret from which to get the Redis password (do not + # change) + secretKey: "ADMIN_PASSWORD" + + persistence: + # -- Whether to persist Redis storage. Setting this to false will use + # `emptyDir` and reset all data on every restart. + enabled: false + + # -- Resource limits and requests for the Redis pod + # @default -- See `values.yaml` + resources: + limits: + cpu: "1" + memory: "20Mi" + + # -- Pod annotations for the Redis pod + podAnnotations: {} + + # -- Node selection rules for the Redis pod + nodeSelector: {} + + # -- Tolerations for the Redis pod + tolerations: [] + + # -- Affinity rules for the Redis pod + affinity: {} + +# The following will be set by parameters injected by Argo CD and should not +# be set in the individual environment values files. +global: + # -- Base URL for the environment + # @default -- Set by Argo CD + baseUrl: "" + + # -- Host name for ingress + # @default -- Set by Argo CD + host: "" + + # -- Base path for Vault secrets + # @default -- Set by Argo CD + vaultSecretsPath: "" diff --git a/services/postgres/.helmignore b/services/postgres/.helmignore new file mode 100644 index 0000000000..50af031725 --- /dev/null +++ b/services/postgres/.helmignore @@ -0,0 +1,22 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/services/postgres/Chart.yaml b/services/postgres/Chart.yaml index a23d84f815..56fc1e9b2b 100644 --- a/services/postgres/Chart.yaml +++ b/services/postgres/Chart.yaml @@ -1,10 +1,7 @@ apiVersion: v2 name: postgres version: 1.0.0 -dependencies: -- name: postgres - version: ">=0.1.1" - repository: https://lsst-sqre.github.io/charts/ -- name: pull-secret - version: 0.1.2 - repository: https://lsst-sqre.github.io/charts/ +description: Postgres RDBMS for LSP +sources: + - https://github.com/lsst-sqre/rsp-postgres +appVersion: 0.0.5 diff --git a/services/postgres/README.md b/services/postgres/README.md new file mode 100644 index 0000000000..25d2e60e20 --- /dev/null +++ b/services/postgres/README.md @@ -0,0 +1,20 @@ +# postgres + +Postgres RDBMS for LSP + +## Source Code + +* + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| debug | string | `""` | Set to non-empty to enable debugging output | +| global.vaultSecretsPath | string | Set by Argo CD | Base path for Vault secrets | +| image.pullPolicy | string | `"IfNotPresent"` | Pull policy for the postgres image | +| image.repository | string | `"lsstsqre/lsp-postgres"` | postgres image to use | +| image.tag | string | The appVersion of the chart | Tag of postgres image to use | +| postgresStorageClass | string | `"standard"` | Storage class for postgres volume. Set to appropriate value for your deployment: at GKE, "standard" (if you want SSD, "premium-rwo", but if you want a good database maybe it's better to use a cloud database?), on Rubin Observatory Rancher, "rook-ceph-block", elsewhere probably "standard" | +| postgresVolumeSize | string | `"1Gi"` | Volume size for postgres. It can generally be very small | +| volumeName | string | `""` | Volume name for postgres, if you use an existing volume that isn't automatically created from the PVC by the storage driver. | diff --git a/services/postgres/templates/_helpers.tpl b/services/postgres/templates/_helpers.tpl new file mode 100644 index 0000000000..9d24248a39 --- /dev/null +++ b/services/postgres/templates/_helpers.tpl @@ -0,0 +1,53 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "postgres.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "postgres.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "postgres.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Common labels +*/}} +{{- define "postgres.labels" -}} +app.kubernetes.io/name: {{ include "postgres.name" . }} +helm.sh/chart: {{ include "postgres.chart" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end -}} + +{{/* +Selector labels +*/}} +{{- define "postgres.selectorLabels" -}} +app.kubernetes.io/name: {{ include "postgres.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} diff --git a/services/postgres/templates/deployment.yaml b/services/postgres/templates/deployment.yaml new file mode 100644 index 0000000000..fcba77bf1a --- /dev/null +++ b/services/postgres/templates/deployment.yaml @@ -0,0 +1,104 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "postgres.fullname" . }} + labels: + {{- include "postgres.labels" . | nindent 4 }} +spec: + replicas: 1 + selector: + matchLabels: + {{- include "postgres.selectorLabels" . | nindent 6 }} + strategy: + type: "Recreate" + template: + metadata: + labels: + {{- include "postgres.selectorLabels" . | nindent 8 }} + spec: + containers: + - name: {{ template "postgres.fullname" . }} + args: + - "-c" + - "tcp_keepalives_idle=600" + - "-c" + - "tcp_keepalives_interval=30" + - "-c" + - "tcp_keepalives_count=10" + env: + - name: "DEBUG" + value: {{ .Values.debug | quote }} + - name: "POSTGRES_PASSWORD" + valueFrom: + secretKeyRef: + name: {{ template "postgres.fullname" . }} + key: "root_password" + {{- with .Values.jupyterhub_db }} + - name: "VRO_DB_JUPYTERHUB_USER" + value: {{ .user | quote }} + - name: "VRO_DB_JUPYTERHUB_DB" + value: {{ .db | quote }} + - name: "VRO_DB_JUPYTERHUB_PASSWORD" + valueFrom: + secretKeyRef: + name: "postgres" + key: "jupyterhub_password" + {{- end }} + {{- with .Values.lovelog_db }} + - name: "VRO_DB_LOVELOG_USER" + value: {{ .user | quote }} + - name: "VRO_DB_LOVELOG_DB" + value: {{ .db | quote }} + - name: "VRO_DB_LOVELOG_PASSWORD" + valueFrom: + secretKeyRef: + name: "postgres" + key: "lovelog_password" + {{- end }} + {{- with .Values.narrativelog_db }} + - name: "VRO_DB_NARRATIVELOG_USER" + value: {{ .user | quote }} + - name: "VRO_DB_NARRATIVELOG_DB" + value: {{ .db | quote }} + - name: "VRO_DB_NARRATIVELOG_PASSWORD" + valueFrom: + secretKeyRef: + name: "postgres" + key: "narrativelog_password" + {{- end }} + {{- with .Values.exposurelog_db }} + - name: "VRO_DB_EXPOSURELOG_USER" + value: {{ .user | quote }} + - name: "VRO_DB_EXPOSURELOG_DB" + value: {{ .db | quote }} + - name: "VRO_DB_EXPOSURELOG_PASSWORD" + valueFrom: + secretKeyRef: + name: "postgres" + key: "exposurelog_password" + {{- end }} + {{- with .Values.gafaelfawr_db }} + - name: "VRO_DB_GAFAELFAWR_USER" + value: {{ .user | quote }} + - name: "VRO_DB_GAFAELFAWR_DB" + value: {{ .db | quote }} + - name: "VRO_DB_GAFAELFAWR_PASSWORD" + valueFrom: + secretKeyRef: + name: "postgres" + key: "gafaelfawr_password" + {{- end }} + imagePullPolicy: {{ .Values.image.pullPolicy | quote }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + ports: + - name: "postgres" + containerPort: 5432 + volumeMounts: + - name: "storage" + mountPath: "/var/lib/postgresql" + imagePullSecrets: + - name: "pull-secret" + volumes: + - name: storage + persistentVolumeClaim: + claimName: {{ template "postgres.fullname" . }}-physpvc diff --git a/services/postgres/templates/physpvc.yaml b/services/postgres/templates/physpvc.yaml new file mode 100644 index 0000000000..c32fdac4ef --- /dev/null +++ b/services/postgres/templates/physpvc.yaml @@ -0,0 +1,15 @@ +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: {{ template "postgres.fullname" . }}-physpvc + labels: {{- include "postgres.labels" . | nindent 4 }} +spec: + accessModes: + - "ReadWriteOnce" + resources: + requests: + storage: {{.Values.postgresVolumeSize}} + storageClassName: {{.Values.postgresStorageClass}} + {{- if .Values.volumeName }} + volumeName: {{ .Values.volumeName }} + {{- end }} diff --git a/services/postgres/templates/service.yaml b/services/postgres/templates/service.yaml new file mode 100644 index 0000000000..a7995d695b --- /dev/null +++ b/services/postgres/templates/service.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ template "postgres.fullname" . }} + labels: {{- include "postgres.labels" . | nindent 4 }} +spec: + ports: + - targetPort: "postgres" + protocol: "TCP" + port: 5432 + selector: + {{- include "postgres.selectorLabels" . | nindent 4 }} diff --git a/services/postgres/templates/vault-secrets.yaml b/services/postgres/templates/vault-secrets.yaml new file mode 100644 index 0000000000..5d1a67dfc5 --- /dev/null +++ b/services/postgres/templates/vault-secrets.yaml @@ -0,0 +1,20 @@ +apiVersion: ricoberger.de/v1alpha1 +kind: VaultSecret +metadata: + name: postgres + labels: + app: {{ template "postgres.fullname" . }} +{{ include "postgres.labels" . | indent 4 }} +spec: + path: "{{ .Values.global.vaultSecretsPath }}/postgres" + type: Opaque +--- +apiVersion: ricoberger.de/v1alpha1 +kind: VaultSecret +metadata: + name: pull-secret + labels: + {{- include "postgres.labels" . | nindent 4 }} +spec: + path: "{{- .Values.global.vaultSecretsPath }}/pull-secret" + type: kubernetes.io/dockerconfigjson diff --git a/services/postgres/values-base.yaml b/services/postgres/values-base.yaml index 10aa88f953..ec730cc9e5 100644 --- a/services/postgres/values-base.yaml +++ b/services/postgres/values-base.yaml @@ -1,25 +1,16 @@ -postgres: - pull_secret: 'pull-secret' - vault_secrets: - path: 'secret/k8s_operator/base-lsp.lsst.codes/postgres' - debug: 'true' - jupyterhub_db: - user: 'jovyan' - db: 'jupyterhub' - lovelog_db: - user: 'lovelog' - db: 'lovelog' - exposurelog_db: - user: 'exposurelog' - db: 'exposurelog' - gafaelfawr_db: - user: 'gafaelfawr' - db: 'gafaelfawr' - narrativelog_db: - user: 'narrativelog' - db: 'narrativelog' - postgres_storage_class: 'rook-ceph-block' - -pull-secret: - enabled: true - path: secret/k8s_operator/base-lsp.lsst.codes/pull-secret +jupyterhub_db: + user: "jovyan" + db: "jupyterhub" +lovelog_db: + user: "lovelog" + db: "lovelog" +exposurelog_db: + user: "exposurelog" + db: "exposurelog" +gafaelfawr_db: + user: "gafaelfawr" + db: "gafaelfawr" +narrativelog_db: + user: "narrativelog" + db: "narrativelog" +postgresStorageClass: "rook-ceph-block" diff --git a/services/postgres/values-ccin2p3.yaml b/services/postgres/values-ccin2p3.yaml new file mode 100644 index 0000000000..52b36aac05 --- /dev/null +++ b/services/postgres/values-ccin2p3.yaml @@ -0,0 +1,9 @@ +jupyterhub_db: + user: 'jovyan' + db: 'jupyterhub' +gafaelfawr_db: + user: 'gafaelfawr' + db: 'gafaelfawr' + +postgres_storage_class: 'rsp-local-storage' +volume_name: 'postgres-data-rsp-ccqserv219' diff --git a/services/postgres/values-idfdev.yaml b/services/postgres/values-idfdev.yaml index d62df2fb11..5a77f93b71 100644 --- a/services/postgres/values-idfdev.yaml +++ b/services/postgres/values-idfdev.yaml @@ -1,12 +1,3 @@ -postgres: - pull_secret: 'pull-secret' - vault_secrets: - path: 'secret/k8s_operator/data-dev.lsst.cloud/postgres' - debug: 'true' - jupyterhub_db: - user: 'jovyan' - db: 'jupyterhub' - -pull-secret: - enabled: true - path: secret/k8s_operator/data-dev.lsst.cloud/pull-secret +jupyterhub_db: + user: "jovyan" + db: "jupyterhub" diff --git a/services/postgres/values-idfint.yaml b/services/postgres/values-idfint.yaml index 4cc83a5042..5a77f93b71 100644 --- a/services/postgres/values-idfint.yaml +++ b/services/postgres/values-idfint.yaml @@ -1,12 +1,3 @@ -postgres: - pull_secret: 'pull-secret' - vault_secrets: - path: 'secret/k8s_operator/data-int.lsst.cloud/postgres' - debug: 'true' - jupyterhub_db: - user: 'jovyan' - db: 'jupyterhub' - -pull-secret: - enabled: true - path: secret/k8s_operator/data-int.lsst.cloud/pull-secret +jupyterhub_db: + user: "jovyan" + db: "jupyterhub" diff --git a/services/postgres/values-idfprod.yaml b/services/postgres/values-idfprod.yaml index a78ee0fa44..5a77f93b71 100644 --- a/services/postgres/values-idfprod.yaml +++ b/services/postgres/values-idfprod.yaml @@ -1,12 +1,3 @@ -postgres: - pull_secret: 'pull-secret' - vault_secrets: - path: 'secret/k8s_operator/data.lsst.cloud/postgres' - debug: 'true' - jupyterhub_db: - user: 'jovyan' - db: 'jupyterhub' - -pull-secret: - enabled: true - path: secret/k8s_operator/data.lsst.cloud/pull-secret +jupyterhub_db: + user: "jovyan" + db: "jupyterhub" diff --git a/services/postgres/values-int.yaml b/services/postgres/values-int.yaml deleted file mode 100644 index fcd7931499..0000000000 --- a/services/postgres/values-int.yaml +++ /dev/null @@ -1,19 +0,0 @@ -postgres: - pull_secret: 'pull-secret' - vault_secrets: - path: 'secret/k8s_operator/lsst-lsp-int.ncsa.illinois.edu/postgres' - debug: 'true' - jupyterhub_db: - user: 'jovyan' - db: 'jupyterhub' - gafaelfawr_db: - user: 'gafaelfawr' - db: 'gafaelfawr' - postgres_storage_class: 'manual' - volume_name: 'postgres-data-volume' - image: - tag: '0.0.3' - -pull-secret: - enabled: true - path: secret/k8s_operator/lsst-lsp-int.ncsa.illinois.edu/pull-secret diff --git a/services/postgres/values-minikube.yaml b/services/postgres/values-minikube.yaml index 425df8c97b..1dc388a3ea 100644 --- a/services/postgres/values-minikube.yaml +++ b/services/postgres/values-minikube.yaml @@ -1,24 +1,14 @@ -postgres: - pull_secret: 'pull-secret' - vault_secrets: - path: 'secret/k8s_operator/minikube.lsst.codes/postgres' - debug: 'true' - jupyterhub_db: - user: 'jovyan' - db: 'jupyterhub' - exposurelog_db: - user: 'exposurelog' - db: 'exposurelog' - gafaelfawr_db: - user: 'gafaelfawr' - db: 'gafaelfawr' - narrativelog_db: - user: 'narrativelog' - db: 'narrativelog' - image: - tag: '0.0.2' - postgres_storage_class: 'standard' - -pull-secret: - enabled: true - path: secret/k8s_operator/minikube.lsst.codes/pull-secret +debug: "true" +jupyterhub_db: + user: "jovyan" + db: "jupyterhub" +exposurelog_db: + user: "exposurelog" + db: "exposurelog" +gafaelfawr_db: + user: "gafaelfawr" + db: "gafaelfawr" +narrativelog_db: + user: "narrativelog" + db: "narrativelog" +postgresStorageClass: "standard" diff --git a/services/postgres/values-red-five.yaml b/services/postgres/values-red-five.yaml deleted file mode 100644 index dc712f1b48..0000000000 --- a/services/postgres/values-red-five.yaml +++ /dev/null @@ -1,17 +0,0 @@ -postgres: - pull_secret: 'pull-secret' - vault_secrets: - path: 'secret/k8s_operator/red-five.lsst.codes/postgres' - debug: 'true' - jupyterhub_db: - user: 'jovyan' - db: 'jupyterhub' - gafaelfawr_db: - user: 'gafaelfawr' - db: 'gafaelfawr' - image: - tag: '0.0.2' - -pull-secret: - enabled: true - path: secret/k8s_operator/red-five.lsst.codes/pull-secret diff --git a/services/postgres/values-roe.yaml b/services/postgres/values-roe.yaml index efdbab6e34..8f053fb744 100644 --- a/services/postgres/values-roe.yaml +++ b/services/postgres/values-roe.yaml @@ -1,18 +1,7 @@ -postgres: - pull_secret: 'pull-secret' - vault_secrets: - path: 'secret/k8s_operator/roe/postgres' - debug: 'true' - jupyterhub_db: - user: 'jovyan' - db: 'jupyterhub' - gafaelfawr_db: - user: 'gafaelfawr' - db: 'gafaelfawr' - image: - tag: '0.0.5' - postgres_storage_class: 'standard' - -pull-secret: - enabled: true - path: secret/k8s_operator/roe/pull-secret +jupyterhub_db: + user: "jovyan" + db: "jupyterhub" +gafaelfawr_db: + user: "gafaelfawr" + db: "gafaelfawr" +postgresStorageClass: "standard" diff --git a/services/postgres/values-squash-sandbox.yaml b/services/postgres/values-squash-sandbox.yaml deleted file mode 100644 index b5bc8486a4..0000000000 --- a/services/postgres/values-squash-sandbox.yaml +++ /dev/null @@ -1,12 +0,0 @@ -postgres: - pull_secret: 'pull-secret' - vault_secrets: - path: 'secret/k8s_operator/squash-sandbox.lsst.codes/postgres' - debug: 'true' - gafaelfawr_db: - user: 'gafaelfawr' - db: 'gafaelfawr' - -pull-secret: - enabled: true - path: secret/k8s_operator/squash-sandbox.lsst.codes/pull-secret diff --git a/services/postgres/values-stable.yaml b/services/postgres/values-stable.yaml deleted file mode 100644 index 8b57a2246e..0000000000 --- a/services/postgres/values-stable.yaml +++ /dev/null @@ -1,19 +0,0 @@ -postgres: - pull_secret: 'pull-secret' - vault_secrets: - path: 'secret/k8s_operator/lsst-lsp-stable.ncsa.illinois.edu/postgres' - debug: 'true' - jupyterhub_db: - user: 'jovyan' - db: 'jupyterhub' - gafaelfawr_db: - user: 'gafaelfawr' - db: 'gafaelfawr' - postgres_storage_class: 'manual' - volume_name: 'postgres-data-volume' - image: - tag: '0.0.3' - -pull-secret: - enabled: true - path: secret/k8s_operator/lsst-lsp-stable.ncsa.illinois.edu/pull-secret diff --git a/services/postgres/values-summit.yaml b/services/postgres/values-summit.yaml index b79bc3c378..1cf382a0d9 100644 --- a/services/postgres/values-summit.yaml +++ b/services/postgres/values-summit.yaml @@ -1,22 +1,13 @@ -postgres: - pull_secret: 'pull-secret' - vault_secrets: - path: 'secret/k8s_operator/summit-lsp.lsst.codes/postgres' - debug: 'true' - jupyterhub_db: - user: 'jovyan' - db: 'jupyterhub' - exposurelog_db: - user: 'exposurelog' - db: 'exposurelog' - gafaelfawr_db: - user: 'gafaelfawr' - db: 'gafaelfawr' - narrativelog_db: - user: 'narrativelog' - db: 'narrativelog' - postgres_storage_class: 'rook-ceph-block' - -pull-secret: - enabled: true - path: secret/k8s_operator/summit-lsp.lsst.codes/pull-secret +jupyterhub_db: + user: "jovyan" + db: "jupyterhub" +exposurelog_db: + user: "exposurelog" + db: "exposurelog" +gafaelfawr_db: + user: "gafaelfawr" + db: "gafaelfawr" +narrativelog_db: + user: "narrativelog" + db: "narrativelog" +postgresStorageClass: "rook-ceph-block" diff --git a/services/postgres/values-tucson-teststand.yaml b/services/postgres/values-tucson-teststand.yaml index 5e09cccdda..1cf382a0d9 100644 --- a/services/postgres/values-tucson-teststand.yaml +++ b/services/postgres/values-tucson-teststand.yaml @@ -1,22 +1,13 @@ -postgres: - pull_secret: 'pull-secret' - vault_secrets: - path: 'secret/k8s_operator/tucson-teststand.lsst.codes/postgres' - debug: 'true' - jupyterhub_db: - user: 'jovyan' - db: 'jupyterhub' - exposurelog_db: - user: 'exposurelog' - db: 'exposurelog' - gafaelfawr_db: - user: 'gafaelfawr' - db: 'gafaelfawr' - narrativelog_db: - user: 'narrativelog' - db: 'narrativelog' - postgres_storage_class: 'rook-ceph-block' - -pull-secret: - enabled: true - path: secret/k8s_operator/tucson-teststand.lsst.codes/pull-secret +jupyterhub_db: + user: "jovyan" + db: "jupyterhub" +exposurelog_db: + user: "exposurelog" + db: "exposurelog" +gafaelfawr_db: + user: "gafaelfawr" + db: "gafaelfawr" +narrativelog_db: + user: "narrativelog" + db: "narrativelog" +postgresStorageClass: "rook-ceph-block" diff --git a/services/postgres/values.yaml b/services/postgres/values.yaml new file mode 100644 index 0000000000..ded4248a51 --- /dev/null +++ b/services/postgres/values.yaml @@ -0,0 +1,37 @@ +# Default values for fileserver. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# -- Set to non-empty to enable debugging output +debug: "" + +image: + # -- postgres image to use + repository: "lsstsqre/lsp-postgres" + + # -- Pull policy for the postgres image + pullPolicy: "IfNotPresent" + + # -- Tag of postgres image to use + # @default -- The appVersion of the chart + tag: "" + +# -- Volume size for postgres. It can generally be very small +postgresVolumeSize: "1Gi" + +# -- Storage class for postgres volume. Set to appropriate value for your +# deployment: at GKE, "standard" (if you want SSD, "premium-rwo", but if you +# want a good database maybe it's better to use a cloud database?), on Rubin +# Observatory Rancher, "rook-ceph-block", elsewhere probably "standard" +postgresStorageClass: "standard" + +# -- Volume name for postgres, if you use an existing volume that isn't +# automatically created from the PVC by the storage driver. +volumeName: "" + +# The following will be set by parameters injected by Argo CD and should not +# be set in the individual environment values files. +global: + # -- Base path for Vault secrets + # @default -- Set by Argo CD + vaultSecretsPath: "" diff --git a/services/production-tools/Chart.yaml b/services/production-tools/Chart.yaml new file mode 100644 index 0000000000..95add46d37 --- /dev/null +++ b/services/production-tools/Chart.yaml @@ -0,0 +1,8 @@ +apiVersion: v2 +name: production-tools +version: 1.0.0 +dependencies: +description: A collection of utility pages for monitoring data processing. +sources: + - https://github.com/lsst-dm/production_tools +appVersion: 0.0.17 diff --git a/services/production-tools/README.md b/services/production-tools/README.md new file mode 100644 index 0000000000..cb7fa475cb --- /dev/null +++ b/services/production-tools/README.md @@ -0,0 +1,28 @@ +# production-tools + +A collection of utility pages for monitoring data processing. + +## Source Code + +* + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| affinity | object | `{}` | Affinity rules for the production-tools deployment pod | +| environment | object | `{}` | | +| fullnameOverride | string | `""` | Override the full name for resources (includes the release name) | +| global.baseUrl | string | Set by Argo CD | Base URL for the environment | +| global.host | string | Set by Argo CD | Host name for ingress | +| global.vaultSecretsPath | string | Set by Argo CD | Base path for Vault secrets | +| image.pullPolicy | string | `"IfNotPresent"` | Pull policy for the production-tools image | +| image.repository | string | `"lsstdm/production_tools"` | Image to use in the production-tools deployment | +| image.tag | string | `""` | Overrides the image tag whose default is the chart appVersion. | +| ingress.annotations | object | `{}` | Additional annotations for the ingress rule | +| nameOverride | string | `""` | Override the base name for resources | +| nodeSelector | object | `{}` | Node selection rules for the production-tools deployment pod | +| podAnnotations | object | `{}` | Annotations for the production-tools deployment pod | +| replicaCount | int | `1` | Number of web deployment pods to start | +| resources | object | `{}` | Resource limits and requests for the production-tools deployment pod | +| tolerations | list | `[]` | Tolerations for the production-tools deployment pod | diff --git a/services/production-tools/templates/_helpers.tpl b/services/production-tools/templates/_helpers.tpl new file mode 100644 index 0000000000..43cdb33e8c --- /dev/null +++ b/services/production-tools/templates/_helpers.tpl @@ -0,0 +1,51 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "production-tools.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "production-tools.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "production-tools.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "production-tools.labels" -}} +helm.sh/chart: {{ include "production-tools.chart" . }} +{{ include "production-tools.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "production-tools.selectorLabels" -}} +app.kubernetes.io/name: {{ include "production-tools.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} diff --git a/services/production-tools/templates/deployment.yaml b/services/production-tools/templates/deployment.yaml new file mode 100644 index 0000000000..af46c2995e --- /dev/null +++ b/services/production-tools/templates/deployment.yaml @@ -0,0 +1,116 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "production-tools.fullname" . }} + labels: + {{- include "production-tools.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "production-tools.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "production-tools.selectorLabels" . | nindent 8 }} + spec: + automountServiceAccountToken: false + imagePullSecrets: + - name: "pull-secret" + securityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + volumes: + # butler-secrets-raw is the secrets we get from vault + - name: "butler-secrets-raw" + secret: + secretName: "butler-secret" + # butler-secrets are the copied and chmoded versions + - name: "butler-secrets" + emptyDir: {} + - name: "cache-dir" + emptyDir: {} + - name: "tmp" + emptyDir: {} + # Have to fix permissions on the pgpass file. + # init container pattern borrowed from vo-cutouts. + initContainers: + - name: fix-secret-permissions + image: {{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }} + imagePullPolicy: Always + command: + - "/bin/bash" + - "-c" + - | + cp -RL /home/worker/secrets-raw/* /home/worker/.lsst/ + chmod 0400 /home/worker/.lsst/* + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - "all" + volumeMounts: + - name: "butler-secrets" + mountPath: "/home/worker/.lsst" + - name: "butler-secrets-raw" + mountPath: "/home/worker/secrets-raw" + readOnly: true + containers: + - name: {{ .Chart.Name }} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - all + readOnlyRootFilesystem: true + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - name: http + containerPort: 8080 + protocol: TCP + readinessProbe: + httpGet: + path: /production-tools + port: http + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumeMounts: + - name: butler-secrets + mountPath: "/home/worker/.lsst/" + - name: "cache-dir" + mountPath: "/home/worker/cache" + - name: "tmp" + mountPath: "/tmp" + env: + - name: "LOG_CACHE_DIR" + value: "/home/worker/cache" + - name: "PGPASSFILE" + value: "/home/worker/.lsst/postgres-credentials.txt" + - name: "AWS_SHARED_CREDENTIALS_FILE" + value: "/home/worker/.lsst/aws-credentials.ini" + - name: "S3_ENDPOINT_URL" + value: "https://storage.googleapis.com" + - name: "SCRIPT_NAME" + value: "/production-tools" +{{- range $key, $value := .Values.environment }} + - name: {{ $key | quote }} + value: {{ $value | quote }} +{{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/services/production-tools/templates/ingress.yaml b/services/production-tools/templates/ingress.yaml new file mode 100644 index 0000000000..fbf1fb3bde --- /dev/null +++ b/services/production-tools/templates/ingress.yaml @@ -0,0 +1,31 @@ +apiVersion: gafaelfawr.lsst.io/v1alpha1 +kind: GafaelfawrIngress +metadata: + name: {{ template "production-tools.fullname" . }} + labels: + {{- include "production-tools.labels" . | nindent 4 }} +config: + baseUrl: {{ .Values.global.baseUrl | quote }} + scopes: + all: + - "exec:portal" + loginRedirect: true +template: + metadata: + name: {{ template "production-tools.fullname" . }} + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 6 }} + {{- end }} + spec: + rules: + - host: {{ required ".Values.global.host must be set" .Values.global.host | quote }} + http: + paths: + - path: "/production-tools" + pathType: "Prefix" + backend: + service: + name: {{ template "production-tools.fullname" . }} + port: + number: 8080 diff --git a/services/production-tools/templates/networkpolicy.yaml b/services/production-tools/templates/networkpolicy.yaml new file mode 100644 index 0000000000..f96da8d5f9 --- /dev/null +++ b/services/production-tools/templates/networkpolicy.yaml @@ -0,0 +1,21 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ include "production-tools.fullname" . }} +spec: + podSelector: + matchLabels: + {{- include "production-tools.selectorLabels" . | nindent 6 }} + policyTypes: + - Ingress + ingress: + # Allow inbound access from pods (in any namespace) labeled + # gafaelfawr.lsst.io/ingress: true. + - from: + - namespaceSelector: {} + podSelector: + matchLabels: + gafaelfawr.lsst.io/ingress: "true" + ports: + - protocol: "TCP" + port: 8080 diff --git a/services/production-tools/templates/service.yaml b/services/production-tools/templates/service.yaml new file mode 100644 index 0000000000..fb56f55e0e --- /dev/null +++ b/services/production-tools/templates/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "production-tools.fullname" . }} + labels: + {{- include "production-tools.labels" . | nindent 4 }} +spec: + type: ClusterIP + ports: + - port: 8080 + targetPort: http + protocol: TCP + name: http + selector: + {{- include "production-tools.selectorLabels" . | nindent 4 }} diff --git a/services/production-tools/templates/vault-secrets.yaml b/services/production-tools/templates/vault-secrets.yaml new file mode 100644 index 0000000000..0b90cc3b7a --- /dev/null +++ b/services/production-tools/templates/vault-secrets.yaml @@ -0,0 +1,20 @@ +--- +apiVersion: ricoberger.de/v1alpha1 +kind: VaultSecret +metadata: + name: butler-secret + labels: + {{- include "production-tools.labels" . | nindent 4 }} +spec: + path: "{{ .Values.global.vaultSecretsPath }}/butler-secret" + type: Opaque +--- +apiVersion: ricoberger.de/v1alpha1 +kind: VaultSecret +metadata: + name: "pull-secret" + labels: + {{- include "production-tools.labels" . | nindent 4 }} +spec: + path: "{{ .Values.global.vaultSecretsPath }}/pull-secret" + type: "kubernetes.io/dockerconfigjson" diff --git a/services/production-tools/values-idfint.yaml b/services/production-tools/values-idfint.yaml new file mode 100644 index 0000000000..b89176b204 --- /dev/null +++ b/services/production-tools/values-idfint.yaml @@ -0,0 +1,5 @@ +environment: + BUTLER_URI: "s3://butler-us-central1-panda-dev/dc2/butler-external.yaml" + LOG_BUCKET: "drp-us-central1-logging" + LOG_PREFIX: "Panda-RubinLog" + WEB_CONCURRENCY: "4" diff --git a/services/production-tools/values.yaml b/services/production-tools/values.yaml new file mode 100644 index 0000000000..d0196e8401 --- /dev/null +++ b/services/production-tools/values.yaml @@ -0,0 +1,59 @@ +# Default values for production-tools. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# -- Number of web deployment pods to start +replicaCount: 1 + +image: + # -- Image to use in the production-tools deployment + repository: lsstdm/production_tools + + # -- Pull policy for the production-tools image + pullPolicy: IfNotPresent + + # -- Overrides the image tag whose default is the chart appVersion. + tag: "" + +# -- Override the base name for resources +nameOverride: "" + +# -- Override the full name for resources (includes the release name) +fullnameOverride: "" + +# -- Annotations for the production-tools deployment pod +podAnnotations: {} + +# Environment variables passed to container +environment: {} + +ingress: + # -- Additional annotations for the ingress rule + annotations: {} + +# -- Resource limits and requests for the production-tools deployment pod +resources: {} + +# -- Node selection rules for the production-tools deployment pod +nodeSelector: {} + +# -- Tolerations for the production-tools deployment pod +tolerations: [] + +# -- Affinity rules for the production-tools deployment pod +affinity: {} + +# The following will be set by parameters injected by Argo CD and should not +# be set in the individual environment values files. +global: + # -- Base URL for the environment + # @default -- Set by Argo CD + baseUrl: "" + + # -- Host name for ingress + # @default -- Set by Argo CD + host: "" + + # -- Base path for Vault secrets + # @default -- Set by Argo CD + vaultSecretsPath: "" diff --git a/services/sasquatch/Chart.yaml b/services/sasquatch/Chart.yaml index eb1493fd8e..ed7fc56ce6 100644 --- a/services/sasquatch/Chart.yaml +++ b/services/sasquatch/Chart.yaml @@ -1,21 +1,42 @@ apiVersion: v2 name: sasquatch version: 1.0.0 -description: SQuaRE telemetry data service. +description: Rubin Observatory's telemetry service. +appVersion: 0.1.0 + dependencies: - - name: "strimzi-kafka" + - name: strimzi-kafka version: 1.0.0 - name: strimzi-registry-operator - version: 1.2.0 + version: 2.1.0 repository: https://lsst-sqre.github.io/charts/ - name: influxdb - version: 4.10.6 + version: 4.12.0 + repository: https://helm.influxdata.com/ + - name: influxdb2 + condition: influxdb2.enabled + version: 2.1.1 repository: https://helm.influxdata.com/ - name: kafka-connect-manager version: 1.0.0 - name: chronograf - version: 1.2.3 + version: 1.2.5 repository: https://helm.influxdata.com/ - name: kapacitor - version: 1.4.3 + version: 1.4.6 repository: https://helm.influxdata.com/ + - name: kafdrop + version: 1.0.0 + - name: telegraf-kafka-consumer + condition: influxdb2.enabled + version: 1.0.0 + - name: rest-proxy + condition: rest-proxy.enabled + version: 1.0.0 + + +annotations: + phalanx.lsst.io/docs: | + - id: "SQR-068" + title: "Sasquatch: beyond the EFD" + url: "https://sqr-068.lsst.io/" diff --git a/services/sasquatch/README.md b/services/sasquatch/README.md index c55c460dac..82828b3073 100644 --- a/services/sasquatch/README.md +++ b/services/sasquatch/README.md @@ -1,41 +1,73 @@ # sasquatch -SQuaRE telemetry data service. - -## Requirements - -| Repository | Name | Version | -|------------|------|---------| -| | kafka-connect-manager | 1.0.0 | -| | strimzi-kafka | 1.0.0 | -| https://helm.influxdata.com/ | chronograf | 1.2.3 | -| https://helm.influxdata.com/ | influxdb | 4.10.6 | -| https://helm.influxdata.com/ | kapacitor | 1.4.3 | -| https://lsst-sqre.github.io/charts/ | strimzi-registry-operator | 1.2.0 | +Rubin Observatory's telemetry service. ## Values | Key | Type | Default | Description | |-----|------|---------|-------------| +| bucketmapper.image | object | `{"repository":"ghcr.io/lsst-sqre/rubin-influx-tools","tag":"0.1.23"}` | image for monitoring-related cronjobs | +| bucketmapper.image.repository | string | `"ghcr.io/lsst-sqre/rubin-influx-tools"` | repository for rubin-influx-tools | +| bucketmapper.image.tag | string | `"0.1.23"` | tag for rubin-influx-tools | | chronograf.env | object | `{"BASE_PATH":"/chronograf","CUSTOM_AUTO_REFRESH":"1s=1000","HOST_PAGE_DISABLED":true}` | Chronograf environment variables. | | chronograf.envFromSecret | string | `"sasquatch"` | Chronograf secrets, expected keys generic_client_id, generic_client_secret and token_secret. | -| chronograf.image | object | `{"repository":"quay.io/influxdb/chronograf","tag":"1.9.3"}` | Chronograf image tag. | +| chronograf.image | object | `{"repository":"quay.io/influxdb/chronograf","tag":"1.9.4"}` | Chronograf image tag. | | chronograf.ingress | object | disabled | Chronograf ingress configuration. | -| chronograf.persistence | object | `{"enabled":true,"size":"16Gi"}` | Chronograf data persistence configuration. | -| influxdb.config | object | `{"continuous_queries":{"enabled":false},"coordinator":{"log_queries_after":"15s","max_concurrent_queries":10,"query_timeout":"900s","write_timeout":"60s"},"data":{"cache_max_memory_size":0,"trace_logging_enabled":true,"wal_fsync_delay":"100ms"},"http":{"auth_enabled":true,"enabled":true,"max_row_limit":0}}` | Override InfluxDB configuration. See https://docs.influxdata.com/influxdb/v1.8/administration/config | +| chronograf.persistence | object | `{"enabled":true,"size":"100Gi"}` | Chronograf data persistence configuration. | +| chronograf.resources.limits.cpu | int | `4` | | +| chronograf.resources.limits.memory | string | `"16Gi"` | | +| chronograf.resources.requests.cpu | int | `1` | | +| chronograf.resources.requests.memory | string | `"1Gi"` | | +| global.baseUrl | string | Set by Argo CD | Base URL for the environment | +| global.host | string | Set by Argo CD | Host name for ingress | +| global.vaultSecretsPath | string | Set by Argo CD | Base path for Vault secrets | +| influxdb.config | object | `{"continuous_queries":{"enabled":false},"coordinator":{"log-queries-after":"15s","max-concurrent-queries":0,"query-timeout":"0s","write-timeout":"1h"},"data":{"cache-max-memory-size":0,"trace-logging-enabled":true,"wal-fsync-delay":"100ms"},"http":{"auth-enabled":true,"enabled":true,"flux-enabled":true,"max-row-limit":0},"logging":{"level":"debug"}}` | Override InfluxDB configuration. See https://docs.influxdata.com/influxdb/v1.8/administration/config | | influxdb.image | object | `{"tag":"1.8.10"}` | InfluxDB image tag. | | influxdb.ingress | object | disabled | InfluxDB ingress configuration. | -| influxdb.initScripts | object | `{"enabled":true,"scripts":{"init.iql":"CREATE DATABASE \"telegraf\" WITH DURATION 30d REPLICATION 1 NAME \"rp_30d\"\n\n"}}` | InfluxDB Custom initialization scripts. | +| influxdb.initScripts.enabled | bool | `false` | Enable InfluxDB custom initialization script. | +| influxdb.persistence.enabled | bool | `true` | Enable persistent volume claim. By default storageClass is undefined choosing the default provisioner (standard on GKE). | +| influxdb.persistence.size | string | `"1Ti"` | Persistent volume size. @default 1Ti for teststand deployments | +| influxdb.resources.limits.cpu | int | `8` | | +| influxdb.resources.limits.memory | string | `"96Gi"` | | +| influxdb.resources.requests.cpu | int | `1` | | +| influxdb.resources.requests.memory | string | `"1Gi"` | | | influxdb.setDefaultUser | object | `{"enabled":true,"user":{"existingSecret":"sasquatch"}}` | Default InfluxDB user, use influxb-user and influxdb-password keys from secret. | -| kafka-connect-manager | object | `{}` | Override strimzi-kafka configuration. | +| influxdb2.adminUser.bucket | string | `"default"` | Admin default bucket. | +| influxdb2.adminUser.existingSecret | string | `"sasquatch"` | Get admin-password/admin-token keys from secret. | +| influxdb2.adminUser.organization | string | `"default"` | Admin default organization. | +| influxdb2.enabled | bool | `false` | | +| influxdb2.env[0].name | string | `"INFLUXD_STORAGE_WAL_FSYNC_DELAY"` | | +| influxdb2.env[0].value | string | `"100ms"` | | +| influxdb2.env[1].name | string | `"INFLUXD_HTTP_IDLE_TIMEOUT"` | | +| influxdb2.env[1].value | string | `"0"` | | +| influxdb2.env[2].name | string | `"INFLUXD_FLUX_LOG_ENABLED"` | | +| influxdb2.env[2].value | string | `"true"` | | +| influxdb2.env[3].name | string | `"INFLUXD_LOG_LEVEL"` | | +| influxdb2.env[3].value | string | `"debug"` | | +| influxdb2.ingress.annotations."nginx.ingress.kubernetes.io/rewrite-target" | string | `"/api/v2/$2"` | | +| influxdb2.ingress.className | string | `"nginx"` | | +| influxdb2.ingress.enabled | bool | `false` | InfluxDB2 ingress configuration | +| influxdb2.ingress.hostname | string | `""` | | +| influxdb2.ingress.path | string | `"/influxdb2(/|$)(.*)"` | | +| influxdb2.initScripts.enabled | bool | `true` | InfluxDB2 initialization scripts | +| influxdb2.initScripts.scripts."init.sh" | string | `"#!/bin/bash\ninflux bucket create --name telegra-kafka-consumer --org default\n"` | | +| influxdb2.persistence.enabled | bool | `true` | Enable persistent volume claim. By default storageClass is undefined choosing the default provisioner (standard on GKE). | +| influxdb2.persistence.size | string | `"1Ti"` | Persistent volume size. @default 1Ti for teststand deployments. | +| influxdb2.resources.limits.cpu | int | `8` | | +| influxdb2.resources.limits.memory | string | `"96Gi"` | | +| influxdb2.resources.requests.cpu | int | `1` | | +| influxdb2.resources.requests.memory | string | `"1Gi"` | | +| kafka-connect-manager | object | `{}` | Override kafka-connect-manager configuration. | | kapacitor.envVars | object | `{"KAPACITOR_SLACK_ENABLED":true}` | Kapacitor environment variables. | | kapacitor.existingSecret | string | `"sasquatch"` | InfluxDB credentials, use influxdb-user and influxdb-password keys from secret. | -| kapacitor.image | object | `{"repository":"kapacitor","tag":"1.6.3"}` | Kapacitor image tag. | -| kapacitor.influxURL | string | `"http://sasquatch.influxdb:8086"` | InfluxDB connection URL. | -| kapacitor.persistence | object | `{"enabled":true,"size":"16Gi"}` | Chronograf data persistence configuration. | +| kapacitor.image | object | `{"repository":"kapacitor","tag":"1.6.5"}` | Kapacitor image tag. | +| kapacitor.influxURL | string | `"http://sasquatch-influxdb.sasquatch:8086"` | InfluxDB connection URL. | +| kapacitor.persistence | object | `{"enabled":true,"size":"100Gi"}` | Chronograf data persistence configuration. | +| kapacitor.resources.limits.cpu | int | `4` | | +| kapacitor.resources.limits.memory | string | `"16Gi"` | | +| kapacitor.resources.requests.cpu | int | `1` | | +| kapacitor.resources.requests.memory | string | `"1Gi"` | | +| rest-proxy | object | `{"enabled":false}` | Override rest-proxy configuration. | | strimzi-kafka | object | `{}` | Override strimzi-kafka configuration. | -| strimzi-registry-operator | object | `{"clusterName":"sasquatch","operatorNamespace":"sasquatch","watchNamespace":"sasquatch"}` | strimzi-registry-operator configuration. | -| vaultSecretsPath | string | None, must be set | Path to the Vault secrets (`secret/k8s_operator//sasquatch`) | - ----------------------------------------------- -Autogenerated from chart metadata using [helm-docs v1.6.0](https://github.com/norwoodj/helm-docs/releases/v1.6.0) +| strimzi-registry-operator | object | `{"clusterName":"sasquatch","clusterNamespace":"sasquatch","operatorNamespace":"sasquatch"}` | strimzi-registry-operator configuration. | +| telegraf-kafka-consumer | object | `{}` | Override telegraf-kafka-consumer configuration. | diff --git a/services/sasquatch/README.md.gotmpl b/services/sasquatch/README.md.gotmpl deleted file mode 100644 index 4531459bbb..0000000000 --- a/services/sasquatch/README.md.gotmpl +++ /dev/null @@ -1,9 +0,0 @@ -{{ template "chart.header" . }} - -{{ template "chart.description" . }} - -{{ template "chart.requirementsSection" . }} - -{{ template "chart.valuesSection" . }} - -{{ template "helm-docs.versionFooter" . }} diff --git a/services/sasquatch/charts/kafdrop/Chart.yaml b/services/sasquatch/charts/kafdrop/Chart.yaml new file mode 100644 index 0000000000..09bb251780 --- /dev/null +++ b/services/sasquatch/charts/kafdrop/Chart.yaml @@ -0,0 +1,7 @@ +apiVersion: v2 +name: kafdrop +version: 1.0.0 +description: A subchart to deploy the Kafdrop UI for Sasquatch. +sources: + - https://github.com/obsidiandynamics/kafdrop +appVersion: 3.30.0 diff --git a/services/sasquatch/charts/kafdrop/README.md b/services/sasquatch/charts/kafdrop/README.md new file mode 100644 index 0000000000..8a1751d804 --- /dev/null +++ b/services/sasquatch/charts/kafdrop/README.md @@ -0,0 +1,39 @@ +# kafdrop + +A subchart to deploy the Kafdrop UI for Sasquatch. + +## Source Code + +* + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| affinity | object | `{}` | Affinity configuration. | +| cmdArgs | string | `"--message.format=AVRO --topic.deleteEnabled=false --topic.createEnabled=false"` | Command line arguments to Kafdrop. | +| existingSecret | string | `""` | Existing k8s secrect use to set kafdrop environment variables. Set SCHEMAREGISTRY_AUTH for basic auth credentials in the form username:password | +| host | string | Defaults to localhost. | The hostname to report for the RMI registry (used for JMX). | +| image.pullPolicy | string | `"IfNotPresent"` | Image pull policy. | +| image.repository | string | `"obsidiandynamics/kafdrop"` | Kafdrop Docker image repository. | +| image.tag | string | `"3.30.0"` | Kafdrop image version. | +| ingress.annotations | object | `{}` | Ingress annotations. | +| ingress.enabled | bool | `false` | Enable Ingress. This should be true to create an ingress rule for the application. | +| ingress.hostname | string | `""` | Ingress hostname. | +| ingress.path | string | `"/kafdrop"` | Ingress path. | +| jmx.port | int | Defaults to 8686 | Port to use for JMX. If unspecified, JMX will not be exposed. | +| jvm.opts | string | `""` | JVM options. | +| kafka.broker | string | `"sasquatch-kafka-bootstrap.sasquatch:9092"` | Bootstrap list of Kafka host/port pairs | +| nodeSelector | object | `{}` | Node selector configuration. | +| podAnnotations | object | `{}` | Pod annotations. | +| replicaCount | int | `1` | Number of kafdrop pods to run in the deployment. | +| resources.limits.cpu | int | `2` | | +| resources.limits.memory | string | `"4Gi"` | | +| resources.requests.cpu | int | `1` | | +| resources.requests.memory | string | `"200Mi"` | | +| schemaregistry | string | `"http://sasquatch-schema-registry.sasquatch:8081"` | The endpoint of Schema Registry | +| server.port | int | Defaults to 9000. | The web server port to listen on. | +| server.servlet | object | Defaults to /. | The context path to serve requests on (must end with a /). | +| service.annotations | object | `{}` | Service annotations | +| service.port | int | `9000` | Service port | +| tolerations | list | `[]` | Tolerations configuration. | diff --git a/services/sasquatch/charts/kafdrop/templates/NOTES.txt b/services/sasquatch/charts/kafdrop/templates/NOTES.txt new file mode 100644 index 0000000000..b6244d54c8 --- /dev/null +++ b/services/sasquatch/charts/kafdrop/templates/NOTES.txt @@ -0,0 +1,9 @@ +1. Get the application URL by running these commands: +{{- if .Values.ingress.enabled }} +{{- range .Values.ingress.hosts }} + http{{ if $.Values.ingress.tls }}s{{ end }}://{{ . }}{{ $.Values.ingress.path }} +{{- end }} + export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "chart.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") + echo "Visit http://127.0.0.1:8080 to use your application" + kubectl port-forward $POD_NAME 8080:80 +{{- end }} diff --git a/services/sasquatch/charts/kafdrop/templates/_helpers.tpl b/services/sasquatch/charts/kafdrop/templates/_helpers.tpl new file mode 100644 index 0000000000..ffeac36252 --- /dev/null +++ b/services/sasquatch/charts/kafdrop/templates/_helpers.tpl @@ -0,0 +1,52 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "chart.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "chart.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "chart.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Common labels +*/}} +{{- define "kafdrop.labels" -}} +helm.sh/chart: {{ include "chart.name" . }} +{{ include "kafdrop.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "kafdrop.selectorLabels" -}} +app.kubernetes.io/name: {{ include "chart.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} diff --git a/services/sasquatch/charts/kafdrop/templates/deployment.yaml b/services/sasquatch/charts/kafdrop/templates/deployment.yaml new file mode 100644 index 0000000000..cbd51dedc4 --- /dev/null +++ b/services/sasquatch/charts/kafdrop/templates/deployment.yaml @@ -0,0 +1,95 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "chart.fullname" . }} + labels: + {{- include "kafdrop.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "kafdrop.selectorLabels" . | nindent 6 }} + template: + metadata: + labels: + {{- include "kafdrop.selectorLabels" . | nindent 8 }} + annotations: + {{- with .Values.podAnnotations }} + {{ toYaml . | indent 8 }} + {{- end }} + spec: + automountServiceAccountToken: false + securityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + containers: + - name: {{ .Chart.Name }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + {{- if .Values.existingSecret -}} + envFrom: + - secretRef: + name: {{ .Values.existingSecret }} + {{- end }} + env: + - name: KAFKA_BROKERCONNECT + value: {{ .Values.kafka.broker | quote }} + - name: JVM_OPTS + value: {{ .Values.jvm.opts | quote }} + - name: HOST + value: {{ .Values.host | quote }} + - name: JMX_PORT + value: {{ .Values.jmx.port | quote }} + - name: SERVER_SERVLET_CONTEXTPATH + value: {{ .Values.server.servlet.contextPath | trimSuffix "/" | quote }} + - name: SERVER_PORT + value: {{ .Values.server.port | quote }} + - name: CMD_ARGS + value: {{ .Values.cmdArgs | quote }} + - name: SCHEMAREGISTRY_CONNECT + value: {{ .Values.schemaregistry | quote }} + - name: KAFKA_PROPERTIES_FILE + value: "/tmp/kafka.properties" + - name: KAFKA_PROPERTIES + valueFrom: + secretKeyRef: + name: sasquatch + key: kafdrop-kafka-properties + ports: + - name: http + containerPort: {{ .Values.server.port }} + protocol: TCP + livenessProbe: + httpGet: + path: "{{ .Values.server.servlet.contextPath | trimSuffix "/" }}/actuator/health" + port: http + initialDelaySeconds: 180 + periodSeconds: 30 + timeoutSeconds: 10 + readinessProbe: + httpGet: + path: "{{ .Values.server.servlet.contextPath | trimSuffix "/" }}/actuator/health" + port: http + initialDelaySeconds: 20 + periodSeconds: 5 + timeoutSeconds: 10 + resources: + {{- toYaml .Values.resources | nindent 12 }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{ toYaml . | indent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{ toYaml . | indent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{ toYaml . | indent 8 }} + {{- end }} diff --git a/services/sasquatch/charts/kafdrop/templates/ingress.yaml b/services/sasquatch/charts/kafdrop/templates/ingress.yaml new file mode 100644 index 0000000000..7305fbf063 --- /dev/null +++ b/services/sasquatch/charts/kafdrop/templates/ingress.yaml @@ -0,0 +1,27 @@ +{{- if .Values.ingress.enabled -}} +{{- $fullName := include "chart.fullname" . -}} +{{- $ingressPath := .Values.ingress.path -}} +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ $fullName }} + labels: + {{- include "kafdrop.labels" . | nindent 4 }} + annotations: + {{- with .Values.ingress.annotations }} + {{ toYaml . | indent 4 }} + {{- end }} +spec: + ingressClassName: "nginx" + rules: + - host: {{ .Values.ingress.hostname | quote }} + http: + paths: + - path: {{ $ingressPath }} + pathType: Prefix + backend: + service: + name: {{ $fullName }} + port: + number: {{ .Values.service.port }} +{{- end }} diff --git a/services/sasquatch/charts/kafdrop/templates/service.yaml b/services/sasquatch/charts/kafdrop/templates/service.yaml new file mode 100644 index 0000000000..720eb65e0d --- /dev/null +++ b/services/sasquatch/charts/kafdrop/templates/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "chart.fullname" . }} + labels: + {{- include "kafdrop.labels" . | nindent 4 }} +spec: + type: ClusterIP + ports: + - port: {{ .Values.service.port }} + targetPort: http + protocol: TCP + name: http + selector: + {{- include "kafdrop.selectorLabels" . | nindent 4 }} diff --git a/services/sasquatch/charts/kafdrop/values.yaml b/services/sasquatch/charts/kafdrop/values.yaml new file mode 100644 index 0000000000..e944c3762d --- /dev/null +++ b/services/sasquatch/charts/kafdrop/values.yaml @@ -0,0 +1,84 @@ +# Default values for Kafdrop + +# -- Number of kafdrop pods to run in the deployment. +replicaCount: 1 + +image: + # -- Kafdrop Docker image repository. + repository: obsidiandynamics/kafdrop + # -- Image pull policy. + pullPolicy: IfNotPresent + # -- Kafdrop image version. + tag: 3.30.0 + +kafka: + # -- Bootstrap list of Kafka host/port pairs + broker: "sasquatch-kafka-bootstrap.sasquatch:9092" + +jvm: + # -- JVM options. + opts: "" + +# -- The hostname to report for the RMI registry (used for JMX). +# @default -- Defaults to localhost. +host: localhost + +jmx: + # -- Port to use for JMX. If unspecified, JMX will not be exposed. + # @default -- Defaults to 8686 + port: 8686 + +server: + # -- The context path to serve requests on (must end with a /). + # @default -- Defaults to /. + servlet: + contextPath: /kafdrop + # -- The web server port to listen on. + # @default -- Defaults to 9000. + port: 9000 + +# -- The endpoint of Schema Registry +schemaregistry: "http://sasquatch-schema-registry.sasquatch:8081" + +# -- Existing k8s secrect use to set kafdrop environment variables. +# Set SCHEMAREGISTRY_AUTH for basic auth credentials in the form username:password +existingSecret: "" + +# -- Command line arguments to Kafdrop. +cmdArgs: "--message.format=AVRO --topic.deleteEnabled=false --topic.createEnabled=false" + +service: + # -- Service annotations + annotations: {} + # -- Service port + port: 9000 + +ingress: + # -- Enable Ingress. This should be true to create an ingress rule for the application. + enabled: false + # -- Ingress annotations. + annotations: {} + # -- Ingress hostname. + hostname: "" + # -- Ingress path. + path: /kafdrop + +resources: + requests: + memory: 200Mi + cpu: 1 + limits: + memory: 4Gi + cpu: 2 + +# -- Node selector configuration. +nodeSelector: {} + +# -- Tolerations configuration. +tolerations: [] + +# -- Affinity configuration. +affinity: {} + +# -- Pod annotations. +podAnnotations: {} diff --git a/services/sasquatch/charts/kafka-connect-manager/Chart.yaml b/services/sasquatch/charts/kafka-connect-manager/Chart.yaml index 93cbc20ede..a81ef857bc 100644 --- a/services/sasquatch/charts/kafka-connect-manager/Chart.yaml +++ b/services/sasquatch/charts/kafka-connect-manager/Chart.yaml @@ -1,4 +1,5 @@ apiVersion: v2 name: kafka-connect-manager version: 1.0.0 -description: A sub chart to deploy the Kafka connectors used by Sasquatch. +description: A subchart to deploy the Kafka connectors used by Sasquatch. +appVersion: 0.9.3 diff --git a/services/sasquatch/charts/kafka-connect-manager/README.md b/services/sasquatch/charts/kafka-connect-manager/README.md index 41b0cfe86f..f03f7b4344 100644 --- a/services/sasquatch/charts/kafka-connect-manager/README.md +++ b/services/sasquatch/charts/kafka-connect-manager/README.md @@ -1,6 +1,6 @@ # kafka-connect-manager -A sub chart to deploy the Kafka connectors used by Sasquatch. +A subchart to deploy the Kafka connectors used by Sasquatch. ## Values @@ -8,23 +8,24 @@ A sub chart to deploy the Kafka connectors used by Sasquatch. |-----|------|---------|-------------| | env.kafkaBrokerUrl | string | `"sasquatch-kafka-bootstrap.sasquatch:9092"` | Kafka broker URL. | | env.kafkaConnectUrl | string | `"http://sasquatch-connect-api.sasquatch:8083"` | Kafka connnect URL. | -| image.pullPolicy | string | `"Always"` | | +| env.kafkaUsername | string | `"kafka-connect-manager"` | Username for SASL authentication. | +| image.pullPolicy | string | `"IfNotPresent"` | | | image.repository | string | `"lsstsqre/kafkaconnect"` | | -| image.tag | string | `"0.9.3"` | | -| influxdbSink.influxdb-sink.autoUpdate | bool | `true` | If autoUpdate is enabled, check for new kafka topics. | -| influxdbSink.influxdb-sink.checkInterval | string | `"15000"` | The interval, in milliseconds, to check for new topics and update the connector. | -| influxdbSink.influxdb-sink.connectInfluxDb | string | `"efd"` | InfluxDB database to write to. | -| influxdbSink.influxdb-sink.connectInfluxErrorPolicy | string | `"THROW"` | Error policy. | -| influxdbSink.influxdb-sink.connectInfluxMaxRetries | string | `"10"` | The maximum number of times a message is retried. | -| influxdbSink.influxdb-sink.connectInfluxRetryInterval | string | `"60000"` | The interval, in milliseconds, between retries. Only valid when the connectInfluxErrorPolicy is set to `RETRY`. | -| influxdbSink.influxdb-sink.connectInfluxUrl | string | `"http://sasquatch.influxdb:8086"` | InfluxDB URL, can be internal to the cluster. | -| influxdbSink.influxdb-sink.connectProgressEnabled | bool | `false` | Enables the output for how many records have been processed. | -| influxdbSink.influxdb-sink.enabled | bool | `false` | Whether this connector instance is deployed. | -| influxdbSink.influxdb-sink.excludedTopicRegex | string | `""` | Regex to exclude topics from the list of selected topics from Kafka. | -| influxdbSink.influxdb-sink.name | string | `"influxdb-sink"` | Name of the connector instance to create. | -| influxdbSink.influxdb-sink.tasksMax | int | `1` | Number of KafkaConnect tasks. | -| influxdbSink.influxdb-sink.timestamp | string | `"private_efdStamp"` | Timestamp field to be used as the InfluxDB time, if not specified `sys_time()` the current timestamp. | -| influxdbSink.influxdb-sink.topicRegex | string | `"lsst.sal.*"` | Regex to select topics from Kafka. | +| image.tag | string | `"1.0.2"` | | +| influxdbSink.autoUpdate | bool | `true` | If autoUpdate is enabled, check for new kafka topics. | +| influxdbSink.checkInterval | string | `"15000"` | The interval, in milliseconds, to check for new topics and update the connector. | +| influxdbSink.connectInfluxDb | string | `"efd"` | InfluxDB database to write to. | +| influxdbSink.connectInfluxErrorPolicy | string | `"NOOP"` | Error policy, see connector documetation for details. | +| influxdbSink.connectInfluxMaxRetries | string | `"10"` | The maximum number of times a message is retried. | +| influxdbSink.connectInfluxRetryInterval | string | `"60000"` | The interval, in milliseconds, between retries. Only valid when the connectInfluxErrorPolicy is set to `RETRY`. | +| influxdbSink.connectInfluxUrl | string | `"http://sasquatch-influxdb.sasquatch:8086"` | InfluxDB URL. | +| influxdbSink.connectProgressEnabled | bool | `false` | Enables the output for how many records have been processed. | +| influxdbSink.connectors | object | `{"test":{"enabled":false,"topicsRegex":".*Test"}}` | Connector instances to deploy. | +| influxdbSink.connectors.test.enabled | bool | `false` | Whether this connector instance is deployed. | +| influxdbSink.connectors.test.topicsRegex | string | `".*Test"` | Regex to select topics from Kafka. | +| influxdbSink.excludedTopicsRegex | string | `""` | Regex to exclude topics from the list of selected topics from Kafka. | +| influxdbSink.tasksMax | int | `1` | Maxium number of tasks to run the connector. | +| influxdbSink.timestamp | string | `"private_efdStamp"` | Timestamp field to be used as the InfluxDB time, if not specified use `sys_time()`. | | jdbcSink.autoCreate | string | `"true"` | Whether to automatically create the destination table. | | jdbcSink.autoEvolve | string | `"false"` | Whether to automatically add columns in the table schema. | | jdbcSink.batchSize | string | `"3000"` | Specifies how many records to attempt to batch together for insertion into the destination table. | @@ -38,16 +39,6 @@ A sub chart to deploy the Kafka connectors used by Sasquatch. | jdbcSink.tableNameFormat | string | `"${topic}"` | A format string for the destination table name. | | jdbcSink.tasksMax | string | `"10"` | Number of Kafka Connect tasks. | | jdbcSink.topicRegex | string | `".*"` | Regex for selecting topics. | -| mirrorMaker2.enabled | bool | `false` | Whether the MirrorMaker 2 connectors (heartbeat, checkpoint and mirror-source) are deployed. | -| mirrorMaker2.name | string | `"replicator"` | Name od the connector to create. | -| mirrorMaker2.replicationPolicySeparator | string | `"."` | Separator used to format the remote topic name. Use an empty string if sourceClusterAlias is empty. | -| mirrorMaker2.sourceClusterAlias | string | `"src"` | Alias for the source cluster. The remote topic name is prefixed by this value. Use an empty string to preserve the name of the source topic in the destination cluster. | -| mirrorMaker2.sourceClusterBootstrapServers | string | `"localhost:31090"` | Source Kafka cluster. | -| mirrorMaker2.syncTopicAclsEnabled | bool | `false` | Whether to monitor source cluster ACLs for changes. | -| mirrorMaker2.targetClusterAlias | string | `"destn"` | Name of the destination cluster. | -| mirrorMaker2.targetClusterBootstrapServers | string | `"localhost:31090"` | Destination Kafka cluster. | -| mirrorMaker2.tasksMax | int | `1` | Number of Kafka Connect tasks. | -| mirrorMaker2.topicRegex | string | `".*"` | Regex for selecting topics. Comma-separated lists are also supported. | | s3Sink.behaviorOnNullValues | string | `"fail"` | How to handle records with a null value (for example, Kafka tombstone records). Valid options are ignore and fail. | | s3Sink.checkInterval | string | `"15000"` | The interval, in milliseconds, to check for new topics and update the connector. | | s3Sink.enabled | bool | `false` | Whether the Amazon S3 Sink connector is deployed. | @@ -72,6 +63,3 @@ A sub chart to deploy the Kafka connectors used by Sasquatch. | s3Sink.timezone | string | `"UTC"` | The timezone to use when partitioning with TimeBasedPartitioner. | | s3Sink.topicsDir | string | `"topics"` | Top level directory to store the data ingested from Kafka. | | s3Sink.topicsRegex | string | `".*"` | Regex to select topics from Kafka. | - ----------------------------------------------- -Autogenerated from chart metadata using [helm-docs v1.6.0](https://github.com/norwoodj/helm-docs/releases/v1.6.0) diff --git a/services/sasquatch/charts/kafka-connect-manager/templates/influxdb_sink.yaml b/services/sasquatch/charts/kafka-connect-manager/templates/influxdb_sink.yaml index 63f8634104..a8617fc8c7 100644 --- a/services/sasquatch/charts/kafka-connect-manager/templates/influxdb_sink.yaml +++ b/services/sasquatch/charts/kafka-connect-manager/templates/influxdb_sink.yaml @@ -1,21 +1,19 @@ -{{- range .Values.influxdbSink }} -{{- with . }} -{{- if .enabled }} +{{- range $key, $value := .Values.influxdbSink.connectors }} +{{- if $value.enabled }} +--- apiVersion: apps/v1 kind: Deployment metadata: - name: sasquatch-{{ .name }} + name: sasquatch-influxdb-sink-{{ $key }} spec: replicas: 1 selector: matchLabels: app: kafka-connect-manager - app.kubernetes.io/instance: {{ $.Release.Name }} template: metadata: labels: app: kafka-connect-manager - app.kubernetes.io/instance: {{ $.Release.Name }} spec: securityContext: runAsNonRoot: true @@ -35,16 +33,16 @@ spec: - kafkaconnect - create - influxdb-sink - {{- if .autoUpdate }} + {{- if $.Values.influxdbSink.autoUpdate }} - --auto-update {{- end }} env: - name: KAFKA_CONNECT_NAME - value: {{ .name | quote }} + value: influxdb-sink-{{ $key }} - name: KAFKA_CONNECT_INFLUXDB_URL - value: {{ .connectInfluxUrl | quote }} + value: {{ $.Values.influxdbSink.connectInfluxUrl | quote }} - name: KAFKA_CONNECT_DATABASE - value: {{ .connectInfluxDb | quote }} + value: {{ $.Values.influxdbSink.connectInfluxDb | quote }} - name: KAFKA_CONNECT_INFLUXDB_USERNAME valueFrom: secretKeyRef: @@ -56,27 +54,33 @@ spec: name: sasquatch key: influxdb-password - name: KAFKA_CONNECT_TASKS_MAX - value: {{ .tasksMax | quote }} + value: {{ $.Values.influxdbSink.tasksMax | quote }} - name: KAFKA_CONNECT_TOPIC_REGEX - value: {{ .topicRegex | quote }} + value: {{ $value.topicsRegex | quote }} - name: KAFKA_CONNECT_CHECK_INTERVAL - value: {{ .checkInterval | quote }} + value: {{ $.Values.influxdbSink.checkInterval | quote }} - name: KAFKA_CONNECT_EXCLUDED_TOPIC_REGEX - value: {{ .excludedTopicRegex | quote }} + value: {{ $.Values.influxdbSink.excludedTopicsRegex | quote }} - name: KAFKA_CONNECT_INFLUXDB_TIMESTAMP - value: {{ .timestamp | quote }} + value: {{ $.Values.influxdbSink.timestamp | quote }} - name: KAFKA_CONNECT_ERROR_POLICY - value: {{ .connectInfluxErrorPolicy | quote }} + value: {{ $.Values.influxdbSink.connectInfluxErrorPolicy | quote }} - name: KAFKA_CONNECT_MAX_RETRIES - value: {{ .connectInfluxMaxRetries | quote }} + value: {{ $.Values.influxdbSink.connectInfluxMaxRetries | quote }} - name: KAFKA_CONNECT_RETRY_INTERVAL - value: {{ .connectInfluxRetryInterval | quote }} + value: {{ $.Values.influxdbSink.connectInfluxRetryInterval | quote }} - name: KAFKA_CONNECT_PROGRESS_ENABLED - value: {{ .connectProgressEnabled | quote }} + value: {{ $.Values.influxdbSink.connectProgressEnabled | quote }} - name: KAFKA_BROKER_URL value: {{ $.Values.env.kafkaBrokerUrl | quote }} - name: KAFKA_CONNECT_URL value: {{ $.Values.env.kafkaConnectUrl | quote }} -{{- end }} + - name: KAFKA_USERNAME + value: {{ $.Values.env.kafkaUsername | quote }} + - name: KAFKA_PASSWORD + valueFrom: + secretKeyRef: + name: sasquatch + key: kafka-connect-manager-password {{- end }} {{- end }} diff --git a/services/sasquatch/charts/kafka-connect-manager/templates/mirrormaker2.yaml b/services/sasquatch/charts/kafka-connect-manager/templates/mirrormaker2.yaml deleted file mode 100644 index 285855379f..0000000000 --- a/services/sasquatch/charts/kafka-connect-manager/templates/mirrormaker2.yaml +++ /dev/null @@ -1,100 +0,0 @@ -{{ if .Values.mirrorMaker2.enabled }} -apiVersion: v1 -kind: ConfigMap -metadata: - name: sasquatch-{{ .Values.mirrorMaker2.name }} -data: - heartbeat-config.json: |+ - { - "connector.class": "org.apache.kafka.connect.mirror.MirrorHeartbeatConnector", - "name": "{{ .Values.mirrorMaker2.name }}-heartbeat", - "source.cluster.alias": {{ .Values.mirrorMaker2.sourceClusterAlias | quote }}, - "replication.policy.separator": {{ .Values.mirrorMaker2.replicationPolicySeparator | quote }}, - "source.cluster.bootstrap.servers": {{ .Values.mirrorMaker2.sourceClusterBootstrapServers | quote }}, - "target.cluster.alias": {{ .Values.mirrorMaker2.targetClusterAlias | quote }}, - "topics": {{ .Values.mirrorMaker2.topicRegex | quote }} - } - checkpoint-config.json: |+ - { - "connector.class": "org.apache.kafka.connect.mirror.MirrorCheckpointConnector", - "name": "{{ .Values.mirrorMaker2.name }}-checkpoint", - "source.cluster.alias": {{ .Values.mirrorMaker2.sourceClusterAlias | quote }}, - "replication.policy.separator": {{ .Values.mirrorMaker2.replicationPolicySeparator | quote }}, - "source.cluster.bootstrap.servers": {{ .Values.mirrorMaker2.sourceClusterBootstrapServers | quote }}, - "target.cluster.alias": {{ .Values.mirrorMaker2.targetClusterAlias | quote }}, - "target.cluster.bootstrap.servers": {{ .Values.mirrorMaker2.targetClusterBootstrapServers | quote }}, - "topics": {{ .Values.mirrorMaker2.topicRegex | quote }} - } - mirror-source-config.json: |+ - { - "connector.class": "org.apache.kafka.connect.mirror.MirrorSourceConnector", - "name": "{{ .Values.mirrorMaker2.name }}-mirror-source", - "source.cluster.alias": {{ .Values.mirrorMaker2.sourceClusterAlias | quote }}, - "replication.policy.separator": {{ .Values.mirrorMaker2.replicationPolicySeparator | quote }}, - "source.cluster.bootstrap.servers": {{ .Values.mirrorMaker2.sourceClusterBootstrapServers | quote }}, - "target.cluster.alias": {{ .Values.mirrorMaker2.targetClusterAlias | quote }}, - "target.cluster.bootstrap.servers": {{ .Values.mirrorMaker2.targetClusterBootstrapServers | quote }}, - "tasks.max": {{ .Values.mirrorMaker2.tasksMax | quote }}, - "topics": {{ .Values.mirrorMaker2.topicRegex | quote }}, - "sync.topic.acls.enabled": {{ .Values.mirrorMaker2.syncTopicAclsEnabled | quote }}, - "key.converter": "org.apache.kafka.connect.converters.ByteArrayConverter", - "value.converter": "org.apache.kafka.connect.converters.ByteArrayConverter" - } ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: sasquatch-{{ .Values.mirrorMaker2.name }} - labels: - app: kafka-connect-manager - app.kubernetes.io/instance: {{ .Release.Name }} -spec: - replicas: 1 - selector: - matchLabels: - app: kafka-connect-manager - app.kubernetes.io/instance: {{ .Release.Name }} - template: - metadata: - labels: - app: kafka-connect-manager - app.kubernetes.io/instance: {{ .Release.Name }} - spec: - securityContext: - runAsNonRoot: true - runAsUser: 1000 - runAsGroup: 1000 - containers: - - name: {{ include "kafka-connect-manager.name" . }} - securityContext: - allowPrivilegeEscalation: false - capabilities: - drop: - - "all" - readOnlyRootFilesystem: true - image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" - imagePullPolicy: {{ .Values.image.pullPolicy }} - command: - - kafkaconnect - - create - - mirrormaker2 - - --heartbeat - - /etc/mirrormaker2/heartbeat-config.json - - --checkpoint - - /etc/mirrormaker2/checkpoint-config.json - - --mirror-source - - /etc/mirrormaker2/mirror-source-config.json - - --show-status - env: - - name: KAFKA_BROKER_URL - value: {{ .Values.env.kafkaBrokerUrl | quote }} - - name: KAFKA_CONNECT_URL - value: {{ .Values.env.kafkaConnectUrl | quote }} - volumeMounts: - - name: mirrormaker2 - mountPath: /etc/mirrormaker2 - volumes: - - name: mirrormaker2 - configMap: - name: {{ .Values.mirrorMaker2.name }} -{{ end }} \ No newline at end of file diff --git a/services/sasquatch/charts/kafka-connect-manager/values.yaml b/services/sasquatch/charts/kafka-connect-manager/values.yaml index 5719a51034..d3f74b3f47 100644 --- a/services/sasquatch/charts/kafka-connect-manager/values.yaml +++ b/services/sasquatch/charts/kafka-connect-manager/values.yaml @@ -2,40 +2,39 @@ # See also https://kafka-connect-manager.lsst.io image: repository: lsstsqre/kafkaconnect - tag: 0.9.3 - pullPolicy: Always + tag: 1.0.2 + pullPolicy: IfNotPresent influxdbSink: - # Repeat this block to create multiple instances of this connector. - influxdb-sink: - # -- Name of the connector instance to create. - name: influxdb-sink - # -- Whether this connector instance is deployed. - enabled: false - # -- InfluxDB URL, can be internal to the cluster. - connectInfluxUrl: "http://sasquatch.influxdb:8086" - # -- InfluxDB database to write to. - connectInfluxDb: "efd" - # -- Number of KafkaConnect tasks. - tasksMax: 1 - # -- Regex to select topics from Kafka. - topicRegex: "lsst.sal.*" - # -- If autoUpdate is enabled, check for new kafka topics. - autoUpdate: true - # -- The interval, in milliseconds, to check for new topics and update the connector. - checkInterval: "15000" - # -- Regex to exclude topics from the list of selected topics from Kafka. - excludedTopicRegex: "" - # -- Timestamp field to be used as the InfluxDB time, if not specified `sys_time()` the current timestamp. - timestamp: private_efdStamp - # -- Error policy. - connectInfluxErrorPolicy: THROW - # -- The maximum number of times a message is retried. - connectInfluxMaxRetries: "10" - # -- The interval, in milliseconds, between retries. Only valid when the connectInfluxErrorPolicy is set to `RETRY`. - connectInfluxRetryInterval: "60000" - # -- Enables the output for how many records have been processed. - connectProgressEnabled: false + # -- InfluxDB URL. + connectInfluxUrl: "http://sasquatch-influxdb.sasquatch:8086" + # -- InfluxDB database to write to. + connectInfluxDb: "efd" + # -- Maxium number of tasks to run the connector. + tasksMax: 1 + # -- If autoUpdate is enabled, check for new kafka topics. + autoUpdate: true + # -- The interval, in milliseconds, to check for new topics and update the connector. + checkInterval: "15000" + # -- Timestamp field to be used as the InfluxDB time, if not specified use `sys_time()`. + timestamp: private_efdStamp + # -- Error policy, see connector documetation for details. + connectInfluxErrorPolicy: NOOP + # -- The maximum number of times a message is retried. + connectInfluxMaxRetries: "10" + # -- The interval, in milliseconds, between retries. Only valid when the connectInfluxErrorPolicy is set to `RETRY`. + connectInfluxRetryInterval: "60000" + # -- Enables the output for how many records have been processed. + connectProgressEnabled: false + # -- Regex to exclude topics from the list of selected topics from Kafka. + excludedTopicsRegex: "" + # -- Connector instances to deploy. + connectors: + test: + # -- Whether this connector instance is deployed. + enabled: false + # -- Regex to select topics from Kafka. + topicsRegex: ".*Test" # The s3Sink connector assumes Parquet format with Snappy compression # and a time based partitioner. @@ -92,31 +91,6 @@ s3Sink: # -- The object storage connection URL, for non-AWS s3 providers. storeUrl: "" -mirrorMaker2: - # -- Whether the MirrorMaker 2 connectors (heartbeat, checkpoint and mirror-source) are deployed. - enabled: false - # -- Name od the connector to create. - name: "replicator" - # -- Source Kafka cluster. - sourceClusterBootstrapServers: "localhost:31090" - # -- Alias for the source cluster. The remote topic name is prefixed by this value. - # Use an empty string to preserve the name of the source topic in the destination cluster. - sourceClusterAlias: "src" - # -- Separator used to format the remote topic name. - # Use an empty string if sourceClusterAlias is empty. - replicationPolicySeparator: "." - # -- Destination Kafka cluster. - targetClusterBootstrapServers: "localhost:31090" - # -- Name of the destination cluster. - targetClusterAlias: "destn" - # -- Regex for selecting topics. - # Comma-separated lists are also supported. - topicRegex: ".*" - # -- Number of Kafka Connect tasks. - tasksMax: 1 - # -- Whether to monitor source cluster ACLs for changes. - syncTopicAclsEnabled: false - jdbcSink: # -- Whether the JDBC Sink connector is deployed. enabled: false @@ -150,3 +124,5 @@ env: kafkaBrokerUrl: "sasquatch-kafka-bootstrap.sasquatch:9092" # -- Kafka connnect URL. kafkaConnectUrl: "http://sasquatch-connect-api.sasquatch:8083" + # -- Username for SASL authentication. + kafkaUsername: "kafka-connect-manager" diff --git a/services/sasquatch/charts/rest-proxy/Chart.yaml b/services/sasquatch/charts/rest-proxy/Chart.yaml new file mode 100644 index 0000000000..54dcab4883 --- /dev/null +++ b/services/sasquatch/charts/rest-proxy/Chart.yaml @@ -0,0 +1,7 @@ +apiVersion: v2 +name: rest-proxy +version: 1.0.0 +description: A subchart to deploy Confluent REST proxy for Sasquatch. +sources: + - https://github.com/confluentinc/kafka-rest +appVersion: 6.2.8 diff --git a/services/sasquatch/charts/rest-proxy/README.md b/services/sasquatch/charts/rest-proxy/README.md new file mode 100644 index 0000000000..757f30220b --- /dev/null +++ b/services/sasquatch/charts/rest-proxy/README.md @@ -0,0 +1,36 @@ +# rest-proxy + +A subchart to deploy Confluent REST proxy for Sasquatch. + +## Source Code + +* + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| affinity | object | `{}` | Affinity configuration. | +| configurationOverrides | object | `{"client.sasl.mechanism":"SCRAM-SHA-512","client.security.protocol":"SASL_PLAINTEXT"}` | Kafka REST configuration options | +| customEnv | string | `nil` | Kafka REST additional env variables | +| heapOptions | string | `"-Xms512M -Xmx512M"` | Kafka REST proxy JVM Heap Option | +| image.pullPolicy | string | `"IfNotPresent"` | Image pull policy. | +| image.repository | string | `"confluentinc/cp-kafka-rest"` | Kafka REST proxy image repository. | +| image.tag | string | `"6.2.8"` | Kafka REST proxy image tag. | +| ingress.annotations | object | `{"nginx.ingress.kubernetes.io/rewrite-target":"/$2"}` | Ingress annotations. | +| ingress.enabled | bool | `false` | Enable Ingress. This should be true to create an ingress rule for the application. | +| ingress.hostname | string | `""` | Ingress hostname. | +| ingress.path | string | `"/sasquatch-rest-proxy(/|$)(.*)"` | Ingress path. | +| kafka.bootstrapServers | string | `"SASL_PLAINTEXT://sasquatch-kafka-bootstrap.sasquatch:9092"` | Kafka bootstrap servers, use the internal listerner on port 9092 wit SASL connection. | +| kafka.cluster.name | string | `"sasquatch"` | Name of the Strimzi Kafka cluster. | +| kafka.topics | string | `nil` | List of Kafka topics to create and expose through the REST proxy API | +| nodeSelector | object | `{}` | Node selector configuration. | +| podAnnotations | object | `{}` | Pod annotations. | +| replicaCount | int | `1` | Number of Kafka REST proxy pods to run in the deployment. | +| resources.limits.cpu | int | `2` | Kafka REST proxy cpu limits | +| resources.limits.memory | string | `"4Gi"` | Kafka REST proxy memory limits | +| resources.requests.cpu | int | `1` | Kafka REST proxy cpu requests | +| resources.requests.memory | string | `"200Mi"` | Kafka REST proxy memory requests | +| schemaregistry.url | string | `"http://sasquatch-schema-registry.sasquatch:8081"` | Schema registry URL | +| service.port | int | `8082` | Kafka REST proxy service port | +| tolerations | list | `[]` | Tolerations configuration. | diff --git a/services/sasquatch/charts/rest-proxy/templates/_helpers.tpl b/services/sasquatch/charts/rest-proxy/templates/_helpers.tpl new file mode 100644 index 0000000000..bc68e03922 --- /dev/null +++ b/services/sasquatch/charts/rest-proxy/templates/_helpers.tpl @@ -0,0 +1,52 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "chart.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "chart.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "chart.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Common labels +*/}} +{{- define "rest-proxy.labels" -}} +helm.sh/chart: {{ include "chart.name" . }} +{{ include "rest-proxy.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "rest-proxy.selectorLabels" -}} +app.kubernetes.io/name: {{ include "chart.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} \ No newline at end of file diff --git a/services/sasquatch/charts/rest-proxy/templates/deployment.yaml b/services/sasquatch/charts/rest-proxy/templates/deployment.yaml new file mode 100644 index 0000000000..51b41c52a8 --- /dev/null +++ b/services/sasquatch/charts/rest-proxy/templates/deployment.yaml @@ -0,0 +1,76 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "chart.fullname" . }} + labels: + {{- include "rest-proxy.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "rest-proxy.selectorLabels" . | nindent 6 }} + template: + metadata: + labels: + {{- include "rest-proxy.selectorLabels" . | nindent 8 }} + annotations: + {{- with .Values.podAnnotations }} + {{ toYaml . | indent 8 }} + {{- end }} + spec: + automountServiceAccountToken: false + securityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + containers: + - name: {{ include "chart.name" . }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: "{{ .Values.image.pullPolicy }}" + ports: + - name: rest-proxy + containerPort: {{ .Values.service.port }} + protocol: TCP + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + env: + - name: KAFKA_REST_HOST_NAME + valueFrom: + fieldRef: + fieldPath: status.podIP + - name: KAFKA_REST_BOOTSTRAP_SERVERS + value: "{{ .Values.kafka.bootstrapServers }}" + - name: KAFKA_REST_SCHEMA_REGISTRY_URL + value: "{{ .Values.schemaregistry.url }}" + - name: KAFKA_REST_HEAP_OPTS + value: "{{ .Values.heapOptions }}" + - name: KAFKA_REST_CLIENT_SASL_JAAS_CONFIG + valueFrom: + secretKeyRef: + name: sasquatch + key: rest-proxy-sasl-jass-config + {{- range $key, $value := .Values.configurationOverrides }} + - name: {{ printf "KAFKA_REST_%s" $key | replace "." "_" | upper | quote }} + value: {{ $value | quote }} + {{- end }} + {{- range $key, $value := .Values.customEnv }} + - name: {{ $key | quote }} + value: {{ $value | quote }} + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + {{- if .Values.nodeSelector }} + nodeSelector: + {{ toYaml .Values.nodeSelector | indent 8 }} + {{- end }} + {{- if .Values.tolerations }} + tolerations: + {{ toYaml .Values.tolerations | indent 8 }} + {{- end }} + {{- if .Values.affinity }} + affinity: + {{ toYaml .Values.affinity | indent 8 }} + {{- end }} \ No newline at end of file diff --git a/services/sasquatch/charts/rest-proxy/templates/ingress.yaml b/services/sasquatch/charts/rest-proxy/templates/ingress.yaml new file mode 100644 index 0000000000..a08ef2ee0e --- /dev/null +++ b/services/sasquatch/charts/rest-proxy/templates/ingress.yaml @@ -0,0 +1,27 @@ +{{- if .Values.ingress.enabled -}} +{{- $fullName := include "chart.fullname" . -}} +{{- $ingressPath := .Values.ingress.path -}} +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ $fullName }} + labels: + {{- include "rest-proxy.labels" . | nindent 4 }} + annotations: + {{- with .Values.ingress.annotations }} + {{ toYaml . | indent 4 }} + {{- end }} +spec: + ingressClassName: "nginx" + rules: + - host: {{ .Values.ingress.hostname | quote }} + http: + paths: + - path: {{ $ingressPath }} + pathType: Prefix + backend: + service: + name: {{ $fullName }} + port: + number: {{ .Values.service.port }} +{{- end }} diff --git a/services/sasquatch/charts/rest-proxy/templates/service.yaml b/services/sasquatch/charts/rest-proxy/templates/service.yaml new file mode 100644 index 0000000000..cc0d190cc7 --- /dev/null +++ b/services/sasquatch/charts/rest-proxy/templates/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "chart.fullname" . }} + labels: + {{- include "rest-proxy.labels" . | nindent 4 }} +spec: + type: ClusterIP + ports: + - port: {{ .Values.service.port }} + targetPort: rest-proxy + protocol: TCP + name: rest-proxy + selector: + {{- include "rest-proxy.selectorLabels" . | nindent 4 }} \ No newline at end of file diff --git a/services/sasquatch/charts/rest-proxy/templates/topics.yaml b/services/sasquatch/charts/rest-proxy/templates/topics.yaml new file mode 100644 index 0000000000..e46d717fb8 --- /dev/null +++ b/services/sasquatch/charts/rest-proxy/templates/topics.yaml @@ -0,0 +1,14 @@ + +{{- $cluster := .Values.kafka.cluster.name }} +{{- range $topic := .Values.kafka.topics }} +--- +apiVersion: kafka.strimzi.io/v1beta1 +kind: KafkaTopic +metadata: + name: {{ $topic }} + labels: + strimzi.io/cluster: {{ $cluster }} +spec: + replicas: 3 + partitions: 1 +{{- end }} diff --git a/services/sasquatch/charts/rest-proxy/templates/user.yaml b/services/sasquatch/charts/rest-proxy/templates/user.yaml new file mode 100644 index 0000000000..a489c2a49c --- /dev/null +++ b/services/sasquatch/charts/rest-proxy/templates/user.yaml @@ -0,0 +1,31 @@ +apiVersion: kafka.strimzi.io/v1beta2 +kind: KafkaUser +metadata: + name: rest-proxy + labels: + strimzi.io/cluster: {{ .Values.kafka.cluster.name }} +spec: + authentication: + type: scram-sha-512 + password: + valueFrom: + secretKeyRef: + name: sasquatch + key: rest-proxy-password + authorization: + type: simple + acls: + - resource: + type: group + name: "*" + patternType: literal + operation: All + {{- range $topic := .Values.kafka.topics }} + - resource: + type: topic + name: {{ $topic }} + patternType: literal + type: allow + host: "*" + operation: All + {{- end }} diff --git a/services/sasquatch/charts/rest-proxy/values.yaml b/services/sasquatch/charts/rest-proxy/values.yaml new file mode 100644 index 0000000000..38a8d8eac4 --- /dev/null +++ b/services/sasquatch/charts/rest-proxy/values.yaml @@ -0,0 +1,75 @@ +# Default values for kafka REST proxy + +# -- Number of Kafka REST proxy pods to run in the deployment. +replicaCount: 1 + +image: + # -- Kafka REST proxy image repository. + repository: confluentinc/cp-kafka-rest + # -- Image pull policy. + pullPolicy: IfNotPresent + # -- Kafka REST proxy image tag. + tag: 6.2.8 + +service: + # -- Kafka REST proxy service port + port: 8082 + +ingress: + # -- Enable Ingress. This should be true to create an ingress rule for the application. + enabled: false + # -- Ingress annotations. + annotations: + nginx.ingress.kubernetes.io/rewrite-target: /$2 + # -- Ingress hostname. + hostname: "" + # -- Ingress path. + path: /sasquatch-rest-proxy(/|$)(.*) + +# -- Kafka REST proxy JVM Heap Option +heapOptions: "-Xms512M -Xmx512M" + +# -- Kafka REST configuration options +configurationOverrides: + "client.security.protocol": SASL_PLAINTEXT + "client.sasl.mechanism": SCRAM-SHA-512 + +# -- Kafka REST additional env variables +customEnv: + +schemaregistry: + # -- Schema registry URL + url: "http://sasquatch-schema-registry.sasquatch:8081" + +kafka: + cluster: + # -- Name of the Strimzi Kafka cluster. + name: sasquatch + # -- Kafka bootstrap servers, use the internal listerner on port 9092 wit SASL connection. + bootstrapServers: "SASL_PLAINTEXT://sasquatch-kafka-bootstrap.sasquatch:9092" + # -- List of Kafka topics to create and expose through the REST proxy API + topics: + +resources: + requests: + # -- Kafka REST proxy memory requests + memory: 200Mi + # -- Kafka REST proxy cpu requests + cpu: 1 + limits: + # -- Kafka REST proxy memory limits + memory: 4Gi + # -- Kafka REST proxy cpu limits + cpu: 2 + +# -- Node selector configuration. +nodeSelector: {} + +# -- Tolerations configuration. +tolerations: [] + +# -- Affinity configuration. +affinity: {} + +# -- Pod annotations. +podAnnotations: {} diff --git a/services/sasquatch/charts/strimzi-kafka/Chart.yaml b/services/sasquatch/charts/strimzi-kafka/Chart.yaml index 8659e1a0b1..d5c1192619 100644 --- a/services/sasquatch/charts/strimzi-kafka/Chart.yaml +++ b/services/sasquatch/charts/strimzi-kafka/Chart.yaml @@ -1,4 +1,5 @@ apiVersion: v2 name: strimzi-kafka version: 1.0.0 -description: A sub chart to deploy Strimzi Kafka components for Sasquatch. +description: A subchart to deploy Strimzi Kafka components for Sasquatch. +appVersion: 3.1.1 diff --git a/services/sasquatch/charts/strimzi-kafka/README.md b/services/sasquatch/charts/strimzi-kafka/README.md index 84c2c604dd..9e1c0b03a3 100644 --- a/services/sasquatch/charts/strimzi-kafka/README.md +++ b/services/sasquatch/charts/strimzi-kafka/README.md @@ -1,26 +1,33 @@ # strimzi-kafka -A sub chart to deploy Strimzi Kafka components for Sasquatch. +A subchart to deploy Strimzi Kafka components for Sasquatch. ## Values | Key | Type | Default | Description | |-----|------|---------|-------------| | cluster.name | string | `"sasquatch"` | Name used for the Kafka cluster, and used by Strimzi for many annotations. | -| connect.replicas | int | `1` | Number of Kafka Connect replicas to run. | -| kafka.config | object | `{"log.retention.bytes":"644245094400","log.retention.hours":168,"offsets.retention.minutes":10080}` | Configuration overrides for the Kafka server. | -| kafka.config."log.retention.bytes" | string | `"644245094400"` | Maximum retained number of bytes for a topic's data. | -| kafka.config."log.retention.hours" | int | `168` | Number of days for a topic's data to be retained. | -| kafka.config."offsets.retention.minutes" | int | `10080` | Number of minutes for a consumer group's offsets to be retained. | +| connect.image | string | `"lsstsqre/strimzi-0.32.0-kafka-3.3.1:1.0.2"` | Custom strimzi-kafka image with connector plugins used by sasquatch. | +| connect.replicas | int | `3` | Number of Kafka Connect replicas to run. | +| kafka.config | object | `{"log.retention.bytes":"429496729600","log.retention.hours":72,"offsets.retention.minutes":4320}` | Configuration overrides for the Kafka server. | +| kafka.config."log.retention.bytes" | string | `"429496729600"` | Maximum retained number of bytes for a topic's data. | +| kafka.config."log.retention.hours" | int | `72` | Number of days for a topic's data to be retained. | +| kafka.config."offsets.retention.minutes" | int | `4320` | Number of minutes for a consumer group's offsets to be retained. | +| kafka.externalListener.bootstrap.annotations | object | `{}` | Annotations that will be added to the Ingress, Route, or Service resource. | +| kafka.externalListener.bootstrap.host | string | `""` | Name used for TLS hostname verification. | +| kafka.externalListener.bootstrap.loadBalancerIP | string | `""` | The loadbalancer is requested with the IP address specified in this field. This feature depends on whether the underlying cloud provider supports specifying the loadBalancerIP when a load balancer is created. This field is ignored if the cloud provider does not support the feature. Once the IP address is provisioned this option make it possible to pin the IP address. We can request the same IP next time it is provisioned. This is important because it lets us configure a DNS record, associating a hostname with that pinned IP address. | +| kafka.externalListener.brokers | list | `[]` | Borkers configuration. host is used in the brokers' advertised.brokers configuration and for TLS hostname verification. The format is a list of maps. | +| kafka.externalListener.tls.certIssuerName | string | `"letsencrypt-dns"` | Name of a ClusterIssuer capable of provisioning a TLS certificate for the broker. | +| kafka.externalListener.tls.enabled | bool | `false` | Whether TLS encryption is enabled. | | kafka.replicas | int | `3` | Number of Kafka broker replicas to run. | -| kafka.storage.size | string | `"100Gi"` | Size of the backing storage disk for each of the Kafka brokers. | +| kafka.storage.size | string | `"500Gi"` | Size of the backing storage disk for each of the Kafka brokers. | | kafka.storage.storageClassName | string | `""` | Name of a StorageClass to use when requesting persistent volumes. | -| kafka.version | string | `"3.0.0"` | Version of Kafka to deploy. | +| kafka.version | string | `"3.3.1"` | Version of Kafka to deploy. | +| mirrormaker2.enabled | bool | `false` | Enable replication in the target (passive) cluster. | +| mirrormaker2.source.bootstrapServer | string | `""` | Source (active) cluster to replicate from. | +| mirrormaker2.source.topicsPattern | string | `"registry-schemas, lsst.sal.*"` | Topic replication from the source cluster defined as a comma-separated list or regular expression pattern. | | registry.schemaTopic | string | `"registry-schemas"` | Name of the topic used by the Schema Registry | | superusers | list | `["kafka-admin"]` | A list of usernames for users who should have global admin permissions. These users will be created, along with their credentials. | | zookeeper.replicas | int | `3` | Number of Zookeeper replicas to run. | | zookeeper.storage.size | string | `"100Gi"` | Size of the backing storage disk for each of the Zookeeper instances. | | zookeeper.storage.storageClassName | string | `""` | Name of a StorageClass to use when requesting persistent volumes. | - ----------------------------------------------- -Autogenerated from chart metadata using [helm-docs v1.6.0](https://github.com/norwoodj/helm-docs/releases/v1.6.0) diff --git a/services/sasquatch/charts/strimzi-kafka/README.md.gotmpl b/services/sasquatch/charts/strimzi-kafka/README.md.gotmpl deleted file mode 100644 index 0d310b45a2..0000000000 --- a/services/sasquatch/charts/strimzi-kafka/README.md.gotmpl +++ /dev/null @@ -1,7 +0,0 @@ -{{ template "chart.header" . }} - -{{ template "chart.description" . }} - -{{ template "chart.valuesSection" . }} - -{{ template "helm-docs.versionFooter" . }} diff --git a/services/sasquatch/charts/strimzi-kafka/templates/certificates.yaml b/services/sasquatch/charts/strimzi-kafka/templates/certificates.yaml new file mode 100644 index 0000000000..71f39d59a5 --- /dev/null +++ b/services/sasquatch/charts/strimzi-kafka/templates/certificates.yaml @@ -0,0 +1,19 @@ +{{- if and (.Values.kafka.externalListener.tls.enabled) (.Values.kafka.externalListener.bootstrap.host) }} +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + name: {{ .Values.cluster.name }}-external-tls + +spec: + secretName: {{ .Values.cluster.name }}-external-tls + + issuerRef: + name: {{ .Values.kafka.externalListener.tls.certIssuerName }} + kind: ClusterIssuer + + dnsNames: + - {{ .Values.kafka.externalListener.bootstrap.host }} + {{- range $broker := .Values.kafka.externalListener.brokers }} + - {{ $broker.host }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/services/sasquatch/charts/strimzi-kafka/templates/connect-user.yaml b/services/sasquatch/charts/strimzi-kafka/templates/connect-user.yaml deleted file mode 100644 index 5e250a175a..0000000000 --- a/services/sasquatch/charts/strimzi-kafka/templates/connect-user.yaml +++ /dev/null @@ -1,23 +0,0 @@ -apiVersion: kafka.strimzi.io/v1beta2 -kind: KafkaUser -metadata: - name: {{ .Values.cluster.name }}-connect - labels: - strimzi.io/cluster: {{ .Values.cluster.name }} -spec: - authentication: - type: tls - authorization: - type: simple - acls: - - resource: - type: group - name: {{ .Values.cluster.name }}-connect - operation: Read - - resource: - type: topic - name: "*" - patternType: literal - type: allow - host: "*" - operation: All diff --git a/services/sasquatch/charts/strimzi-kafka/templates/connect.yaml b/services/sasquatch/charts/strimzi-kafka/templates/connect.yaml index 40dd1c18df..e1c1fae049 100644 --- a/services/sasquatch/charts/strimzi-kafka/templates/connect.yaml +++ b/services/sasquatch/charts/strimzi-kafka/templates/connect.yaml @@ -6,7 +6,7 @@ metadata: # Use Connect REST API to configure connectors strimzi.io/use-connector-resources: "false" spec: - version: {{ .Values.kafka.version | quote }} + image: {{ .Values.connect.image | quote }} replicas: {{ .Values.connect.replicas }} bootstrapServers: {{ .Values.cluster.name }}-kafka-bootstrap:9093 tls: @@ -28,3 +28,53 @@ spec: config.storage.replication.factor: -1 offset.storage.replication.factor: -1 status.storage.replication.factor: -1 + key.converter: io.confluent.connect.avro.AvroConverter + key.converter.schemas.enable: true + key.converter.schema.registry.url: http://sasquatch-schema-registry.sasquatch:8081 + value.converter: io.confluent.connect.avro.AvroConverter + value.converter.schemas.enable: true + value.converter.schema.registry.url: http://sasquatch-schema-registry.sasquatch:8081 + resources: + requests: + cpu: "2" + memory: 4Gi + limits: + cpu: "8" + memory: 24Gi + jvmOptions: + "-Xmx": "8g" + "-Xms": "8g" +--- +apiVersion: kafka.strimzi.io/v1beta2 +kind: KafkaUser +metadata: + name: {{ .Values.cluster.name }}-connect + labels: + strimzi.io/cluster: {{ .Values.cluster.name }} +spec: + authentication: + type: tls + authorization: + type: simple + acls: + - resource: + type: group + name: {{ .Values.cluster.name }}-connect + operation: Read + - resource: + type: group + name: "*" + patternType: literal + operation: All + - resource: + type: topic + name: "*" + patternType: literal + type: allow + host: "*" + operation: All + quotas: + producerByteRate: 1073741824 + consumerByteRate: 1073741824 + requestPercentage: 90 + controllerMutationRate: 1000 diff --git a/services/sasquatch/charts/strimzi-kafka/templates/kafka.yaml b/services/sasquatch/charts/strimzi-kafka/templates/kafka.yaml index 34b220d49c..7f85bbb44d 100644 --- a/services/sasquatch/charts/strimzi-kafka/templates/kafka.yaml +++ b/services/sasquatch/charts/strimzi-kafka/templates/kafka.yaml @@ -5,19 +5,67 @@ metadata: name: {{ .Values.cluster.name }} spec: kafka: + template: + persistentVolumeClaim: + metadata: + annotations: + argocd.argoproj.io/compare-options: IgnoreExtraneous + argocd.argoproj.io/sync-options: Prune=false version: {{ .Values.kafka.version | quote }} replicas: {{ .Values.kafka.replicas }} listeners: + # internal istener without tls encryption and with scram-sha-512 authentication + # used by clients inside the Kubernetes cluster - name: plain port: 9092 type: internal tls: false - - name: tls # Used by the schema registry; it has a fixed name it expects + authentication: + type: scram-sha-512 + # internal listener with tls encryption and mutual tls authentication + # used by the schema registry and kafka connect clients + - name: tls port: 9093 type: internal tls: true authentication: type: tls + # external listener of type loadbalancer with tls encryption and scram-sha-512 + # authentication used by clients outside the Kubernetes cluster + - name: external + port: 9094 + type: loadbalancer + tls: {{ .Values.kafka.externalListener.tls.enabled }} + authentication: + type: scram-sha-512 + configuration: + bootstrap: + {{- if .Values.kafka.externalListener.bootstrap.loadBalancerIP }} + loadBalancerIP: {{ .Values.kafka.externalListener.bootstrap.loadBalancerIP }} + {{- end }} + {{- if .Values.kafka.externalListener.bootstrap.annotations }} + annotations: {{ .Values.kafka.externalListener.bootstrap.annotations }} + {{- end }} + {{- if .Values.kafka.externalListener.brokers }} + brokers: + {{- range $idx, $broker := .Values.kafka.externalListener.brokers }} + - broker: {{ $idx }} + loadBalancerIP: {{ $broker.loadBalancerIP }} + advertisedHost: {{ $broker.host }} + advertisedPort: 9094 + annotations: + {{- range $key, $value := $broker.annotations }} + {{ $key }}: {{ $value }} + {{- end}} + {{- end }} + {{- end }} + {{- if and (.Values.kafka.externalListener.tls.enabled) (.Values.kafka.externalListener.bootstrap.host) }} + brokerCertChainAndKey: + secretName: {{ .Values.cluster.name }}-external-tls + certificate: tls.crt + key: tls.key + {{- end }} + authorization: type: simple {{- if .Values.superusers }} @@ -26,8 +74,9 @@ spec: - {{ . }} {{- end }} {{- end }} - config: + # Accept larger messages + message.max.bytes: 262144 offsets.topic.replication.factor: {{ .Values.kafka.replicas }} transaction.state.log.replication.factor: {{ .Values.kafka.replicas }} transaction.state.log.min.isr: {{ .Values.kafka.replicas }} @@ -51,6 +100,12 @@ spec: {{- end}} deleteClaim: false zookeeper: + template: + persistentVolumeClaim: + metadata: + annotations: + argocd.argoproj.io/compare-options: IgnoreExtraneous + argocd.argoproj.io/sync-options: Prune=false replicas: {{ .Values.zookeeper.replicas }} storage: # Note that storage is configured per replica. If there are 3 replicas, diff --git a/services/sasquatch/charts/strimzi-kafka/templates/mirrormaker2.yaml b/services/sasquatch/charts/strimzi-kafka/templates/mirrormaker2.yaml new file mode 100644 index 0000000000..61ca7fa661 --- /dev/null +++ b/services/sasquatch/charts/strimzi-kafka/templates/mirrormaker2.yaml @@ -0,0 +1,102 @@ +# Mostly based on the Strimzi Kafka MirrorMaker2 example +# configuration for handling high volumes of messages. +{{ if .Values.mirrormaker2.enabled }} +apiVersion: kafka.strimzi.io/v1beta2 +kind: KafkaMirrorMaker2 +metadata: + name: replicator +spec: + version: {{ .Values.kafka.version | quote }} + replicas: 1 + # In the unidirectional (active/passive) replication scenario + # it is recommended to deploy MirrorMaker2 on the target (passive) cluster. + connectCluster: "target" + clusters: + - alias: "source" + bootstrapServers: {{ .Values.mirrormaker2.source.bootstrapServer }} + tls: {} + # The external kafka listeneres in Sasquatch use scram-sha-512 authentication + # Use the replicator Kafka user to authenticate against the Kafka source cluster. + # Not the same secret with the replicator password must exist in both the source + # and the target clusters. + authentication: + type: scram-sha-512 + username: replicator + passwordSecret: + secretName: sasquatch + password: replicator-password + - alias: "target" + # For the Kafka target cluster, use the internal listener with tls encryption and mutual tls authentication. + bootstrapServers: {{ .Values.cluster.name }}-kafka-bootstrap:9093 + tls: + trustedCertificates: + - secretName: {{ .Values.cluster.name }}-cluster-ca-cert + certificate: ca.crt + authentication: + type: tls + certificateAndKey: + secretName: {{ .Values.cluster.name }}-connect + certificate: user.crt + key: user.key + config: + # This should be enough time for the sent messages to be acknowledged + # by the brokers and offset data committed. + offset.flush.timeout.ms: 10000 + mirrors: + - sourceCluster: "source" + targetCluster: "target" + sourceConnector: + tasksMax: 10 + config: + replication.factor: 3 + offset-syncs.topic.replication.factor: 3 + # Dot not replicate topic ACLs configuration. + sync.topic.acls.enabled: "false" + # The frequency to check for new topics. + refresh.topics.interval.seconds: 60 + # Policy to define the remote topic naming convention. + # This setting will preserve topic names in the target cluster. + replication.policy.separator: "" + replication.policy.class: "org.apache.kafka.connect.mirror.IdentityReplicationPolicy" + # Handling high volumes of messages + # By increasing the batch size, produce requests are delayed and more messages are + # added to the batch and sent to brokers at the same time. + # This can improve throughput when you have just a few topic partitions that + # handle large numbers of messages. + producer.override.batch.size: 327680 + # Use linger.ms to add a wait time in milliseconds to delay produce requests when + # producer load decreases. + # The delay means that more records can be added to batches if they are under the + # maximum batch size. + producer.override.linger.ms: 100 + # Accept larger messages. + # See also message.max.bytes broker configuration. + producer.max.request.size: 262144 + heartbeatConnector: + config: + heartbeats.topic.replication.factor: 3 + checkpointConnector: + config: + checkpoints.topic.replication.factor: 3 + # Frequency of checks for new consumer groups. + refresh.groups.interval.seconds: 300 + # Enables synchronization of consumer group offsets to the target cluster. + sync.group.offsets.enabled: true + # The frequency to sync group offsets. + sync.group.offsets.interval.seconds: 60 + # The frequency of checks for offset tracking. + emit.checkpoints.interval.seconds: 60 + # Policy to define the remote topic naming convention. + # This setting will preserve topic names in the target cluster. + replication.policy.class: "org.apache.kafka.connect.mirror.IdentityReplicationPolicy" + # Topic replication from the source cluster defined as a comma-separated list + # or regular expression pattern. + topicsPattern: {{ .Values.mirrormaker2.source.topicsPattern }} + resources: + requests: + cpu: {{ .Values.mirrormaker2.resources.requests.cpu | quote }} + memory: {{ .Values.mirrormaker2.resources.requests.memory | quote }} + limits: + cpu: {{ .Values.mirrormaker2.resources.limits.cpu | quote }} + memory: {{ .Values.mirrormaker2.resources.limits.memory | quote }} +{{ end }} diff --git a/services/sasquatch/charts/strimzi-kafka/templates/schema-registry-topic.yaml b/services/sasquatch/charts/strimzi-kafka/templates/schema-registry-topic.yaml deleted file mode 100644 index ca01c3653c..0000000000 --- a/services/sasquatch/charts/strimzi-kafka/templates/schema-registry-topic.yaml +++ /dev/null @@ -1,11 +0,0 @@ -apiVersion: kafka.strimzi.io/v1beta2 -kind: KafkaTopic -metadata: - name: {{ .Values.registry.schemaTopic }} - labels: - strimzi.io/cluster: {{ .Values.cluster.name }} -spec: - partitions: 1 - replicas: 3 - config: - cleanup.policy: compact diff --git a/services/sasquatch/charts/strimzi-kafka/templates/schema-registry.yaml b/services/sasquatch/charts/strimzi-kafka/templates/schema-registry.yaml index 87a0eb387b..c2e8ee0b3f 100644 --- a/services/sasquatch/charts/strimzi-kafka/templates/schema-registry.yaml +++ b/services/sasquatch/charts/strimzi-kafka/templates/schema-registry.yaml @@ -4,3 +4,66 @@ metadata: name: {{ .Values.cluster.name }}-schema-registry spec: listener: tls + compatibilityLevel: none +--- +apiVersion: kafka.strimzi.io/v1beta2 +kind: KafkaTopic +metadata: + name: {{ .Values.registry.schemaTopic }} + labels: + strimzi.io/cluster: {{ .Values.cluster.name }} +spec: + partitions: 1 + replicas: 3 + config: + cleanup.policy: compact +--- +apiVersion: kafka.strimzi.io/v1beta2 +kind: KafkaUser +metadata: + name: {{ .Values.cluster.name }}-schema-registry + labels: + strimzi.io/cluster: {{ .Values.cluster.name }} +spec: + authentication: + type: tls + authorization: + # Official docs on authorizations required for the Schema Registry: + # https://docs.confluent.io/current/schema-registry/security/index.html#authorizing-access-to-the-schemas-topic + type: simple + acls: + # Allow Read, Write and DescribeConfigs operations on the + # schemas topic + - resource: + type: topic + name: {{ .Values.registry.schemaTopic }} + patternType: literal + operation: Read + type: allow + - resource: + type: topic + name: {{ .Values.registry.schemaTopic }} + patternType: literal + operation: Write + type: allow + - resource: + type: topic + name: {{ .Values.registry.schemaTopic }} + patternType: literal + operation: DescribeConfigs + type: allow + # Allow all operations on the schema-registry* group + - resource: + type: group + name: schema-registry + patternType: prefix + operation: All + type: allow + # Allow Describe on the __consumer_offsets topic + # (The official docs also mention DescribeConfigs?) + - resource: + type: topic + name: "__consumer_offsets" + patternType: literal + operation: Describe + type: allow diff --git a/services/sasquatch/charts/strimzi-kafka/templates/superuser.yaml b/services/sasquatch/charts/strimzi-kafka/templates/superusers.yaml similarity index 79% rename from services/sasquatch/charts/strimzi-kafka/templates/superuser.yaml rename to services/sasquatch/charts/strimzi-kafka/templates/superusers.yaml index 29c3a7cb10..4da1e5b269 100644 --- a/services/sasquatch/charts/strimzi-kafka/templates/superuser.yaml +++ b/services/sasquatch/charts/strimzi-kafka/templates/superusers.yaml @@ -12,6 +12,11 @@ spec: authorization: type: simple acls: + - resource: + type: group + name: "*" + patternType: literal + operation: All - resource: type: topic name: "*" diff --git a/services/sasquatch/charts/strimzi-kafka/templates/tests/test-sasl-authentication.yaml b/services/sasquatch/charts/strimzi-kafka/templates/tests/test-sasl-authentication.yaml new file mode 100644 index 0000000000..21be5414ea --- /dev/null +++ b/services/sasquatch/charts/strimzi-kafka/templates/tests/test-sasl-authentication.yaml @@ -0,0 +1,80 @@ +apiVersion: kafka.strimzi.io/v1beta2 +kind: KafkaUser +metadata: + name: sasquatch-test + labels: + strimzi.io/cluster: sasquatch +spec: + authentication: + type: scram-sha-512 + password: + valueFrom: + secretKeyRef: + name: sasquatch + key: sasquatch-test-password + authorization: + type: simple + acls: + - resource: + type: topic + name: sasquatch-test + patternType: literal + type: allow + host: "*" + operation: All +--- +apiVersion: kafka.strimzi.io/v1beta1 +kind: KafkaTopic +metadata: + name: sasquatch-test + labels: + strimzi.io/cluster: sasquatch +spec: + replicas: 3 + partitions: 12 +--- +apiVersion: batch/v1 +kind: Job +metadata: + labels: + app: sasquatch + name: sasquatch-test-producer + annotations: + "helm.sh/hook": test +spec: + backoffLimit: 0 + completions: 1 + parallelism: 1 + template: + metadata: + labels: + app: sasquatch + job-name: sasquatch-test-producer + name: sasquatch-test-producer + namespace: sasquatch + spec: + containers: + - env: + - name: BOOTSTRAP_SERVERS + value: sasquatch-kafka-bootstrap.sasquatch:9092 + - name: DELAY_MS + value: "1000" + - name: TOPIC + value: sasquatch-test + - name: MESSAGE_COUNT + value: "100" + - name: MESSAGE + value: Hello-world + - name: PRODUCER_ACKS + value: all + - name: LOG_LEVEL + value: DEBUG + - name: ADDITIONAL_CONFIG + valueFrom: + secretKeyRef: + name: sasquatch + key: sasquatch-test-kafka-properties + image: quay.io/strimzi-test-clients/test-client-kafka-producer:latest-kafka-3.0.0 + imagePullPolicy: IfNotPresent + name: kafka-producer-client + restartPolicy: "Never" diff --git a/services/sasquatch/charts/strimzi-kafka/templates/users.yaml b/services/sasquatch/charts/strimzi-kafka/templates/users.yaml new file mode 100644 index 0000000000..3ceb1e8eb1 --- /dev/null +++ b/services/sasquatch/charts/strimzi-kafka/templates/users.yaml @@ -0,0 +1,185 @@ +apiVersion: kafka.strimzi.io/v1beta2 +kind: KafkaUser +metadata: + name: ts-salkafka + labels: + strimzi.io/cluster: {{ .Values.cluster.name }} +spec: + authentication: + type: scram-sha-512 + password: + valueFrom: + secretKeyRef: + name: sasquatch + key: ts-salkafka-password + authorization: + type: simple + acls: + - resource: + type: topic + name: "lsst.sal" + patternType: prefix + type: allow + host: "*" + operation: All + quotas: + producerByteRate: 1073741824 + consumerByteRate: 1073741824 + requestPercentage: 90 + controllerMutationRate: 1000 +--- +apiVersion: kafka.strimzi.io/v1beta2 +kind: KafkaUser +metadata: + name: replicator + labels: + strimzi.io/cluster: {{ .Values.cluster.name }} +spec: + authentication: + type: scram-sha-512 + password: + valueFrom: + secretKeyRef: + name: sasquatch + key: replicator-password + authorization: + type: simple + acls: + - resource: + type: group + name: "*" + patternType: literal + operation: All + - resource: + type: topic + name: "*" + patternType: literal + type: allow + host: "*" + operation: All + quotas: + producerByteRate: 1073741824 + consumerByteRate: 1073741824 + requestPercentage: 90 + controllerMutationRate: 1000 +--- +apiVersion: kafka.strimzi.io/v1beta2 +kind: KafkaUser +metadata: + name: kafka-connect-manager + labels: + strimzi.io/cluster: {{ .Values.cluster.name }} +spec: + authentication: + type: scram-sha-512 + password: + valueFrom: + secretKeyRef: + name: sasquatch + key: kafka-connect-manager-password + authorization: + type: simple + acls: + - resource: + type: topic + name: "*" + patternType: literal + type: allow + host: "*" + operation: Read + quotas: + producerByteRate: 1073741824 + consumerByteRate: 1073741824 + requestPercentage: 90 + controllerMutationRate: 1000 +--- +apiVersion: kafka.strimzi.io/v1beta2 +kind: KafkaUser +metadata: + name: kafdrop + labels: + strimzi.io/cluster: {{ .Values.cluster.name }} +spec: + authentication: + type: scram-sha-512 + password: + valueFrom: + secretKeyRef: + name: sasquatch + key: kafdrop-password + authorization: + type: simple + acls: + - resource: + type: group + name: "*" + patternType: literal + operation: All + - resource: + type: topic + name: "*" + patternType: literal + type: allow + host: "*" + operation: All +--- +apiVersion: kafka.strimzi.io/v1beta2 +kind: KafkaUser +metadata: + name: telegraf + labels: + strimzi.io/cluster: {{ .Values.cluster.name }} +spec: + authentication: + type: scram-sha-512 + password: + valueFrom: + secretKeyRef: + name: sasquatch + key: telegraf-password + authorization: + type: simple + acls: + - resource: + type: group + name: "*" + patternType: literal + operation: All + - resource: + type: topic + name: "*" + patternType: literal + type: allow + host: "*" + operation: Read +--- +apiVersion: kafka.strimzi.io/v1beta2 +kind: KafkaUser +metadata: + name: prompt-processing + labels: + strimzi.io/cluster: {{ .Values.cluster.name }} +spec: + authentication: + type: scram-sha-512 + password: + valueFrom: + secretKeyRef: + name: sasquatch + key: prompt-processing-password + authorization: + type: simple + acls: + - resource: + type: group + name: "*" + patternType: literal + operation: All + - resource: + type: topic + name: "test.next-visit" + patternType: literal + type: allow + host: "*" + operation: All + diff --git a/services/sasquatch/charts/strimzi-kafka/values.yaml b/services/sasquatch/charts/strimzi-kafka/values.yaml index b805b6cab3..f51ca51c70 100644 --- a/services/sasquatch/charts/strimzi-kafka/values.yaml +++ b/services/sasquatch/charts/strimzi-kafka/values.yaml @@ -5,22 +5,56 @@ cluster: kafka: # -- Version of Kafka to deploy. - version: "3.0.0" + version: "3.3.1" # -- Number of Kafka broker replicas to run. replicas: 3 storage: # -- Size of the backing storage disk for each of the Kafka brokers. - size: 100Gi + size: 500Gi # -- Name of a StorageClass to use when requesting persistent volumes. storageClassName: "" # -- Configuration overrides for the Kafka server. config: # -- Number of minutes for a consumer group's offsets to be retained. - offsets.retention.minutes: 10080 + offsets.retention.minutes: 4320 # -- Number of days for a topic's data to be retained. - log.retention.hours: 168 + log.retention.hours: 72 # -- Maximum retained number of bytes for a topic's data. - log.retention.bytes: "644245094400" + log.retention.bytes: "429496729600" + + externalListener: + tls: + # -- Whether TLS encryption is enabled. + enabled: false + # -- Name of a ClusterIssuer capable of provisioning a TLS certificate for the broker. + certIssuerName: "letsencrypt-dns" + + bootstrap: + # -- The loadbalancer is requested with the IP address specified in this field. + # This feature depends on whether the underlying cloud provider supports specifying the loadBalancerIP when a load balancer is created. + # This field is ignored if the cloud provider does not support the feature. + # Once the IP address is provisioned this option make it possible to pin the IP address. + # We can request the same IP next time it is provisioned. This is important because + # it lets us configure a DNS record, associating a hostname with that pinned IP address. + loadBalancerIP: "" + # -- Name used for TLS hostname verification. + host: "" + # -- Annotations that will be added to the Ingress, Route, or Service resource. + annotations: {} + + # -- Borkers configuration. host is used in the brokers' advertised.brokers configuration and for TLS hostname verification. + # The format is a list of maps. + brokers: [] + # For example: + # brokers: + # - loadBalancerIP: "192.168.1.1" + # host: broker-0.example + # annotations: + # metallb.universe.tf/address-pool: sdf-dmz + # - loadBalancerIP: "192.168.1.2" + # host: broker-1.example + # annotations: + # metallb.universe.tf/address-pool: sdf-dmz zookeeper: # -- Number of Zookeeper replicas to run. @@ -32,8 +66,10 @@ zookeeper: storageClassName: "" connect: + # -- Custom strimzi-kafka image with connector plugins used by sasquatch. + image: lsstsqre/strimzi-0.32.0-kafka-3.3.1:1.0.2 # -- Number of Kafka Connect replicas to run. - replicas: 1 + replicas: 3 registry: # -- Name of the topic used by the Schema Registry @@ -43,3 +79,12 @@ registry: # These users will be created, along with their credentials. superusers: - kafka-admin + +mirrormaker2: + # -- Enable replication in the target (passive) cluster. + enabled: false + source: + # -- Source (active) cluster to replicate from. + bootstrapServer: "" + # -- Topic replication from the source cluster defined as a comma-separated list or regular expression pattern. + topicsPattern: "registry-schemas, lsst.sal.*" diff --git a/services/sasquatch/charts/telegraf-kafka-consumer/Chart.yaml b/services/sasquatch/charts/telegraf-kafka-consumer/Chart.yaml new file mode 100755 index 0000000000..92210beefa --- /dev/null +++ b/services/sasquatch/charts/telegraf-kafka-consumer/Chart.yaml @@ -0,0 +1,7 @@ +apiVersion: v2 +name: telegraf-kafka-consumer +version: 1.0.0 +description: > + Telegraf is an agent written in Go for collecting, processing, aggregating, and writing metrics. + This chart deploys multiple instances of the telegraf agent to connect Kafka and InfluxDB in Sasquatch. +appVersion: 1.23.3 diff --git a/services/sasquatch/charts/telegraf-kafka-consumer/README.md b/services/sasquatch/charts/telegraf-kafka-consumer/README.md new file mode 100644 index 0000000000..1c9a74089e --- /dev/null +++ b/services/sasquatch/charts/telegraf-kafka-consumer/README.md @@ -0,0 +1,31 @@ +# telegraf-kafka-consumer + +Telegraf is an agent written in Go for collecting, processing, aggregating, and writing metrics. This chart deploys multiple instances of the telegraf agent to connect Kafka and InfluxDB in Sasquatch. + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| affinity | object | `{}` | Affinity for pod assignment. | +| args | list | `[]` | Arguments passed to the Telegraf agent containers. | +| envFromSecret | string | `""` | Name of the secret with values to be added to the environment. | +| env[0].name | string | `"TELEGRAF_PASSWORD"` | | +| env[0].valueFrom.secretKeyRef.key | string | `"telegraf-password"` | Telegraf KafkaUser password. | +| env[0].valueFrom.secretKeyRef.name | string | `"sasquatch"` | | +| env[1].name | string | `"INFLUXDB_TOKEN"` | | +| env[1].valueFrom.secretKeyRef.key | string | `"admin-token"` | InfluxDB admin token. | +| env[1].valueFrom.secretKeyRef.name | string | `"sasquatch"` | | +| image.pullPolicy | string | IfNotPresent | Image pull policy. | +| image.repo | string | `"lsstsqre/telegraf"` | Telegraf image repository. | +| image.tag | string | `"kafka-regexp"` | Telegraf image tag. | +| imagePullSecrets | list | `[]` | Secret names to use for Docker pulls. | +| influxdb2.bucket | string | `"telegraf-kafka-consumer"` | Name of the InfluxDB v2 bucket to write to. | +| kafkaConsumers.test.enabled | bool | `false` | Enable the Telegraf Kafka consumer. | +| kafkaConsumers.test.flush_interval | string | `"1s"` | Default data flushing interval to InfluxDB. | +| kafkaConsumers.test.interval | string | `"1s"` | Data collection interval for the Kafka consumer. | +| kafkaConsumers.test.topicRegexps | string | `"[ \".*Test\" ]\n"` | List of regular expressions to specify the Kafka topics consumed by this agent. | +| nodeSelector | object | `{}` | Node labels for pod assignment. | +| podAnnotations | object | `{}` | Annotations for telegraf-kafka-consumers pods. | +| podLabels | object | `{}` | Labels for telegraf-kafka-consumer pods. | +| resources | object | `{}` | Kubernetes resources requests and limits. | +| tolerations | list | `[]` | Tolerations for pod assignment. | diff --git a/services/sasquatch/charts/telegraf-kafka-consumer/templates/configmap.yaml b/services/sasquatch/charts/telegraf-kafka-consumer/templates/configmap.yaml new file mode 100644 index 0000000000..40072b3e68 --- /dev/null +++ b/services/sasquatch/charts/telegraf-kafka-consumer/templates/configmap.yaml @@ -0,0 +1,55 @@ +{{- range $key, $value := .Values.kafkaConsumers }} +{{- if $value.enabled }} +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: sasquatch-telegraf-kafka-consumer-{{ $key }} + labels: + app: sasquatch-telegraf-kafka-consumer +data: + telegraf.conf: |+ + [agent] + collection_jitter = "0s" + debug = true + flush_interval = {{ default "1s" $value.flush_interval | quote }} + flush_jitter = "0s" + interval = {{ default "1s" $value.interval | quote }} + logfile = "" + metric_batch_size = 1000 + metric_buffer_limit = 10000 + omit_hostname = true + precision = "" + quiet = false + round_interval = true + + [[outputs.influxdb_v2]] + bucket = {{ $.Values.influxdb2.bucket | quote }} + token = "$INFLUXDB_TOKEN" + organization = "default" + urls = [ + "http://sasquatch-influxdb2.sasquatch:80" + ] + + [[inputs.kafka_consumer]] + avro_schema_registry = "http://sasquatch-schema-registry.sasquatch:8081" + avro_timestamp = "private_efdStamp" + avro_timestamp_format = "unix_us" + brokers = [ + "sasquatch-kafka-brokers.sasquatch:9092" + ] + consumer_group = "telegraf-kafka-consumer-{{ $key }}" + data_format = "avro" + max_message_len = 1000000 + sasl_mechanism = "SCRAM-SHA-512" + sasl_password = "$TELEGRAF_PASSWORD" + sasl_username = "telegraf" + topic_refresh_interval = "60s" + topic_regexps = {{ $value.topicRegexps }} + offset = "newest" + consumer_fetch_default = "20MB" + + [[inputs.internal]] + collect_memstats = false +{{- end }} +{{- end }} diff --git a/services/sasquatch/charts/telegraf-kafka-consumer/templates/deployment.yaml b/services/sasquatch/charts/telegraf-kafka-consumer/templates/deployment.yaml new file mode 100644 index 0000000000..040877961e --- /dev/null +++ b/services/sasquatch/charts/telegraf-kafka-consumer/templates/deployment.yaml @@ -0,0 +1,79 @@ +{{- range $key, $value := .Values.kafkaConsumers }} +{{- if $value.enabled }} +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: sasquatch-telegraf-kafka-consumer-{{ $key }} + labels: + app: sasquatch-telegraf-kafka-consumer +spec: + replicas: {{ default 1 $value.replicaCount }} + selector: + matchLabels: + app: sasquatch-telegraf-kafka-consumer + template: + metadata: + labels: + app: sasquatch-telegraf-kafka-consumer + {{- if $.Values.podAnnotations }} + annotations: + {{- toYaml $.Values.podAnnotations | nindent 8 }} + {{- end }} + spec: + securityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + containers: + - name: telegraf + securityContext: + capabilities: + drop: + - all + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + image: "{{ $.Values.image.repo }}:{{ $.Values.image.tag }}" + imagePullPolicy: {{ default "IfNotPresent" $.Values.image.pullPolicy | quote }} + {{- if $.Values.resources }} + resources: + {{- toYaml $.Values.resources | nindent 10 }} + {{- end }} + {{- if $.Values.args }} + args: + {{- toYaml $.Values.args | nindent 8 }} + {{- end }} + {{- if $.Values.env }} + env: + {{- toYaml $.Values.env | nindent 8 }} + {{- end }} + {{- if $.Values.envFromSecret }} + envFrom: + - secretRef: + name: {{ $.Values.envFromSecret }} + {{- end }} + volumeMounts: + - name: config + mountPath: /etc/telegraf + {{- if $.Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml $.Values.imagePullSecrets | nindent 8 }} + {{- end }} + {{- if $.Values.nodeSelector }} + nodeSelector: + {{- toYaml $.Values.nodeSelector | nindent 8 }} + {{- end }} + {{- if $.Values.affinity }} + affinity: + {{- toYaml $.Values.affinity | nindent 8 }} + {{- end }} + {{- if $.Values.tolerations }} + tolerations: + {{- toYaml $.Values.tolerations | nindent 8 }} + {{- end }} + volumes: + - name: config + configMap: + name: sasquatch-telegraf-kafka-consumer-{{ $key }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/services/sasquatch/charts/telegraf-kafka-consumer/values.yaml b/services/sasquatch/charts/telegraf-kafka-consumer/values.yaml new file mode 100644 index 0000000000..ba88a40e07 --- /dev/null +++ b/services/sasquatch/charts/telegraf-kafka-consumer/values.yaml @@ -0,0 +1,68 @@ +## Default values.yaml for Telegraf Kafka Consumer +image: + # -- Telegraf image repository. + repo: "lsstsqre/telegraf" + # -- Telegraf image tag. + tag: "kafka-regexp" + # -- Image pull policy. + # @default -- IfNotPresent + pullPolicy: "Always" + +# -- Annotations for telegraf-kafka-consumers pods. +podAnnotations: {} + +# -- Labels for telegraf-kafka-consumer pods. +podLabels: {} + +# -- Secret names to use for Docker pulls. +imagePullSecrets: [] + +# -- Arguments passed to the Telegraf agent containers. +args: [] + +# Telegraf agent enviroment variables +env: + - name: TELEGRAF_PASSWORD + valueFrom: + secretKeyRef: + name: sasquatch + # -- Telegraf KafkaUser password. + key: telegraf-password + - name: INFLUXDB_TOKEN + valueFrom: + secretKeyRef: + name: sasquatch + # -- InfluxDB admin token. + key: admin-token + +# -- Name of the secret with values to be added to the environment. +envFromSecret: "" + +# List of Telegraf Kafka consumers to deploy. +kafkaConsumers: + test: + # -- Enable the Telegraf Kafka consumer. + enabled: false + # -- Data collection interval for the Kafka consumer. + interval: "1s" + # -- Default data flushing interval to InfluxDB. + flush_interval: "1s" + # -- List of regular expressions to specify the Kafka topics consumed by this agent. + topicRegexps: | + [ ".*Test" ] + +influxdb2: + # -- Name of the InfluxDB v2 bucket to write to. + bucket: "telegraf-kafka-consumer" + +# -- Kubernetes resources requests and limits. +resources: {} + +# -- Node labels for pod assignment. +nodeSelector: {} + +# -- Affinity for pod assignment. +affinity: {} + +# -- Tolerations for pod assignment. +tolerations: [] diff --git a/services/sasquatch/templates/bucketmapper.yaml b/services/sasquatch/templates/bucketmapper.yaml new file mode 100644 index 0000000000..de676abaf0 --- /dev/null +++ b/services/sasquatch/templates/bucketmapper.yaml @@ -0,0 +1,41 @@ +{{- if .Values.influxdb2.enabled }} +apiVersion: batch/v1 +kind: CronJob +metadata: + name: sasquatch-bucketmapper + namespace: sasquatch +spec: + schedule: "3-59/15 * * * *" + successfulJobsHistoryLimit: 1 + jobTemplate: + spec: + template: + spec: + restartPolicy: Never + automountServiceAccountToken: false + containers: + - name: bucketmapper + image: "{{ .Values.bucketmapper.image.repository }}:{{ .Values.bucketmapper.image.tag }}" + securityContext: + allowPrivilegeEscalation: false + runAsNonRoot: true + runAsUser: 405 + runAsGroup: 100 + capabilities: + drop: + - all + readOnlyRootFilesystem: true + env: + - name: "INFLUXDB_TOKEN" + valueFrom: + secretKeyRef: + name: "sasquatch" + key: "admin-token" + - name: "INFLUXDB_ORG" + value: "default" + - name: "INFLUXDB_URL" + value: "http://sasquatch-influxdb2.sasquatch:80" + - name: "DEBUG" + value: "true" + command: [ "bucketmapper" ] +{{- end }} diff --git a/services/sasquatch/templates/vault-secret.yaml b/services/sasquatch/templates/vault-secrets.yaml similarity index 68% rename from services/sasquatch/templates/vault-secret.yaml rename to services/sasquatch/templates/vault-secrets.yaml index 8660d0610a..d44b29b2dc 100644 --- a/services/sasquatch/templates/vault-secret.yaml +++ b/services/sasquatch/templates/vault-secrets.yaml @@ -4,7 +4,7 @@ metadata: name: sasquatch namespace: sasquatch spec: - path: {{ .Values.vaultSecretsPath }}/sasquatch + path: "{{ .Values.global.vaultSecretsPath }}/sasquatch" type: Opaque --- apiVersion: ricoberger.de/v1alpha1 @@ -12,5 +12,5 @@ kind: VaultSecret metadata: name: pull-secret spec: - path: {{ .Values.vaultSecretsPath }}/pull-secret + path: "{{ .Values.global.vaultSecretsPath }}/pull-secret" type: kubernetes.io/dockerconfigjson diff --git a/services/sasquatch/values-idfdev.yaml b/services/sasquatch/values-idfdev.yaml index c678530b54..a104dc5298 100644 --- a/services/sasquatch/values-idfdev.yaml +++ b/services/sasquatch/values-idfdev.yaml @@ -1,29 +1,70 @@ -strimzi-kafka: {} +strimzi-kafka: + kafka: + externalListener: + tls: + enabled: true + bootstrap: + loadBalancerIP: "34.173.210.129" + host: sasquatch-dev-kafka-bootstrap.lsst.cloud + brokers: + - loadBalancerIP: "34.173.20.18" + host: sasquatch-dev-kafka-0.lsst.cloud + - loadBalancerIP: "34.69.251.153" + host: sasquatch-dev-kafka-1.lsst.cloud + - loadBalancerIP: "35.184.86.132" + host: sasquatch-dev-kafka-2.lsst.cloud influxdb: ingress: enabled: true hostname: data-dev.lsst.cloud -kafka-connect-manager: - influxdbSink: - influxdb-sink: +influxdb2: + enabled: true + ingress: + enabled: true + hostname: data-dev.lsst.cloud + +telegraf-kafka-consumer: + kafkaConsumers: + test: enabled: true + replicaCount: 1 + topicRegexps: | + [ ".*Test" ] + atmcs: + enabled: true + replicaCount: 1 + topicRegexps: | + [ ".*ATMCS" ] -chronograf: +kafdrop: ingress: enabled: true hostname: data-dev.lsst.cloud +rest-proxy: + enabled: true + ingress: + enabled: true + hostname: data-dev.lsst.cloud + kafka: + topics: + - lsst.dm.sky-flux-visit-statistic-metric + - test.next-visit + +chronograf: + ingress: + enabled: true + hostname: data-dev.lsst.cloud env: GENERIC_NAME: "OIDC" GENERIC_AUTH_URL: https://data-dev.lsst.cloud/auth/openid/login GENERIC_TOKEN_URL: https://data-dev.lsst.cloud/auth/openid/token USE_ID_TOKEN: 1 JWKS_URL: https://data-dev.lsst.cloud/.well-known/jwks.json - GENERIC_API_URL: https://data-dev.lsst.cloud/auth/userinfo + GENERIC_API_URL: https://data-dev.lsst.cloud/auth/openid/userinfo GENERIC_SCOPES: openid + GENERIC_API_KEY: sub PUBLIC_URL: https://data-dev.lsst.cloud/ - STATUS_FEED_URL: "https://lsst-sqre.github.io/sasquatch/feeds/idfdev.json" - -vaultSecretsPath: secret/k8s_operator/data-dev.lsst.cloud + STATUS_FEED_URL: https://raw.githubusercontent.com/lsst-sqre/rsp_broadcast/main/jsonfeeds/idfdev.json diff --git a/services/sasquatch/values-idfint.yaml b/services/sasquatch/values-idfint.yaml new file mode 100644 index 0000000000..d4443b8f96 --- /dev/null +++ b/services/sasquatch/values-idfint.yaml @@ -0,0 +1,62 @@ +strimzi-kafka: + kafka: + externalListener: + tls: + enabled: true + bootstrap: + loadBalancerIP: "35.188.187.82" + host: sasquatch-int-kafka-bootstrap.lsst.cloud + + brokers: + - loadBalancerIP: "34.171.69.125" + host: sasquatch-int-kafka-0.lsst.cloud + - loadBalancerIP: "34.72.50.204" + host: sasquatch-int-kafka-1.lsst.cloud + - loadBalancerIP: "34.173.225.150" + host: sasquatch-int-kafka-2.lsst.cloud + mirrormaker2: + enabled: true + source: + bootstrapServer: sasquatch-dev-kafka-bootstrap.lsst.cloud:9094 + topicsPattern: "registry-schemas, lsst.sal.*" + resources: + requests: + cpu: 2 + memory: 4Gi + limits: + cpu: 4 + memory: 8Gi + +influxdb: + ingress: + enabled: true + hostname: data-int.lsst.cloud + +kafka-connect-manager: + influxdbSink: + connectors: + test: + enabled: true + topicsRegex: ".*Test" + +kafdrop: + ingress: + enabled: true + hostname: data-int.lsst.cloud + +chronograf: + ingress: + enabled: true + hostname: data-int.lsst.cloud + + env: + GENERIC_NAME: "OIDC" + GENERIC_AUTH_URL: https://data-int.lsst.cloud/auth/openid/login + GENERIC_TOKEN_URL: https://data-int.lsst.cloud/auth/openid/token + USE_ID_TOKEN: 1 + JWKS_URL: https://data-int.lsst.cloud/.well-known/jwks.json + GENERIC_API_URL: https://data-int.lsst.cloud/auth/openid/userinfo + GENERIC_SCOPES: openid + GENERIC_API_KEY: sub + PUBLIC_URL: https://data-int.lsst.cloud/ + STATUS_FEED_URL: https://raw.githubusercontent.com/lsst-sqre/rsp_broadcast/main/jsonfeeds/idfint.json diff --git a/services/sasquatch/values-minikube.yaml b/services/sasquatch/values-minikube.yaml deleted file mode 100644 index bc0a81dc4c..0000000000 --- a/services/sasquatch/values-minikube.yaml +++ /dev/null @@ -1,29 +0,0 @@ -strimzi-kafka: {} - -influxdb: - ingress: - enabled: true - hostname: minikube.lsst.codes - -kafka-connect-manager: - influxdbSink: - influxdb-sink: - enabled: true - -chronograf: - ingress: - enabled: true - hostname: minikube.lsst.codes - - env: - GENERIC_NAME: "OIDC" - GENERIC_AUTH_URL: https://minikube.lsst.codes/auth/openid/login - GENERIC_TOKEN_URL: https://minikube.lsst.codes/auth/openid/token - USE_ID_TOKEN: 1 - JWKS_URL: https://minikube.lsst.codes/.well-known/jwks.json - GENERIC_API_URL: https://minikube.lsst.codes/auth/userinfo - GENERIC_SCOPES: openid - PUBLIC_URL: https://minikube.lsst.codes - STATUS_FEED_URL: "https://lsst-sqre.github.io/sasquatch/feeds/minikube.json" - -vaultSecretsPath: secret/k8s_operator/minikube.lsst.codes diff --git a/services/sasquatch/values-summit.yaml b/services/sasquatch/values-summit.yaml new file mode 100644 index 0000000000..ccae5afd05 --- /dev/null +++ b/services/sasquatch/values-summit.yaml @@ -0,0 +1,106 @@ +strimzi-kafka: + kafka: + storage: + storageClassName: rook-ceph-block + externalListener: + tls: + enabled: true + bootstrap: + loadBalancerIP: "139.229.160.152" + host: sasquatch-summit-kafka-bootstrap.lsst.codes + brokers: + - loadBalancerIP: "139.229.160.154" + host: sasquatch-summit-kafka-0.lsst.codes + - loadBalancerIP: "139.229.160.153" + host: sasquatch-summit-kafka-1.lsst.codes + - loadBalancerIP: "139.229.160.155" + host: sasquatch-summit-kafka-2.lsst.codes + zookeeper: + storage: + storageClassName: rook-ceph-block + +influxdb: + persistence: + storageClass: rook-ceph-block + size: 5Ti + ingress: + enabled: true + hostname: summit-lsp.lsst.codes + +kafka-connect-manager: + influxdbSink: + # Based on the kafka producers configuration for the Summit + # https://github.com/lsst-ts/argocd-csc/blob/main/apps/kafka-producers/values-summit.yaml + connectors: + auxtel: + enabled: true + topicsRegex: ".*ATAOS|.*ATDome|.*ATDomeTrajectory|.*ATHexapod|.*ATPneumatics|.*ATPtg|.*ATMCS" + maintel: + enabled: true + topicsRegex: ".*MTAOS|.*MTDome|.*MTDomeTrajectory|.*MTPtg" + mtmount: + enabled: true + topicsRegex: ".*MTMount" + comcam: + enabled: true + topicsRegex: ".*CCArchiver|.*CCCamera|.*CCHeaderService|.*CCOODS" + eas: + enabled: true + topicsRegex: ".*DIMM|.*DSM|.*ESS|.*HVAC|.*WeatherForecast|.*WeatherStation" + latiss: + enabled: true + topicsRegex: ".*ATArchiver|.*ATCamera|.*ATHeaderService|.*ATOODS|.*ATSpectrograph" + m1m3: + enabled: true + topicsRegex: ".*MTM1M3" + m2: + enabled: true + topicsRegex: ".*MTHexapod|.*MTM2|.*MTRotator" + obssys: + enabled: true + topicsRegex: ".*Scheduler|.*Script|.*ScriptQueue|.*Watcher" + ocps: + enabled: true + topicsRegex: ".*OCPS" + test: + enabled: true + topicsRegex: ".*Test" + pmd: + enabled: true + topicsRegex: ".*PMD" + calsys: + enabled: true + topicsRegex: ".*ATMonochromator|.*ATWhiteLight|.*CBP|.*Electrometer|.*FiberSpectrograph|.*LinearStage|.*TunableLaser" + mtaircompressor: + enabled: true + topicsRegex: ".*MTAirCompressor" + genericcamera: + enabled: true + topicsRegex: ".*GCHeaderService|.*GenericCamera" + +kafdrop: + ingress: + enabled: true + hostname: summit-lsp.lsst.codes + +chronograf: + persistence: + storageClass: rook-ceph-block + ingress: + enabled: true + hostname: summit-lsp.lsst.codes + env: + GENERIC_NAME: "OIDC" + GENERIC_AUTH_URL: https://summit-lsp.lsst.codes/auth/openid/login + GENERIC_TOKEN_URL: https://summit-lsp.lsst.codes/auth/openid/token + USE_ID_TOKEN: 1 + JWKS_URL: https://summit-lsp.lsst.codes/.well-known/jwks.json + GENERIC_API_URL: https://summit-lsp.lsst.codes/auth/openid/userinfo + GENERIC_SCOPES: openid + GENERIC_API_KEY: sub + PUBLIC_URL: https://summit-lsp.lsst.codes + STATUS_FEED_URL: https://raw.githubusercontent.com/lsst-sqre/rsp_broadcast/main/jsonfeeds/summit.json + +kapacitor: + persistence: + storageClass: rook-ceph-block diff --git a/services/sasquatch/values-tucson-teststand.yaml b/services/sasquatch/values-tucson-teststand.yaml new file mode 100644 index 0000000000..a3c3bf6717 --- /dev/null +++ b/services/sasquatch/values-tucson-teststand.yaml @@ -0,0 +1,184 @@ +strimzi-kafka: + kafka: + storage: + storageClassName: rook-ceph-block + externalListener: + tls: + enabled: true + bootstrap: + loadBalancerIP: "140.252.146.59" + host: sasquatch-tts-kafka-bootstrap.lsst.codes + brokers: + - loadBalancerIP: "140.252.146.46" + host: sasquatch-tts-kafka-0.lsst.codes + - loadBalancerIP: "140.252.146.58" + host: sasquatch-tts-kafka-1.lsst.codes + - loadBalancerIP: "140.252.146.47" + host: sasquatch-tts-kafka-2.lsst.codes + zookeeper: + storage: + storageClassName: rook-ceph-block + +influxdb: + persistence: + storageClass: rook-ceph-block + ingress: + enabled: true + hostname: tucson-teststand.lsst.codes + +telegraf-kafka-consumer: + kafkaConsumers: + auxtel: + enabled: true + topicRegexps: | + [ ".*ATAOS", ".*ATDome", ".*ATDomeTrajectory", ".*ATHexapod", ".*ATPneumatics", ".*ATPtg", ".*ATMCS" ] + maintel: + enabled: true + topicRegexps: | + [ ".*MTAOS", ".*MTDome", ".*MTDomeTrajectory", ".*MTPtg" ] + mtmount: + enabled: true + topicRegexps: | + [ ".*MTMount" ] + comcam: + enabled: true + topicRegexps: | + [ ".*CCCamera", ".*CCHeaderService", ".*CCOODS" ] + eas: + enabled: true + topicRegexps: | + [ ".*DIMM", ".*DSM", ".*WeatherForecast", ".*WeatherStation" ] + latiss: + enabled: true + topicRegexps: | + [ ".*ATCamera", ".*ATHeaderService", ".*ATOODS", ".*ATSpectrograph" ] + m1m3: + enabled: true + flush_interval: "0.1s" + interval: "0.1s" + topicRegexps: | + [ ".*MTM1M3" ] + m2: + enabled: true + topicRegexps: | + [ ".*MTHexapod", ".*MTM2", ".*MTRotator" ] + obssys: + enabled: true + topicRegexps: | + [ ".*Scheduler", ".*Script", ".*ScriptQueue", ".*Watcher" ] + ocps: + enabled: true + topicRegexps: | + [ ".*OCPS" ] + pmd: + enabled: true + topicRegexps: | + [ ".*PMD" ] + calsys: + enabled: true + topicRegexps: | + [ ".*ATMonochromator", ".*ATWhiteLight", ".*CBP", ".*Electrometer", ".*FiberSpectrograph", ".*LinearStage", ".*TunableLaser" ] + mtaircompressor: + enabled: true + topicRegexps: | + [ ".*MTAirCompressor" ] + authorize: + enabled: true + topicRegexps: | + [ ".*Authorize" ] + lasertracker: + enabled: true + topicRegexps: | + [ ".*LaserTracker" ] + test: + enabled: true + topicRegexps: | + [ "lsst.sal.Test" ] + genericcamera: + enabled: true + topicRegexps: | + [ ".*GCHeaderService", ".*GenericCamera" ] + +kafka-connect-manager: + influxdbSink: + # Based on the kafka producers configuration for the TTS + # https://github.com/lsst-ts/argocd-csc/blob/main/apps/kafka-producers/values-tucson-teststand.yaml + connectors: + auxtel: + enabled: true + topicsRegex: ".*ATAOS|.*ATDome|.*ATDomeTrajectory|.*ATHexapod|.*ATPneumatics|.*ATPtg|.*ATMCS" + maintel: + enabled: true + topicsRegex: ".*MTAOS|.*MTDome|.*MTDomeTrajectory|.*MTPtg" + mtmount: + enabled: true + topicsRegex: ".*MTMount" + comcam: + enabled: true + topicsRegex: ".*CCCamera|.*CCHeaderService|.*CCOODS" + eas: + enabled: true + topicsRegex: ".*DIMM|.*DSM|.*WeatherForecast|.*WeatherStation" + latiss: + enabled: true + topicsRegex: ".*ATCamera|.*ATHeaderService|.*ATOODS|.*ATSpectrograph" + m1m3: + enabled: true + topicsRegex: ".*MTM1M3" + m2: + enabled: true + topicsRegex: ".*MTHexapod|.*MTM2|.*MTRotator" + obssys: + enabled: true + topicsRegex: ".*Scheduler|.*Script|.*ScriptQueue|.*Watcher" + ocps: + enabled: true + topicsRegex: ".*OCPS" + test: + enabled: true + topicsRegex: ".*Test" + pmd: + enabled: true + topicsRegex: ".*PMD" + calsys: + enabled: true + topicsRegex: ".*ATMonochromator|.*ATWhiteLight|.*CBP|.*Electrometer|.*FiberSpectrograph|.*LinearStage|.*TunableLaser" + mtaircompressor: + enabled: true + topicsRegex: ".*MTAirCompressor" + authorize: + enabled: true + topicsRegex: ".*Authorize" + lasertracker: + enabled: true + topicsRegex: ".*LaserTracker" + genericcamera: + enabled: true + topicsRegex: ".*GCHeaderService|.*GenericCamera" + +kafdrop: + ingress: + enabled: true + hostname: tucson-teststand.lsst.codes + +chronograf: + persistence: + storageClass: rook-ceph-block + ingress: + enabled: true + hostname: tucson-teststand.lsst.codes + env: + GENERIC_NAME: "OIDC" + GENERIC_AUTH_URL: https://tucson-teststand.lsst.codes/auth/openid/login + GENERIC_TOKEN_URL: https://tucson-teststand.lsst.codes/auth/openid/token + USE_ID_TOKEN: 1 + JWKS_URL: https://tucson-teststand.lsst.codes/.well-known/jwks.json + GENERIC_API_URL: https://tucson-teststand.lsst.codes/auth/openid/userinfo + GENERIC_SCOPES: openid + GENERIC_API_KEY: sub + PUBLIC_URL: https://tucson-teststand.lsst.codes + STATUS_FEED_URL: https://raw.githubusercontent.com/lsst-sqre/rsp_broadcast/main/jsonfeeds/tucson-teststand.json + +kapacitor: + persistence: + storageClass: rook-ceph-block diff --git a/services/sasquatch/values.yaml b/services/sasquatch/values.yaml index 9a56752f17..2fc6f39caf 100644 --- a/services/sasquatch/values.yaml +++ b/services/sasquatch/values.yaml @@ -1,17 +1,25 @@ # Default values for Sasquatch. + # -- Override strimzi-kafka configuration. strimzi-kafka: {} # -- strimzi-registry-operator configuration. strimzi-registry-operator: clusterName: sasquatch - watchNamespace: sasquatch + clusterNamespace: sasquatch operatorNamespace: sasquatch influxdb: # -- InfluxDB image tag. image: tag: "1.8.10" + persistence: + # -- Enable persistent volume claim. + # By default storageClass is undefined choosing the default provisioner (standard on GKE). + enabled: true + # -- Persistent volume size. + # @default 1Ti for teststand deployments + size: 1Ti # -- Default InfluxDB user, use influxb-user and influxdb-password keys from secret. setDefaultUser: enabled: true @@ -24,54 +32,118 @@ influxdb: tls: false hostname: "" annotations: - kubernetes.io/ingress.class: "nginx" nginx.ingress.kubernetes.io/rewrite-target: /$2 + className: "nginx" path: /influxdb(/|$)(.*) # -- Override InfluxDB configuration. # See https://docs.influxdata.com/influxdb/v1.8/administration/config config: data: - cache_max_memory_size: 0 - wal_fsync_delay: "100ms" - trace_logging_enabled: true + cache-max-memory-size: 0 + wal-fsync-delay: "100ms" + trace-logging-enabled: true http: enabled: true - auth_enabled: true - max_row_limit: 0 + flux-enabled: true + auth-enabled: true + max-row-limit: 0 coordinator: - write_timeout: "60s" - max_concurrent_queries: 10 - query_timeout: "900s" - log_queries_after: "15s" + write-timeout: "1h" + max-concurrent-queries: 0 + query-timeout: "0s" + log-queries-after: "15s" continuous_queries: enabled: false - # -- InfluxDB Custom initialization scripts. + logging: + level: "debug" initScripts: + # -- Enable InfluxDB custom initialization script. + enabled: false + # scripts: + # # -- InfluxDB custom initialization script. + # init.iql: |+ + resources: + requests: + memory: 1Gi + cpu: 1 + limits: + memory: 96Gi + cpu: 8 + +influxdb2: + enabled: false + adminUser: + # -- Admin default organization. + organization: "default" + # -- Admin default bucket. + bucket: "default" + # -- Get admin-password/admin-token keys from secret. + existingSecret: sasquatch + persistence: + # -- Enable persistent volume claim. + # By default storageClass is undefined choosing the default provisioner (standard on GKE). + enabled: true + # -- Persistent volume size. + # @default 1Ti for teststand deployments. + size: 1Ti + ingress: + # -- InfluxDB2 ingress configuration + enabled: false + hostname: "" + annotations: + nginx.ingress.kubernetes.io/rewrite-target: /api/v2/$2 + className: "nginx" + path: /influxdb2(/|$)(.*) + env: + - name: INFLUXD_STORAGE_WAL_FSYNC_DELAY + value: "100ms" + - name: INFLUXD_HTTP_IDLE_TIMEOUT + value: "0" + - name: INFLUXD_FLUX_LOG_ENABLED + value: "true" + - name: INFLUXD_LOG_LEVEL + value: "debug" + initScripts: + # -- InfluxDB2 initialization scripts enabled: true scripts: - init.iql: |+ - CREATE DATABASE "telegraf" WITH DURATION 30d REPLICATION 1 NAME "rp_30d" + init.sh: |+ + #!/bin/bash + influx bucket create --name telegra-kafka-consumer --org default + resources: + requests: + memory: 1Gi + cpu: 1 + limits: + memory: 96Gi + cpu: 8 -# -- Override strimzi-kafka configuration. +# -- Override kafka-connect-manager configuration. kafka-connect-manager: {} +# -- Override telegraf-kafka-consumer configuration. +telegraf-kafka-consumer: {} + +# -- Override rest-proxy configuration. +rest-proxy: + enabled: false + chronograf: # -- Chronograf image tag. image: repository: "quay.io/influxdb/chronograf" - tag: 1.9.3 + tag: 1.9.4 # -- Chronograf data persistence configuration. persistence: enabled: true - size: 16Gi + size: 100Gi # -- Chronograf ingress configuration. # @default -- disabled ingress: enabled: false tls: false hostname: "" - annotations: - kubernetes.io/ingress.class: "nginx" + className: "nginx" path: /chronograf(/|$) # -- Chronograf environment variables. env: @@ -80,24 +152,55 @@ chronograf: CUSTOM_AUTO_REFRESH: "1s=1000" # -- Chronograf secrets, expected keys generic_client_id, generic_client_secret and token_secret. envFromSecret: "sasquatch" + resources: + requests: + memory: 1Gi + cpu: 1 + limits: + memory: 16Gi + cpu: 4 kapacitor: # -- Kapacitor image tag. image: repository: kapacitor - tag: 1.6.3 + tag: 1.6.5 # -- Chronograf data persistence configuration. persistence: enabled: true - size: 16Gi + size: 100Gi # -- InfluxDB connection URL. - influxURL: http://sasquatch.influxdb:8086 + influxURL: http://sasquatch-influxdb.sasquatch:8086 # -- InfluxDB credentials, use influxdb-user and influxdb-password keys from secret. existingSecret: sasquatch # -- Kapacitor environment variables. envVars: KAPACITOR_SLACK_ENABLED: true + resources: + requests: + memory: 1Gi + cpu: 1 + limits: + memory: 16Gi + cpu: 4 + +bucketmapper: + # -- image for monitoring-related cronjobs + image: + # -- repository for rubin-influx-tools + repository: ghcr.io/lsst-sqre/rubin-influx-tools + # -- tag for rubin-influx-tools + tag: 0.1.23 + +global: + # -- Base URL for the environment + # @default -- Set by Argo CD + baseUrl: "" + + # -- Host name for ingress + # @default -- Set by Argo CD + host: "" -# -- Path to the Vault secrets (`secret/k8s_operator//sasquatch`) -# @default -- None, must be set -vaultSecretsPath: "" + # -- Base path for Vault secrets + # @default -- Set by Argo CD + vaultSecretsPath: "" diff --git a/services/semaphore/Chart.yaml b/services/semaphore/Chart.yaml index 4608561d2c..c021a09311 100644 --- a/services/semaphore/Chart.yaml +++ b/services/semaphore/Chart.yaml @@ -1,10 +1,16 @@ apiVersion: v2 name: semaphore version: 1.0.0 -dependencies: - - name: semaphore - version: 0.2.2 - repository: https://lsst-sqre.github.io/charts/ - - name: pull-secret - version: 0.1.2 - repository: https://lsst-sqre.github.io/charts/ +appVersion: "0.3.0" +type: application +description: Semaphore is the user notification and messaging service for the Rubin Science Platform. +sources: + - https://github.com/lsst-sqre/semaphore +maintainers: + - name: jonathansick + url: https://github.com/jonathansick +annotations: + phalanx.lsst.io/docs: | + - id: "SQR-060" + title: "Design of the Semaphore user broadcast message system for the Rubin Science Platform" + url: "https://sqr-060.lsst.io/" diff --git a/services/semaphore/README.md b/services/semaphore/README.md new file mode 100644 index 0000000000..ea3233aef9 --- /dev/null +++ b/services/semaphore/README.md @@ -0,0 +1,43 @@ +# semaphore + +Semaphore is the user notification and messaging service for the Rubin Science Platform. + +## Source Code + +* + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| affinity | object | `{}` | | +| autoscaling.enabled | bool | `false` | | +| autoscaling.maxReplicas | int | `100` | | +| autoscaling.minReplicas | int | `1` | | +| autoscaling.targetCPUUtilizationPercentage | int | `80` | | +| config.enable_github_app | string | `"False"` | Toggle to enable the GitHub App functionality | +| config.github_app_id | string | `""` | GitHub application ID | +| config.log_level | string | `"INFO"` | | +| config.logger_name | string | `"semaphore"` | Logger name | +| config.name | string | `"semaphore"` | Name of the service, and path where the external API is hosted. | +| config.phalanx_env | string | `""` | Name of the Phalanx environment where the application is installed TODO can this be set by a global? | +| config.profile | string | `"production"` | | +| fullnameOverride | string | `""` | Override the full name for resources (includes the release name) | +| global.baseUrl | string | Set by Argo CD Application | Base URL for the environment | +| global.host | string | Set by Argo CD Application | Host name for ingress | +| global.vaultSecretsPathPrefix | string | Set by Argo CD Application | Base path for Vault secrets | +| image.pullPolicy | string | `"IfNotPresent"` | Image pull policy | +| image.repository | string | `"ghcr.io/lsst-sqre/semaphore"` | Semaphore image repository | +| image.tag | string | The appVersion of the chart | Tag of the image | +| ingress.annotations | object | `{}` | Additional annotations to add to the ingress | +| ingress.enabled | bool | `true` | Enable ingress | +| ingress.path | string | `"/semaphore"` | URL path prefix where the Semaphore API is hosted | +| nameOverride | string | `""` | Override the base name for resources | +| nodeSelector | object | `{}` | | +| podAnnotations | object | `{}` | Annotations for pods | +| replicaCount | int | `1` | Number of Semaphore pods to run | +| resources | object | `{}` | | +| serviceAccount.annotations | object | `{}` | Annotations to add to the service account | +| serviceAccount.create | bool | `false` | Specifies whether a service account should be created. | +| serviceAccount.name | string | `""` | | +| tolerations | list | `[]` | | diff --git a/services/semaphore/templates/_helpers.tpl b/services/semaphore/templates/_helpers.tpl new file mode 100644 index 0000000000..564691e8b7 --- /dev/null +++ b/services/semaphore/templates/_helpers.tpl @@ -0,0 +1,62 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "semaphore.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "semaphore.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "semaphore.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "semaphore.labels" -}} +helm.sh/chart: {{ include "semaphore.chart" . }} +{{ include "semaphore.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "semaphore.selectorLabels" -}} +app.kubernetes.io/name: {{ include "semaphore.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "semaphore.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "semaphore.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/services/semaphore/templates/configmap.yaml b/services/semaphore/templates/configmap.yaml new file mode 100644 index 0000000000..a150586752 --- /dev/null +++ b/services/semaphore/templates/configmap.yaml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "semaphore.fullname" . }} + labels: + {{- include "semaphore.labels" . | nindent 4 }} +data: + SAFIR_NAME: {{ .Values.config.name | quote }} + SAFIR_PROFILE: {{ .Values.config.profile | quote }} + SAFIR_LOG_LEVEL: {{ .Values.config.log_level | quote }} + SAFIR_LOGGER: {{ .Values.config.logger_name | quote }} + SEMAPHORE_GITHUB_APP_ID: {{ .Values.config.github_app_id | quote }} + SEMAPHORE_ENABLE_GITHUB_APP: {{ .Values.config.enable_github_app | quote }} + SEMAPHORE_PHALANX_ENV: {{ .Values.config.phalanx_env | quote }} diff --git a/services/semaphore/templates/deployment.yaml b/services/semaphore/templates/deployment.yaml new file mode 100644 index 0000000000..6980bf34e8 --- /dev/null +++ b/services/semaphore/templates/deployment.yaml @@ -0,0 +1,78 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "semaphore.fullname" . }} + labels: + {{- include "semaphore.labels" . | nindent 4 }} +spec: + {{- if not .Values.autoscaling.enabled }} + replicas: {{ .Values.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "semaphore.selectorLabels" . | nindent 6 }} + template: + metadata: + annotations: + checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} + {{- with .Values.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "semaphore.selectorLabels" . | nindent 8 }} + spec: + serviceAccountName: {{ include "semaphore.serviceAccountName" . }} + securityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + containers: + - name: {{ .Chart.Name }} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - "all" + readOnlyRootFilesystem: true + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - name: http + containerPort: 8080 + protocol: TCP + livenessProbe: + httpGet: + path: / + port: http + readinessProbe: + httpGet: + path: / + port: http + resources: + {{- toYaml .Values.resources | nindent 12 }} + envFrom: + - configMapRef: + name: {{ include "semaphore.fullname" . }} + env: + - name: SEMAPHORE_GITHUB_WEBHOOK_SECRET + valueFrom: + secretKeyRef: + name: {{ include "semaphore.fullname" . }} + key: SEMAPHORE_GITHUB_WEBHOOK_SECRET + - name: SEMAPHORE_GITHUB_APP_PRIVATE_KEY + valueFrom: + secretKeyRef: + name: {{ include "semaphore.fullname" . }} + key: SEMAPHORE_GITHUB_APP_PRIVATE_KEY + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/services/semaphore/templates/hpa.yaml b/services/semaphore/templates/hpa.yaml new file mode 100644 index 0000000000..d6f2c1fd7b --- /dev/null +++ b/services/semaphore/templates/hpa.yaml @@ -0,0 +1,28 @@ +{{- if .Values.autoscaling.enabled }} +apiVersion: autoscaling/v2beta1 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "semaphore.fullname" . }} + labels: + {{- include "semaphore.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "semaphore.fullname" . }} + minReplicas: {{ .Values.autoscaling.minReplicas }} + maxReplicas: {{ .Values.autoscaling.maxReplicas }} + metrics: + {{- if .Values.autoscaling.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: cpu + targetAverageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} + {{- end }} + {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: memory + targetAverageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }} + {{- end }} +{{- end }} diff --git a/services/semaphore/templates/ingress.yaml b/services/semaphore/templates/ingress.yaml new file mode 100644 index 0000000000..dc174e7aa3 --- /dev/null +++ b/services/semaphore/templates/ingress.yaml @@ -0,0 +1,31 @@ +{{- if .Values.ingress.enabled -}} +apiVersion: gafaelfawr.lsst.io/v1alpha1 +kind: GafaelfawrIngress +metadata: + name: {{ template "semaphore.fullname" . }} + labels: + {{- include "semaphore.labels" . | nindent 4 }} +config: + baseUrl: {{ .Values.global.baseUrl | quote }} + scopes: + anonymous: true +template: + metadata: + name: {{ template "semaphore.fullname" . }} + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 6 }} + {{- end }} + spec: + rules: + - host: {{ required "global.host must be set" .Values.global.host | quote }} + http: + paths: + - path: {{ .Values.ingress.path | quote }} + pathType: "Prefix" + backend: + service: + name: {{ template "semaphore.fullname" . }} + port: + number: 80 +{{- end }} diff --git a/services/semaphore/templates/service.yaml b/services/semaphore/templates/service.yaml new file mode 100644 index 0000000000..fd84bb90b4 --- /dev/null +++ b/services/semaphore/templates/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "semaphore.fullname" . }} + labels: + {{- include "semaphore.labels" . | nindent 4 }} +spec: + type: "ClusterIP" + ports: + - port: 80 + targetPort: http + protocol: TCP + name: http + selector: + {{- include "semaphore.selectorLabels" . | nindent 4 }} diff --git a/services/semaphore/templates/serviceaccount.yaml b/services/semaphore/templates/serviceaccount.yaml new file mode 100644 index 0000000000..a2aeac7b66 --- /dev/null +++ b/services/semaphore/templates/serviceaccount.yaml @@ -0,0 +1,12 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "semaphore.serviceAccountName" . }} + labels: + {{- include "semaphore.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/services/semaphore/templates/vaultsecret.yaml b/services/semaphore/templates/vaultsecret.yaml new file mode 100644 index 0000000000..5c71f7c51a --- /dev/null +++ b/services/semaphore/templates/vaultsecret.yaml @@ -0,0 +1,9 @@ +apiVersion: ricoberger.de/v1alpha1 +kind: VaultSecret +metadata: + name: {{ include "semaphore.fullname" . }} + labels: + {{- include "semaphore.labels" . | nindent 4 }} +spec: + path: "{{ .Values.global.vaultSecretsPathPrefix }}/noteburst" + type: Opaque diff --git a/services/semaphore/values-base.yaml b/services/semaphore/values-base.yaml index e5eab01758..56d9a0e30d 100644 --- a/services/semaphore/values-base.yaml +++ b/services/semaphore/values-base.yaml @@ -1,18 +1,5 @@ -semaphore: - config: - phalanx_env: "base" - ingress: - enabled: true - annotations: - kubernetes.io/ingress.class: nginx - hosts: - - host: "base-lsp.lsst.codes" - paths: - - path: "/semaphore" - pathType: Prefix - imagePullSecrets: - - name: "pull-secret" - vaultSecretsPath: "secret/k8s_operator/base-lsp.lsst.codes/semaphore" +config: + phalanx_env: "base" pull-secret: enabled: true diff --git a/services/semaphore/values-idfdev.yaml b/services/semaphore/values-idfdev.yaml index f89ae44569..b2dde8b667 100644 --- a/services/semaphore/values-idfdev.yaml +++ b/services/semaphore/values-idfdev.yaml @@ -1,23 +1,10 @@ -semaphore: - image: - pullPolicy: Always - config: - github_app_id: "127943" - enable_github_app: "True" - phalanx_env: "idfdev" - log_level: "DEBUG" - ingress: - enabled: true - annotations: - kubernetes.io/ingress.class: nginx - hosts: - - host: "data-dev.lsst.cloud" - paths: - - path: "/semaphore" - pathType: Prefix - imagePullSecrets: - - name: "pull-secret" - vaultSecretsPath: "secret/k8s_operator/data-dev.lsst.cloud/semaphore" +image: + pullPolicy: Always +config: + github_app_id: "127943" + enable_github_app: "True" + phalanx_env: "idfdev" + log_level: "DEBUG" pull-secret: enabled: true diff --git a/services/semaphore/values-idfint.yaml b/services/semaphore/values-idfint.yaml index 0f75e7d7de..7213ca7711 100644 --- a/services/semaphore/values-idfint.yaml +++ b/services/semaphore/values-idfint.yaml @@ -1,20 +1,7 @@ -semaphore: - config: - phalanx_env: "idfint" - github_app_id: "131457" - enable_github_app: "True" - ingress: - enabled: true - annotations: - kubernetes.io/ingress.class: nginx - hosts: - - host: "data-int.lsst.cloud" - paths: - - path: "/semaphore" - pathType: Prefix - imagePullSecrets: - - name: "pull-secret" - vaultSecretsPath: "secret/k8s_operator/data-int.lsst.cloud/semaphore" +config: + phalanx_env: "idfint" + github_app_id: "131457" + enable_github_app: "True" pull-secret: enabled: true diff --git a/services/semaphore/values-idfprod.yaml b/services/semaphore/values-idfprod.yaml index 4c114cfdbf..99f41bc0bc 100644 --- a/services/semaphore/values-idfprod.yaml +++ b/services/semaphore/values-idfprod.yaml @@ -1,20 +1,7 @@ -semaphore: - config: - phalanx_env: "idfprod" - github_app_id: "131502" - enable_github_app: "True" - ingress: - enabled: true - annotations: - kubernetes.io/ingress.class: nginx - hosts: - - host: "data.lsst.cloud" - paths: - - path: "/semaphore" - pathType: Prefix - imagePullSecrets: - - name: "pull-secret" - vaultSecretsPath: "secret/k8s_operator/data.lsst.cloud/semaphore" +config: + phalanx_env: "idfprod" + github_app_id: "131502" + enable_github_app: "True" pull-secret: enabled: true diff --git a/services/semaphore/values-int.yaml b/services/semaphore/values-int.yaml deleted file mode 100644 index b6f3db3959..0000000000 --- a/services/semaphore/values-int.yaml +++ /dev/null @@ -1,19 +0,0 @@ -semaphore: - config: - phalanx_env: "int" - ingress: - enabled: true - annotations: - kubernetes.io/ingress.class: nginx - hosts: - - host: "lsst-lsp-int.ncsa.illinois.edu" - paths: - - path: "/semaphore" - pathType: Prefix - imagePullSecrets: - - name: "pull-secret" - vaultSecretsPath: "secret/k8s_operator/lsst-lsp-int.ncsa.illinois.edu/semaphore" - -pull-secret: - enabled: true - path: secret/k8s_operator/lsst-lsp-int.ncsa.illinois.edu/pull-secret diff --git a/services/semaphore/values-minikube.yaml b/services/semaphore/values-minikube.yaml index 7dfbb96285..8e214230f2 100644 --- a/services/semaphore/values-minikube.yaml +++ b/services/semaphore/values-minikube.yaml @@ -1,18 +1,5 @@ -semaphore: - config: - phalanx_env: "minikube" - ingress: - enabled: true - annotations: - kubernetes.io/ingress.class: nginx - hosts: - - host: "minikube.lsst.codes" - paths: - - path: "/semaphore" - pathType: Prefix - imagePullSecrets: - - name: "pull-secret" - vaultSecretsPath: "secret/k8s_operator/minikube.lsst.codes/semaphore" +config: + phalanx_env: "minikube" pull-secret: enabled: true diff --git a/services/semaphore/values-red-five.yaml b/services/semaphore/values-red-five.yaml deleted file mode 100644 index 28164317b7..0000000000 --- a/services/semaphore/values-red-five.yaml +++ /dev/null @@ -1,19 +0,0 @@ -semaphore: - config: - phalanx_env: "red-five" - ingress: - enabled: true - annotations: - kubernetes.io/ingress.class: nginx - hosts: - - host: "red-five.lsst.codes" - paths: - - path: "/semaphore" - pathType: Prefix - imagePullSecrets: - - name: "pull-secret" - vaultSecretsPath: "secret/k8s_operator/red-five.lsst.codes/semaphore" - -pull-secret: - enabled: true - path: secret/k8s_operator/red-five.lsst.codes/pull-secret diff --git a/services/semaphore/values-roe.yaml b/services/semaphore/values-roe.yaml index 4c502b3629..16f883b9ed 100644 --- a/services/semaphore/values-roe.yaml +++ b/services/semaphore/values-roe.yaml @@ -1,23 +1,10 @@ -semaphore: - image: - pullPolicy: Always - config: - github_app_id: "1452049" - enable_github_app: "True" - phalanx_env: "roe" - log_level: "DEBUG" - ingress: - enabled: true - annotations: - kubernetes.io/ingress.class: nginx - hosts: - - host: "rsp.lsst.ac.uk" - paths: - - path: "/semaphore" - pathType: Prefix - imagePullSecrets: - - name: "pull-secret" - vaultSecretsPath: "secret/k8s_operator/roe/semaphore" +image: + pullPolicy: Always +config: + github_app_id: "1452049" + enable_github_app: "True" + phalanx_env: "roe" + log_level: "DEBUG" pull-secret: enabled: true diff --git a/services/semaphore/values-stable.yaml b/services/semaphore/values-stable.yaml deleted file mode 100644 index 945a969c10..0000000000 --- a/services/semaphore/values-stable.yaml +++ /dev/null @@ -1,19 +0,0 @@ -semaphore: - config: - phalanx_env: "stable" - ingress: - enabled: true - annotations: - kubernetes.io/ingress.class: nginx - hosts: - - host: "lsst-lsp-stable.ncsa.illinois.edu" - paths: - - path: "/semaphore" - pathType: Prefix - imagePullSecrets: - - name: "pull-secret" - vaultSecretsPath: "secret/k8s_operator/lsst-lsp-stable.ncsa.illinois.edu/semaphore" - -pull-secret: - enabled: true - path: secret/k8s_operator/lsst-lsp-stable.ncsa.illinois.edu/pull-secret diff --git a/services/semaphore/values-summit.yaml b/services/semaphore/values-summit.yaml index e8ae0694a0..8d3df5674b 100644 --- a/services/semaphore/values-summit.yaml +++ b/services/semaphore/values-summit.yaml @@ -1,18 +1,5 @@ -semaphore: - config: - phalanx_env: "summit" - ingress: - enabled: true - annotations: - kubernetes.io/ingress.class: nginx - hosts: - - host: "summit-lsp.lsst.codes" - paths: - - path: "/semaphore" - pathType: Prefix - imagePullSecrets: - - name: "pull-secret" - vaultSecretsPath: "secret/k8s_operator/summit-lsp.lsst.codes/semaphore" +config: + phalanx_env: "summit" pull-secret: enabled: true diff --git a/services/semaphore/values-tucson-teststand.yaml b/services/semaphore/values-tucson-teststand.yaml index 9f023ef0f5..36baad3083 100644 --- a/services/semaphore/values-tucson-teststand.yaml +++ b/services/semaphore/values-tucson-teststand.yaml @@ -1,18 +1,5 @@ -semaphore: - config: - phalanx_env: "tucson-teststand" - ingress: - enabled: true - annotations: - kubernetes.io/ingress.class: nginx - hosts: - - host: "tucson-teststand.lsst.codes" - paths: - - path: "/semaphore" - pathType: Prefix - imagePullSecrets: - - name: "pull-secret" - vaultSecretsPath: "secret/k8s_operator/tucson-teststand.lsst.codes/semaphore" +config: + phalanx_env: "tucson-teststand" pull-secret: enabled: true diff --git a/services/semaphore/values.yaml b/services/semaphore/values.yaml new file mode 100644 index 0000000000..614ed49253 --- /dev/null +++ b/services/semaphore/values.yaml @@ -0,0 +1,105 @@ +# Default values for semaphore. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# -- Number of Semaphore pods to run +replicaCount: 1 + +image: + # -- Semaphore image repository + repository: ghcr.io/lsst-sqre/semaphore + + # -- Image pull policy + pullPolicy: IfNotPresent + + # -- Tag of the image + # @default -- The appVersion of the chart + tag: "" + +# -- Override the base name for resources +nameOverride: "" + +# -- Override the full name for resources (includes the release name) +fullnameOverride: "" + +serviceAccount: + # -- Specifies whether a service account should be created. + create: false + + # -- Annotations to add to the service account + annotations: {} + + # The name of the service account to use. + # @default -- Generated using the fullname template + name: "" + +# -- Annotations for pods +podAnnotations: {} + +ingress: + # -- Enable ingress + enabled: true + + # -- URL path prefix where the Semaphore API is hosted + path: "/semaphore" + + # -- Additional annotations to add to the ingress + annotations: {} + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +autoscaling: + enabled: false + minReplicas: 1 + maxReplicas: 100 + targetCPUUtilizationPercentage: 80 + # targetMemoryUtilizationPercentage: 80 + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +# Semaphore app configurations. +config: + # -- Name of the service, and path where the external API is hosted. + name: "semaphore" + # -- Name of the Phalanx environment where the application is installed + # TODO can this be set by a global? + phalanx_env: "" + # Run profile: "production" or "development" + profile: "production" + # Logging level: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL" + log_level: "INFO" + # -- Logger name + logger_name: "semaphore" + # -- GitHub application ID + github_app_id: "" + # -- Toggle to enable the GitHub App functionality + enable_github_app: "False" + +# Global parameters will be set by parameters injected by Argo CD and should +# not be set in the individual environment values files. +global: + # -- Base URL for the environment + # @default -- Set by Argo CD Application + baseUrl: "" + + # -- Host name for ingress + # @default -- Set by Argo CD Application + host: "" + + # -- Base path for Vault secrets + # @default -- Set by Argo CD Application + vaultSecretsPathPrefix: "" diff --git a/services/sherlock/Chart.yaml b/services/sherlock/Chart.yaml index 651bc48eaf..4ca1fede5c 100644 --- a/services/sherlock/Chart.yaml +++ b/services/sherlock/Chart.yaml @@ -1,7 +1,8 @@ apiVersion: v2 name: sherlock +type: application version: 1.0.0 -dependencies: - - name: sherlock - version: 0.1.5 - repository: https://lsst-sqre.github.io/charts/ +description: Application ingress status and metrics +sources: + - https://github.com/lsst-sqre/sherlock +appVersion: 0.1.8 diff --git a/services/sherlock/README.md b/services/sherlock/README.md new file mode 100644 index 0000000000..459c2c462d --- /dev/null +++ b/services/sherlock/README.md @@ -0,0 +1,34 @@ +# sherlock + +Application ingress status and metrics + +## Source Code + +* + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| affinity | object | `{}` | Affinity rules for the sherlock deployment pod | +| autoscaling.enabled | bool | `false` | Enable autoscaling of sherlock deployment | +| autoscaling.maxReplicas | int | `100` | Maximum number of sherlock deployment pods | +| autoscaling.minReplicas | int | `1` | Minimum number of sherlock deployment pods | +| autoscaling.targetCPUUtilizationPercentage | int | `80` | Target CPU utilization of sherlock deployment pods | +| fullnameOverride | string | `""` | Override the full name for resources (includes the release name) | +| global.baseUrl | string | Set by Argo CD | Base URL for the environment | +| global.host | string | Set by Argo CD | Host name for ingress | +| global.vaultSecretsPath | string | Set by Argo CD | Base path for Vault secrets | +| image.pullPolicy | string | `"Always"` | Pull policy for the sherlock image | +| image.repository | string | `"lsstsqre/sherlock"` | Image to use in the sherlock deployment | +| image.tag | string | `""` | Overrides the image tag whose default is the chart appVersion. | +| imagePullSecrets | list | `[]` | Secret names to use for all Docker pulls | +| ingress.annotations | object | `{}` | Additional annotations for the ingress rule | +| nameOverride | string | `""` | Override the base name for resources | +| nodeSelector | object | `{}` | Node selection rules for the sherlock deployment pod | +| podAnnotations | object | `{}` | Annotations for the sherlock deployment pod | +| publishUrl | string | `""` | URL to push status to via HTTP PUTs. | +| replicaCount | int | `1` | Number of web deployment pods to start | +| resources | object | `{}` | Resource limits and requests for the sherlock deployment pod | +| serviceAccount.name | string | `""` | | +| tolerations | list | `[]` | Tolerations for the sherlock deployment pod | diff --git a/services/sherlock/templates/_helpers.tpl b/services/sherlock/templates/_helpers.tpl new file mode 100644 index 0000000000..542ea5a3e9 --- /dev/null +++ b/services/sherlock/templates/_helpers.tpl @@ -0,0 +1,58 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "sherlock.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "sherlock.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "sherlock.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "sherlock.labels" -}} +helm.sh/chart: {{ include "sherlock.chart" . }} +{{ include "sherlock.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "sherlock.selectorLabels" -}} +app.kubernetes.io/name: {{ include "sherlock.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "sherlock.serviceAccountName" -}} +{{ default (include "sherlock.fullname" .) .Values.serviceAccount.name }} +{{- end -}} diff --git a/services/sherlock/templates/deployment.yaml b/services/sherlock/templates/deployment.yaml new file mode 100644 index 0000000000..8d490720e0 --- /dev/null +++ b/services/sherlock/templates/deployment.yaml @@ -0,0 +1,76 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "sherlock.fullname" . }} + labels: + {{- include "sherlock.labels" . | nindent 4 }} +spec: + {{- if not .Values.autoscaling.enabled }} + replicas: {{ .Values.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "sherlock.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "sherlock.selectorLabels" . | nindent 8 }} + spec: + serviceAccountName: {{ template "sherlock.serviceAccountName" . }} + automountServiceAccountToken: true + imagePullSecrets: + - name: "pull-secret" + securityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + containers: + - name: {{ .Chart.Name }} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - all + readOnlyRootFilesystem: true + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + env: + - name: "PUBLISH_URL" + value: {{ .Values.publishUrl }} + {{- if .Values.publishUrl }} + - name: "PUBLISH_KEY" + valueFrom: + secretKeyRef: + name: {{ include "sherlock.fullname" . }}-secret + key: "publish_key" + {{- end }} + ports: + - name: http + containerPort: 8080 + protocol: TCP + livenessProbe: + httpGet: + path: / + port: http + readinessProbe: + httpGet: + path: / + port: http + resources: + {{- toYaml .Values.resources | nindent 12 }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/services/sherlock/templates/hpa.yaml b/services/sherlock/templates/hpa.yaml new file mode 100644 index 0000000000..e9452350c4 --- /dev/null +++ b/services/sherlock/templates/hpa.yaml @@ -0,0 +1,28 @@ +{{- if .Values.autoscaling.enabled }} +apiVersion: autoscaling/v2beta1 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "sherlock.fullname" . }} + labels: + {{- include "sherlock.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "sherlock.fullname" . }} + minReplicas: {{ .Values.autoscaling.minReplicas }} + maxReplicas: {{ .Values.autoscaling.maxReplicas }} + metrics: + {{- if .Values.autoscaling.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: cpu + targetAverageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} + {{- end }} + {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: memory + targetAverageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }} + {{- end }} +{{- end }} diff --git a/services/sherlock/templates/ingress.yaml b/services/sherlock/templates/ingress.yaml new file mode 100644 index 0000000000..68dadd0529 --- /dev/null +++ b/services/sherlock/templates/ingress.yaml @@ -0,0 +1,33 @@ +apiVersion: gafaelfawr.lsst.io/v1alpha1 +kind: GafaelfawrIngress +metadata: + name: {{ template "sherlock.fullname" . }} + labels: + {{- include "sherlock.labels" . | nindent 4 }} +config: + baseUrl: {{ .Values.global.baseUrl | quote }} + scopes: + all: + - "exec:admin" + loginRedirect: true +template: + metadata: + name: {{ template "sherlock.fullname" . }} + annotations: + nginx.ingress.kubernetes.io/cors-allow-methods: "GET" + nginx.ingress.kubernetes.io/enable-cors: "true" + {{- with .Values.ingress.annotations }} + {{- toYaml . | nindent 6 }} + {{- end }} + spec: + rules: + - host: {{ required "global.host must be set" .Values.global.host | quote }} + http: + paths: + - path: "/sherlock" + pathType: "Prefix" + backend: + service: + name: {{ template "sherlock.fullname" . }} + port: + number: 8080 diff --git a/services/sherlock/templates/networkpolicy.yaml b/services/sherlock/templates/networkpolicy.yaml new file mode 100644 index 0000000000..3c165006dd --- /dev/null +++ b/services/sherlock/templates/networkpolicy.yaml @@ -0,0 +1,21 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ include "sherlock.fullname" . }} +spec: + podSelector: + matchLabels: + {{- include "sherlock.selectorLabels" . | nindent 6 }} + policyTypes: + - Ingress + ingress: + # Allow inbound access from pods (in any namespace) labeled + # gafaelfawr.lsst.io/ingress: true. + - from: + - namespaceSelector: {} + podSelector: + matchLabels: + gafaelfawr.lsst.io/ingress: "true" + ports: + - protocol: "TCP" + port: 8080 diff --git a/services/sherlock/templates/service.yaml b/services/sherlock/templates/service.yaml new file mode 100644 index 0000000000..4516057f2f --- /dev/null +++ b/services/sherlock/templates/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "sherlock.fullname" . }} + labels: + {{- include "sherlock.labels" . | nindent 4 }} +spec: + type: ClusterIP + ports: + - port: 8080 + targetPort: http + protocol: TCP + name: http + selector: + {{- include "sherlock.selectorLabels" . | nindent 4 }} diff --git a/services/sherlock/templates/serviceaccount.yaml b/services/sherlock/templates/serviceaccount.yaml new file mode 100644 index 0000000000..a6c30dfba1 --- /dev/null +++ b/services/sherlock/templates/serviceaccount.yaml @@ -0,0 +1,38 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ template "sherlock.serviceAccountName" . }} + labels: + {{- include "sherlock.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +imagePullSecrets: + - name: "pull-secret" +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ template "sherlock.serviceAccountName" . }} + labels: + {{- include "sherlock.labels" . | nindent 4 }} +rules: + - apiGroups: [""] + resources: ["pods", "pods/log"] + verbs: ["get", "list"] +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: {{ template "sherlock.serviceAccountName" . }} + labels: + {{- include "sherlock.labels" . | nindent 4 }} +subjects: + - kind: ServiceAccount + name: {{ template "sherlock.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} +roleRef: + kind: ClusterRole + name: {{ template "sherlock.serviceAccountName" . }} + apiGroup: rbac.authorization.k8s.io diff --git a/services/sherlock/templates/vault-secrets.yaml b/services/sherlock/templates/vault-secrets.yaml new file mode 100644 index 0000000000..2099d52cc1 --- /dev/null +++ b/services/sherlock/templates/vault-secrets.yaml @@ -0,0 +1,21 @@ +{{- if .Values.publishUrl }} +apiVersion: ricoberger.de/v1alpha1 +kind: VaultSecret +metadata: + name: {{ include "sherlock.fullname" . }}-secret + labels: + {{- include "sherlock.labels" . | nindent 4 }} +spec: + path: "{{ .Values.global.vaultSecretsPath }}/sherlock" + type: Opaque +{{- end }} +--- +apiVersion: ricoberger.de/v1alpha1 +kind: VaultSecret +metadata: + name: pull-secret + labels: + {{- include "sherlock.labels" . | nindent 4 }} +spec: + path: "{{- .Values.global.vaultSecretsPath }}/pull-secret" + type: kubernetes.io/dockerconfigjson diff --git a/services/sherlock/values-base.yaml b/services/sherlock/values-base.yaml index 8b60acde58..de35ed5941 100644 --- a/services/sherlock/values-base.yaml +++ b/services/sherlock/values-base.yaml @@ -1,3 +1,7 @@ -sherlock: - ingress: - host: "base-lsp.lsst.codes" +resources: + requests: + cpu: 2.0 + memory: "2G" + limits: + cpu: 4.0 + memory: "4G" diff --git a/services/sherlock/values-idfdev.yaml b/services/sherlock/values-idfdev.yaml index df9043146a..09d06b446e 100644 --- a/services/sherlock/values-idfdev.yaml +++ b/services/sherlock/values-idfdev.yaml @@ -1,3 +1,9 @@ -sherlock: - ingress: - host: "data-dev.lsst.cloud" +resources: + requests: + cpu: 2.0 + memory: "2G" + limits: + cpu: 4.0 + memory: "4G" + +publishUrl: "https://status.lsst.codes/api/data-dev" diff --git a/services/sherlock/values-idfint.yaml b/services/sherlock/values-idfint.yaml index 35da44d596..f26f30166c 100644 --- a/services/sherlock/values-idfint.yaml +++ b/services/sherlock/values-idfint.yaml @@ -1,3 +1,8 @@ -sherlock: - ingress: - host: "data-int.lsst.cloud" +resources: + requests: + cpu: 2.0 + memory: "2G" + limits: + cpu: 4.0 + memory: "4G" +publishUrl: "https://status.lsst.codes/api/data-int" diff --git a/services/sherlock/values-idfprod.yaml b/services/sherlock/values-idfprod.yaml index d0ae11fa1b..6dc7b40cad 100644 --- a/services/sherlock/values-idfprod.yaml +++ b/services/sherlock/values-idfprod.yaml @@ -1,3 +1,8 @@ -sherlock: - ingress: - host: "data.lsst.cloud" +resources: + requests: + cpu: 2.0 + memory: "2G" + limits: + cpu: 4.0 + memory: "4G" +publishUrl: "https://status.lsst.codes/api/data" diff --git a/services/sherlock/values-int.yaml b/services/sherlock/values-int.yaml deleted file mode 100644 index 74c2defc68..0000000000 --- a/services/sherlock/values-int.yaml +++ /dev/null @@ -1,3 +0,0 @@ -sherlock: - ingress: - host: "lsst-lsp-int.ncsa.illinois.edu" diff --git a/services/sherlock/values-minikube.yaml b/services/sherlock/values-minikube.yaml index 2af4d6dc4e..e69de29bb2 100644 --- a/services/sherlock/values-minikube.yaml +++ b/services/sherlock/values-minikube.yaml @@ -1,3 +0,0 @@ -sherlock: - ingress: - host: "minikube.lsst.codes" diff --git a/services/sherlock/values-roe.yaml b/services/sherlock/values-roe.yaml index 52f1920c23..de35ed5941 100644 --- a/services/sherlock/values-roe.yaml +++ b/services/sherlock/values-roe.yaml @@ -1,3 +1,7 @@ -sherlock: - ingress: - host: "rsp.lsst.ac.uk" +resources: + requests: + cpu: 2.0 + memory: "2G" + limits: + cpu: 4.0 + memory: "4G" diff --git a/services/sherlock/values-stable.yaml b/services/sherlock/values-stable.yaml deleted file mode 100644 index 391e74d55b..0000000000 --- a/services/sherlock/values-stable.yaml +++ /dev/null @@ -1,3 +0,0 @@ -sherlock: - ingress: - host: "lsst-lsp-stable.ncsa.illinois.edu" diff --git a/services/sherlock/values-summit.yaml b/services/sherlock/values-summit.yaml index 2afdfd57f8..de35ed5941 100644 --- a/services/sherlock/values-summit.yaml +++ b/services/sherlock/values-summit.yaml @@ -1,3 +1,7 @@ -sherlock: - ingress: - host: "summit-lsp.lsst.codes" +resources: + requests: + cpu: 2.0 + memory: "2G" + limits: + cpu: 4.0 + memory: "4G" diff --git a/services/sherlock/values-tucson-teststand.yaml b/services/sherlock/values-tucson-teststand.yaml index fcf6d9a9f2..de35ed5941 100644 --- a/services/sherlock/values-tucson-teststand.yaml +++ b/services/sherlock/values-tucson-teststand.yaml @@ -1,3 +1,7 @@ -sherlock: - ingress: - host: "tucson-teststand.lsst.codes" +resources: + requests: + cpu: 2.0 + memory: "2G" + limits: + cpu: 4.0 + memory: "4G" diff --git a/services/sherlock/values.yaml b/services/sherlock/values.yaml new file mode 100644 index 0000000000..c4756cfadc --- /dev/null +++ b/services/sherlock/values.yaml @@ -0,0 +1,79 @@ +# Default values for sherlock. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# -- Number of web deployment pods to start +replicaCount: 1 + +image: + # -- Image to use in the sherlock deployment + repository: lsstsqre/sherlock + + # -- Pull policy for the sherlock image + pullPolicy: Always + + # -- Overrides the image tag whose default is the chart appVersion. + tag: "" + +# -- Secret names to use for all Docker pulls +imagePullSecrets: [] + +# -- Override the base name for resources +nameOverride: "" + +# -- Override the full name for resources (includes the release name) +fullnameOverride: "" + +# -- Annotations for the sherlock deployment pod +podAnnotations: {} + +ingress: + # -- Additional annotations for the ingress rule + annotations: {} + +# -- Resource limits and requests for the sherlock deployment pod +resources: {} + +autoscaling: + # -- Enable autoscaling of sherlock deployment + enabled: false + + # -- Minimum number of sherlock deployment pods + minReplicas: 1 + + # -- Maximum number of sherlock deployment pods + maxReplicas: 100 + + # -- Target CPU utilization of sherlock deployment pods + targetCPUUtilizationPercentage: 80 + # targetMemoryUtilizationPercentage: 80 + +# -- Node selection rules for the sherlock deployment pod +nodeSelector: {} + +# -- Tolerations for the sherlock deployment pod +tolerations: [] + +# -- Affinity rules for the sherlock deployment pod +affinity: {} + +serviceAccount: + name: "" + +# -- URL to push status to via HTTP PUTs. +publishUrl: "" + +# The following will be set by parameters injected by Argo CD and should not +# be set in the individual environment values files. +global: + # -- Base URL for the environment + # @default -- Set by Argo CD + baseUrl: "" + + # -- Host name for ingress + # @default -- Set by Argo CD + host: "" + + # -- Base path for Vault secrets + # @default -- Set by Argo CD + vaultSecretsPath: "" diff --git a/services/sqlproxy-cross-project/.helmignore b/services/sqlproxy-cross-project/.helmignore new file mode 100644 index 0000000000..0e8a0eb36f --- /dev/null +++ b/services/sqlproxy-cross-project/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/services/sqlproxy-cross-project/Chart.yaml b/services/sqlproxy-cross-project/Chart.yaml new file mode 100644 index 0000000000..4f48ff8d1f --- /dev/null +++ b/services/sqlproxy-cross-project/Chart.yaml @@ -0,0 +1,15 @@ +apiVersion: v2 +name: sqlproxy +type: application +version: 1.0.0 +description: GCP SQL Proxy as a service +home: https://cloud.google.com/sql/docs/postgres/sql-proxy +sources: + - https://github.com/GoogleCloudPlatform/cloud-sql-proxy + +# Normally, we would put the tag of the Docker container here and have the +# deployment default to that value. However, in this case, since the image is +# maintained externally and we want Renovate to send us PRs for version +# updates, the version is tracked only in values.yaml and this version is not +# used. +# appVersion: "0.1.0" diff --git a/services/sqlproxy-cross-project/README.md b/services/sqlproxy-cross-project/README.md new file mode 100644 index 0000000000..7764bd010b --- /dev/null +++ b/services/sqlproxy-cross-project/README.md @@ -0,0 +1,28 @@ +# sqlproxy + +GCP SQL Proxy as a service + +**Homepage:** + +## Source Code + +* + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| affinity | object | `{}` | Affinity rules for the Cloud SQL Proxy pod | +| config.instanceConnectionName | string | None, must be set | Instance connection name for a CloudSQL PostgreSQL instance | +| config.ipAddressType | string | `"PRIVATE"` | IP address type of the instance to connect to (either `PUBLIC` or `PRIVATE`) | +| config.serviceAccount | string | None, must be set if Cloud SQL Auth Proxy is enabled | The Google service account that has an IAM binding to the Cloud SQL Proxy Kubernetes service account and has the `cloudsql.client` role | +| fullnameOverride | string | `""` | Override the full name for resources (includes the release name) | +| image.pullPolicy | string | `"IfNotPresent"` | Pull policy for the Cloud SQL Proxy image | +| image.repository | string | `"gcr.io/cloudsql-docker/gce-proxy"` | Cloud SQL Proxy image to use | +| image.tag | string | `"1.33.2"` | Tag of Cloud SQL Proxy image to use | +| nameOverride | string | `""` | Override the base name for resources | +| nodeSelector | object | `{}` | Node selector rules for the Cloud SQL Proxy pod | +| podAnnotations | object | `{}` | Annotations for the Cloud SQL Proxy pod | +| replicaCount | int | `1` | Number of pods to start | +| resources | object | `{}` | Resource limits and requests for the Cloud SQL Proxy pod | +| tolerations | list | `[]` | Tolerations for the Cloud SQL Proxy pod | diff --git a/services/sqlproxy-cross-project/templates/_helpers.tpl b/services/sqlproxy-cross-project/templates/_helpers.tpl new file mode 100644 index 0000000000..42c6871d00 --- /dev/null +++ b/services/sqlproxy-cross-project/templates/_helpers.tpl @@ -0,0 +1,52 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "sqlproxy.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "sqlproxy.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "sqlproxy.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Common labels +*/}} +{{- define "sqlproxy.labels" -}} +helm.sh/chart: {{ include "sqlproxy.chart" . }} +{{ include "sqlproxy.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "sqlproxy.selectorLabels" -}} +app.kubernetes.io/name: {{ include "sqlproxy.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} diff --git a/services/sqlproxy-cross-project/templates/deployment.yaml b/services/sqlproxy-cross-project/templates/deployment.yaml new file mode 100644 index 0000000000..5a0cd44a21 --- /dev/null +++ b/services/sqlproxy-cross-project/templates/deployment.yaml @@ -0,0 +1,62 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "sqlproxy.fullname" . }} + labels: + {{- include "sqlproxy.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "sqlproxy.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: "cloud-sql-proxy" + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "sqlproxy.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: "cloud-sql-proxy" + spec: + serviceAccountName: {{ include "sqlproxy.fullname" . }} + containers: + - name: cloud-sql-proxy + command: + - "/cloud_sql_proxy" + - "-log_debug_stdout" + - "-structured_logs" + - "-ip_address_types={{ required "config.ipAddressType must be specified" .Values.config.ipAddressType}}" + - "-instances={{ required "config.instanceConnectionName must be specified" .Values.config.instanceConnectionName }}=tcp:0.0.0.0:5432" + image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" + imagePullPolicy: {{ .Values.image.pullPolicy | quote }} + ports: + - containerPort: 5432 + protocol: "TCP" + {{- with .Values.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - "all" + readOnlyRootFilesystem: true + securityContext: + runAsNonRoot: true + runAsUser: 65532 + runAsGroup: 65532 + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/services/sqlproxy-cross-project/templates/service.yaml b/services/sqlproxy-cross-project/templates/service.yaml new file mode 100644 index 0000000000..c684cc889c --- /dev/null +++ b/services/sqlproxy-cross-project/templates/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "sqlproxy.fullname" . }} + labels: + {{- include "sqlproxy.labels" . | nindent 4 }} +spec: + type: ClusterIP + ports: + - protocol: "TCP" + port: 5432 + targetPort: 5432 + selector: + {{- include "sqlproxy.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: "cloud-sql-proxy" diff --git a/services/sqlproxy-cross-project/templates/serviceaccount.yaml b/services/sqlproxy-cross-project/templates/serviceaccount.yaml new file mode 100644 index 0000000000..cb8d257f36 --- /dev/null +++ b/services/sqlproxy-cross-project/templates/serviceaccount.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "sqlproxy.fullname" . }} + labels: + {{- include "sqlproxy.labels" . | nindent 4 }} + annotations: + iam.gke.io/gcp-service-account: {{ required ".Values.config.serviceAccount must be set to a valid Google service account" .Values.config.serviceAccount | quote }} diff --git a/services/sqlproxy-cross-project/values-idfdev.yaml b/services/sqlproxy-cross-project/values-idfdev.yaml new file mode 100644 index 0000000000..ffcc6f60d1 --- /dev/null +++ b/services/sqlproxy-cross-project/values-idfdev.yaml @@ -0,0 +1,11 @@ +fullnameOverride: sqlproxy-butler-int + +config: + ipAddressType: "PUBLIC" + instanceConnectionName: "science-platform-int-dc5d:us-central1:butler-registry-int-72f9812d" + serviceAccount: "sqlproxy-butler-int@science-platform-dev-7696.iam.gserviceaccount.com" + +resources: + requests: + cpu: "1" + memory: "2Gi" diff --git a/services/sqlproxy-cross-project/values.yaml b/services/sqlproxy-cross-project/values.yaml new file mode 100644 index 0000000000..af3a655f68 --- /dev/null +++ b/services/sqlproxy-cross-project/values.yaml @@ -0,0 +1,49 @@ +# Default values for sqlproxy + +# -- Override the base name for resources +nameOverride: "" + +# -- Override the full name for resources (includes the release name) +fullnameOverride: "" + +# -- Number of pods to start +replicaCount: 1 + +image: + # -- Cloud SQL Proxy image to use + repository: "gcr.io/cloudsql-docker/gce-proxy" + + # -- Tag of Cloud SQL Proxy image to use + tag: "1.33.2" + + # -- Pull policy for the Cloud SQL Proxy image + pullPolicy: "IfNotPresent" + +config: + # -- Instance connection name for a CloudSQL PostgreSQL instance + # @default -- None, must be set + instanceConnectionName: "" + + # -- IP address type of the instance to connect to (either `PUBLIC` or + # `PRIVATE`) + ipAddressType: "PRIVATE" + + # -- The Google service account that has an IAM binding to the Cloud SQL + # Proxy Kubernetes service account and has the `cloudsql.client` role + # @default -- None, must be set if Cloud SQL Auth Proxy is enabled + serviceAccount: "" + +# -- Resource limits and requests for the Cloud SQL Proxy pod +resources: {} + +# -- Annotations for the Cloud SQL Proxy pod +podAnnotations: {} + +# -- Node selector rules for the Cloud SQL Proxy pod +nodeSelector: {} + +# -- Tolerations for the Cloud SQL Proxy pod +tolerations: [] + +# -- Affinity rules for the Cloud SQL Proxy pod +affinity: {} diff --git a/services/squareone/Chart.yaml b/services/squareone/Chart.yaml index c5c9508e19..ef3fbb1608 100644 --- a/services/squareone/Chart.yaml +++ b/services/squareone/Chart.yaml @@ -1,10 +1,13 @@ apiVersion: v2 name: squareone version: 1.0.0 -dependencies: - - name: squareone - version: 0.4.1 - repository: https://lsst-sqre.github.io/charts/ - - name: pull-secret - version: 0.1.2 - repository: https://lsst-sqre.github.io/charts/ +description: Squareone is the homepage UI for the Rubin Science Platform. +home: https://squareone.lsst.io/ +sources: + - https://github.com/lsst-sqre/squareone +maintainers: + - name: jonathansick + url: https://github.com/jonathansick + +# The default version tag of the squareone docker image +appVersion: "0.8.1" diff --git a/services/squareone/README.md b/services/squareone/README.md new file mode 100644 index 0000000000..907219d8ac --- /dev/null +++ b/services/squareone/README.md @@ -0,0 +1,39 @@ +# squareone + +Squareone is the homepage UI for the Rubin Science Platform. + +**Homepage:** + +## Source Code + +* + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| affinity | object | `{}` | | +| autoscaling.enabled | bool | `false` | | +| autoscaling.maxReplicas | int | `100` | | +| autoscaling.minReplicas | int | `1` | | +| autoscaling.targetCPUUtilizationPercentage | int | `80` | | +| config.semaphoreUrl | string | `nil` | URL to the Semaphore (user notifications) API service. @default null disables the Semaphore integration | +| config.siteDescription | string | `"Access Rubin Observatory Legacy Survey of Space and Time data.\n"` | Site description, used in meta tags | +| config.siteName | string | `"Rubin Science Platform"` | Name of the site, used in the title and meta tags. | +| config.timesSquareUrl | string | `nil` | URL to the Times Square (parameterized notebooks) API service. @default null disables the Times Square integration | +| fullnameOverride | string | `""` | Overrides the full name for resources (includes the release name) | +| global.baseUrl | string | Set by Argo CD Application | Base URL for the environment | +| global.host | string | Set by Argo CD Application | Host name for ingress | +| global.vaultSecretsPathPrefix | string | Set by Argo CD Application | Base path for Vault secrets | +| image.pullPolicy | string | `"IfNotPresent"` | Image pull policy (tip: use Always for development) | +| image.repository | string | `"ghcr.io/lsst-sqre/squareone"` | Squareone Docker image repository | +| image.tag | string | Chart's appVersion | Overrides the image tag. | +| ingress.annotations | object | `{}` | Additional annotations to add to the ingress | +| ingress.enabled | bool | `true` | Enable ingress | +| ingress.tls | bool | `true` | Enable Let's Encrypt TLS management in this chart. This should be false if TLS is managed elsewhere, such as in an ingress-nginx app. | +| nameOverride | string | `""` | Overrides the base name for resources | +| nodeSelector | object | `{}` | | +| podAnnotations | object | `{}` | Annotations for squareone pods | +| replicaCount | int | `1` | Number of squareone pods to run in the deployment. | +| resources | object | `{}` | | +| tolerations | list | `[]` | | diff --git a/services/squareone/templates/_helpers.tpl b/services/squareone/templates/_helpers.tpl new file mode 100644 index 0000000000..96acff7412 --- /dev/null +++ b/services/squareone/templates/_helpers.tpl @@ -0,0 +1,51 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "squareone.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "squareone.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "squareone.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "squareone.labels" -}} +helm.sh/chart: {{ include "squareone.chart" . }} +{{ include "squareone.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "squareone.selectorLabels" -}} +app.kubernetes.io/name: {{ include "squareone.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} diff --git a/services/squareone/templates/configmap.yaml b/services/squareone/templates/configmap.yaml new file mode 100644 index 0000000000..66cc40b565 --- /dev/null +++ b/services/squareone/templates/configmap.yaml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "squareone.fullname" . }} + labels: + {{- include "squareone.labels" . | nindent 4 }} +data: + squareone.config.yaml: | + siteName: {{ .Values.config.siteName | quote }} + baseUrl: {{ .Values.global.baseUrl | quote }} + siteDescription: {{ .Values.config.siteDescription | quote }} + {{- if .Values.config.semaphoreUrl }} + semaphoreUrl: {{ .Values.config.semaphoreUrl | quote }} + {{- end}} + {{- if .Values.config.timesSquareUrl }} + timesSquareUrl: {{ .Values.config.timesSquareUrl | quote }} + {{- end}} diff --git a/services/squareone/templates/deployment.yaml b/services/squareone/templates/deployment.yaml new file mode 100644 index 0000000000..eb8f98ff8e --- /dev/null +++ b/services/squareone/templates/deployment.yaml @@ -0,0 +1,78 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "squareone.fullname" . }} + labels: + {{- include "squareone.labels" . | nindent 4 }} +spec: + {{- if not .Values.autoscaling.enabled }} + replicas: {{ .Values.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "squareone.selectorLabels" . | nindent 6 }} + template: + metadata: + annotations: + checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} + {{- with .Values.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "squareone.selectorLabels" . | nindent 8 }} + spec: + containers: + - name: {{ .Chart.Name }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - ALL + readOnlyRootFilesystem: true + ports: + - name: http + containerPort: 3000 + protocol: TCP + livenessProbe: + httpGet: + path: / + port: http + readinessProbe: + httpGet: + path: / + port: http + resources: + {{- toYaml .Values.resources | nindent 12 }} + env: + - name: "SQUAREONE_CONFIG_PATH" + value: "/etc/squareone/squareone.config.yaml" + volumeMounts: + - name: "config" + mountPath: "/etc/squareone" + - name: "next-image-cache" + mountPath: "/app/.next/cache/images" + automountServiceAccountToken: false + securityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + volumes: + - name: "config" + configMap: + name: {{ include "squareone.fullname" . }} + - name: "next-image-cache" + emptyDir: {} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/services/squareone/templates/hpa.yaml b/services/squareone/templates/hpa.yaml new file mode 100644 index 0000000000..e0f0c7f7dd --- /dev/null +++ b/services/squareone/templates/hpa.yaml @@ -0,0 +1,28 @@ +{{- if .Values.autoscaling.enabled }} +apiVersion: autoscaling/v2beta1 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "squareone.fullname" . }} + labels: + {{- include "squareone.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "squareone.fullname" . }} + minReplicas: {{ .Values.autoscaling.minReplicas }} + maxReplicas: {{ .Values.autoscaling.maxReplicas }} + metrics: + {{- if .Values.autoscaling.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: cpu + targetAverageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} + {{- end }} + {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: memory + targetAverageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }} + {{- end }} +{{- end }} diff --git a/services/squareone/templates/ingress.yaml b/services/squareone/templates/ingress.yaml new file mode 100644 index 0000000000..5ee4d13d6a --- /dev/null +++ b/services/squareone/templates/ingress.yaml @@ -0,0 +1,41 @@ +{{- if .Values.ingress.enabled -}} +{{- $fullName := include "squareone.fullname" . -}} +apiVersion: gafaelfawr.lsst.io/v1alpha1 +kind: GafaelfawrIngress +metadata: + name: {{ $fullName }} + labels: + {{- include "squareone.labels" . | nindent 4 }} +config: + baseUrl: {{ .Values.global.baseUrl | quote }} + scopes: + anonymous: true +template: + metadata: + name: {{ $fullName }} + annotations: + {{- if .Values.ingress.tls }} + cert-manager.io/cluster-issuer: "letsencrypt-dns" + {{- end }} + {{- with .Values.ingress.annotations }} + {{- toYaml . | nindent 6 }} + {{- end }} + spec: + {{- if .Values.ingress.tls }} + tls: + - hosts: + - {{ required "global.host must be set" .Values.global.host | quote }} + secretName: "squareone-tls" + {{- end }} + rules: + - host: {{ required "global.host must be set" .Values.global.host | quote }} + http: + paths: + - path: "/" + pathType: "Prefix" + backend: + service: + name: {{ $fullName }} + port: + number: 80 +{{- end }} diff --git a/services/squareone/templates/service.yaml b/services/squareone/templates/service.yaml new file mode 100644 index 0000000000..1acfffc8e8 --- /dev/null +++ b/services/squareone/templates/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "squareone.fullname" . }} + labels: + {{- include "squareone.labels" . | nindent 4 }} +spec: + type: ClusterIP + ports: + - port: 80 + targetPort: http + protocol: TCP + name: http + selector: + {{- include "squareone.selectorLabels" . | nindent 4 }} diff --git a/services/squareone/templates/tests/test-connection.yaml b/services/squareone/templates/tests/test-connection.yaml new file mode 100644 index 0000000000..78149a04f1 --- /dev/null +++ b/services/squareone/templates/tests/test-connection.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "squareone.fullname" . }}-test-connection" + labels: + {{- include "squareone.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test +spec: + containers: + - name: wget + image: busybox + command: ['wget'] + args: ['{{ include "squareone.fullname" . }}:80'] + restartPolicy: Never diff --git a/services/squareone/templates/vault-secrets.yaml b/services/squareone/templates/vault-secrets.yaml new file mode 100644 index 0000000000..8755456d29 --- /dev/null +++ b/services/squareone/templates/vault-secrets.yaml @@ -0,0 +1,9 @@ +apiVersion: ricoberger.de/v1alpha1 +kind: VaultSecret +metadata: + name: pull-secret + labels: + {{- include "squareone.labels" . | nindent 4 }} +spec: + path: "{{- .Values.global.vaultSecretsPathPrefix }}/pull-secret" + type: kubernetes.io/dockerconfigjson diff --git a/services/squareone/values-base.yaml b/services/squareone/values-base.yaml index 54debdc61a..47a1d30ae5 100644 --- a/services/squareone/values-base.yaml +++ b/services/squareone/values-base.yaml @@ -1,17 +1,2 @@ -squareone: - ingress: - host: "base-lsp.lsst.codes" - annotations: - cert-manager.io/cluster-issuer: cert-issuer-letsencrypt-dns - tls: - - secretName: squareone-tls - hosts: - - "base-lsp.lsst.codes" - imagePullSecrets: - - name: "pull-secret" - config: - siteName: "Rubin Science Platform @ Base" - -pull-secret: - enabled: true - path: secret/k8s_operator/base-lsp.lsst.codes/pull-secret +config: + siteName: "Rubin Science Platform @ Base" diff --git a/services/squareone/values-ccin2p3.yaml b/services/squareone/values-ccin2p3.yaml new file mode 100644 index 0000000000..f8cc7248ca --- /dev/null +++ b/services/squareone/values-ccin2p3.yaml @@ -0,0 +1,5 @@ +config: + siteName: "Rubin Science Platform @ CC-IN2P3" + +ingress: + tls: false diff --git a/services/squareone/values-idfdev.yaml b/services/squareone/values-idfdev.yaml index 10460ecb2c..d96ac543ea 100644 --- a/services/squareone/values-idfdev.yaml +++ b/services/squareone/values-idfdev.yaml @@ -1,18 +1,8 @@ -squareone: - ingress: - host: "data-dev.lsst.cloud" - annotations: - cert-manager.io/cluster-issuer: cert-issuer-letsencrypt-dns - tls: - - secretName: squareone-tls - hosts: - - "data-dev.lsst.cloud" - imagePullSecrets: - - name: "pull-secret" - config: - siteName: "Rubin Science Platform @ data-dev" - semaphoreUrl: "https://data-dev.lsst.cloud/semaphore" +image: + pullPolicy: Always + # tag: tickets-DM-34723 -pull-secret: - enabled: true - path: secret/k8s_operator/data-dev.lsst.cloud/pull-secret +config: + siteName: "Rubin Science Platform @ data-dev" + semaphoreUrl: "https://data-dev.lsst.cloud/semaphore" + timesSquareUrl: "https://data-dev.lsst.cloud/times-square/api" diff --git a/services/squareone/values-idfint.yaml b/services/squareone/values-idfint.yaml index 2de36d6ff2..24636c3000 100644 --- a/services/squareone/values-idfint.yaml +++ b/services/squareone/values-idfint.yaml @@ -1,18 +1,3 @@ -squareone: - ingress: - host: "data-int.lsst.cloud" - annotations: - cert-manager.io/cluster-issuer: cert-issuer-letsencrypt-dns - tls: - - secretName: squareone-tls - hosts: - - "data-int.lsst.cloud" - imagePullSecrets: - - name: "pull-secret" - config: - siteName: "Rubin Science Platform @ data-int" - semaphoreUrl: "https://data-int.lsst.cloud/semaphore" - -pull-secret: - enabled: true - path: secret/k8s_operator/data-int.lsst.cloud/pull-secret +config: + siteName: "Rubin Science Platform @ data-int" + semaphoreUrl: "https://data-int.lsst.cloud/semaphore" diff --git a/services/squareone/values-idfprod.yaml b/services/squareone/values-idfprod.yaml index a0d6cb08db..953ac09dda 100644 --- a/services/squareone/values-idfprod.yaml +++ b/services/squareone/values-idfprod.yaml @@ -1,19 +1,5 @@ -squareone: - replicaCount: 3 - ingress: - host: "data.lsst.cloud" - annotations: - cert-manager.io/cluster-issuer: cert-issuer-letsencrypt-dns - tls: - - secretName: squareone-tls - hosts: - - "data.lsst.cloud" - imagePullSecrets: - - name: "pull-secret" - config: - siteName: "Rubin Science Platform" - semaphoreUrl: "https://data.lsst.cloud/semaphore" +replicaCount: 3 -pull-secret: - enabled: true - path: secret/k8s_operator/data.lsst.cloud/pull-secret +config: + siteName: "Rubin Science Platform" + semaphoreUrl: "https://data.lsst.cloud/semaphore" diff --git a/services/squareone/values-int.yaml b/services/squareone/values-int.yaml deleted file mode 100644 index b22e3d1d65..0000000000 --- a/services/squareone/values-int.yaml +++ /dev/null @@ -1,11 +0,0 @@ -squareone: - ingress: - host: "lsst-lsp-int.ncsa.illinois.edu" - imagePullSecrets: - - name: "pull-secret" - config: - siteName: "Rubin Science Platform @ lsp-int" - -pull-secret: - enabled: true - path: secret/k8s_operator/lsst-lsp-int.ncsa.illinois.edu/pull-secret diff --git a/services/squareone/values-minikube.yaml b/services/squareone/values-minikube.yaml index 379b8a6d32..e8c2c09204 100644 --- a/services/squareone/values-minikube.yaml +++ b/services/squareone/values-minikube.yaml @@ -1,11 +1,5 @@ -squareone: - ingress: - host: "minikube.lsst.codes" - imagePullSecrets: - - name: "pull-secret" - config: - siteName: "Rubin Science Platform @ minikube" +config: + siteName: "Rubin Science Platform @ minikube" -pull-secret: - enabled: true - path: secret/k8s_operator/minikube.lsst.codes/pull-secret +ingress: + tls: false diff --git a/services/squareone/values-red-five.yaml b/services/squareone/values-red-five.yaml deleted file mode 100644 index 534ff624ce..0000000000 --- a/services/squareone/values-red-five.yaml +++ /dev/null @@ -1,17 +0,0 @@ -squareone: - ingress: - host: "red-five.lsst.codes" - annotations: - cert-manager.io/cluster-issuer: cert-issuer-letsencrypt-dns - tls: - - secretName: squareone-tls - hosts: - - "red-five.lsst.codes" - imagePullSecrets: - - name: "pull-secret" - config: - siteName: "Rubin Science Platform @ red-five" - -pull-secret: - enabled: true - path: secret/k8s_operator/red-five.lsst.codes/pull-secret diff --git a/services/squareone/values-roe.yaml b/services/squareone/values-roe.yaml index 1c4745709b..43079580c6 100644 --- a/services/squareone/values-roe.yaml +++ b/services/squareone/values-roe.yaml @@ -1,12 +1,6 @@ -squareone: - ingress: - host: "rsp.lsst.ac.uk" - imagePullSecrets: - - name: "pull-secret" - config: - siteName: "Rubin Science Platform" - semaphoreUrl: "https://rsp.lsst.ac.uk/semaphore" +config: + siteName: "Rubin Science Platform" + semaphoreUrl: "https://rsp.lsst.ac.uk/semaphore" -pull-secret: - enabled: true - path: secret/k8s_operator/roe/pull-secret +ingress: + tls: false diff --git a/services/squareone/values-stable.yaml b/services/squareone/values-stable.yaml deleted file mode 100644 index fe4d58d15e..0000000000 --- a/services/squareone/values-stable.yaml +++ /dev/null @@ -1,11 +0,0 @@ -squareone: - ingress: - host: "lsst-lsp-stable.ncsa.illinois.edu" - imagePullSecrets: - - name: "pull-secret" - config: - siteName: "Rubin Science Platform @ lsp-stable" - -pull-secret: - enabled: true - path: secret/k8s_operator/lsst-lsp-stable.ncsa.illinois.edu/pull-secret diff --git a/services/squareone/values-summit.yaml b/services/squareone/values-summit.yaml index d3363a24c8..3bef8dbc2f 100644 --- a/services/squareone/values-summit.yaml +++ b/services/squareone/values-summit.yaml @@ -1,17 +1,2 @@ -squareone: - ingress: - host: "summit-lsp.lsst.codes" - annotations: - cert-manager.io/cluster-issuer: cert-issuer-letsencrypt-dns - tls: - - secretName: squareone-tls - hosts: - - "summit-lsp.lsst.codes" - imagePullSecrets: - - name: "pull-secret" - config: - siteName: "Rubin Science Platform @ Summit" - -pull-secret: - enabled: true - path: secret/k8s_operator/summit-lsp.lsst.codes/pull-secret +config: + siteName: "Rubin Science Platform @ Summit" diff --git a/services/squareone/values-tucson-teststand.yaml b/services/squareone/values-tucson-teststand.yaml index cf13fd2a85..f2836300f9 100644 --- a/services/squareone/values-tucson-teststand.yaml +++ b/services/squareone/values-tucson-teststand.yaml @@ -1,17 +1,2 @@ -squareone: - ingress: - host: "tucson-teststand.lsst.codes" - annotations: - cert-manager.io/cluster-issuer: cert-issuer-letsencrypt-dns - tls: - - secretName: squareone-tls - hosts: - - "tucson-teststand.lsst.codes" - imagePullSecrets: - - name: "pull-secret" - config: - siteName: "Rubin Science Platform @ Tucson" - -pull-secret: - enabled: true - path: secret/k8s_operator/tucson-teststand.lsst.codes/pull-secret +config: + siteName: "Rubin Science Platform @ Tucson" diff --git a/services/squareone/values.yaml b/services/squareone/values.yaml new file mode 100644 index 0000000000..c82dac869a --- /dev/null +++ b/services/squareone/values.yaml @@ -0,0 +1,92 @@ +# Default values for squareone. + +# -- Number of squareone pods to run in the deployment. +replicaCount: 1 + +image: + # -- Squareone Docker image repository + repository: ghcr.io/lsst-sqre/squareone + + # -- Image pull policy (tip: use Always for development) + pullPolicy: IfNotPresent + + # -- Overrides the image tag. + # @default -- Chart's appVersion + tag: "" + +# -- Overrides the base name for resources +nameOverride: "" + +# -- Overrides the full name for resources (includes the release name) +fullnameOverride: "" + +# -- Annotations for squareone pods +podAnnotations: {} + +ingress: + # -- Enable ingress + enabled: true + + # -- Additional annotations to add to the ingress + annotations: {} + + # -- Enable Let's Encrypt TLS management in this chart. This should be false + # if TLS is managed elsewhere, such as in an ingress-nginx app. + tls: true + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +autoscaling: + enabled: false + minReplicas: 1 + maxReplicas: 100 + targetCPUUtilizationPercentage: 80 + # targetMemoryUtilizationPercentage: 80 + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +# Squareone app configuration +config: + # -- Name of the site, used in the title and meta tags. + siteName: "Rubin Science Platform" + + # -- Site description, used in meta tags + siteDescription: | + Access Rubin Observatory Legacy Survey of Space and Time data. + + # -- URL to the Semaphore (user notifications) API service. + # @default null disables the Semaphore integration + semaphoreUrl: null + + # -- URL to the Times Square (parameterized notebooks) API service. + # @default null disables the Times Square integration + timesSquareUrl: null + +# Global parameters are set by parameters injected by the Argo CD Application +# and should not be set in the individual environment values files. +global: + # -- Base URL for the environment + # @default -- Set by Argo CD Application + baseUrl: "" + + # -- Host name for ingress + # @default -- Set by Argo CD Application + host: "" + + # -- Base path for Vault secrets + # @default -- Set by Argo CD Application + vaultSecretsPathPrefix: "" diff --git a/services/squash-api/Chart.yaml b/services/squash-api/Chart.yaml deleted file mode 100644 index e03abc7614..0000000000 --- a/services/squash-api/Chart.yaml +++ /dev/null @@ -1,7 +0,0 @@ -apiVersion: v2 -name: squash-api -version: 0.1.0 -dependencies: - - name: squash-api - version: 0.1.6 - repository: https://lsst-sqre.github.io/charts/ diff --git a/services/squash-api/templates/vault-secrets.yaml b/services/squash-api/templates/vault-secrets.yaml deleted file mode 100644 index 8fdc1c91d8..0000000000 --- a/services/squash-api/templates/vault-secrets.yaml +++ /dev/null @@ -1,26 +0,0 @@ -apiVersion: ricoberger.de/v1alpha1 -kind: VaultSecret -metadata: - name: cloudsql-instance-credentials - namespace: squash-api -spec: - path: {{ .Values.vaultSecretsBasePath }}/cloudsql-instance-credentials - type: Opaque ---- -apiVersion: ricoberger.de/v1alpha1 -kind: VaultSecret -metadata: - name: squash-api - namespace: squash-api -spec: - path: {{ .Values.vaultSecretsBasePath }}/squash-api - type: Opaque ---- -apiVersion: ricoberger.de/v1alpha1 -kind: VaultSecret -metadata: - name: influxdb-auth - namespace: squash-api -spec: - path: {{ .Values.vaultSecretsBasePath }}/influxdb-auth - type: Opaque diff --git a/services/squash-api/values-squash-sandbox.yaml b/services/squash-api/values-squash-sandbox.yaml deleted file mode 100644 index 67e1c0cc7b..0000000000 --- a/services/squash-api/values-squash-sandbox.yaml +++ /dev/null @@ -1,41 +0,0 @@ -squash-api: - - # SQuaSH Cloud SQL instance connection name - instanceConnectionName: "squash-db-sandbox-3" - - # Credentials for the SQuaSH Cloud SQL service account - cloudSQLInstanceSecret: "cloudsql-instance-credentials" - - # SQuaSH API secret name - squashAPISecret: "squash-api" - - # If "True", job datetime is obtained from the job metadata instead of using - # current time. Use this option to restore existing jobs to SQuaSH. - squashETLMode: "" - - # S3 Bucket to upload verification jobs - s3BucketName: "squash-sandbox" - - # InfluxDB URL - influxUrl: "http://influxdb.influxdb:8086" - influxDb: "squash-sandbox" - - # InfluxDB credentials - influxSecret: "influxdb-auth" - - ingress: - enabled: true - annotations: - kubernetes.io/ingress.class: "nginx" - nginx.ingress.kubernetes.io/rewrite-target: / - cert-manager.io/cluster-issuer: cert-issuer-letsencrypt-dns - hosts: - - host: squash-sandbox.lsst.codes - paths: ["/"] - tls: - - secretName: "tls-certs" - hosts: - - squash-sandbox.lsst.codes - -## Base path for squash-api secrets in Vault -vaultSecretsBasePath: secret/k8s_operator/squash-sandbox.lsst.codes diff --git a/services/strimzi-registry-operator/Chart.yaml b/services/strimzi-registry-operator/Chart.yaml index dd0580fbc8..f236549ad6 100644 --- a/services/strimzi-registry-operator/Chart.yaml +++ b/services/strimzi-registry-operator/Chart.yaml @@ -1,7 +1,9 @@ apiVersion: v2 name: strimzi-registry-operator version: 1.1.0 +sources: + - https://github.com/lsst-sqre/strimzi-registry-operator dependencies: - name: strimzi-registry-operator - version: 1.2.0 + version: 2.1.0 repository: https://lsst-sqre.github.io/charts/ diff --git a/services/strimzi/Chart.yaml b/services/strimzi/Chart.yaml index 51e3bb71df..43139f0845 100644 --- a/services/strimzi/Chart.yaml +++ b/services/strimzi/Chart.yaml @@ -1,10 +1,11 @@ apiVersion: v2 name: strimzi -description: Strimzi Kafka Operator, https://strimzi.io type: application version: 0.1.0 +description: Strimzi Kafka Operator +home: https://strimzi.io appVersion: "0.26.0" dependencies: - name: strimzi-kafka-operator - version: "0.27.1" + version: "0.32.0" repository: https://strimzi.io/charts/ diff --git a/services/strimzi/values-idfdev.yaml b/services/strimzi/values-idfdev.yaml index e4cd2e47e1..6eb0bd0082 100644 --- a/services/strimzi/values-idfdev.yaml +++ b/services/strimzi/values-idfdev.yaml @@ -1,4 +1,9 @@ strimzi-kafka-operator: + resources: + limits: + memory: "1Gi" + requests: + memory: "512Mi" watchNamespaces: - "sasquatch" logLevel: "DEBUG" diff --git a/services/strimzi/values-idfint.yaml b/services/strimzi/values-idfint.yaml index a12314beaa..f98848daa7 100644 --- a/services/strimzi/values-idfint.yaml +++ b/services/strimzi/values-idfint.yaml @@ -1 +1,10 @@ -# This file intentionally blank - no customization needed +strimzi-kafka-operator: + resources: + limits: + memory: "1Gi" + requests: + memory: "512Mi" + watchNamespaces: + - "sasquatch" + - "alert-stream-broker" + logLevel: "DEBUG" diff --git a/services/strimzi/values-summit.yaml b/services/strimzi/values-summit.yaml new file mode 100644 index 0000000000..1abe0d7c86 --- /dev/null +++ b/services/strimzi/values-summit.yaml @@ -0,0 +1,9 @@ +strimzi-kafka-operator: + resources: + limits: + memory: "1Gi" + requests: + memory: "512Mi" + watchNamespaces: + - "sasquatch" + logLevel: "INFO" diff --git a/services/strimzi/values-tucson-teststand.yaml b/services/strimzi/values-tucson-teststand.yaml new file mode 100644 index 0000000000..6eb0bd0082 --- /dev/null +++ b/services/strimzi/values-tucson-teststand.yaml @@ -0,0 +1,9 @@ +strimzi-kafka-operator: + resources: + limits: + memory: "1Gi" + requests: + memory: "512Mi" + watchNamespaces: + - "sasquatch" + logLevel: "DEBUG" diff --git a/services/strimzi/values.yaml b/services/strimzi/values.yaml deleted file mode 100644 index 0d90ffd616..0000000000 --- a/services/strimzi/values.yaml +++ /dev/null @@ -1,4 +0,0 @@ -strimzi-kafka-operator: - watchNamespaces: - - "alert-stream-broker" - logLevel: "INFO" diff --git a/services/tap-schema/Chart.yaml b/services/tap-schema/Chart.yaml index c5281a40fd..06017de370 100644 --- a/services/tap-schema/Chart.yaml +++ b/services/tap-schema/Chart.yaml @@ -1,10 +1,7 @@ apiVersion: v2 name: tap-schema version: 1.0.0 -dependencies: -- name: tap-schema - version: ">=0.1.0" - repository: https://lsst-sqre.github.io/charts/ -- name: pull-secret - version: 0.1.2 - repository: https://lsst-sqre.github.io/charts/ +description: The TAP_SCHEMA database +sources: + - https://github.com/lsst/sdm_schemas +appVersion: 1.2.3 diff --git a/services/tap-schema/README.md b/services/tap-schema/README.md new file mode 100644 index 0000000000..4c53f0c3ef --- /dev/null +++ b/services/tap-schema/README.md @@ -0,0 +1,23 @@ +# tap-schema + +The TAP_SCHEMA database + +## Source Code + +* + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| affinity | object | `{}` | Affinity rules for the MySQL pod | +| fullnameOverride | string | `""` | Override the full name for resources (includes the release name) | +| global.vaultSecretsPath | string | Set by Argo CD | Base path for Vault secrets | +| image.pullPolicy | string | `"IfNotPresent"` | Pull policy for the tap-schema image | +| image.repository | string | `"lsstsqre/tap-schema-mock"` | tap-schema image to use | +| image.tag | string | The appVersion of the chart | Tag of tap-schema image to use | +| nameOverride | string | `""` | Override the base name for resources | +| nodeSelector | object | `{}` | Node selector rules for the MySQL pod | +| podAnnotations | object | `{}` | Annotations for the MySQL pod | +| resources | object | `{}` | Resource limits and requests for the MySQL pod | +| tolerations | list | `[]` | Tolerations for the MySQL pod | diff --git a/services/tap-schema/templates/_helpers.tpl b/services/tap-schema/templates/_helpers.tpl new file mode 100644 index 0000000000..02b4b76755 --- /dev/null +++ b/services/tap-schema/templates/_helpers.tpl @@ -0,0 +1,51 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "tap-schema.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "tap-schema.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "tap-schema.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "tap-schema.labels" -}} +helm.sh/chart: {{ include "tap-schema.chart" . }} +{{ include "tap-schema.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "tap-schema.selectorLabels" -}} +app.kubernetes.io/name: {{ include "tap-schema.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} diff --git a/services/tap-schema/templates/tap-schema-db-deployment.yaml b/services/tap-schema/templates/tap-schema-db-deployment.yaml new file mode 100644 index 0000000000..6782db63b5 --- /dev/null +++ b/services/tap-schema/templates/tap-schema-db-deployment.yaml @@ -0,0 +1,55 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "tap-schema.fullname" . }}-db + labels: + {{- include "tap-schema.labels" . | nindent 4 }} +spec: + replicas: 1 + selector: + matchLabels: + {{- include "tap-schema.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "tap-schema.selectorLabels" . | nindent 8 }} + spec: + automountServiceAccountToken: false + containers: + - name: {{ .Chart.Name }} + env: + - name: MYSQL_DATABASE + value: "TAP_SCHEMA" + - name: MYSQL_USER + value: "TAP_SCHEMA" + - name: MYSQL_PASSWORD + value: "TAP_SCHEMA" + - name: MYSQL_ROOT_HOST + value: "%" + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy | quote }} + ports: + - containerPort: 3306 + protocol: "TCP" + {{- with .Values.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + imagePullSecrets: + - name: "pull-secret" + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/services/tap-schema/templates/tap-schema-db-service.yaml b/services/tap-schema/templates/tap-schema-db-service.yaml new file mode 100644 index 0000000000..523ef266f2 --- /dev/null +++ b/services/tap-schema/templates/tap-schema-db-service.yaml @@ -0,0 +1,14 @@ +kind: Service +apiVersion: v1 +metadata: + name: {{ template "tap-schema.fullname" . }}-db + labels: + {{- include "tap-schema.labels" . | nindent 4 }} +spec: + type: "ClusterIP" + ports: + - protocol: "TCP" + port: 3306 + targetPort: 3306 + selector: + {{- include "tap-schema.selectorLabels" . | nindent 4 }} diff --git a/services/tap-schema/templates/vault-secrets.yaml b/services/tap-schema/templates/vault-secrets.yaml new file mode 100644 index 0000000000..10a383ca63 --- /dev/null +++ b/services/tap-schema/templates/vault-secrets.yaml @@ -0,0 +1,9 @@ +apiVersion: ricoberger.de/v1alpha1 +kind: VaultSecret +metadata: + name: "pull-secret" + labels: + {{- include "tap-schema.labels" . | nindent 4 }} +spec: + path: "{{ .Values.global.vaultSecretsPath }}/pull-secret" + type: "kubernetes.io/dockerconfigjson" diff --git a/services/tap-schema/values-ccin2p3.yaml b/services/tap-schema/values-ccin2p3.yaml new file mode 100644 index 0000000000..6a4e60db59 --- /dev/null +++ b/services/tap-schema/values-ccin2p3.yaml @@ -0,0 +1,2 @@ +image: + repository: "lsstsqre/tap-schema-idfprod" diff --git a/services/tap-schema/values-idfdev.yaml b/services/tap-schema/values-idfdev.yaml index 8dd89a7a3e..a831a1b57f 100644 --- a/services/tap-schema/values-idfdev.yaml +++ b/services/tap-schema/values-idfdev.yaml @@ -1,7 +1,2 @@ -tap-schema: - pull_secret: 'pull-secret' - image: lsstsqre/tap-schema-mock - -pull-secret: - enabled: true - path: secret/k8s_operator/data-dev.lsst.cloud/pull-secret +image: + repository: "lsstsqre/tap-schema-idfdev" diff --git a/services/tap-schema/values-idfint.yaml b/services/tap-schema/values-idfint.yaml index 0affa75646..60572be44b 100644 --- a/services/tap-schema/values-idfint.yaml +++ b/services/tap-schema/values-idfint.yaml @@ -1,7 +1,2 @@ -tap-schema: - pull_secret: 'pull-secret' - image: lsstsqre/tap-schema-idfint - -pull-secret: - enabled: true - path: secret/k8s_operator/data-int.lsst.cloud/pull-secret +image: + repository: "lsstsqre/tap-schema-idfint" diff --git a/services/tap-schema/values-idfprod.yaml b/services/tap-schema/values-idfprod.yaml index 082b482651..7dd2ec1bc8 100644 --- a/services/tap-schema/values-idfprod.yaml +++ b/services/tap-schema/values-idfprod.yaml @@ -1,7 +1,2 @@ -tap-schema: - pull_secret: 'pull-secret' - image: lsstsqre/tap-schema-idfprod - -pull-secret: - enabled: true - path: secret/k8s_operator/data.lsst.cloud/pull-secret +image: + repository: "lsstsqre/tap-schema-idfprod" diff --git a/services/tap-schema/values-int.yaml b/services/tap-schema/values-int.yaml deleted file mode 100644 index eb81bf56e8..0000000000 --- a/services/tap-schema/values-int.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tap-schema: - pull_secret: 'pull-secret' - image: lsstsqre/tap-schema-int - -pull-secret: - enabled: true - path: secret/k8s_operator/lsst-lsp-int.ncsa.illinois.edu/pull-secret diff --git a/services/tap-schema/values-minikube.yaml b/services/tap-schema/values-minikube.yaml index 35acec1115..e69de29bb2 100644 --- a/services/tap-schema/values-minikube.yaml +++ b/services/tap-schema/values-minikube.yaml @@ -1,7 +0,0 @@ -tap-schema: - pull_secret: 'pull-secret' - image: lsstsqre/tap-schema-mock - -pull-secret: - enabled: true - path: secret/k8s_operator/minikube.lsst.codes/pull-secret diff --git a/services/tap-schema/values-red-five.yaml b/services/tap-schema/values-red-five.yaml deleted file mode 100644 index 57ec1f8e93..0000000000 --- a/services/tap-schema/values-red-five.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tap-schema: - pull_secret: 'pull-secret' - image: lsstsqre/tap-schema-mock - -pull-secret: - enabled: true - path: secret/k8s_operator/red-five.lsst.codes/pull-secret diff --git a/services/tap-schema/values-roe.yaml b/services/tap-schema/values-roe.yaml index 1e2816fe6a..37acef8e22 100644 --- a/services/tap-schema/values-roe.yaml +++ b/services/tap-schema/values-roe.yaml @@ -1,7 +1,2 @@ -tap-schema: - pull_secret: 'pull-secret' - image: lsstsqre/tap-schema-mock - -pull-secret: - enabled: true - path: secret/k8s_operator/roe/pull-secret +image: + repository: "stvoutsin/tap-schema-roe" diff --git a/services/tap-schema/values-stable.yaml b/services/tap-schema/values-stable.yaml deleted file mode 100644 index b5345a1133..0000000000 --- a/services/tap-schema/values-stable.yaml +++ /dev/null @@ -1,7 +0,0 @@ -tap-schema: - pull_secret: 'pull-secret' - image: lsstsqre/tap-schema-stable - -pull-secret: - enabled: true - path: secret/k8s_operator/lsst-lsp-stable.ncsa.illinois.edu/pull-secret diff --git a/services/tap-schema/values.yaml b/services/tap-schema/values.yaml new file mode 100644 index 0000000000..37398ae568 --- /dev/null +++ b/services/tap-schema/values.yaml @@ -0,0 +1,40 @@ +# Default values for tap-schema. + +# -- Override the base name for resources +nameOverride: "" + +# -- Override the full name for resources (includes the release name) +fullnameOverride: "" + +image: + # -- tap-schema image to use + repository: "lsstsqre/tap-schema-mock" + + # -- Pull policy for the tap-schema image + pullPolicy: "IfNotPresent" + + # -- Tag of tap-schema image to use + # @default -- The appVersion of the chart + tag: "" + +# -- Resource limits and requests for the MySQL pod +resources: {} + +# -- Annotations for the MySQL pod +podAnnotations: {} + +# -- Node selector rules for the MySQL pod +nodeSelector: {} + +# -- Tolerations for the MySQL pod +tolerations: [] + +# -- Affinity rules for the MySQL pod +affinity: {} + +# The following will be set by parameters injected by Argo CD and should not +# be set in the individual environment values files. +global: + # -- Base path for Vault secrets + # @default -- Set by Argo CD + vaultSecretsPath: "" diff --git a/services/tap/Chart.yaml b/services/tap/Chart.yaml index 6533eb0136..16049f9cb0 100644 --- a/services/tap/Chart.yaml +++ b/services/tap/Chart.yaml @@ -1,10 +1,8 @@ apiVersion: v2 -name: tap +name: cadc-tap version: 1.0.0 -dependencies: - - name: cadc-tap - version: 1.0.5 - repository: https://lsst-sqre.github.io/charts/ - - name: pull-secret - version: 0.1.2 - repository: https://lsst-sqre.github.io/charts/ +description: IVOA TAP service +sources: + - https://github.com/lsst-sqre/lsst-tap-service + - https://github.com/opencadc/tap +appVersion: 1.4.2 diff --git a/services/tap/README.md b/services/tap/README.md new file mode 100644 index 0000000000..f709fc5a44 --- /dev/null +++ b/services/tap/README.md @@ -0,0 +1,55 @@ +# cadc-tap + +IVOA TAP service + +## Source Code + +* +* + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| affinity | object | `{}` | Affinity rules for the Gafaelfawr frontend pod | +| config.datalinkPayloadUrl | string | `"https://github.com/lsst/sdm_schemas/releases/download/1.2.2/datalink-snippets.zip"` | Datalink payload URL | +| config.gafaelfawrHost | string | Value of `ingress.host` | Gafaelfawr hostname to get user information from a token | +| config.gcsBucket | string | None, must be set | Name of GCS bucket in which to store results | +| config.gcsBucketType | string | GCS | GCS bucket type (GCS or S3) | +| config.gcsBucketUrl | string | None, must be set | Base URL for results stored in GCS bucket | +| config.jvmMaxHeapSize | string | `"4G"` | Java heap size, which will set the maximum size of the heap. Otherwise Java would determine it based on how much memory is available and black maths. | +| config.tapSchemaAddress | string | `"tap-schema-db.tap-schema.svc.cluster.local:3306"` | Address to a MySQL database containing TAP schema data | +| fullnameOverride | string | `"cadc-tap"` | Override the full name for resources (includes the release name) | +| global.baseUrl | string | Set by Argo CD | Base URL for the environment | +| global.host | string | Set by Argo CD | Host name for ingress | +| global.vaultSecretsPath | string | Set by Argo CD | Base path for Vault secrets | +| image.pullPolicy | string | `"IfNotPresent"` | Pull policy for the tap image | +| image.repository | string | `"ghcr.io/lsst-sqre/lsst-tap-service"` | tap image to use | +| image.tag | string | The appVersion of the chart | Tag of tap image to use | +| ingress.anonymousAnnotations | object | `{}` | Additional annotations to use for endpoints that allow anonymous access, such as `/capabilities` and `/availability` | +| ingress.authenticatedAnnotations | object | `{}` | Additional annotations to use for endpoints that are authenticated, such as `/sync`, `/async`, and `/tables` | +| nameOverride | string | `""` | Override the base name for resources | +| nodeSelector | object | `{}` | Node selector rules for the Gafaelfawr frontend pod | +| podAnnotations | object | `{}` | Annotations for the Gafaelfawr frontend pod | +| qserv.host | string | `"mock-qserv:3306"` (the mock QServ) | QServ hostname:port to connect to | +| qserv.mock.affinity | object | `{}` | Affinity rules for the mock QServ pod | +| qserv.mock.enabled | bool | `true` | Spin up a container to pretend to be QServ. | +| qserv.mock.image.pullPolicy | string | `"IfNotPresent"` | Pull policy for the mock QServ image | +| qserv.mock.image.repository | string | `"ghcr.io/lsst-sqre/lsst-tap-mock-qserv"` | Mock QServ image to use | +| qserv.mock.image.tag | string | The appVersion of the chart | Tag of mock QServ image to use | +| qserv.mock.nodeSelector | object | `{}` | Node selection rules for the mock QServ pod | +| qserv.mock.podAnnotations | object | `{}` | Annotations for the mock QServ pod | +| qserv.mock.resources | object | `{}` | Resource limits and requests for the mock QServ pod | +| qserv.mock.tolerations | list | `[]` | Tolerations for the mock QServ pod | +| replicaCount | int | `1` | Number of pods to start | +| resources | object | `{}` | Resource limits and requests for the Gafaelfawr frontend pod | +| tolerations | list | `[]` | Tolerations for the Gafaelfawr frontend pod | +| uws.affinity | object | `{}` | Affinity rules for the UWS database pod | +| uws.image.pullPolicy | string | `"IfNotPresent"` | Pull policy for the UWS database image | +| uws.image.repository | string | `"ghcr.io/lsst-sqre/lsst-tap-uws-db"` | UWS database image to use | +| uws.image.tag | string | The appVersion of the chart | Tag of UWS database image to use | +| uws.nodeSelector | object | `{}` | Node selection rules for the UWS database pod | +| uws.podAnnotations | object | `{}` | Annotations for the UWS databse pod | +| uws.resources | object | `{}` | Resource limits and requests for the UWS database pod | +| uws.tolerations | list | `[]` | Tolerations for the UWS database pod | +| vaultSecretsPath | string | None, must be set | Path to the Vault secret (`secret/k8s_operator//tap`, for example) | diff --git a/services/tap/templates/_helpers.tpl b/services/tap/templates/_helpers.tpl new file mode 100644 index 0000000000..cfb4a4a120 --- /dev/null +++ b/services/tap/templates/_helpers.tpl @@ -0,0 +1,52 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "cadc-tap.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "cadc-tap.fullname" -}} +{{- if .Values.fullnameOverride -}} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- $name := default .Chart.Name .Values.nameOverride -}} +{{- if contains $name .Release.Name -}} +{{- .Release.Name | trunc 63 | trimSuffix "-" -}} +{{- else -}} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "cadc-tap.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{/* +Common labels +*/}} +{{- define "cadc-tap.labels" -}} +app.kubernetes.io/name: {{ include "cadc-tap.name" . }} +helm.sh/chart: {{ include "cadc-tap.chart" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end -}} + +{{/* +Selector labels +*/}} +{{- define "cadc-tap.selectorLabels" -}} +app.kubernetes.io/name: {{ include "cadc-tap.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} diff --git a/services/tap/templates/mock-qserv-deployment.yaml b/services/tap/templates/mock-qserv-deployment.yaml new file mode 100644 index 0000000000..ed70b2d2ce --- /dev/null +++ b/services/tap/templates/mock-qserv-deployment.yaml @@ -0,0 +1,47 @@ +{{ if .Values.qserv.mock.enabled -}} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "cadc-tap.fullname" . }}-mock-qserv + labels: + {{- include "cadc-tap.labels" . | nindent 4 }} +spec: + replicas: 1 + selector: + matchLabels: + {{- include "cadc-tap.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: "mock-qserv" + template: + metadata: + {{- with .Values.qserv.mock.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "cadc-tap.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: "mock-qserv" + spec: + automountServiceAccountToken: false + containers: + - name: "mock-qserv" + image: "{{ .Values.qserv.mock.image.repository }}:{{ .Values.qserv.mock.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.qserv.mock.image.pullPolicy | quote }} + ports: + - containerPort: 3306 + {{- with .Values.qserv.mock.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.qserv.mock.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.qserv.mock.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.qserv.mock.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/services/tap/templates/mock-qserv-networkpolicy.yaml b/services/tap/templates/mock-qserv-networkpolicy.yaml new file mode 100644 index 0000000000..361c8a6ecf --- /dev/null +++ b/services/tap/templates/mock-qserv-networkpolicy.yaml @@ -0,0 +1,23 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ include "cadc-tap.fullname" . }}-mock-qserv +spec: + podSelector: + matchLabels: + {{- include "cadc-tap.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: "mock-qserv" + policyTypes: + - Ingress + # Deny all outbound access; MySQL doesn't need to talk to anything. + - Egress + ingress: + # Allow inbound access to mock Qserv from the server. + - from: + - podSelector: + matchLabels: + {{- include "cadc-tap.selectorLabels" . | nindent 14 }} + app.kubernetes.io/component: "server" + ports: + - protocol: "TCP" + port: 3306 diff --git a/services/tap/templates/mock-qserv-service.yaml b/services/tap/templates/mock-qserv-service.yaml new file mode 100644 index 0000000000..208080d6cc --- /dev/null +++ b/services/tap/templates/mock-qserv-service.yaml @@ -0,0 +1,17 @@ +{{ if .Values.qserv.mock.enabled -}} +kind: Service +apiVersion: v1 +metadata: + name: "mock-qserv" + labels: + {{- include "cadc-tap.labels" . | nindent 4 }} +spec: + type: ClusterIP + ports: + - protocol: "TCP" + port: 3306 + targetPort: 3306 + selector: + {{- include "cadc-tap.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: "mock-qserv" +{{- end }} diff --git a/services/tap/templates/tap-deployment.yaml b/services/tap/templates/tap-deployment.yaml new file mode 100644 index 0000000000..7e9a6c0448 --- /dev/null +++ b/services/tap/templates/tap-deployment.yaml @@ -0,0 +1,108 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "cadc-tap.fullname" . }} + labels: + {{- include "cadc-tap.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "cadc-tap.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: "server" + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "cadc-tap.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: "server" + spec: + automountServiceAccountToken: false + containers: + - name: "tap-server" + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy | quote }} + env: + - name: CATALINA_OPTS + value: >- + -Dqservuser.jdbc.username=qsmaster + -Dqservuser.jdbc.password= + -Dqservuser.jdbc.driverClassName=com.mysql.cj.jdbc.Driver + -Dqservuser.jdbc.url=jdbc:mysql://{{ .Values.qserv.host }}/ + -Dqservuser.maxActive=100 + -Dtapuser.jdbc.username=TAP_SCHEMA + -Dtapuser.jdbc.password=TAP_SCHEMA + -Dtapuser.jdbc.driverClassName=com.mysql.cj.jdbc.Driver + -Dtapuser.jdbc.url=jdbc:mysql://{{ .Values.config.tapSchemaAddress }}/ + -Dtapuser.maxActive=100 + -Dca.nrc.cadc.reg.client.RegistryClient.local=true + -Duws.jdbc.username=postgres + -Duws.jdbc.driverClassName=org.postgresql.Driver + -Duws.jdbc.url=jdbc:postgresql://{{ template "cadc-tap.fullname" . }}-uws-db/ + -Dca.nrc.cadc.auth.Authenticator=org.opencadc.tap.impl.AuthenticatorImpl + -Dgafaelfawr_url={{ .Values.global.baseUrl }}/auth/api/v1/user-info + -Dgcs_bucket={{ .Values.config.gcsBucket }} + -Dgcs_bucket_url={{ .Values.config.gcsBucketUrl }} + -Dgcs_bucket_type={{ .Values.config.gcsBucketType }} + -Dbase_url={{ .Values.global.baseUrl }} + -Dca.nrc.cadc.util.PropertiesReader.dir=/etc/creds/ + -Xmx{{ .Values.config.jvmMaxHeapSize }} + - name: GOOGLE_APPLICATION_CREDENTIALS + value: "/etc/creds/google_creds.json" + {{- if eq .Values.config.gcsBucketType "S3" }} + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: {{ template "cadc-tap.fullname" . }}-secret + key: "AWS_SECRET_ACCESS_KEY" + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: {{ template "cadc-tap.fullname" . }}-secret + key: "AWS_ACCESS_KEY_ID" + {{- end }} + - name: DATALINK_PAYLOAD_URL + value: "{{ .Values.config.datalinkPayloadUrl }}" + ports: + - containerPort: 8080 + {{- with .Values.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + volumeMounts: + - name: "google-creds" + mountPath: "/etc/creds" + readOnly: true + - name: "tmp" + mountPath: "/tmp" + livenessProbe: + failureThreshold: 3 + httpGet: + path: /tap/availability + port: 8080 + scheme: HTTP + initialDelaySeconds: 10 + periodSeconds: 10 + successThreshold: 1 + timeoutSeconds: 1 + volumes: + - name: "google-creds" + secret: + secretName: {{ template "cadc-tap.fullname" . }}-secret + - name: "tmp" + emptyDir: {} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/services/tap/templates/tap-ingress-anonymous.yaml b/services/tap/templates/tap-ingress-anonymous.yaml new file mode 100644 index 0000000000..068fcb1f4b --- /dev/null +++ b/services/tap/templates/tap-ingress-anonymous.yaml @@ -0,0 +1,37 @@ +apiVersion: gafaelfawr.lsst.io/v1alpha1 +kind: GafaelfawrIngress +metadata: + name: {{ template "cadc-tap.fullname" . }}-anonymous + labels: + {{- include "cadc-tap.labels" . | nindent 4 }} +config: + baseUrl: {{ .Values.global.baseUrl | quote }} + scopes: + anonymous: true +template: + metadata: + name: {{ template "cadc-tap.fullname" . }}-anonymous + annotations: + nginx.ingress.kubernetes.io/proxy-connect-timeout: "900" + nginx.ingress.kubernetes.io/proxy-send-timeout: "900" + nginx.ingress.kubernetes.io/proxy-read-timeout: "900" + nginx.ingress.kubernetes.io/rewrite-target: "/tap/$1" + nginx.ingress.kubernetes.io/proxy-redirect-from: "http://$host/tap/" + nginx.ingress.kubernetes.io/proxy-redirect-to: "https://$host/api/tap/" + nginx.ingress.kubernetes.io/ssl-redirect: "true" + nginx.ingress.kubernetes.io/use-regex: "true" + {{- with .Values.ingress.anonymousAnnotations }} + {{- toYaml . | indent 4}} + {{- end }} + spec: + rules: + - host: {{ required "global.host must be set" .Values.global.host | quote }} + http: + paths: + - path: "/api/tap/(availability|capabilities|swagger-ui.*)" + pathType: "ImplementationSpecific" + backend: + service: + name: {{ template "cadc-tap.fullname" . }} + port: + number: 80 diff --git a/services/tap/templates/tap-ingress-authenticated.yaml b/services/tap/templates/tap-ingress-authenticated.yaml new file mode 100644 index 0000000000..d2ea1ab62d --- /dev/null +++ b/services/tap/templates/tap-ingress-authenticated.yaml @@ -0,0 +1,44 @@ +apiVersion: gafaelfawr.lsst.io/v1alpha1 +kind: GafaelfawrIngress +metadata: + name: {{ template "cadc-tap.fullname" . }}-authenticated + labels: + {{- include "cadc-tap.labels" . | nindent 4 }} +config: + baseUrl: {{ .Values.global.baseUrl | quote }} + scopes: + all: + - "read:tap" + authType: "basic" + delegate: + internal: + scopes: [] + service: "tap" + useAuthorization: true +template: + metadata: + name: {{ template "cadc-tap.fullname" . }}-authenticated + annotations: + nginx.ingress.kubernetes.io/proxy-connect-timeout: "1800" + nginx.ingress.kubernetes.io/proxy-send-timeout: "1800" + nginx.ingress.kubernetes.io/proxy-read-timeout: "1800" + nginx.ingress.kubernetes.io/rewrite-target: "/tap/$2" + nginx.ingress.kubernetes.io/proxy-redirect-from: "http://$host/tap/" + nginx.ingress.kubernetes.io/proxy-redirect-to: "https://$host/api/tap/" + nginx.ingress.kubernetes.io/ssl-redirect: "true" + nginx.ingress.kubernetes.io/use-regex: "true" + {{- with .Values.ingress.authenticatedAnnotations }} + {{- toYaml . | indent 6 }} + {{- end }} + spec: + rules: + - host: {{ required "global.host must be set" .Values.global.host | quote }} + http: + paths: + - path: "/api/tap(/|$)(.*)" + pathType: "ImplementationSpecific" + backend: + service: + name: {{ template "cadc-tap.fullname" . }} + port: + number: 80 diff --git a/services/tap/templates/tap-networkpolicy.yaml b/services/tap/templates/tap-networkpolicy.yaml new file mode 100644 index 0000000000..9612ba8cfc --- /dev/null +++ b/services/tap/templates/tap-networkpolicy.yaml @@ -0,0 +1,22 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ include "cadc-tap.fullname" . }} +spec: + podSelector: + matchLabels: + {{- include "cadc-tap.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: "server" + policyTypes: + - Ingress + ingress: + # Allow inbound access from pods (in any namespace) labeled + # gafaelfawr.lsst.io/ingress: true. + - from: + - namespaceSelector: {} + podSelector: + matchLabels: + gafaelfawr.lsst.io/ingress: "true" + ports: + - protocol: "TCP" + port: 8080 diff --git a/services/tap/templates/tap-service.yaml b/services/tap/templates/tap-service.yaml new file mode 100644 index 0000000000..8f387364b1 --- /dev/null +++ b/services/tap/templates/tap-service.yaml @@ -0,0 +1,15 @@ +kind: Service +apiVersion: v1 +metadata: + name: {{ template "cadc-tap.fullname" . }} + labels: + {{- include "cadc-tap.labels" . | nindent 4 }} +spec: + type: ClusterIP + ports: + - protocol: "TCP" + port: 80 + targetPort: 8080 + selector: + {{- include "cadc-tap.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: "server" diff --git a/services/tap/templates/uws-db-deployment.yaml b/services/tap/templates/uws-db-deployment.yaml new file mode 100644 index 0000000000..c41a9233db --- /dev/null +++ b/services/tap/templates/uws-db-deployment.yaml @@ -0,0 +1,51 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "cadc-tap.fullname" . }}-uws-db + labels: + {{- include "cadc-tap.labels" . | nindent 4 }} +spec: + replicas: 1 + selector: + matchLabels: + {{- include "cadc-tap.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: "uws-db" + template: + metadata: + {{- with .Values.uws.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "cadc-tap.labels" . | nindent 8 }} + app.kubernetes.io/component: "uws-db" + spec: + automountServiceAccountToken: false + containers: + - name: "postgresql" + image: "{{ .Values.uws.image.repository }}:{{ .Values.uws.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.uws.image.pullPolicy | quote }} + ports: + - containerPort: 5432 + {{- with .Values.uws.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + volumeMounts: + - name: "data" + mountPath: "/var/lib/postgresql/data" + volumes: + - name: "data" + emptyDir: {} + {{- with .Values.uws.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.uws.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.uws.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/services/tap/templates/uws-db-networkpolicy.yaml b/services/tap/templates/uws-db-networkpolicy.yaml new file mode 100644 index 0000000000..b67fbdd5e1 --- /dev/null +++ b/services/tap/templates/uws-db-networkpolicy.yaml @@ -0,0 +1,23 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ include "cadc-tap.fullname" . }}-uws-db +spec: + podSelector: + matchLabels: + {{- include "cadc-tap.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: "uws-db" + policyTypes: + - Ingress + # Deny all outbound access; PostgreSQL doesn't need to talk to anything. + - Egress + ingress: + # Allow inbound access to UWS database from the server. + - from: + - podSelector: + matchLabels: + {{- include "cadc-tap.selectorLabels" . | nindent 14 }} + app.kubernetes.io/component: "server" + ports: + - protocol: "TCP" + port: 5432 diff --git a/services/tap/templates/uws-db-service.yaml b/services/tap/templates/uws-db-service.yaml new file mode 100644 index 0000000000..2352a5d334 --- /dev/null +++ b/services/tap/templates/uws-db-service.yaml @@ -0,0 +1,14 @@ +kind: Service +apiVersion: v1 +metadata: + name: {{ template "cadc-tap.fullname" . }}-uws-db + labels: + {{- include "cadc-tap.labels" . | nindent 4 }} +spec: + ports: + - protocol: "TCP" + port: 5432 + targetPort: 5432 + selector: + {{- include "cadc-tap.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: "uws-db" diff --git a/services/tap/templates/vault-secrets.yaml b/services/tap/templates/vault-secrets.yaml new file mode 100644 index 0000000000..54334fa119 --- /dev/null +++ b/services/tap/templates/vault-secrets.yaml @@ -0,0 +1,9 @@ +apiVersion: ricoberger.de/v1alpha1 +kind: VaultSecret +metadata: + name: {{ template "cadc-tap.fullname" . }}-secret + labels: + {{- include "cadc-tap.labels" . | nindent 4 }} +spec: + path: "{{ .Values.global.vaultSecretsPath }}/tap" + type: Opaque diff --git a/services/tap/values-ccin2p3.yaml b/services/tap/values-ccin2p3.yaml new file mode 100644 index 0000000000..c573f85a6c --- /dev/null +++ b/services/tap/values-ccin2p3.yaml @@ -0,0 +1,10 @@ +config: + gcsBucket: "async-results.lsst.codes" + gcsBucketUrl: "https://cccephs3.in2p3.fr:8080" + gcsBucketType: "S3" + jvmMaxHeapSize: "31G" + +qserv: + host: "ccqserv201.in2p3.fr:30040" + mock: + enabled: false diff --git a/services/tap/values-idfdev.yaml b/services/tap/values-idfdev.yaml index 6693c7e32e..b0a7af3d2f 100644 --- a/services/tap/values-idfdev.yaml +++ b/services/tap/values-idfdev.yaml @@ -1,16 +1,17 @@ -cadc-tap: - fullnameOverride: "cadc-tap" +resources: + requests: + cpu: 2.0 + memory: "2G" + limits: + cpu: 8.0 + memory: "32G" - imagePullSecrets: - - name: "pull-secret" - ingress: - host: "data-dev.lsst.cloud" - vaultSecretsPath: "secret/k8s_operator/data-dev.lsst.cloud/tap" +config: + gcsBucket: "async-results.lsst.codes" + gcsBucketUrl: "http://async-results.lsst.codes" + jvmMaxHeapSize: "31G" - config: - gcsBucket: "async-results.lsst.codes" - gcsBucketUrl: "http://async-results.lsst.codes" - -pull-secret: - enabled: true - path: "secret/k8s_operator/data-dev.lsst.cloud/pull-secret" +qserv: + host: "10.136.1.211:4040" + mock: + enabled: false diff --git a/services/tap/values-idfint.yaml b/services/tap/values-idfint.yaml index 570d4ba999..11bab7d2a0 100644 --- a/services/tap/values-idfint.yaml +++ b/services/tap/values-idfint.yaml @@ -1,21 +1,19 @@ -cadc-tap: - fullnameOverride: "cadc-tap" +resources: + requests: + cpu: 2.0 + memory: "2G" + limits: + cpu: 8.0 + memory: "32G" - imagePullSecrets: - - name: "pull-secret" - ingress: - host: "data-int.lsst.cloud" - vaultSecretsPath: "secret/k8s_operator/data-int.lsst.cloud/tap" +replicaCount: 2 - config: - gcsBucket: "async-results.lsst.codes" - gcsBucketUrl: "http://async-results.lsst.codes" +config: + gcsBucket: "async-results.lsst.codes" + gcsBucketUrl: "http://async-results.lsst.codes" + jvmMaxHeapSize: "31G" - qserv: - host: "10.136.1.211:4040" - mock: - enabled: false - -pull-secret: - enabled: true - path: "secret/k8s_operator/data-int.lsst.cloud/pull-secret" +qserv: + host: "10.136.1.211:4040" + mock: + enabled: false diff --git a/services/tap/values-idfprod.yaml b/services/tap/values-idfprod.yaml index 94e6df8ec3..a96be3b075 100644 --- a/services/tap/values-idfprod.yaml +++ b/services/tap/values-idfprod.yaml @@ -1,39 +1,28 @@ -cadc-tap: - fullnameOverride: "cadc-tap" +resources: + requests: + cpu: 2.0 + memory: "2G" + limits: + cpu: 8.0 + memory: "32G" - imagePullSecrets: - - name: "pull-secret" - ingress: - host: "data.lsst.cloud" - vaultSecretsPath: "secret/k8s_operator/data.lsst.cloud/tap" +replicaCount: 2 +config: + gcsBucket: "async-results.lsst.codes" + gcsBucketUrl: "http://async-results.lsst.codes" + jvmMaxHeapSize: "31G" + +qserv: + host: "10.140.1.211:4040" + mock: + enabled: false + +uws: resources: requests: - cpu: 2.0 - memory: "2G" + cpu: 0.25 + memory: "1G" limits: - cpu: 8.0 - memory: "32G" - - config: - gcsBucket: "async-results.lsst.codes" - gcsBucketUrl: "http://async-results.lsst.codes" - jvmMaxHeapSize: "31G" - - qserv: - host: "10.140.1.211:4040" - mock: - enabled: false - - uws: - resources: - requests: - cpu: 0.25 - memory: "1G" - limits: - cpu: 2.0 - memory: "4G" - -pull-secret: - enabled: true - path: "secret/k8s_operator/data.lsst.cloud/pull-secret" + cpu: 2.0 + memory: "4G" diff --git a/services/tap/values-int.yaml b/services/tap/values-int.yaml deleted file mode 100644 index 1641d37d18..0000000000 --- a/services/tap/values-int.yaml +++ /dev/null @@ -1,39 +0,0 @@ -cadc-tap: - fullnameOverride: "cadc-tap" - - imagePullSecrets: - - name: "pull-secret" - ingress: - host: "lsst-lsp-int.ncsa.illinois.edu" - vaultSecretsPath: "secret/k8s_operator/lsst-lsp-int.ncsa.illinois.edu/tap" - - resources: - requests: - cpu: 2.0 - memory: "2G" - limits: - cpu: 8.0 - memory: "16G" - - config: - gcsBucket: "async-results.lsst.codes" - gcsBucketUrl: "http://async-results.lsst.codes" - jvmMaxHeapSize: "15G" - - qserv: - host: "lsst-qserv-master03:4040" - mock: - enabled: false - - uws: - resources: - requests: - cpu: 0.25 - memory: "1G" - limits: - cpu: 2.0 - memory: "4G" - -pull-secret: - enabled: true - path: "secret/k8s_operator/lsst-lsp-int.ncsa.illinois.edu/pull-secret" diff --git a/services/tap/values-minikube.yaml b/services/tap/values-minikube.yaml index 2117a31a05..6e3f1aca1e 100644 --- a/services/tap/values-minikube.yaml +++ b/services/tap/values-minikube.yaml @@ -1,16 +1,3 @@ -cadc-tap: - fullnameOverride: "cadc-tap" - - imagePullSecrets: - - name: "pull-secret" - ingress: - host: "minikube.lsst.codes" - vaultSecretsPath: "secret/k8s_operator/minikube.lsst.codes/tap" - - config: - gcsBucket: "async-results.lsst.codes" - gcsBucketUrl: "http://async-results.lsst.codes" - -pull-secret: - enabled: true - path: "secret/k8s_operator/minikube.lsst.codes/pull-secret" +config: + gcsBucket: "async-results.lsst.codes" + gcsBucketUrl: "http://async-results.lsst.codes" diff --git a/services/tap/values-red-five.yaml b/services/tap/values-red-five.yaml deleted file mode 100644 index b3e6d9ca9e..0000000000 --- a/services/tap/values-red-five.yaml +++ /dev/null @@ -1,16 +0,0 @@ -cadc-tap: - fullnameOverride: "cadc-tap" - - imagePullSecrets: - - name: "pull-secret" - ingress: - host: "red-five.lsst.codes" - vaultSecretsPath: "secret/k8s_operator/red-five.lsst.codes/tap" - - config: - gcsBucket: "async-results.lsst.codes" - gcsBucketUrl: "http://async-results.lsst.codes" - -pull-secret: - enabled: true - path: "secret/k8s_operator/red-five.lsst.codes/pull-secret" diff --git a/services/tap/values-roe.yaml b/services/tap/values-roe.yaml index 6775305892..cf433e64ea 100644 --- a/services/tap/values-roe.yaml +++ b/services/tap/values-roe.yaml @@ -1,12 +1,4 @@ -cadc-tap: - fullnameOverride: "cadc-tap" - - imagePullSecrets: - - name: "pull-secret" - ingress: - host: "rsp.lsst.ac.uk" - vaultSecretsPath: "secret/k8s_operator/roe/tap" - -pull-secret: - enabled: true - path: "secret/k8s_operator/roe/pull-secret" +qserv: + host: "192.41.122.228:30040" + mock: + enabled: false diff --git a/services/tap/values-stable.yaml b/services/tap/values-stable.yaml deleted file mode 100644 index 3c638d699e..0000000000 --- a/services/tap/values-stable.yaml +++ /dev/null @@ -1,39 +0,0 @@ -cadc-tap: - fullnameOverride: "cadc-tap" - - imagePullSecrets: - - name: "pull-secret" - ingress: - host: "lsst-lsp-stable.ncsa.illinois.edu" - vaultSecretsPath: "secret/k8s_operator/lsst-lsp-stable.ncsa.illinois.edu/tap" - - resources: - requests: - cpu: 2.0 - memory: "2G" - limits: - cpu: 8.0 - memory: "32G" - - config: - gcsBucket: "async-results.lsst.codes" - gcsBucketUrl: "http://async-results.lsst.codes" - jvmMaxHeapSize: "31G" - - qserv: - host: "lsst-qserv-master03:4040" - mock: - enabled: false - - uws: - resources: - requests: - cpu: 0.25 - memory: "1G" - limits: - cpu: 2.0 - memory: "4G" - -pull-secret: - enabled: true - path: "secret/k8s_operator/lsst-lsp-stable.ncsa.illinois.edu/pull-secret" diff --git a/services/tap/values.yaml b/services/tap/values.yaml new file mode 100644 index 0000000000..1aec4d4528 --- /dev/null +++ b/services/tap/values.yaml @@ -0,0 +1,157 @@ +# Default values for cadc-tap. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# -- Override the base name for resources +nameOverride: "" + +# -- Override the full name for resources (includes the release name) +fullnameOverride: "cadc-tap" + +# -- Number of pods to start +replicaCount: 1 + +image: + # -- tap image to use + repository: "ghcr.io/lsst-sqre/lsst-tap-service" + + # -- Pull policy for the tap image + pullPolicy: "IfNotPresent" + + # -- Tag of tap image to use + # @default -- The appVersion of the chart + tag: "" + +# Settings for the ingress rules. +ingress: + # -- Additional annotations to use for endpoints that allow anonymous + # access, such as `/capabilities` and `/availability` + anonymousAnnotations: {} + + # -- Additional annotations to use for endpoints that are authenticated, + # such as `/sync`, `/async`, and `/tables` + authenticatedAnnotations: {} + +# -- Resource limits and requests for the Gafaelfawr frontend pod +resources: {} + +# -- Annotations for the Gafaelfawr frontend pod +podAnnotations: {} + +# -- Node selector rules for the Gafaelfawr frontend pod +nodeSelector: {} + +# -- Tolerations for the Gafaelfawr frontend pod +tolerations: [] + +# -- Affinity rules for the Gafaelfawr frontend pod +affinity: {} + +# -- Path to the Vault secret (`secret/k8s_operator//tap`, for example) +# @default -- None, must be set +vaultSecretsPath: "" + +config: + # -- Address to a MySQL database containing TAP schema data + tapSchemaAddress: "tap-schema-db.tap-schema.svc.cluster.local:3306" + + # -- Datalink payload URL + datalinkPayloadUrl: "https://github.com/lsst/sdm_schemas/releases/download/1.2.2/datalink-snippets.zip" + + # -- Gafaelfawr hostname to get user information from a token + # @default -- Value of `ingress.host` + gafaelfawrHost: "" + + # -- Name of GCS bucket in which to store results + # @default -- None, must be set + gcsBucket: "" + + # -- Base URL for results stored in GCS bucket + # @default -- None, must be set + gcsBucketUrl: "" + + # -- GCS bucket type (GCS or S3) + # @default -- GCS + gcsBucketType: "GCS" + + # -- Java heap size, which will set the maximum size of the heap. Otherwise + # Java would determine it based on how much memory is available and black + # maths. + jvmMaxHeapSize: 4G + +qserv: + # -- QServ hostname:port to connect to + # @default -- `"mock-qserv:3306"` (the mock QServ) + host: "mock-qserv:3306" + + mock: + # -- Spin up a container to pretend to be QServ. + enabled: true + + image: + # -- Mock QServ image to use + repository: "ghcr.io/lsst-sqre/lsst-tap-mock-qserv" + + # -- Pull policy for the mock QServ image + pullPolicy: "IfNotPresent" + + # -- Tag of mock QServ image to use + # @default -- The appVersion of the chart + tag: "" + + # -- Resource limits and requests for the mock QServ pod + resources: {} + + # -- Annotations for the mock QServ pod + podAnnotations: {} + + # -- Node selection rules for the mock QServ pod + nodeSelector: {} + + # -- Tolerations for the mock QServ pod + tolerations: [] + + # -- Affinity rules for the mock QServ pod + affinity: {} + +uws: + image: + # -- UWS database image to use + repository: "ghcr.io/lsst-sqre/lsst-tap-uws-db" + + # -- Pull policy for the UWS database image + pullPolicy: "IfNotPresent" + + # -- Tag of UWS database image to use + # @default -- The appVersion of the chart + tag: "" + + # -- Resource limits and requests for the UWS database pod + resources: {} + + # -- Annotations for the UWS databse pod + podAnnotations: {} + + # -- Node selection rules for the UWS database pod + nodeSelector: {} + + # -- Tolerations for the UWS database pod + tolerations: [] + + # -- Affinity rules for the UWS database pod + affinity: {} + +# The following will be set by parameters injected by Argo CD and should not +# be set in the individual environment values files. +global: + # -- Base URL for the environment + # @default -- Set by Argo CD + baseUrl: "" + + # -- Host name for ingress + # @default -- Set by Argo CD + host: "" + + # -- Base path for Vault secrets + # @default -- Set by Argo CD + vaultSecretsPath: "" diff --git a/services/telegraf-ds/Chart.yaml b/services/telegraf-ds/Chart.yaml new file mode 100644 index 0000000000..9205380673 --- /dev/null +++ b/services/telegraf-ds/Chart.yaml @@ -0,0 +1,17 @@ +apiVersion: v2 +name: telegraf-ds +version: 1.0.0 +description: Kubernetes node telemetry collection service +home: https://www.influxdata.com/time-series-platform/telegraf/ +sources: + - https://github.com/influxdata/telegraf + - https://github.com/influxdata/helm-charts +dependencies: + - name: telegraf-ds + version: 1.1.6 + repository: https://helm.influxdata.com/ +annotations: + phalanx.lsst.io/docs: | + - id: "SQR-061" + title: "Monitoring architecture for the RSP" + url: "https://sqr-061.lsst.io/" diff --git a/services/telegraf-ds/README.md b/services/telegraf-ds/README.md new file mode 100644 index 0000000000..012cd8ecad --- /dev/null +++ b/services/telegraf-ds/README.md @@ -0,0 +1,30 @@ +# telegraf-ds + +Kubernetes node telemetry collection service + +**Homepage:** + +## Source Code + +* +* + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| global.enabled_services | string | Set by Argo CD | services enabled in this RSP instance | +| global.host | string | Set by Argo CD | Host name for instance identification | +| global.vaultSecretsPath | string | Set by Argo CD | Base path for Vault secrets | +| telegraf-ds.args[0] | string | `"--config"` | | +| telegraf-ds.args[1] | string | `"/etc/telegraf-generated/telegraf-generated.conf"` | | +| telegraf-ds.env[0] | object | `{"name":"INFLUX_TOKEN","valueFrom":{"secretKeyRef":{"key":"influx-token","name":"telegraf"}}}` | Token to communicate with Influx | +| telegraf-ds.mountPoints[0].mountPath | string | `"/etc/telegraf-generated"` | | +| telegraf-ds.mountPoints[0].name | string | `"telegraf-generated-config"` | | +| telegraf-ds.override_config.toml | string | `"[agent]\n logfile=\"\"\n"` | | +| telegraf-ds.rbac.create | bool | `true` | | +| telegraf-ds.resources.limits.cpu | string | `"900m"` | | +| telegraf-ds.resources.limits.memory | string | `"512Mi"` | | +| telegraf-ds.serviceAccount.name | string | `"telegraf-ds"` | | +| telegraf-ds.volumes[0].configMap.name | string | `"telegraf-generated-config"` | | +| telegraf-ds.volumes[0].name | string | `"telegraf-generated-config"` | | diff --git a/services/telegraf-ds/templates/configmap.yaml b/services/telegraf-ds/templates/configmap.yaml new file mode 100644 index 0000000000..b8c84afc53 --- /dev/null +++ b/services/telegraf-ds/templates/configmap.yaml @@ -0,0 +1,42 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: telegraf-generated-config +data: + telegraf-generated.conf: |+ + [global_tags] + cluster = {{- .Values.global.host | quote }} + [agent] + hostname = "telegraf-$HOSTIP" + interval = "120s" + flush_interval = "120s" + collection_jitter = "10s" + metric_batch_size = 10000 + metric_buffer_limit = 100000 + + [[inputs.kubernetes]] + url = "https://$HOSTIP:10250" + bearer_token = "/var/run/secrets/kubernetes.io/serviceaccount/token" + insecure_skip_verify = true + namepass = ["kubernetes_pod_container"] + fieldpass = ["cpu_usage_nanocores", "memory_usage_bytes"] + + [[inputs.kube_inventory]] + url = "https://kubernetes.default.svc" + bearer_token = "/run/secrets/kubernetes.io/serviceaccount/token" + # Only worry about pods + resource_exclude = [ "daemonsets", "deployments", "endpoints", "ingress", "nodes", "persistentvolumes", "persistentvolumeclaims", "services", "statefulsets" ] + resource_include = [ "pods" ] + insecure_skip_verify = true + namespace = "" + {{ range $app := splitList "@" .Values.global.enabled_services }} + {{- $bucket := replace "-" "_" $app }} + {{- $namespace := replace "_" "-" $app }} + [[outputs.influxdb_v2]] + urls = ["https://monitoring.lsst.codes"] + token = "$INFLUX_TOKEN" + organization = "square" + bucket = {{ $bucket | quote }} + [outputs.influxdb_v2.tagpass] + namespace = [{{ $namespace | quote }}] + {{ end }} diff --git a/services/telegraf-ds/templates/vault-secret.yaml b/services/telegraf-ds/templates/vault-secret.yaml new file mode 100644 index 0000000000..643487069f --- /dev/null +++ b/services/telegraf-ds/templates/vault-secret.yaml @@ -0,0 +1,17 @@ +apiVersion: ricoberger.de/v1alpha1 +kind: VaultSecret +metadata: + name: telegraf + namespace: telegraf-ds +spec: + # Use regular telegraf path--it's the same secret + path: {{ .Values.global.vaultSecretsPath }}/telegraf + type: Opaque +--- +apiVersion: ricoberger.de/v1alpha1 +kind: VaultSecret +metadata: + name: pull-secret +spec: + path: {{ .Values.global.vaultSecretsPath }}/pull-secret + type: kubernetes.io/dockerconfigjson diff --git a/services/telegraf-ds/values-base.yaml b/services/telegraf-ds/values-base.yaml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/services/telegraf-ds/values-idfdev.yaml b/services/telegraf-ds/values-idfdev.yaml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/services/telegraf-ds/values-idfint.yaml b/services/telegraf-ds/values-idfint.yaml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/services/telegraf-ds/values-idfprod.yaml b/services/telegraf-ds/values-idfprod.yaml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/services/telegraf-ds/values-summit.yaml b/services/telegraf-ds/values-summit.yaml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/services/telegraf-ds/values-tucson-teststand.yaml b/services/telegraf-ds/values-tucson-teststand.yaml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/services/telegraf-ds/values.yaml b/services/telegraf-ds/values.yaml new file mode 100644 index 0000000000..8ed1e6ea76 --- /dev/null +++ b/services/telegraf-ds/values.yaml @@ -0,0 +1,48 @@ +telegraf-ds: + args: + - "--config" + - "/etc/telegraf-generated/telegraf-generated.conf" + env: + # -- Token to communicate with Influx + - name: INFLUX_TOKEN + valueFrom: + secretKeyRef: + name: telegraf + key: influx-token + rbac: + create: true + resources: + limits: + memory: 512Mi + cpu: 900m + + serviceAccount: + name: telegraf-ds + + # Set to effectively empty and just use generated config instead. + override_config: + toml: |+ + [agent] + logfile="" + volumes: + - name: telegraf-generated-config + configMap: + name: "telegraf-generated-config" + mountPoints: + - name: telegraf-generated-config + mountPath: /etc/telegraf-generated + +# The following will be set by parameters injected by Argo CD and should not +# be set in the individual environment values files. +global: + # -- services enabled in this RSP instance + # @default -- Set by Argo CD + enabled_services: "" + + # -- Host name for instance identification + # @default -- Set by Argo CD + host: "" + + # -- Base path for Vault secrets + # @default -- Set by Argo CD + vaultSecretsPath: "" diff --git a/services/telegraf/Chart.yaml b/services/telegraf/Chart.yaml new file mode 100644 index 0000000000..7ae4166856 --- /dev/null +++ b/services/telegraf/Chart.yaml @@ -0,0 +1,17 @@ +apiVersion: v2 +name: telegraf +version: 1.0.1 +description: Application telemetry collection service +home: https://www.influxdata.com/time-series-platform/telegraf/ +sources: + - https://github.com/influxdata/telegraf + - https://github.com/influxdata/helm-charts +dependencies: + - name: telegraf + version: 1.8.24 + repository: https://helm.influxdata.com/ +annotations: + phalanx.lsst.io/docs: | + - id: "SQR-061" + title: "Monitoring architecture for the RSP" + url: "https://sqr-061.lsst.io/" diff --git a/services/telegraf/README.md b/services/telegraf/README.md new file mode 100644 index 0000000000..b3fc357504 --- /dev/null +++ b/services/telegraf/README.md @@ -0,0 +1,37 @@ +# telegraf + +Application telemetry collection service + +**Homepage:** + +## Source Code + +* +* + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| global.enabled_services | string | Set by Argo CD | services enabled in this RSP instance | +| global.host | string | Set by Argo CD | Host name for instance identification | +| global.vaultSecretsPath | string | Set by Argo CD | Base path for Vault secrets | +| prometheus_config | object | `{"argocd":{"application_controller":"http://argocd-application-controller-metrics.argocd.svc:8082/metrics","notifications_controller":"http://argocd-notifications-controller-metrics.argocd.svc:9001/metrics","redis":"http://argocd-redis-metrics.argocd.svc:9121/metrics","repo_server":"http://argocd-repo-server-metrics.argocd.svc:8084/metrics","server":"http://argocd-server-metrics.argocd.svc:8083/metrics"},"ingress-nginx":{"controller":"http://ingress-nginx-controller-metrics.ingress-nginx:10254/metrics"},"nublado2":{"hub":"http://hub.nublado2:8081/metrics"}}` | Use prometheus_config to specify all the services in the RSP that expose prometheus endpoints. A better option, eventually, will be to use telegraf-operator and capture these as pod annotations. | +| telegraf.args[0] | string | `"--config"` | | +| telegraf.args[1] | string | `"/etc/telegraf-generated/telegraf-generated.conf"` | | +| telegraf.config.inputs | list | `[]` | | +| telegraf.config.outputs | list | `[]` | | +| telegraf.config.processors | list | `[]` | | +| telegraf.env[0].name | string | `"INFLUX_TOKEN"` | | +| telegraf.env[0].valueFrom.secretKeyRef.key | string | `"influx-token"` | | +| telegraf.env[0].valueFrom.secretKeyRef.name | string | `"telegraf"` | | +| telegraf.mountPoints[0].mountPath | string | `"/etc/telegraf-generated"` | | +| telegraf.mountPoints[0].name | string | `"telegraf-generated-config"` | | +| telegraf.podLabels."hub.jupyter.org/network-access-hub" | string | `"true"` | | +| telegraf.rbac.clusterWide | bool | `true` | | +| telegraf.resources.limits.cpu | string | `"900m"` | | +| telegraf.resources.limits.memory | string | `"512Mi"` | | +| telegraf.service.enabled | bool | `false` | | +| telegraf.tplVersion | int | `2` | | +| telegraf.volumes[0].configMap.name | string | `"telegraf-generated-config"` | | +| telegraf.volumes[0].name | string | `"telegraf-generated-config"` | | diff --git a/services/telegraf/templates/configmap.yaml b/services/telegraf/templates/configmap.yaml new file mode 100644 index 0000000000..6492489b8a --- /dev/null +++ b/services/telegraf/templates/configmap.yaml @@ -0,0 +1,68 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: telegraf-generated-config +data: + telegraf-generated.conf: | + {{- $enabled_apps := splitList "@" .Values.global.enabled_services }} + [global_tags] + cluster = {{- .Values.global.host | quote }} + [agent] + hostname = "$HOSTNAME" + omit_hostname = true + interval = "120s" + flush_interval = "120s" + collection_jitter = "10s" + logfile = "" + metric_batch_size = 10000 + metric_buffer_limit = 100000 + + [[processors.enum]] + [[processors.enum.mapping]] + dest = "status_code" + field = "status" + [processors.enum.mapping.value_mappings] + healthy = 1 + problem = 2 + critical = 3 + + [[inputs.internal]] + collect_memstats = false + + + {{- range $raw_app_name, $defn := .Values.prometheus_config }} + {{- $app_name := replace "-" "_" $raw_app_name }} + {{- if has $app_name $enabled_apps }} + {{- range $component, $endpoint := $defn }} + + [[inputs.prometheus]] + metric_version = 2 + name_override = "prometheus_{{ $component }}" + urls = [ + {{ $endpoint | quote }} + ] + [inputs.prometheus.tags] + prometheus_app = {{ $app_name | quote }} + {{- end }} + {{- end }} + {{- end }} + + {{- range $raw_app_name, $defn := .Values.prometheus_config }} + {{- $app_name := replace "-" "_" $raw_app_name }} + {{- if has $app_name $enabled_apps }} + {{- range $component, $endpoint := $defn }} + + [[outputs.influxdb_v2]] + bucket = {{ $app_name | quote }} + organization = "square" + token = "$INFLUX_TOKEN" + urls = [ + "https://monitoring.lsst.codes" + ] + [outputs.influxdb_v2.tagpass] + prometheus_app = [ + {{ $app_name | quote }} + ] + {{- end }} + {{- end }} + {{- end }} diff --git a/services/telegraf/templates/vault-secret.yaml b/services/telegraf/templates/vault-secret.yaml new file mode 100644 index 0000000000..8370543c85 --- /dev/null +++ b/services/telegraf/templates/vault-secret.yaml @@ -0,0 +1,16 @@ +apiVersion: ricoberger.de/v1alpha1 +kind: VaultSecret +metadata: + name: telegraf + namespace: telegraf +spec: + path: {{ .Values.global.vaultSecretsPath }}/telegraf + type: Opaque +--- +apiVersion: ricoberger.de/v1alpha1 +kind: VaultSecret +metadata: + name: pull-secret +spec: + path: {{ .Values.global.vaultSecretsPath }}/pull-secret + type: kubernetes.io/dockerconfigjson diff --git a/services/telegraf/values-base.yaml b/services/telegraf/values-base.yaml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/services/telegraf/values-idfdev.yaml b/services/telegraf/values-idfdev.yaml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/services/telegraf/values-idfint.yaml b/services/telegraf/values-idfint.yaml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/services/telegraf/values-idfprod.yaml b/services/telegraf/values-idfprod.yaml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/services/telegraf/values-summit.yaml b/services/telegraf/values-summit.yaml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/services/telegraf/values-tucson-teststand.yaml b/services/telegraf/values-tucson-teststand.yaml new file mode 100644 index 0000000000..e69de29bb2 diff --git a/services/telegraf/values.yaml b/services/telegraf/values.yaml new file mode 100644 index 0000000000..f323c1eea4 --- /dev/null +++ b/services/telegraf/values.yaml @@ -0,0 +1,64 @@ +telegraf: + # Remove processors, inputs and outputs: use generated config instead. + config: + processors: [] + inputs: [] + outputs: [] + resources: + limits: + memory: 512Mi + cpu: 900m + args: + - "--config" + - "/etc/telegraf-generated/telegraf-generated.conf" + # We need the additional rules for prometheus scraping. + rbac: + clusterWide: true + env: + - name: INFLUX_TOKEN + valueFrom: + secretKeyRef: + key: influx-token + name: telegraf + podLabels: + hub.jupyter.org/network-access-hub: 'true' + service: + enabled: false + tplVersion: 2 + volumes: + - name: telegraf-generated-config + configMap: + name: telegraf-generated-config + mountPoints: + - name: telegraf-generated-config + mountPath: /etc/telegraf-generated + +# -- Use prometheus_config to specify all the services in the RSP that +# expose prometheus endpoints. A better option, eventually, will be to +# use telegraf-operator and capture these as pod annotations. +prometheus_config: + argocd: + application_controller: "http://argocd-application-controller-metrics.argocd.svc:8082/metrics" + notifications_controller: "http://argocd-notifications-controller-metrics.argocd.svc:9001/metrics" + redis: "http://argocd-redis-metrics.argocd.svc:9121/metrics" + repo_server: "http://argocd-repo-server-metrics.argocd.svc:8084/metrics" + server: "http://argocd-server-metrics.argocd.svc:8083/metrics" + nublado2: + hub: "http://hub.nublado2:8081/metrics" + ingress-nginx: + controller: "http://ingress-nginx-controller-metrics.ingress-nginx:10254/metrics" + +# The following will be set by parameters injected by Argo CD and should not +# be set in the individual environment values files. +global: + # -- services enabled in this RSP instance + # @default -- Set by Argo CD + enabled_services: "" + + # -- Host name for instance identification + # @default -- Set by Argo CD + host: "" + + # -- Base path for Vault secrets + # @default -- Set by Argo CD + vaultSecretsPath: "" diff --git a/services/times-square/Chart.yaml b/services/times-square/Chart.yaml new file mode 100644 index 0000000000..03ffaa3db7 --- /dev/null +++ b/services/times-square/Chart.yaml @@ -0,0 +1,25 @@ +apiVersion: v2 +name: times-square +version: 1.0.0 +description: | + An API service for managing and rendering parameterized Jupyter notebooks. +sources: + - https://github.com/lsst-sqre/times-square +type: application + +# The default version tag of the times-square docker image +appVersion: "0.6.0" + +dependencies: + - name: redis + version: 0.1.4 + repository: https://lsst-sqre.github.io/charts/ + +annotations: + phalanx.lsst.io/docs: | + - id: "SQR-062" + title: "The Times Square service for publishing parameterized Jupyter Notebooks in the Rubin Science platform" + url: "https://sqr-062.lsst.io/" + - id: "SQR-065" + title: "Design of Noteburst, a programatic JupyterLab notebook execution service for the Rubin Science Platform" + url: "https://sqr-065.lsst.io/" diff --git a/services/times-square/README.md b/services/times-square/README.md new file mode 100644 index 0000000000..dfa03521ec --- /dev/null +++ b/services/times-square/README.md @@ -0,0 +1,62 @@ +# times-square + +An API service for managing and rendering parameterized Jupyter notebooks. + +## Source Code + +* + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| affinity | object | `{}` | Affinity rules for the times-square deployment pod | +| autoscaling.enabled | bool | `false` | Enable autoscaling of times-square deployment | +| autoscaling.maxReplicas | int | `100` | Maximum number of times-square deployment pods | +| autoscaling.minReplicas | int | `1` | Minimum number of times-square deployment pods | +| autoscaling.targetCPUUtilizationPercentage | int | `80` | Target CPU utilization of times-square deployment pods | +| cloudsql.enabled | bool | `false` | Enable the Cloud SQL Auth Proxy sidecar, used with CloudSQL databases on Google Cloud | +| cloudsql.image.pullPolicy | string | `"IfNotPresent"` | Pull policy for Cloud SQL Auth Proxy images | +| cloudsql.image.repository | string | `"gcr.io/cloudsql-docker/gce-proxy"` | Cloud SQL Auth Proxy image to use | +| cloudsql.image.tag | string | `"1.33.2"` | Cloud SQL Auth Proxy tag to use | +| cloudsql.instanceConnectionName | string | `""` | Instance connection name for a CloudSQL PostgreSQL instance | +| cloudsql.serviceAccount | string | `""` | The Google service account that has an IAM binding to the `times-square` Kubernetes service accounts and has the `cloudsql.client` role | +| config.databaseUrl | string | None, must be set | URL for the PostgreSQL database | +| config.enableGitHubApp | string | `"False"` | Toggle to enable the GitHub App functionality | +| config.githubAppId | string | `""` | GitHub application ID | +| config.logLevel | string | `"INFO"` | Logging level: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL" | +| config.name | string | `"times-square"` | Name of the service. | +| config.profile | string | `"production"` | Run profile: "production" or "development" | +| config.redisCacheUrl | string | Points to embedded Redis | URL for Redis html / noteburst job cache database | +| config.redisQueueUrl | string | Points to embedded Redis | URL for Redis arq queue database | +| fullnameOverride | string | `""` | Override the full name for resources (includes the release name) | +| global.baseUrl | string | Set by times-square Argo CD Application | Base URL for the environment | +| global.host | string | Set by times-square Argo CD Application | Host name for ingress | +| global.vaultSecretsPathPrefix | string | Set by times-square Argo CD Application | Base path for Vault secrets | +| image.pullPolicy | string | `"IfNotPresent"` | Pull policy for the times-square image | +| image.repository | string | `"ghcr.io/lsst-sqre/times-square"` | Image to use in the times-square deployment | +| image.tag | string | `""` | Overrides the image tag whose default is the chart appVersion. | +| imagePullSecrets | list | `[]` | Secret names to use for all Docker pulls | +| ingress.annotations | object | `{}` | Additional annotations for the ingress rule | +| ingress.path | string | `"/times-square/api"` | Root URL path prefix for times-square API | +| nameOverride | string | `""` | Override the base name for resources | +| nodeSelector | object | `{}` | Node selection rules for the times-square deployment pod | +| podAnnotations | object | `{}` | Annotations for the times-square deployment pod | +| redis.affinity | object | `{}` | Affinity rules for the Redis pod | +| redis.nodeSelector | object | `{}` | Node selection rules for the Redis pod | +| redis.persistence.enabled | bool | `true` | Whether to persist Redis storage and thus tokens. Setting this to false will use `emptyDir` and reset all tokens on every restart. Only use this for a test deployment. | +| redis.persistence.size | string | `"8Gi"` | Amount of persistent storage to request | +| redis.persistence.storageClass | string | `""` | Class of storage to request | +| redis.persistence.volumeClaimName | string | `""` | Use an existing PVC, not dynamic provisioning. If this is set, the size, storageClass, and accessMode settings are ignored. | +| redis.podAnnotations | object | `{}` | Pod annotations for the Redis pod | +| redis.resources | object | See `values.yaml` | Resource limits and requests for the Redis pod | +| redis.tolerations | list | `[]` | Tolerations for the Redis pod | +| replicaCount.api | int | `1` | Number of API deployment pods to start | +| replicaCount.worker | int | `1` | Number of worker deployment pods to start | +| resources | object | `{}` | Resource limits and requests for the times-square deployment pod | +| service.port | int | `8080` | Port of the service to create and map to the ingress | +| service.type | string | `"ClusterIP"` | Type of service to create | +| serviceAccount.annotations | object | `{}` | Annotations to add to the service account. If CloudSQL is in use, the annotation specifying the Google service account will also be added. | +| serviceAccount.create | bool | `false` | Force creation of a service account. Normally, no service account is used or mounted. If CloudSQL is enabled, a service account is always created regardless of this value. | +| serviceAccount.name | string | Name based on the fullname template | Name of the service account to use | +| tolerations | list | `[]` | Tolerations for the times-square deployment pod | diff --git a/services/times-square/templates/_helpers.tpl b/services/times-square/templates/_helpers.tpl new file mode 100644 index 0000000000..4fe2a60721 --- /dev/null +++ b/services/times-square/templates/_helpers.tpl @@ -0,0 +1,63 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "times-square.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "times-square.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "times-square.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "times-square.labels" -}} +helm.sh/chart: {{ include "times-square.chart" . }} +{{ include "times-square.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "times-square.selectorLabels" -}} +app.kubernetes.io/name: {{ include "times-square.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + + +{{/* +Create the name of the service account to use +*/}} +{{- define "times-square.serviceAccountName" -}} +{{- if or .Values.serviceAccount.create .Values.cloudsql.enabled }} +{{- default (include "times-square.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/services/times-square/templates/configmap.yaml b/services/times-square/templates/configmap.yaml new file mode 100644 index 0000000000..4ee03962d6 --- /dev/null +++ b/services/times-square/templates/configmap.yaml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "times-square.fullname" . }} + labels: + {{- include "times-square.labels" . | nindent 4 }} +data: + SAFIR_NAME: {{ .Values.config.name | quote }} + SAFIR_PROFILE: {{ .Values.config.profile | quote }} + SAFIR_LOG_LEVEL: {{ .Values.config.logLevel | quote }} + TS_ENVIRONMENT_URL: {{ .Values.global.baseUrl | quote }} + TS_PATH_PREFIX: {{ .Values.ingress.path }} + TS_DATABASE_URL: {{ required "config.databaseUrl must be set" .Values.config.databaseUrl | quote }} + TS_REDIS_URL: {{ required "config.redisCacheUrl must be set" .Values.config.redisCacheUrl | quote }} + TS_REDIS_QUEUE_URL: {{ required "config.redisQueueUrl must be set" .Values.config.redisQueueUrl | quote }} + TS_ENABLE_GITHUB_APP: {{ .Values.config.enableGitHubApp | quote }} + TS_GITHUB_APP_ID: {{ .Values.config.githubAppId | quote }} diff --git a/services/times-square/templates/deployment.yaml b/services/times-square/templates/deployment.yaml new file mode 100644 index 0000000000..4d344f49f2 --- /dev/null +++ b/services/times-square/templates/deployment.yaml @@ -0,0 +1,120 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "times-square.fullname" . }} + labels: + {{- include "times-square.labels" . | nindent 4 }} +spec: + {{- if not .Values.autoscaling.enabled }} + replicas: {{ .Values.replicaCount.api }} + {{- end }} + selector: + matchLabels: + {{- include "times-square.selectorLabels" . | nindent 6 }} + template: + metadata: + annotations: + checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} + {{- with .Values.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "times-square.selectorLabels" . | nindent 8 }} + times-square-redis-client: "true" + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if or .Values.serviceAccount.create .Values.cloudsql.enabled }} + serviceAccountName: {{ include "times-square.serviceAccountName" . }} + {{- else }} + automountServiceAccountToken: false + {{- end }} + securityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + containers: + {{- if .Values.cloudsql.enabled }} + - name: "cloud-sql-proxy" + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - "all" + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 65532 + runAsGroup: 65532 + image: "{{ .Values.cloudsql.image.repository }}:{{ .Values.cloudsql.image.tag }}" + imagePullPolicy: {{ .Values.cloudsql.image.pullPolicy | quote }} + command: + - "/cloud_sql_proxy" + - "-ip_address_types=PRIVATE" + - "-instances={{ required "cloudsql.instanceConnectionName must be specified" .Values.cloudsql.instanceConnectionName }}=tcp:5432" + {{- end }} + - name: {{ .Chart.Name }} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - all + readOnlyRootFilesystem: true + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - name: http + containerPort: 8080 + protocol: TCP + livenessProbe: + httpGet: + path: / + port: http + readinessProbe: + httpGet: + path: / + port: http + resources: + {{- toYaml .Values.resources | nindent 12 }} + envFrom: + - configMapRef: + name: {{ include "times-square.fullname" . }} + env: + - name: "TS_GAFAELFAWR_TOKEN" + valueFrom: + secretKeyRef: + name: {{ template "times-square.fullname" . }}-gafaelfawr-token + key: "token" + - name: "TS_DATABASE_PASSWORD" + valueFrom: + secretKeyRef: + name: {{ template "times-square.fullname" . }}-secret + key: "TS_DATABASE_PASSWORD" + - name: "TS_GITHUB_WEBHOOK_SECRET" + valueFrom: + secretKeyRef: + name: {{ template "times-square.fullname" . }}-secret + key: "TS_GITHUB_WEBHOOK_SECRET" + - name: "TS_GITHUB_WEBHOOK_SECRET" + valueFrom: + secretKeyRef: + name: {{ template "times-square.fullname" . }}-secret + key: "TS_GITHUB_WEBHOOK_SECRET" + - name: "TS_GITHUB_APP_PRIVATE_KEY" + valueFrom: + secretKeyRef: + name: {{ template "times-square.fullname" . }}-secret + key: "TS_GITHUB_APP_PRIVATE_KEY" + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/services/times-square/templates/gafaelfawrtoken.yaml b/services/times-square/templates/gafaelfawrtoken.yaml new file mode 100644 index 0000000000..f173ea4fa2 --- /dev/null +++ b/services/times-square/templates/gafaelfawrtoken.yaml @@ -0,0 +1,11 @@ +apiVersion: gafaelfawr.lsst.io/v1alpha1 +kind: GafaelfawrServiceToken +metadata: + name: {{ include "times-square.fullname" . }}-gafaelfawr-token + labels: + {{- include "times-square.labels" . | nindent 4 }} +spec: + service: "bot-times-square" + scopes: + - "admin:token" + - "exec:admin" diff --git a/services/times-square/templates/hpa.yaml b/services/times-square/templates/hpa.yaml new file mode 100644 index 0000000000..6989b5af1e --- /dev/null +++ b/services/times-square/templates/hpa.yaml @@ -0,0 +1,28 @@ +{{- if .Values.autoscaling.enabled }} +apiVersion: autoscaling/v2beta1 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "times-square.fullname" . }} + labels: + {{- include "times-square.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "times-square.fullname" . }} + minReplicas: {{ .Values.autoscaling.minReplicas }} + maxReplicas: {{ .Values.autoscaling.maxReplicas }} + metrics: + {{- if .Values.autoscaling.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: cpu + targetAverageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} + {{- end }} + {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: memory + targetAverageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }} + {{- end }} +{{- end }} diff --git a/services/times-square/templates/ingress-webhooks.yaml b/services/times-square/templates/ingress-webhooks.yaml new file mode 100644 index 0000000000..af29673def --- /dev/null +++ b/services/times-square/templates/ingress-webhooks.yaml @@ -0,0 +1,29 @@ +apiVersion: gafaelfawr.lsst.io/v1alpha1 +kind: GafaelfawrIngress +metadata: + name: {{ include "times-square.fullname" . }}-github-webhook + labels: + {{- include "times-square.labels" . | nindent 4 }} +config: + baseUrl: {{ .Values.global.baseUrl | quote }} + scopes: + anonymous: true +template: + metadata: + name: {{ include "times-square.fullname" . }}-github-webhook + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 6 }} + {{- end }} + spec: + rules: + - host: {{ required "global.host must be set" .Values.global.host | quote }} + http: + paths: + - path: "{{ .Values.ingress.path }}/github" + pathType: "Prefix" + backend: + service: + name: {{ include "times-square.fullname" . }} + port: + number: {{ .Values.service.port }} diff --git a/services/times-square/templates/ingress.yaml b/services/times-square/templates/ingress.yaml new file mode 100644 index 0000000000..8fd58c6eab --- /dev/null +++ b/services/times-square/templates/ingress.yaml @@ -0,0 +1,31 @@ +apiVersion: gafaelfawr.lsst.io/v1alpha1 +kind: GafaelfawrIngress +metadata: + name: {{ template "times-square.fullname" . }} + labels: + {{- include "times-square.labels" . | nindent 4 }} +config: + baseUrl: {{ .Values.global.baseUrl | quote }} + scopes: + all: + - "exec:admin" + loginRedirect: true +template: + metadata: + name: {{ template "times-square.fullname" . }} + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 6 }} + {{- end }} + spec: + rules: + - host: {{ required "global.host must be set" .Values.global.host | quote }} + http: + paths: + - path: {{ .Values.ingress.path | quote }} + pathType: "Prefix" + backend: + service: + name: {{ template "times-square.fullname" . }} + port: + number: {{ .Values.service.port }} diff --git a/services/times-square/templates/networkpolicy.yaml b/services/times-square/templates/networkpolicy.yaml new file mode 100644 index 0000000000..dbb7e17403 --- /dev/null +++ b/services/times-square/templates/networkpolicy.yaml @@ -0,0 +1,21 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ include "times-square.fullname" . }} +spec: + podSelector: + matchLabels: + {{- include "times-square.selectorLabels" . | nindent 6 }} + policyTypes: + - Ingress + ingress: + # Allow inbound access from pods (in any namespace) labeled + # gafaelfawr.lsst.io/ingress: true. + - from: + - namespaceSelector: {} + podSelector: + matchLabels: + gafaelfawr.lsst.io/ingress: "true" + ports: + - protocol: "TCP" + port: 8080 diff --git a/services/times-square/templates/service.yaml b/services/times-square/templates/service.yaml new file mode 100644 index 0000000000..477632df17 --- /dev/null +++ b/services/times-square/templates/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "times-square.fullname" . }} + labels: + {{- include "times-square.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: http + protocol: TCP + name: http + selector: + {{- include "times-square.selectorLabels" . | nindent 4 }} diff --git a/services/times-square/templates/serviceaccount.yaml b/services/times-square/templates/serviceaccount.yaml new file mode 100644 index 0000000000..bbc698585b --- /dev/null +++ b/services/times-square/templates/serviceaccount.yaml @@ -0,0 +1,15 @@ +{{- if or .Values.serviceAccount.create .Values.cloudsql.enabled -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "times-square.serviceAccountName" . }} + labels: + {{- include "times-square.labels" . | nindent 4 }} + annotations: + {{- if .Values.cloudsql.enabled }} + iam.gke.io/gcp-service-account: {{ required "cloudsql.serviceAccount must be set to a valid Google service account" .Values.cloudsql.serviceAccount | quote }} + {{- end }} + {{- with .Values.serviceAccount.annotations }} + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} diff --git a/services/times-square/templates/vault-secret.yaml b/services/times-square/templates/vault-secret.yaml new file mode 100644 index 0000000000..a7960d8b21 --- /dev/null +++ b/services/times-square/templates/vault-secret.yaml @@ -0,0 +1,9 @@ +apiVersion: ricoberger.de/v1alpha1 +kind: VaultSecret +metadata: + name: {{ template "times-square.fullname" . }}-secret + labels: + {{- include "times-square.labels" . | nindent 4 }} +spec: + path: "{{ .Values.global.vaultSecretsPathPrefix }}/times-square" + type: Opaque diff --git a/services/times-square/templates/worker-deployment.yaml b/services/times-square/templates/worker-deployment.yaml new file mode 100644 index 0000000000..4d40031f97 --- /dev/null +++ b/services/times-square/templates/worker-deployment.yaml @@ -0,0 +1,113 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "times-square.fullname" . }}-worker + labels: + {{- include "times-square.labels" . | nindent 4 }} +spec: + {{- if not .Values.autoscaling.enabled }} + replicas: {{ .Values.replicaCount.worker }} + {{- end }} + selector: + matchLabels: + {{- include "times-square.selectorLabels" . | nindent 6 }} + template: + metadata: + annotations: + checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} + {{- with .Values.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "times-square.selectorLabels" . | nindent 8 }} + times-square-redis-client: "true" + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if or .Values.serviceAccount.create .Values.cloudsql.enabled }} + serviceAccountName: {{ include "times-square.serviceAccountName" . }} + {{- else }} + automountServiceAccountToken: false + {{- end }} + securityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + containers: + {{- if .Values.cloudsql.enabled }} + - name: "cloud-sql-proxy" + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - "all" + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 65532 + runAsGroup: 65532 + image: "{{ .Values.cloudsql.image.repository }}:{{ .Values.cloudsql.image.tag }}" + imagePullPolicy: {{ .Values.cloudsql.image.pullPolicy | quote }} + command: + - "/cloud_sql_proxy" + - "-ip_address_types=PRIVATE" + - "-instances={{ required "cloudsql.instanceConnectionName must be specified" .Values.cloudsql.instanceConnectionName }}=tcp:5432" + {{- end }} + - name: {{ .Chart.Name }} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - all + readOnlyRootFilesystem: true + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + command: ["arq"] + args: ["timessquare.worker.main.WorkerSettings"] + livenessProbe: + exec: + command: + - "arq" + - "--check" + - "timessquare.worker.main.WorkerSettings" + initialDelaySeconds: 360 + periodSeconds: 15 + resources: + {{- toYaml .Values.resources | nindent 12 }} + envFrom: + - configMapRef: + name: {{ include "times-square.fullname" . }} + env: + - name: "TS_GAFAELFAWR_TOKEN" + valueFrom: + secretKeyRef: + name: {{ template "times-square.fullname" . }}-gafaelfawr-token + key: "token" + - name: "TS_DATABASE_PASSWORD" + valueFrom: + secretKeyRef: + name: {{ template "times-square.fullname" . }}-secret + key: "TS_DATABASE_PASSWORD" + - name: "TS_GITHUB_WEBHOOK_SECRET" + valueFrom: + secretKeyRef: + name: {{ template "times-square.fullname" . }}-secret + key: "TS_GITHUB_WEBHOOK_SECRET" + - name: "TS_GITHUB_APP_PRIVATE_KEY" + valueFrom: + secretKeyRef: + name: {{ template "times-square.fullname" . }}-secret + key: "TS_GITHUB_APP_PRIVATE_KEY" + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/services/times-square/values-idfdev.yaml b/services/times-square/values-idfdev.yaml new file mode 100644 index 0000000000..ccd4620735 --- /dev/null +++ b/services/times-square/values-idfdev.yaml @@ -0,0 +1,14 @@ +image: + pullPolicy: Always +config: + logLevel: "DEBUG" + databaseUrl: "postgresql://times-square@localhost/times-square" + githubAppId: "196798" + enableGitHubApp: "True" +cloudsql: + enabled: true + instanceConnectionName: "science-platform-dev-7696:us-central1:science-platform-dev-e9e11de2" + serviceAccount: "times-square@science-platform-dev-7696.iam.gserviceaccount.com" +redis: + persistence: + storageClass: "premium-rwo" diff --git a/services/times-square/values.yaml b/services/times-square/values.yaml new file mode 100644 index 0000000000..7abb126d69 --- /dev/null +++ b/services/times-square/values.yaml @@ -0,0 +1,184 @@ +# Global parameters will be set by parameters injected via the Argo CD +# Application resource and should not be set in the individual environment +# values files. + +replicaCount: + # -- Number of API deployment pods to start + api: 1 + + # -- Number of worker deployment pods to start + worker: 1 + +image: + # -- Image to use in the times-square deployment + repository: ghcr.io/lsst-sqre/times-square + + # -- Pull policy for the times-square image + pullPolicy: IfNotPresent + + # -- Overrides the image tag whose default is the chart appVersion. + tag: "" + +# -- Secret names to use for all Docker pulls +imagePullSecrets: [] + +# -- Override the base name for resources +nameOverride: "" + +# -- Override the full name for resources (includes the release name) +fullnameOverride: "" + +# -- Annotations for the times-square deployment pod +podAnnotations: {} + +serviceAccount: + # -- Force creation of a service account. Normally, no service account is + # used or mounted. If CloudSQL is enabled, a service account is always + # created regardless of this value. + create: false + + # -- Annotations to add to the service account. If CloudSQL is in use, the + # annotation specifying the Google service account will also be added. + annotations: {} + + # -- Name of the service account to use + # @default -- Name based on the fullname template + name: "" + +service: + # -- Type of service to create + type: ClusterIP + + # -- Port of the service to create and map to the ingress + port: 8080 + +ingress: + # -- Additional annotations for the ingress rule + annotations: {} + + # -- Root URL path prefix for times-square API + path: "/times-square/api" + +# -- Resource limits and requests for the times-square deployment pod +resources: {} + +autoscaling: + # -- Enable autoscaling of times-square deployment + enabled: false + + # -- Minimum number of times-square deployment pods + minReplicas: 1 + + # -- Maximum number of times-square deployment pods + maxReplicas: 100 + + # -- Target CPU utilization of times-square deployment pods + targetCPUUtilizationPercentage: 80 + # targetMemoryUtilizationPercentage: 80 + +# -- Node selection rules for the times-square deployment pod +nodeSelector: {} + +# -- Tolerations for the times-square deployment pod +tolerations: [] + +# -- Affinity rules for the times-square deployment pod +affinity: {} + +config: + # -- Name of the service. + name: "times-square" + + # -- Run profile: "production" or "development" + profile: "production" + + # -- Logging level: "DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL" + logLevel: "INFO" + + # -- URL for the PostgreSQL database + # @default -- None, must be set + databaseUrl: "" + + # -- URL for Redis html / noteburst job cache database + # @default -- Points to embedded Redis + redisCacheUrl: "redis://times-square-redis:6379/0" + + # -- URL for Redis arq queue database + # @default -- Points to embedded Redis + redisQueueUrl: "redis://times-square-redis:6379/1" + + # -- GitHub application ID + githubAppId: "" + + # -- Toggle to enable the GitHub App functionality + enableGitHubApp: "False" + +cloudsql: + # -- Enable the Cloud SQL Auth Proxy sidecar, used with CloudSQL databases + # on Google Cloud + enabled: false + + image: + # -- Cloud SQL Auth Proxy image to use + repository: "gcr.io/cloudsql-docker/gce-proxy" + + # -- Cloud SQL Auth Proxy tag to use + tag: "1.33.2" + + # -- Pull policy for Cloud SQL Auth Proxy images + pullPolicy: "IfNotPresent" + + # -- Instance connection name for a CloudSQL PostgreSQL instance + instanceConnectionName: "" + + # -- The Google service account that has an IAM binding to the `times-square` + # Kubernetes service accounts and has the `cloudsql.client` role + serviceAccount: "" + +redis: + persistence: + # -- Whether to persist Redis storage and thus tokens. Setting this to + # false will use `emptyDir` and reset all tokens on every restart. Only + # use this for a test deployment. + enabled: true + + # -- Amount of persistent storage to request + size: "8Gi" + + # -- Class of storage to request + storageClass: "" + + # -- Use an existing PVC, not dynamic provisioning. If this is set, the + # size, storageClass, and accessMode settings are ignored. + volumeClaimName: "" + + # -- Resource limits and requests for the Redis pod + # @default -- See `values.yaml` + resources: + limits: + cpu: "1" + + # -- Pod annotations for the Redis pod + podAnnotations: {} + + # -- Node selection rules for the Redis pod + nodeSelector: {} + + # -- Tolerations for the Redis pod + tolerations: [] + + # -- Affinity rules for the Redis pod + affinity: {} + +global: + # -- Base URL for the environment + # @default -- Set by times-square Argo CD Application + baseUrl: "" + + # -- Host name for ingress + # @default -- Set by times-square Argo CD Application + host: "" + + # -- Base path for Vault secrets + # @default -- Set by times-square Argo CD Application + vaultSecretsPathPrefix: "" diff --git a/services/vault-secrets-operator/Chart.yaml b/services/vault-secrets-operator/Chart.yaml index 5a5d34b03c..b3b58dbfae 100644 --- a/services/vault-secrets-operator/Chart.yaml +++ b/services/vault-secrets-operator/Chart.yaml @@ -1,7 +1,14 @@ apiVersion: v2 name: vault-secrets-operator version: 1.0.0 +sources: + - https://github.com/ricoberger/vault-secrets-operator dependencies: -- name: vault-secrets-operator - version: 1.16.5 - repository: https://ricoberger.github.io/helm-charts/ + - name: vault-secrets-operator + version: 1.19.8 + repository: https://ricoberger.github.io/helm-charts/ +annotations: + phalanx.lsst.io/docs: | + - id: "DMTN-112" + title: "LSST DM Vault" + url: "https://dmtn-112.lsst.io/" diff --git a/services/vault-secrets-operator/README.md b/services/vault-secrets-operator/README.md new file mode 100644 index 0000000000..f1c78b7e0e --- /dev/null +++ b/services/vault-secrets-operator/README.md @@ -0,0 +1,14 @@ +# vault-secrets-operator + +## Source Code + +* + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| vault-secrets-operator.environmentVars[0] | object | `{"name":"VAULT_TOKEN","valueFrom":{"secretKeyRef":{"key":"VAULT_TOKEN","name":"vault-secrets-operator"}}}` | environment variable where the Vault read token is kept | +| vault-secrets-operator.environmentVars[1] | object | `{"name":"VAULT_TOKEN_LEASE_DURATION","valueFrom":{"secretKeyRef":{"key":"VAULT_TOKEN_LEASE_DURATION","name":"vault-secrets-operator"}}}` | environment variable storing the lease duration, in seconds | +| vault-secrets-operator.vault.address | string | `"https://vault.lsst.codes"` | URL of the underlying Vault implementation | +| vault-secrets-operator.vault.reconciliationTime | int | `60` | Sync secrets from vault on this cadence | diff --git a/services/vault-secrets-operator/values-base.yaml b/services/vault-secrets-operator/values-base.yaml index 51a1243b2d..e69de29bb2 100644 --- a/services/vault-secrets-operator/values-base.yaml +++ b/services/vault-secrets-operator/values-base.yaml @@ -1,15 +0,0 @@ -vault-secrets-operator: - environmentVars: - - name: VAULT_TOKEN - valueFrom: - secretKeyRef: - name: vault-secrets-operator - key: VAULT_TOKEN - - name: VAULT_TOKEN_LEASE_DURATION - valueFrom: - secretKeyRef: - name: vault-secrets-operator - key: VAULT_TOKEN_LEASE_DURATION - vault: - address: "https://vault.lsst.codes" - reconciliationTime: 60 diff --git a/services/vault-secrets-operator/values-int.yaml b/services/vault-secrets-operator/values-ccin2p3.yaml similarity index 93% rename from services/vault-secrets-operator/values-int.yaml rename to services/vault-secrets-operator/values-ccin2p3.yaml index 51a1243b2d..d18a033099 100644 --- a/services/vault-secrets-operator/values-int.yaml +++ b/services/vault-secrets-operator/values-ccin2p3.yaml @@ -12,4 +12,3 @@ vault-secrets-operator: key: VAULT_TOKEN_LEASE_DURATION vault: address: "https://vault.lsst.codes" - reconciliationTime: 60 diff --git a/services/vault-secrets-operator/values-idfdev.yaml b/services/vault-secrets-operator/values-idfdev.yaml index 51a1243b2d..e69de29bb2 100644 --- a/services/vault-secrets-operator/values-idfdev.yaml +++ b/services/vault-secrets-operator/values-idfdev.yaml @@ -1,15 +0,0 @@ -vault-secrets-operator: - environmentVars: - - name: VAULT_TOKEN - valueFrom: - secretKeyRef: - name: vault-secrets-operator - key: VAULT_TOKEN - - name: VAULT_TOKEN_LEASE_DURATION - valueFrom: - secretKeyRef: - name: vault-secrets-operator - key: VAULT_TOKEN_LEASE_DURATION - vault: - address: "https://vault.lsst.codes" - reconciliationTime: 60 diff --git a/services/vault-secrets-operator/values-idfint.yaml b/services/vault-secrets-operator/values-idfint.yaml index 51a1243b2d..e69de29bb2 100644 --- a/services/vault-secrets-operator/values-idfint.yaml +++ b/services/vault-secrets-operator/values-idfint.yaml @@ -1,15 +0,0 @@ -vault-secrets-operator: - environmentVars: - - name: VAULT_TOKEN - valueFrom: - secretKeyRef: - name: vault-secrets-operator - key: VAULT_TOKEN - - name: VAULT_TOKEN_LEASE_DURATION - valueFrom: - secretKeyRef: - name: vault-secrets-operator - key: VAULT_TOKEN_LEASE_DURATION - vault: - address: "https://vault.lsst.codes" - reconciliationTime: 60 diff --git a/services/vault-secrets-operator/values-idfprod.yaml b/services/vault-secrets-operator/values-idfprod.yaml index 51a1243b2d..e69de29bb2 100644 --- a/services/vault-secrets-operator/values-idfprod.yaml +++ b/services/vault-secrets-operator/values-idfprod.yaml @@ -1,15 +0,0 @@ -vault-secrets-operator: - environmentVars: - - name: VAULT_TOKEN - valueFrom: - secretKeyRef: - name: vault-secrets-operator - key: VAULT_TOKEN - - name: VAULT_TOKEN_LEASE_DURATION - valueFrom: - secretKeyRef: - name: vault-secrets-operator - key: VAULT_TOKEN_LEASE_DURATION - vault: - address: "https://vault.lsst.codes" - reconciliationTime: 60 diff --git a/services/vault-secrets-operator/values-minikube.yaml b/services/vault-secrets-operator/values-minikube.yaml index 51a1243b2d..e69de29bb2 100644 --- a/services/vault-secrets-operator/values-minikube.yaml +++ b/services/vault-secrets-operator/values-minikube.yaml @@ -1,15 +0,0 @@ -vault-secrets-operator: - environmentVars: - - name: VAULT_TOKEN - valueFrom: - secretKeyRef: - name: vault-secrets-operator - key: VAULT_TOKEN - - name: VAULT_TOKEN_LEASE_DURATION - valueFrom: - secretKeyRef: - name: vault-secrets-operator - key: VAULT_TOKEN_LEASE_DURATION - vault: - address: "https://vault.lsst.codes" - reconciliationTime: 60 diff --git a/services/vault-secrets-operator/values-roe.yaml b/services/vault-secrets-operator/values-roe.yaml index 51a1243b2d..e69de29bb2 100644 --- a/services/vault-secrets-operator/values-roe.yaml +++ b/services/vault-secrets-operator/values-roe.yaml @@ -1,15 +0,0 @@ -vault-secrets-operator: - environmentVars: - - name: VAULT_TOKEN - valueFrom: - secretKeyRef: - name: vault-secrets-operator - key: VAULT_TOKEN - - name: VAULT_TOKEN_LEASE_DURATION - valueFrom: - secretKeyRef: - name: vault-secrets-operator - key: VAULT_TOKEN_LEASE_DURATION - vault: - address: "https://vault.lsst.codes" - reconciliationTime: 60 diff --git a/services/vault-secrets-operator/values-squash-sandbox.yaml b/services/vault-secrets-operator/values-squash-sandbox.yaml deleted file mode 100644 index 51a1243b2d..0000000000 --- a/services/vault-secrets-operator/values-squash-sandbox.yaml +++ /dev/null @@ -1,15 +0,0 @@ -vault-secrets-operator: - environmentVars: - - name: VAULT_TOKEN - valueFrom: - secretKeyRef: - name: vault-secrets-operator - key: VAULT_TOKEN - - name: VAULT_TOKEN_LEASE_DURATION - valueFrom: - secretKeyRef: - name: vault-secrets-operator - key: VAULT_TOKEN_LEASE_DURATION - vault: - address: "https://vault.lsst.codes" - reconciliationTime: 60 diff --git a/services/vault-secrets-operator/values-stable.yaml b/services/vault-secrets-operator/values-stable.yaml deleted file mode 100644 index 51a1243b2d..0000000000 --- a/services/vault-secrets-operator/values-stable.yaml +++ /dev/null @@ -1,15 +0,0 @@ -vault-secrets-operator: - environmentVars: - - name: VAULT_TOKEN - valueFrom: - secretKeyRef: - name: vault-secrets-operator - key: VAULT_TOKEN - - name: VAULT_TOKEN_LEASE_DURATION - valueFrom: - secretKeyRef: - name: vault-secrets-operator - key: VAULT_TOKEN_LEASE_DURATION - vault: - address: "https://vault.lsst.codes" - reconciliationTime: 60 diff --git a/services/vault-secrets-operator/values-summit.yaml b/services/vault-secrets-operator/values-summit.yaml index 51a1243b2d..e69de29bb2 100644 --- a/services/vault-secrets-operator/values-summit.yaml +++ b/services/vault-secrets-operator/values-summit.yaml @@ -1,15 +0,0 @@ -vault-secrets-operator: - environmentVars: - - name: VAULT_TOKEN - valueFrom: - secretKeyRef: - name: vault-secrets-operator - key: VAULT_TOKEN - - name: VAULT_TOKEN_LEASE_DURATION - valueFrom: - secretKeyRef: - name: vault-secrets-operator - key: VAULT_TOKEN_LEASE_DURATION - vault: - address: "https://vault.lsst.codes" - reconciliationTime: 60 diff --git a/services/vault-secrets-operator/values-tucson-teststand.yaml b/services/vault-secrets-operator/values-tucson-teststand.yaml index 51a1243b2d..e69de29bb2 100644 --- a/services/vault-secrets-operator/values-tucson-teststand.yaml +++ b/services/vault-secrets-operator/values-tucson-teststand.yaml @@ -1,15 +0,0 @@ -vault-secrets-operator: - environmentVars: - - name: VAULT_TOKEN - valueFrom: - secretKeyRef: - name: vault-secrets-operator - key: VAULT_TOKEN - - name: VAULT_TOKEN_LEASE_DURATION - valueFrom: - secretKeyRef: - name: vault-secrets-operator - key: VAULT_TOKEN_LEASE_DURATION - vault: - address: "https://vault.lsst.codes" - reconciliationTime: 60 diff --git a/services/vault-secrets-operator/values-red-five.yaml b/services/vault-secrets-operator/values.yaml similarity index 53% rename from services/vault-secrets-operator/values-red-five.yaml rename to services/vault-secrets-operator/values.yaml index 51a1243b2d..713e889bc1 100644 --- a/services/vault-secrets-operator/values-red-five.yaml +++ b/services/vault-secrets-operator/values.yaml @@ -1,15 +1,21 @@ +## Variables for Vault Secrets Operator +## https://github.com/ricoberger/vault-secrets-operator/blob/master/charts/README.md vault-secrets-operator: environmentVars: + # -- environment variable where the Vault read token is kept - name: VAULT_TOKEN valueFrom: secretKeyRef: name: vault-secrets-operator key: VAULT_TOKEN + # -- environment variable storing the lease duration, in seconds - name: VAULT_TOKEN_LEASE_DURATION valueFrom: secretKeyRef: name: vault-secrets-operator key: VAULT_TOKEN_LEASE_DURATION vault: + # -- URL of the underlying Vault implementation address: "https://vault.lsst.codes" + # -- Sync secrets from vault on this cadence reconciliationTime: 60 diff --git a/services/vo-cutouts/Chart.yaml b/services/vo-cutouts/Chart.yaml index c08386ebb1..17c9632a60 100644 --- a/services/vo-cutouts/Chart.yaml +++ b/services/vo-cutouts/Chart.yaml @@ -1,10 +1,18 @@ apiVersion: v2 name: vo-cutouts version: 1.0.0 +description: "Image cutout service complying with IVOA SODA" +sources: + - "https://github.com/lsst-sqre/vo-cutouts" +appVersion: 1.0.0 + dependencies: - - name: vo-cutouts - version: 0.2.2 - repository: https://lsst-sqre.github.io/charts/ - - name: pull-secret - version: 0.1.2 + - name: redis + version: 1.0.0 repository: https://lsst-sqre.github.io/charts/ + +annotations: + phalanx.lsst.io/docs: | + - id: "DMTN-208" + title: "RSP image cutout service implementation strategy" + url: "https://dmtn-208.lsst.io/" diff --git a/services/vo-cutouts/README.md b/services/vo-cutouts/README.md new file mode 100644 index 0000000000..7fd791fa78 --- /dev/null +++ b/services/vo-cutouts/README.md @@ -0,0 +1,67 @@ +# vo-cutouts + +Image cutout service complying with IVOA SODA + +## Source Code + +* + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| affinity | object | `{}` | Affinity rules for the vo-cutouts frontend pod | +| cloudsql.enabled | bool | `false` | Enable the Cloud SQL Auth Proxy sidecar, used with CloudSQL databases on Google Cloud | +| cloudsql.image.pullPolicy | string | `"IfNotPresent"` | Pull policy for Cloud SQL Auth Proxy images | +| cloudsql.image.repository | string | `"gcr.io/cloudsql-docker/gce-proxy"` | Cloud SQL Auth Proxy image to use | +| cloudsql.image.tag | string | `"1.33.2"` | Cloud SQL Auth Proxy tag to use | +| cloudsql.instanceConnectionName | string | `""` | Instance connection name for a CloudSQL PostgreSQL instance | +| cloudsql.serviceAccount | string | None, must be set | The Google service account that has an IAM binding to the `vo-cutouts` Kubernetes service accounts and has the `cloudsql.client` role, access to the GCS bucket, and ability to sign URLs as itself | +| config.databaseUrl | string | None, must be set | URL for the PostgreSQL database | +| config.gcsBucketUrl | string | None, must be set | URL for the GCS bucket into which to store cutouts (must start with `s3`) | +| config.lifetime | string | 2592000 (30 days) | Lifetime of job results in seconds (quote so that Helm doesn't turn it into a floating point number) | +| config.loglevel | string | `"INFO"` | Choose from the text form of Python logging levels | +| config.syncTimeout | int | 60 (1 minute) | Timeout for results from a sync cutout in seconds | +| config.timeout | int | 600 (10 minutes) | Timeout for a single cutout job in seconds | +| cutoutWorker.affinity | object | `{}` | Affinity rules for the cutout worker pod | +| cutoutWorker.image.pullPolicy | string | `"IfNotPresent"` | Pull policy for cutout workers | +| cutoutWorker.image.repository | string | `"ghcr.io/lsst-sqre/vo-cutouts-worker"` | Stack image to use for cutouts | +| cutoutWorker.image.tag | string | The appVersion of the chart | Tag of vo-cutouts worker image to use | +| cutoutWorker.nodeSelector | object | `{}` | Node selection rules for the cutout worker pod | +| cutoutWorker.podAnnotations | object | `{}` | Annotations for the cutout worker pod | +| cutoutWorker.replicaCount | int | `2` | Number of cutout worker pods to start | +| cutoutWorker.resources | object | `{}` | Resource limits and requests for the cutout worker pod | +| cutoutWorker.tolerations | list | `[]` | Tolerations for the cutout worker pod | +| databaseWorker.affinity | object | `{}` | Affinity rules for the database worker pod | +| databaseWorker.nodeSelector | object | `{}` | Node selection rules for the database worker pod | +| databaseWorker.podAnnotations | object | `{}` | Annotations for the database worker pod | +| databaseWorker.replicaCount | int | `1` | Number of database worker pods to start | +| databaseWorker.resources | object | `{}` | Resource limits and requests for the database worker pod | +| databaseWorker.tolerations | list | `[]` | Tolerations for the database worker pod | +| fullnameOverride | string | `""` | Override the full name for resources (includes the release name) | +| global.baseUrl | string | Set by Argo CD | Base URL for the environment | +| global.butlerRepositoryIndex | string | Set by Argo CD | URI to the Butler configuration of available repositories | +| global.host | string | Set by Argo CD | Host name for ingress | +| global.vaultSecretsPath | string | Set by Argo CD | Base path for Vault secrets | +| image.pullPolicy | string | `"IfNotPresent"` | Pull policy for the vo-cutouts image | +| image.repository | string | `"ghcr.io/lsst-sqre/vo-cutouts"` | vo-cutouts image to use | +| image.tag | string | The appVersion of the chart | Tag of vo-cutouts image to use | +| ingress.annotations | object | `{}` | Additional annotations to add to the ingress | +| nameOverride | string | `""` | Override the base name for resources | +| nodeSelector | object | `{}` | Node selector rules for the vo-cutouts frontend pod | +| podAnnotations | object | `{}` | Annotations for the vo-cutouts frontend pod | +| redis.affinity | object | `{}` | Affinity rules for the Redis pod | +| redis.config.secretKey | string | `"redis-password"` | Key inside secret from which to get the Redis password (do not change) | +| redis.config.secretName | string | `"vo-cutouts-secret"` | Name of secret containing Redis password (may require changing if fullnameOverride is set) | +| redis.nodeSelector | object | `{}` | Node selection rules for the Redis pod | +| redis.persistence.accessMode | string | `"ReadWriteOnce"` | Access mode of storage to request | +| redis.persistence.enabled | bool | `true` | Whether to persist Redis storage and thus tokens. Setting this to false will use `emptyDir` and reset all tokens on every restart. Only use this for a test deployment. | +| redis.persistence.size | string | `"100Mi"` | Amount of persistent storage to request | +| redis.persistence.storageClass | string | `""` | Class of storage to request | +| redis.persistence.volumeClaimName | string | `""` | Use an existing PVC, not dynamic provisioning. If this is set, the size, storageClass, and accessMode settings are ignored. | +| redis.podAnnotations | object | `{}` | Pod annotations for the Redis pod | +| redis.resources | object | See `values.yaml` | Resource limits and requests for the Redis pod | +| redis.tolerations | list | `[]` | Tolerations for the Redis pod | +| replicaCount | int | `1` | Number of web frontend pods to start | +| resources | object | `{}` | Resource limits and requests for the vo-cutouts frontend pod | +| tolerations | list | `[]` | Tolerations for the vo-cutouts frontend pod | diff --git a/services/vo-cutouts/templates/_helpers.tpl b/services/vo-cutouts/templates/_helpers.tpl new file mode 100644 index 0000000000..0fec75cb0a --- /dev/null +++ b/services/vo-cutouts/templates/_helpers.tpl @@ -0,0 +1,52 @@ +{{/* vim: set filetype=mustache: */}} +{{/* +Expand the name of the chart. +*/}} +{{- define "vo-cutouts.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "vo-cutouts.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "vo-cutouts.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "vo-cutouts.labels" -}} +helm.sh/chart: {{ include "vo-cutouts.chart" . }} +{{ include "vo-cutouts.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "vo-cutouts.selectorLabels" -}} +app.kubernetes.io/name: {{ include "vo-cutouts.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} diff --git a/services/vo-cutouts/templates/configmap.yaml b/services/vo-cutouts/templates/configmap.yaml new file mode 100644 index 0000000000..aae4be91be --- /dev/null +++ b/services/vo-cutouts/templates/configmap.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ template "vo-cutouts.fullname" . }}-config + labels: + {{- include "vo-cutouts.labels" . | nindent 4 }} +data: + CUTOUT_DATABASE_URL: {{ required "config.databaseUrl must be set" .Values.config.databaseUrl | quote }} + CUTOUT_SERVICE_ACCOUNT: {{ required "cloudsql.serviceAccount must be set" .Values.cloudsql.serviceAccount | quote }} + CUTOUT_STORAGE_URL: {{ required "config.gcsBucketUrl must be set" .Values.config.gcsBucketUrl | quote }} + CUTOUT_TIMEOUT: {{ .Values.config.timeout | quote }} + CUTOUT_LIFETIME: {{ .Values.config.lifetime | quote }} + CUTOUT_REDIS_HOST: "{{ template "vo-cutouts.fullname" . }}-redis.{{ .Release.Namespace }}" + CUTOUT_SYNC_TIMEOUT: {{ .Values.config.syncTimeout | quote }} + SAFIR_LOG_LEVEL: {{ .Values.config.loglevel | quote }} + SAFIR_PROFILE: "production" diff --git a/services/vo-cutouts/templates/db-worker-deployment.yaml b/services/vo-cutouts/templates/db-worker-deployment.yaml new file mode 100644 index 0000000000..142017495e --- /dev/null +++ b/services/vo-cutouts/templates/db-worker-deployment.yaml @@ -0,0 +1,106 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "vo-cutouts.fullname" . }}-db-worker + labels: + {{- include "vo-cutouts.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.databaseWorker.replicaCount }} + selector: + matchLabels: + {{- include "vo-cutouts.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: "db-worker" + template: + metadata: + annotations: + checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} + {{- with .Values.databaseWorker.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "vo-cutouts.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: "db-worker" + vo-cutouts-redis-client: "true" + spec: + {{- if .Values.cloudsql.enabled }} + serviceAccountName: {{ include "vo-cutouts.fullname" . }} + {{- else }} + automountServiceAccountToken: false + {{- end }} + containers: + {{- if .Values.cloudsql.enabled }} + - name: "cloud-sql-proxy" + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - "all" + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 65532 + runAsGroup: 65532 + image: "{{ .Values.cloudsql.image.repository }}:{{ .Values.cloudsql.image.tag }}" + imagePullPolicy: {{ .Values.cloudsql.image.pullPolicy | quote }} + command: + - "/cloud_sql_proxy" + - "-ip_address_types=PRIVATE" + - "-instances={{ required "cloudsql.instanceConnectionName must be specified" .Values.cloudsql.instanceConnectionName }}=tcp:5432" + {{- end }} + - name: "db-worker" + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - "all" + readOnlyRootFilesystem: true + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy | quote }} + command: + - "dramatiq" + - "vocutouts.actors" + - "-Q" + - "uws" + - "-p" + - "1" + env: + - name: "CUTOUT_DATABASE_PASSWORD" + valueFrom: + secretKeyRef: + name: {{ template "vo-cutouts.fullname" . }}-secret + key: "database-password" + - name: "CUTOUT_REDIS_PASSWORD" + valueFrom: + secretKeyRef: + name: {{ template "vo-cutouts.fullname" . }}-secret + key: "redis-password" + envFrom: + - configMapRef: + name: {{ template "vo-cutouts.fullname" . }}-config + {{- with .Values.databaseWorker.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + volumeMounts: + - name: "tmp" + mountPath: "/tmp" + securityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + volumes: + # Dramatiq enables its Prometheus middleware by default, which + # requires writable /tmp. + - name: "tmp" + emptyDir: {} + {{- with .Values.databaseWorker.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.databaseWorker.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.databaseWorker.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/services/vo-cutouts/templates/db-worker-networkpolicy.yaml b/services/vo-cutouts/templates/db-worker-networkpolicy.yaml new file mode 100644 index 0000000000..7e6f6b961e --- /dev/null +++ b/services/vo-cutouts/templates/db-worker-networkpolicy.yaml @@ -0,0 +1,15 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ template "vo-cutouts.fullname" . }}-db-worker + labels: + {{- include "vo-cutouts.labels" . | nindent 4 }} +spec: + podSelector: + # This policy controls inbound and outbound access to the database workers. + matchLabels: + {{- include "vo-cutouts.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: "db-worker" + policyTypes: + # Block all inbound access. + - Ingress diff --git a/services/vo-cutouts/templates/deployment.yaml b/services/vo-cutouts/templates/deployment.yaml new file mode 100644 index 0000000000..5414510289 --- /dev/null +++ b/services/vo-cutouts/templates/deployment.yaml @@ -0,0 +1,107 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "vo-cutouts.fullname" . }} + labels: + {{- include "vo-cutouts.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + selector: + matchLabels: + {{- include "vo-cutouts.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: "frontend" + template: + metadata: + annotations: + checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} + {{- with .Values.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "vo-cutouts.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: "frontend" + vo-cutouts-redis-client: "true" + spec: + {{- if .Values.cloudsql.enabled }} + serviceAccountName: {{ include "vo-cutouts.fullname" . }} + {{- else }} + automountServiceAccountToken: false + {{- end }} + securityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + containers: + {{- if .Values.cloudsql.enabled }} + - name: "cloud-sql-proxy" + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - "all" + readOnlyRootFilesystem: true + runAsNonRoot: true + runAsUser: 65532 + runAsGroup: 65532 + image: "{{ .Values.cloudsql.image.repository }}:{{ .Values.cloudsql.image.tag }}" + imagePullPolicy: {{ .Values.cloudsql.image.pullPolicy | quote }} + command: + - "/cloud_sql_proxy" + - "-ip_address_types=PRIVATE" + - "-instances={{ required "cloudsql.instanceConnectionName must be specified" .Values.cloudsql.instanceConnectionName }}=tcp:5432" + {{- end }} + - name: "vo-cutouts" + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - "all" + readOnlyRootFilesystem: true + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy | quote }} + ports: + - containerPort: 8080 + name: "http" + protocol: "TCP" + env: + - name: "CUTOUT_DATABASE_PASSWORD" + valueFrom: + secretKeyRef: + name: {{ template "vo-cutouts.fullname" . }}-secret + key: "database-password" + - name: "CUTOUT_REDIS_PASSWORD" + valueFrom: + secretKeyRef: + name: {{ template "vo-cutouts.fullname" . }}-secret + key: "redis-password" + envFrom: + - configMapRef: + name: {{ template "vo-cutouts.fullname" . }}-config + readinessProbe: + httpGet: + path: "/api/cutout/availability" + port: "http" + {{- with .Values.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + volumeMounts: + - name: "tmp" + mountPath: "/tmp" + volumes: + # Dramatiq enables its Prometheus middleware by default, which + # requires writable /tmp. + - name: "tmp" + emptyDir: {} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/services/vo-cutouts/templates/ingress.yaml b/services/vo-cutouts/templates/ingress.yaml new file mode 100644 index 0000000000..d93540b39a --- /dev/null +++ b/services/vo-cutouts/templates/ingress.yaml @@ -0,0 +1,30 @@ +apiVersion: gafaelfawr.lsst.io/v1alpha1 +kind: GafaelfawrIngress +metadata: + name: {{ template "vo-cutouts.fullname" . }} + labels: + {{- include "vo-cutouts.labels" . | nindent 4 }} +config: + baseUrl: {{ .Values.global.baseUrl | quote }} + scopes: + all: + - "read:image" +template: + metadata: + name: {{ template "vo-cutouts.fullname" . }} + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 6 }} + {{- end }} + spec: + rules: + - host: {{ required "global.host must be set" .Values.global.host | quote }} + http: + paths: + - path: "/api/cutout" + pathType: "Prefix" + backend: + service: + name: {{ template "vo-cutouts.fullname" . }} + port: + number: 8080 diff --git a/services/vo-cutouts/templates/networkpolicy.yaml b/services/vo-cutouts/templates/networkpolicy.yaml new file mode 100644 index 0000000000..61ff694572 --- /dev/null +++ b/services/vo-cutouts/templates/networkpolicy.yaml @@ -0,0 +1,25 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ template "vo-cutouts.fullname" . }} + labels: + {{- include "vo-cutouts.labels" . | nindent 4 }} +spec: + podSelector: + # This policy controls inbound access to the frontend component. + matchLabels: + {{- include "vo-cutouts.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: "frontend" + policyTypes: + - Ingress + ingress: + # Allow inbound access from pods (in any namespace) labeled + # gafaelfawr.lsst.io/ingress: true. + - from: + - namespaceSelector: {} + podSelector: + matchLabels: + gafaelfawr.lsst.io/ingress: "true" + ports: + - protocol: "TCP" + port: 8080 diff --git a/services/vo-cutouts/templates/service.yaml b/services/vo-cutouts/templates/service.yaml new file mode 100644 index 0000000000..ca11dd650e --- /dev/null +++ b/services/vo-cutouts/templates/service.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ template "vo-cutouts.fullname" . }} + labels: + {{- include "vo-cutouts.labels" . | nindent 4 }} +spec: + type: ClusterIP + ports: + - protocol: "TCP" + port: 8080 + targetPort: "http" + selector: + {{- include "vo-cutouts.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: "frontend" + sessionAffinity: None diff --git a/services/vo-cutouts/templates/serviceaccount.yaml b/services/vo-cutouts/templates/serviceaccount.yaml new file mode 100644 index 0000000000..e77c1f4a6c --- /dev/null +++ b/services/vo-cutouts/templates/serviceaccount.yaml @@ -0,0 +1,10 @@ +{{- if .Values.cloudsql.enabled }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "vo-cutouts.fullname" . }} + labels: + {{- include "vo-cutouts.labels" . | nindent 4 }} + annotations: + iam.gke.io/gcp-service-account: {{ required "cloudsql.serviceAccount must be set to a valid Google service account" .Values.cloudsql.serviceAccount | quote }} +{{- end }} diff --git a/services/vo-cutouts/templates/vault-secrets.yaml b/services/vo-cutouts/templates/vault-secrets.yaml new file mode 100644 index 0000000000..04696ceb30 --- /dev/null +++ b/services/vo-cutouts/templates/vault-secrets.yaml @@ -0,0 +1,9 @@ +apiVersion: ricoberger.de/v1alpha1 +kind: VaultSecret +metadata: + name: {{ template "vo-cutouts.fullname" . }}-secret + labels: + {{- include "vo-cutouts.labels" . | nindent 4 }} +spec: + path: "{{ .Values.global.vaultSecretsPath }}/vo-cutouts" + type: Opaque diff --git a/services/vo-cutouts/templates/worker-deployment.yaml b/services/vo-cutouts/templates/worker-deployment.yaml new file mode 100644 index 0000000000..0a5dfa7f80 --- /dev/null +++ b/services/vo-cutouts/templates/worker-deployment.yaml @@ -0,0 +1,124 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ template "vo-cutouts.fullname" . }}-worker + labels: + {{- include "vo-cutouts.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.databaseWorker.replicaCount }} + selector: + matchLabels: + {{- include "vo-cutouts.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: "worker" + template: + metadata: + annotations: + checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }} + {{- with .Values.databaseWorker.podAnnotations }} + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "vo-cutouts.selectorLabels" . | nindent 8 }} + app.kubernetes.io/component: "worker" + vo-cutouts-redis-client: "true" + spec: + automountServiceAccountToken: false + + # Butler uses a pgpass file to authenticate to its database, and + # PostgreSQL unfortunately requires its pgpass file be owned by the + # current user and mode 0600, but Kubernetes has no way of controlling + # the ownership of a mounted secret. We therefore use a privileged init + # container to copy the secrets into a shared emptyDir and change their + # ownership and permissions. + initContainers: + - name: "fix-secret-permissions" + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy | quote }} + command: + - "/bin/bash" + - "-c" + - | + cp -RL /etc/vo-cutouts/secrets-raw/* /etc/vo-cutouts/secrets + chmod 0400 /etc/vo-cutouts/secrets/* + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - "all" + volumeMounts: + - name: "secrets" + mountPath: "/etc/vo-cutouts/secrets" + - name: "secrets-raw" + mountPath: "/etc/vo-cutouts/secrets-raw" + readOnly: true + containers: + - name: "worker" + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - "all" + image: "{{ .Values.cutoutWorker.image.repository }}:{{ .Values.cutoutWorker.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.cutoutWorker.image.pullPolicy | quote }} + env: + # The following are used by Butler to retrieve its configuration + # and authenticate to its database. + - name: "AWS_SHARED_CREDENTIALS_FILE" + value: "/etc/vo-cutouts/secrets/aws-credentials" + - name: "DAF_BUTLER_REPOSITORY_INDEX" + value: {{ .Values.global.butlerRepositoryIndex | quote }} + - name: "PGPASSFILE" + value: "/etc/vo-cutouts/secrets/postgres-credentials" + - name: "S3_ENDPOINT_URL" + value: "https://storage.googleapis.com" + + # Authentication to the Redis queue for Dramatiq. + - name: "CUTOUT_REDIS_PASSWORD" + valueFrom: + secretKeyRef: + name: {{ template "vo-cutouts.fullname" . }}-secret + key: "redis-password" + + # URL of the bucket into which to store the cutouts. + - name: "CUTOUT_STORAGE_URL" + value: {{ required "config.gcsBucketUrl must be set" .Values.config.gcsBucketUrl | quote }} + + # Temporary directory into which to stage cutouts before uploading. + - name: "CUTOUT_TMPDIR" + value: "/tmp/cutouts" + envFrom: + - configMapRef: + name: {{ template "vo-cutouts.fullname" . }}-config + {{- with .Values.cutoutWorker.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + volumeMounts: + - name: "secrets" + mountPath: "/etc/vo-cutouts/secrets" + - name: "tmp" + mountPath: "/tmp" + securityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + volumes: + - name: "secrets" + emptyDir: {} + - name: "secrets-raw" + secret: + secretName: {{ template "vo-cutouts.fullname" . }}-secret + - name: "tmp" + emptyDir: {} + {{- with .Values.cutoutWorker.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.cutoutWorker.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.cutoutWorker.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/services/vo-cutouts/templates/worker-networkpolicy.yaml b/services/vo-cutouts/templates/worker-networkpolicy.yaml new file mode 100644 index 0000000000..4263747825 --- /dev/null +++ b/services/vo-cutouts/templates/worker-networkpolicy.yaml @@ -0,0 +1,15 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: {{ template "vo-cutouts.fullname" . }}-worker + labels: + {{- include "vo-cutouts.labels" . | nindent 4 }} +spec: + podSelector: + # This policy controls inbound and outbound access to the database workers. + matchLabels: + {{- include "vo-cutouts.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: "worker" + policyTypes: + # Block all inbound access. + - Ingress diff --git a/services/vo-cutouts/values-idfdev.yaml b/services/vo-cutouts/values-idfdev.yaml index 17ae2e4f9a..46442dec37 100644 --- a/services/vo-cutouts/values-idfdev.yaml +++ b/services/vo-cutouts/values-idfdev.yaml @@ -1,28 +1,8 @@ -vo-cutouts: - imagePullSecrets: - - name: "pull-secret" - ingress: - host: "data-dev.lsst.cloud" - vaultSecretsPath: "secret/k8s_operator/data-dev.lsst.cloud/vo-cutouts" +config: + databaseUrl: "postgresql://vo-cutouts@localhost/vo-cutouts" + gcsBucketUrl: "s3://rubin-cutouts-dev-us-central1-output/" - image: - pullPolicy: "Always" - tag: "tickets-DM-33513" - - config: - # There is currently no working Butler in data-dev, so this configuration - # won't work. Leaving it here anyway since it has the correct - # configuration otherwise should we later get a Butler for that - # environment. - butlerRepository: "TBD" - databaseUrl: "postgresql://vo-cutouts@localhost/vo-cutouts" - gcsBucketUrl: "s3://rubin-cutouts-dev-us-central1-output/" - - cloudsql: - enabled: true - instanceConnectionName: "science-platform-dev-7696:us-central1:science-platform-dev-e9e11de2" - serviceAccount: "vo-cutouts@science-platform-dev-7696.iam.gserviceaccount.com" - -pull-secret: +cloudsql: enabled: true - path: "secret/k8s_operator/data-dev.lsst.cloud/pull-secret" + instanceConnectionName: "science-platform-dev-7696:us-central1:science-platform-dev-e9e11de2" + serviceAccount: "vo-cutouts@science-platform-dev-7696.iam.gserviceaccount.com" diff --git a/services/vo-cutouts/values-idfint.yaml b/services/vo-cutouts/values-idfint.yaml index 77c26adb14..08b0e0a979 100644 --- a/services/vo-cutouts/values-idfint.yaml +++ b/services/vo-cutouts/values-idfint.yaml @@ -1,24 +1,8 @@ -vo-cutouts: - imagePullSecrets: - - name: "pull-secret" - ingress: - host: "data-int.lsst.cloud" - vaultSecretsPath: "secret/k8s_operator/data-int.lsst.cloud/vo-cutouts" +config: + databaseUrl: "postgresql://vo-cutouts@localhost/vo-cutouts" + gcsBucketUrl: "s3://rubin-cutouts-int-us-central1-output/" - image: - pullPolicy: "Always" - tag: "tickets-DM-33513" - - config: - butlerRepository: "s3://butler-us-central1-panda-dev/dc2/butler-external.yaml" - databaseUrl: "postgresql://vo-cutouts@localhost/vo-cutouts" - gcsBucketUrl: "s3://rubin-cutouts-int-us-central1-output/" - - cloudsql: - enabled: true - instanceConnectionName: "science-platform-int-dc5d:us-central1:science-platform-int-8f439af2" - serviceAccount: "vo-cutouts@science-platform-int-dc5d.iam.gserviceaccount.com" - -pull-secret: +cloudsql: enabled: true - path: "secret/k8s_operator/data-int.lsst.cloud/pull-secret" + instanceConnectionName: "science-platform-int-dc5d:us-central1:science-platform-int-8f439af2" + serviceAccount: "vo-cutouts@science-platform-int-dc5d.iam.gserviceaccount.com" diff --git a/services/vo-cutouts/values-idfprod.yaml b/services/vo-cutouts/values-idfprod.yaml new file mode 100644 index 0000000000..736d983835 --- /dev/null +++ b/services/vo-cutouts/values-idfprod.yaml @@ -0,0 +1,8 @@ +config: + databaseUrl: "postgresql://vo-cutouts@localhost/vo-cutouts" + gcsBucketUrl: "s3://rubin-cutouts-stable-us-central1-output/" + +cloudsql: + enabled: true + instanceConnectionName: "science-platform-stable-6994:us-central1:science-platform-stable-0c29612b" + serviceAccount: "vo-cutouts@science-platform-stable-6994.iam.gserviceaccount.com" diff --git a/services/vo-cutouts/values.yaml b/services/vo-cutouts/values.yaml new file mode 100644 index 0000000000..5261de2cd7 --- /dev/null +++ b/services/vo-cutouts/values.yaml @@ -0,0 +1,205 @@ +# Default values for vo-cutouts. + +# -- Number of web frontend pods to start +replicaCount: 1 + +# -- Override the base name for resources +nameOverride: "" + +# -- Override the full name for resources (includes the release name) +fullnameOverride: "" + +image: + # -- vo-cutouts image to use + repository: "ghcr.io/lsst-sqre/vo-cutouts" + + # -- Pull policy for the vo-cutouts image + pullPolicy: "IfNotPresent" + + # -- Tag of vo-cutouts image to use + # @default -- The appVersion of the chart + tag: "" + +ingress: + # -- Additional annotations to add to the ingress + annotations: {} + +# -- Resource limits and requests for the vo-cutouts frontend pod +resources: {} + +# -- Annotations for the vo-cutouts frontend pod +podAnnotations: {} + +# -- Node selector rules for the vo-cutouts frontend pod +nodeSelector: {} + +# -- Tolerations for the vo-cutouts frontend pod +tolerations: [] + +# -- Affinity rules for the vo-cutouts frontend pod +affinity: {} + +config: + # -- Choose from the text form of Python logging levels + loglevel: "INFO" + + # -- URL for the PostgreSQL database + # @default -- None, must be set + databaseUrl: "" + + # -- URL for the GCS bucket into which to store cutouts (must start with + # `s3`) + # @default -- None, must be set + gcsBucketUrl: "" + + # -- Timeout for a single cutout job in seconds + # @default -- 600 (10 minutes) + timeout: 600 + + # -- Lifetime of job results in seconds (quote so that Helm doesn't turn it + # into a floating point number) + # @default -- 2592000 (30 days) + lifetime: "2592000" + + # -- Timeout for results from a sync cutout in seconds + # @default -- 60 (1 minute) + syncTimeout: 60 + +cloudsql: + # -- Enable the Cloud SQL Auth Proxy sidecar, used with CloudSQL databases + # on Google Cloud + enabled: false + + image: + # -- Cloud SQL Auth Proxy image to use + repository: "gcr.io/cloudsql-docker/gce-proxy" + + # -- Cloud SQL Auth Proxy tag to use + tag: "1.33.2" + + # -- Pull policy for Cloud SQL Auth Proxy images + pullPolicy: "IfNotPresent" + + # -- Instance connection name for a CloudSQL PostgreSQL instance + instanceConnectionName: "" + + # -- The Google service account that has an IAM binding to the `vo-cutouts` + # Kubernetes service accounts and has the `cloudsql.client` role, access + # to the GCS bucket, and ability to sign URLs as itself + # @default -- None, must be set + serviceAccount: "" + +cutoutWorker: + # -- Number of cutout worker pods to start + replicaCount: 2 + + image: + # -- Stack image to use for cutouts + repository: "ghcr.io/lsst-sqre/vo-cutouts-worker" + + # -- Tag of vo-cutouts worker image to use + # @default -- The appVersion of the chart + tag: "" + + # -- Pull policy for cutout workers + pullPolicy: "IfNotPresent" + + # -- Resource limits and requests for the cutout worker pod + resources: {} + + # -- Annotations for the cutout worker pod + podAnnotations: {} + + # -- Node selection rules for the cutout worker pod + nodeSelector: {} + + # -- Tolerations for the cutout worker pod + tolerations: [] + + # -- Affinity rules for the cutout worker pod + affinity: {} + +databaseWorker: + # -- Number of database worker pods to start + replicaCount: 1 + + # -- Resource limits and requests for the database worker pod + resources: {} + + # -- Annotations for the database worker pod + podAnnotations: {} + + # -- Node selection rules for the database worker pod + nodeSelector: {} + + # -- Tolerations for the database worker pod + tolerations: [] + + # -- Affinity rules for the database worker pod + affinity: {} + +redis: + config: + # -- Name of secret containing Redis password (may require changing if + # fullnameOverride is set) + secretName: "vo-cutouts-secret" + + # -- Key inside secret from which to get the Redis password (do not + # change) + secretKey: "redis-password" + + persistence: + # -- Whether to persist Redis storage and thus tokens. Setting this to + # false will use `emptyDir` and reset all tokens on every restart. Only + # use this for a test deployment. + enabled: true + + # -- Amount of persistent storage to request + size: "100Mi" + + # -- Class of storage to request + storageClass: "" + + # -- Access mode of storage to request + accessMode: "ReadWriteOnce" + + # -- Use an existing PVC, not dynamic provisioning. If this is set, the + # size, storageClass, and accessMode settings are ignored. + volumeClaimName: "" + + # -- Resource limits and requests for the Redis pod + # @default -- See `values.yaml` + resources: + limits: + cpu: "1" + + # -- Pod annotations for the Redis pod + podAnnotations: {} + + # -- Node selection rules for the Redis pod + nodeSelector: {} + + # -- Tolerations for the Redis pod + tolerations: [] + + # -- Affinity rules for the Redis pod + affinity: {} + +# The following will be set by parameters injected by Argo CD and should not +# be set in the individual environment values files. +global: + # -- Base URL for the environment + # @default -- Set by Argo CD + baseUrl: "" + + # -- URI to the Butler configuration of available repositories + # @default -- Set by Argo CD + butlerRepositoryIndex: "" + + # -- Host name for ingress + # @default -- Set by Argo CD + host: "" + + # -- Base path for Vault secrets + # @default -- Set by Argo CD + vaultSecretsPath: "" diff --git a/src/phalanx/__init__.py b/src/phalanx/__init__.py new file mode 100644 index 0000000000..a48ec8244e --- /dev/null +++ b/src/phalanx/__init__.py @@ -0,0 +1,18 @@ +"""The phalanx package provides support tooling for Phalanx, SQuaRE's +application deployment platform. +""" + +__all__ = ["__version__"] + +from importlib.metadata import PackageNotFoundError, version + +__version__: str +"""The version string, although ``phalanx`` isn't technically released +like a typical Python package. +""" + +try: + __version__ = version(__name__) +except PackageNotFoundError: + # package is not installed + __version__ = "0.0.0" diff --git a/src/phalanx/docs/__init__.py b/src/phalanx/docs/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/phalanx/docs/crossref.py b/src/phalanx/docs/crossref.py new file mode 100644 index 0000000000..3220a5c8b5 --- /dev/null +++ b/src/phalanx/docs/crossref.py @@ -0,0 +1,74 @@ +"""Cross-referencing roles and directives for Phalanx topics.""" + +from __future__ import annotations + +from sphinx.application import Sphinx + +__all__ = ["setup"] + + +def setup(app: Sphinx) -> None: + """Set up the Phalan cross-referencing extensions.""" + # Cross reference an environment's homepage + app.add_crossref_type( + "px-env", + "px-env", + indextemplate="single: %s", + ref_nodeclass=None, + objname="", + override=False, + ) + # Cross reference an app's homepage + app.add_crossref_type( + "px-app", + "px-app", + indextemplate="single: %s", + ref_nodeclass=None, + objname="", + override=False, + ) + # Cross reference an app's architectural notes page + app.add_crossref_type( + "px-app-notes", + "px-app-notes", + indextemplate="single: %s", + ref_nodeclass=None, + objname="", + override=False, + ) + # Cross reference an app's bootstrapping page + app.add_crossref_type( + "px-app-bootstrap", + "px-app-bootstrap", + indextemplate="single: %s", + ref_nodeclass=None, + objname="", + override=False, + ) + # Cross reference an app's upgrade page + app.add_crossref_type( + "px-app-upgrade", + "px-app-upgrade", + indextemplate="single: %s", + ref_nodeclass=None, + objname="", + override=False, + ) + # Cross reference an app's troubleshooting page + app.add_crossref_type( + "px-app-troubleshooting", + "px-app-troubleshooting", + indextemplate="single: %s", + ref_nodeclass=None, + objname="", + override=False, + ) + # Cross reference an app's Helm values page + app.add_crossref_type( + "px-app-values", + "px-app-values", + indextemplate="single: %s", + ref_nodeclass=None, + objname="", + override=False, + ) diff --git a/src/phalanx/docs/models.py b/src/phalanx/docs/models.py new file mode 100644 index 0000000000..c39d1e1805 --- /dev/null +++ b/src/phalanx/docs/models.py @@ -0,0 +1,402 @@ +"""Models of the Phalanx environment and application configurations.""" + +from __future__ import annotations + +import re +from dataclasses import dataclass, field +from functools import cached_property +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple + +import yaml + +ENVIRONMENTS_DIR = "science-platform" +"""Directory of the environments Helm chart in Phalanx.""" + +APPS_DIR = "services" +"""Root directory of the application Helm charts in Phalanx.""" + + +@dataclass(kw_only=True) +class DocLink: + """A model describing a document link, based on an individual array item + in the ``phalanx.lsst.io/docs`` chart annotation. + """ + + url: str + """URL to the document.""" + + title: str + """Document title.""" + + id: Optional[str] + """Document identifier.""" + + def __str__(self) -> str: + """A reStructuredText-formatted link.""" + if self.id is not None: + label = f"{self.id}: {self.title}" + else: + label = self.title + + return f"`{label} <{self.url}>`__" + + +@dataclass(kw_only=True) +class Application: + """A model for a Phalanx-configured application.""" + + name: str + """Name of the application. + + This name is used to label directories, etc. + """ + + values: Dict[str, Dict] + """The parsed Helm values for each environment.""" + + chart: Dict[str, Any] + """The parsed Helm Chart.yaml file.""" + + active_environments: List[str] = field(default_factory=list) + """Environments where this application is active.""" + + namespace: str + """Kubernetes namespace""" + + readme: str + """Contents of the README.md from the applications Phalanx directory.""" + + @property + def homepage_url(self) -> Optional[str]: + """The Helm home field, typically used for the app's docs.""" + if "home" in self.chart: + return self.chart["home"] + else: + return None + + @property + def source_urls(self) -> Optional[List[str]]: + """Application source URLs, typically from the Helm sources field.""" + if "sources" in self.chart: + return self.chart["sources"] + else: + return None + + @property + def values_table_md(self) -> str: + """The markdown-formatted Helm values documenation generated by + helm-docs in the README. + """ + lines = self.readme.splitlines() + for i, line in enumerate(lines): + if line.startswith("## Values"): + return "\n".join(lines[i + 1 :]) + return "" + + @cached_property + def doc_links(self) -> List[str]: + """reStructuredText-formatted list of links.""" + key = "phalanx.lsst.io/docs" + if "annotations" in self.chart and key in self.chart["annotations"]: + docs_data = yaml.safe_load(self.chart["annotations"][key]) + docs = [DocLink(**d) for d in docs_data] + return docs + else: + return [] + + @classmethod + def load( + cls, *, app_dir: Path, root_dir: Path, env_values: Dict[str, Dict] + ) -> Application: + """Load an application from the Phalanx repository. + + Parameters + ---------- + app_dir : `pathlib.Path` + The application's directory (where its Helm chart is located + in Phalanx). + env_values : `dict` + The Helm values for each environment, keyed by the environment + name. This data determines where the application is active. + """ + app_name = app_dir.name + + # Open the chart's README + readme_path = app_dir.joinpath("README.md") + if readme_path.is_file(): + readme = readme_path.read_text() + else: + readme = "" + + # Open the chart's Chart.yaml + chart_path = app_dir.joinpath("Chart.yaml") + if chart_path.is_file(): + chart = yaml.safe_load(chart_path.read_text()) + else: + chart = {} + + # Load the app's values files for each environment + values: Dict[str, Dict] = {} + for values_path in app_dir.glob("values-*.yaml"): + env_name = values_path.stem.removeprefix("values-") + values[env_name] = yaml.safe_load(values_path.read_text()) + + # Determine what environments use this app based on the environment's + # values file. + active_environments: List[str] = [] + for env_name, env_configs in env_values.items(): + if app_name == "argocd": + active_environments.append(env_name) + continue + + try: + reformatted_name = app_name.replace("-", "_") + if env_configs[reformatted_name]["enabled"] is True: + active_environments.append(env_name) + except KeyError: + pass + active_environments.sort() + + # Open the Application Helm definition to get namespace info + namespace = "Unknown" + app_template_path = root_dir.joinpath( + ENVIRONMENTS_DIR, "templates", f"{app_name}-application.yaml" + ) + if app_template_path.is_file(): + app_template = app_template_path.read_text() + # Extract the namespace from the Helm template + pattern = ( + r"destination:\n" + r"[ ]+namespace:[ ]*[\"]?(?P[a-zA-Z][\w-]+)[\"]?" + ) + m = re.search( + pattern, app_template, flags=re.MULTILINE | re.DOTALL + ) + if m: + namespace = m.group("namespace") + else: + print(f"Did not match template for namespace for {app_name}") + else: + print(f"Could not open app template for {app_name}") + + return cls( + name=app_name, + chart=chart, + values=values, + active_environments=active_environments, + namespace=namespace, + readme=readme, + ) + + +@dataclass(kw_only=True) +class Environment: + """A model for an environment.""" + + name: str + """Name of the Phalanx environment. + + This name is used to label directories, values files, etc. + """ + + domain: str + """The root domain where the environment is hosted.""" + + vault_path_prefix: str + """The Vault key prefix for this environment.""" + + apps: List[Application] + """The applications that are enabled for this service.""" + + @property + def argocd_url(self) -> Optional[str]: + """Path to the Argo CD UI.""" + argocd = self.get_app("argocd") + if argocd is None: + return "N/A" + + try: + return argocd.values[self.name]["argo-cd"]["server"]["config"][ + "url" + ] + except KeyError: + # Environments like minikube don't expose an argo cd URL + return "N/A" + + @property + def argocd_rbac_csv(self) -> Optional[List[str]]: + """The Argo CD RBAC table, as a list of CSV lines.""" + argocd = self.get_app("argocd") + if argocd is None: + return None + + try: + rbac_csv = argocd.values[self.name]["argo-cd"]["server"][ + "rbacConfig" + ]["policy.csv"] + lines = [ + ",".join([f"``{item.strip()}``" for item in line.split(",")]) + for line in rbac_csv.splitlines() + ] + print(lines) + return lines + except KeyError: + # Some environments may not configure an RBAC + return None + + @property + def identity_provider(self) -> str: + """A description of the identity provider for Gafaelfawr.""" + gafaelfawr = self.get_app("gafaelfawr") + if gafaelfawr is None: + return "Unknown" + + config_values = gafaelfawr.values[self.name]["config"] + if "cilogon" in config_values: + return "CILogon" + + if "github" in config_values: + return "GitHub" + + if "oidc" in config_values: + return "OIDC" + + return "Unknown" + + @property + def gafaelfawr_roles(self) -> List[Tuple[str, List[str]]]: + """Gafaelfawr role mapping (reStructuredText). + + Group strings may be formatted as reStructuredText links to GitHub + teams. + """ + roles: List[Tuple[str, List[str]]] = [] + + gafaelfawr = self.get_app("gafaelfawr") + if gafaelfawr is None: + return roles + + try: + group_mapping = gafaelfawr.values[self.name]["config"][ + "groupMapping" + ] + except KeyError: + return roles + + role_names = sorted(group_mapping.keys()) + for role_name in role_names: + groups: List[str] = [] + for group in group_mapping[role_name]: + if isinstance(group, str): + # e.g. a comanage group + groups.append(f"``{group}``") + elif isinstance(group, dict) and "github" in group: + org = group["github"]["organization"] + team = group["github"]["team"] + url = f"https://github.com/orgs/{org}/teams/{team}" + groups.append(f":fab:`github` `{org}/{team} <{url}>`__") + else: + print(f"Group type unknown: {group}") + continue + roles.append((role_name, groups)) + + return roles + + def get_app(self, name) -> Optional[Application]: + """Get the named application.""" + for app in self.apps: + if app.name == name: + return app + return None + + @classmethod + def load( + cls, *, values: Dict[str, Any], applications: List[Application] + ) -> Environment: + """Load an environment by inspecting the Phalanx repository.""" + # Extract name from dir/values-envname.yaml + name = values["environment"] + + # Get Application instances active in this environment + apps: List[Application] = [] + for app in applications: + if app.name == "argocd": + # argocd is a special case because it's not toggled per env + apps.append(app) + continue + + if app.name in values: + if values[app.name]["enabled"] is True: + apps.append(app) + elif (app_name_underscore := app.name.replace("-", "_")) in values: + # Many keys in an env's values.yaml use underscores instead of + # dashes, so they don't match the actual application name + if values[app_name_underscore]["enabled"] is True: + apps.append(app) + + apps.sort(key=lambda a: a.name) + + return Environment( + name=name, + domain=values["fqdn"], + vault_path_prefix=values["vault_path_prefix"], + apps=apps, + ) + + +@dataclass(kw_only=True) +class Phalanx: + """Root container for Phalanx data.""" + + environments: List[Environment] = field(default_factory=list) + """Phalanx environments.""" + + apps: List[Application] = field(default_factory=list) + """Phalanx applications.""" + + @classmethod + def load_phalanx(cls, root_dir: Path) -> Phalanx: + """Load the Phalanx git repository. + + Parameters + ---------- + root_dir : `pathlib.Path` + The path for the root directory of a Phalanx repository clone. + + Returns + ------- + phalanx : `Phalanx` + A model of the Phalanx platform, including environment and + application configuration. + """ + apps: List[Application] = [] + envs: List[Environment] = [] + + # Pre-load the values files for each environment + env_values: Dict[str, Dict[str, Any]] = {} + for env_values_path in root_dir.joinpath(ENVIRONMENTS_DIR).glob( + "values-*.yaml" + ): + if not env_values_path.is_file(): + continue + values = yaml.safe_load(env_values_path.read_text()) + name = values["environment"] + env_values[name] = values + + # Gather applications + for app_dir in root_dir.joinpath(APPS_DIR).iterdir(): + if not app_dir.is_dir(): + continue + app = Application.load( + app_dir=app_dir, env_values=env_values, root_dir=root_dir + ) + apps.append(app) + apps.sort(key=lambda a: a.name) + + # Gather environments + for env_name, values in env_values.items(): + env = Environment.load(values=values, applications=apps) + envs.append(env) + + return cls(environments=envs, apps=apps) diff --git a/src/phalanx/testing/__init__.py b/src/phalanx/testing/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/expand-services b/src/phalanx/testing/expandcharts.py old mode 100755 new mode 100644 similarity index 95% rename from tests/expand-services rename to src/phalanx/testing/expandcharts.py index eb0f78f981..79751cc095 --- a/tests/expand-services +++ b/src/phalanx/testing/expandcharts.py @@ -1,5 +1,3 @@ -#!/usr/bin/env python3 - """Expand Helm charts for testing. Discover the list of supported environments, find all charts that have changed @@ -31,7 +29,7 @@ def get_changed_charts() -> List[str]: if (path / "Chart.yaml").exists(): diff = repo.head.commit.diff("origin/master", paths=[str(path)]) for change_type in DiffIndex.change_type: - if any(diff.iter_change_type(change_type)): + if any(diff.iter_change_type(change_type)): # type: ignore print("Found changed chart", path.name) charts.append(path.name) break @@ -85,7 +83,3 @@ def main() -> None: environments = get_environments() for chart in charts: expand_chart(chart, environments) - - -if __name__ == "__main__": - main() diff --git a/starters/README.md b/starters/README.md new file mode 100644 index 0000000000..ce539a5a36 --- /dev/null +++ b/starters/README.md @@ -0,0 +1,11 @@ +# Helm starters for Phalanx + +Each subdirectory of this directory is a Helm starter for a class of Phalanx service. +Use the starters with the `-p` option to `helm create`. +For example, from the `services` directory: + +```sh +helm create new-service -p $(pwd)/../starters/rsp-web-service +``` + +The path to the starter directory must be absolute, not relative, or Helm will try to use it has a path relative to `$HOME/.local/share/helm`. diff --git a/starters/web-service/.helmignore b/starters/web-service/.helmignore new file mode 100644 index 0000000000..0e8a0eb36f --- /dev/null +++ b/starters/web-service/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/starters/web-service/Chart.yaml b/starters/web-service/Chart.yaml new file mode 100644 index 0000000000..f693083c5b --- /dev/null +++ b/starters/web-service/Chart.yaml @@ -0,0 +1,10 @@ +apiVersion: v2 +name: +version: 1.0.0 +description: | + Helm starter chart for a new RSP service. +home: "https://github.com/lsst-sqre/" +type: application + +# The default version tag of the Docker image. +appVersion: "1.0.0" diff --git a/starters/web-service/README.md b/starters/web-service/README.md new file mode 100644 index 0000000000..6e0e4b91e0 --- /dev/null +++ b/starters/web-service/README.md @@ -0,0 +1,27 @@ +# + +Helm starter chart for a new RSP service. + +**Homepage:** > + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| affinity | object | `{}` | Affinity rules for the deployment pod | +| autoscaling.enabled | bool | `false` | Enable autoscaling of deployment | +| autoscaling.maxReplicas | int | `100` | Maximum number of deployment pods | +| autoscaling.minReplicas | int | `1` | Minimum number of deployment pods | +| autoscaling.targetCPUUtilizationPercentage | int | `80` | Target CPU utilization of deployment pods | +| global.baseUrl | string | Set by Argo CD | Base URL for the environment | +| global.host | string | Set by Argo CD | Host name for ingress | +| global.vaultSecretsPath | string | Set by Argo CD | Base path for Vault secrets | +| image.pullPolicy | string | `"IfNotPresent"` | Pull policy for the image | +| image.repository | string | `"ghcr.io/lsst-sqre/"` | Image to use in the deployment | +| image.tag | string | `""` | Overrides the image tag whose default is the chart appVersion. | +| ingress.annotations | object | `{}` | Additional annotations for the ingress rule | +| nodeSelector | object | `{}` | Node selection rules for the deployment pod | +| podAnnotations | object | `{}` | Annotations for the deployment pod | +| replicaCount | int | `1` | Number of web deployment pods to start | +| resources | object | `{}` | Resource limits and requests for the deployment pod | +| tolerations | list | `[]` | Tolerations for the deployment pod | diff --git a/starters/web-service/templates/_helpers.tpl b/starters/web-service/templates/_helpers.tpl new file mode 100644 index 0000000000..d4d9a92e86 --- /dev/null +++ b/starters/web-service/templates/_helpers.tpl @@ -0,0 +1,26 @@ +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define ".chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define ".labels" -}} +helm.sh/chart: {{ include ".chart" . }} +{{ include ".selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define ".selectorLabels" -}} +app.kubernetes.io/name: "" +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} diff --git a/starters/web-service/templates/deployment.yaml b/starters/web-service/templates/deployment.yaml new file mode 100644 index 0000000000..878b838602 --- /dev/null +++ b/starters/web-service/templates/deployment.yaml @@ -0,0 +1,59 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: "" + labels: + {{- include ".labels" . | nindent 4 }} +spec: + {{- if not .Values.autoscaling.enabled }} + replicas: {{ .Values.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include ".selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include ".selectorLabels" . | nindent 8 }} + spec: + automountServiceAccountToken: false + containers: + - name: {{ .Chart.Name }} + securityContext: + allowPrivilegeEscalation: false + capabilities: + drop: + - "all" + readOnlyRootFilesystem: true + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - name: "http" + containerPort: 8080 + protocol: "TCP" + readinessProbe: + httpGet: + path: "/" + port: "http" + resources: + {{- toYaml .Values.resources | nindent 12 }} + securityContext: + runAsNonRoot: true + runAsUser: 1000 + runAsGroup: 1000 + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/starters/web-service/templates/hpa.yaml b/starters/web-service/templates/hpa.yaml new file mode 100644 index 0000000000..c2b225e39e --- /dev/null +++ b/starters/web-service/templates/hpa.yaml @@ -0,0 +1,28 @@ +{{- if .Values.autoscaling.enabled }} +apiVersion: autoscaling/v2beta1 +kind: HorizontalPodAutoscaler +metadata: + name: "" + labels: + {{- include ".labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: "" + minReplicas: {{ .Values.autoscaling.minReplicas }} + maxReplicas: {{ .Values.autoscaling.maxReplicas }} + metrics: + {{- if .Values.autoscaling.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: "cpu" + targetAverageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} + {{- end }} + {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: "memory" + targetAverageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }} + {{- end }} +{{- end }} diff --git a/starters/web-service/templates/ingress.yaml b/starters/web-service/templates/ingress.yaml new file mode 100644 index 0000000000..eacb451a7e --- /dev/null +++ b/starters/web-service/templates/ingress.yaml @@ -0,0 +1,32 @@ +apiVersion: gafaelfawr.lsst.io/v1alpha1 +kind: GafaelfawrIngress +metadata: + name: "" + labels: + {{- include ".labels" . | nindent 4 }} +config: + baseUrl: {{ .Values.global.baseUrl | quote }} + scopes: + all: + - "read:image" + loginRedirect: true +template: + metadata: + name: "" + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 6 }} + {{- end }} + spec: + ingressClassName: "nginx" + rules: + - host: {{ required "global.host must be set" .Values.global.host | quote }} + http: + paths: + - path: "/" + pathType: "Prefix" + backend: + service: + name: "" + port: + number: 8080 diff --git a/starters/web-service/templates/networkpolicy.yaml b/starters/web-service/templates/networkpolicy.yaml new file mode 100644 index 0000000000..180cc36f0d --- /dev/null +++ b/starters/web-service/templates/networkpolicy.yaml @@ -0,0 +1,21 @@ +apiVersion: networking.k8s.io/v1 +kind: NetworkPolicy +metadata: + name: "" +spec: + podSelector: + matchLabels: + {{- include ".selectorLabels" . | nindent 6 }} + policyTypes: + - Ingress + ingress: + # Allow inbound access from pods (in any namespace) labeled + # gafaelfawr.lsst.io/ingress: true. + - from: + - namespaceSelector: {} + podSelector: + matchLabels: + gafaelfawr.lsst.io/ingress: "true" + ports: + - protocol: "TCP" + port: 8080 diff --git a/starters/web-service/templates/service.yaml b/starters/web-service/templates/service.yaml new file mode 100644 index 0000000000..2bcfb29260 --- /dev/null +++ b/starters/web-service/templates/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: "" + labels: + {{- include ".labels" . | nindent 4 }} +spec: + type: "ClusterIP" + ports: + - port: 8080 + targetPort: "http" + protocol: "TCP" + name: "http" + selector: + {{- include ".selectorLabels" . | nindent 4 }} diff --git a/starters/web-service/values.yaml b/starters/web-service/values.yaml new file mode 100644 index 0000000000..61c5171ce3 --- /dev/null +++ b/starters/web-service/values.yaml @@ -0,0 +1,64 @@ +# Default values for . +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# -- Number of web deployment pods to start +replicaCount: 1 + +image: + # -- Image to use in the deployment + repository: "ghcr.io/lsst-sqre/" + + # -- Pull policy for the image + pullPolicy: "IfNotPresent" + + # -- Overrides the image tag whose default is the chart appVersion. + tag: "" + +ingress: + # -- Additional annotations for the ingress rule + annotations: {} + +autoscaling: + # -- Enable autoscaling of deployment + enabled: false + + # -- Minimum number of deployment pods + minReplicas: 1 + + # -- Maximum number of deployment pods + maxReplicas: 100 + + # -- Target CPU utilization of deployment pods + targetCPUUtilizationPercentage: 80 + # targetMemoryUtilizationPercentage: 80 + +# -- Annotations for the deployment pod +podAnnotations: {} + +# -- Resource limits and requests for the deployment pod +resources: {} + +# -- Node selection rules for the deployment pod +nodeSelector: {} + +# -- Tolerations for the deployment pod +tolerations: [] + +# -- Affinity rules for the deployment pod +affinity: {} + +# The following will be set by parameters injected by Argo CD and should not +# be set in the individual environment values files. +global: + # -- Base URL for the environment + # @default -- Set by Argo CD + baseUrl: "" + + # -- Host name for ingress + # @default -- Set by Argo CD + host: "" + + # -- Base path for Vault secrets + # @default -- Set by Argo CD + vaultSecretsPath: "" diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/docs/models_test.py b/tests/docs/models_test.py new file mode 100644 index 0000000000..3fc650b24a --- /dev/null +++ b/tests/docs/models_test.py @@ -0,0 +1,17 @@ +"""Tests for the phalanx.docs.models module.""" + +from __future__ import annotations + +from pathlib import Path + +from phalanx.docs.models import Phalanx + + +def test_phalanx_load() -> None: + """Smoke test for loading Phalanx repository metadata.""" + root_dir = Path(__file__).parent.parent.parent + metadata = Phalanx.load_phalanx(root_dir) + assert isinstance(metadata, Phalanx) + + assert len(metadata.environments) > 0 + assert len(metadata.apps) > 0 diff --git a/tests/packaging_test.py b/tests/packaging_test.py new file mode 100644 index 0000000000..5010938f07 --- /dev/null +++ b/tests/packaging_test.py @@ -0,0 +1,11 @@ +"""Test that the Python packaging metadata.""" + +from __future__ import annotations + +from phalanx import __version__ + + +def test_vesrion() -> None: + """Test that the package has a version (and is installed).""" + assert len(__version__) > 0 + assert __version__ != "0.0.0" # would be if not installed diff --git a/tests/requirements.txt b/tests/requirements.txt deleted file mode 100644 index 64b1adaeeb..0000000000 --- a/tests/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -GitPython diff --git a/tools/addpullsecret/add-pull-secrets.bash b/tools/addpullsecret/add-pull-secrets.bash deleted file mode 100755 index 6eadf1393f..0000000000 --- a/tools/addpullsecret/add-pull-secrets.bash +++ /dev/null @@ -1,158 +0,0 @@ -#!/usr/bin/env bash - -# Requires Bash 4 because we use hashes; brew install bash if you're on -# OS X (it's a GPL3 thing) - -function usage() { - echo 1>&2 "Usage: $0 phalanx-directory" - exit 1 -} - -bv=$(echo ${BASH_VERSION} | cut -d '.' -f 1) -if [ ${bv} -lt 4 ]; then - echo 1>&2 "$0 requires at least bash version 4" - exit 2 -fi - -# Customize as necessary -topdir=$1 - -if [ -z "${topdir}" ]; then - usage -fi -if [ -n "$2" ]; then - usage -fi - -# This builds a list of services that have associated namespaces. -# For instance, cert-manager and cert-issuer share a namespace; we only -# inject a vault secret for pull once per namespace -svcs="argocd cert-manager exposurelog gafaelfawr influxdb kapacitor" -svcs="${svcs} landing-page mobu ingress-nginx narrativelog nublado obstap" -svcs="${svcs} portal postgres tap" - -# This is a list of environments. -envs="base bleed gold-leader idfdev idfint idfprod int kueyen minikube" -envs="${envs} red-five rogue-two stable summit tucson-teststand" - -# These are the services that we're going to add the pull-secret string to: -# Skip cachemachine and nublado2 for now. -# -# Cachemachine, it's called "cachemachine-secret" rather than "pull-secret", -# and counterintuitively, nublado2 doesn't need it--the pods it spawns do, -# and it handles that in the nublado2 resource template yaml. -add_pull="tap obstap exposurelog portal gafaelfawr influxdb kapacitor" -add_pull="${add_pull} landing-page mobu narrativelog nublado postgres" - -# This is what I have run it with so far. -#envs="nublado" - -IFS='' read -r -d '' addreq <<'EOF' -- name: pull-secret - version: 0.1.2 - repository: https://lsst-sqre.github.io/charts/ -EOF - -declare -A pull_secret -for e in ${envs}; do - np="${topdir}/services/nublado/values-${e}.yaml" - if ! [ -e ${np} ]; then - echo 1>&2 "No nublado to query for secret path in env ${e}!" - continue - fi - tops=$(grep "secret/k8s_operator" ${np} | head -1 | cut -d / -f 3 ) - if [ -z "${tops}" ] ;then - echo 1>&2 "Could not determine vault secret path for ${e}." - continue - fi - pull_secret[${e}]="secret/k8s_operator/${tops}/pull-secret" -done -for s in ${svcs}; do - svcdir="${topdir}/services/${s}" - for e in ${envs}; do - psp="${pull_secret[${e}]}" - if [ -z "${psp}" ]; then - echo 1>&2 "No vault secret path for ${e}." - continue - fi - IFS='' read -r -d '' addsec </dev/null - rc=$? - if [ ${rc} -eq 0 ] ; then - echo 1>&2 "${efile} already has pull-secret." - else - echo -n "${addsec}" >> ${efile} - fi - done - # Add pull-secret to requirements file. - # ingress-nginx has its dependencies right in Chart.yaml - rfile="${svcdir}/requirements.yaml" - if [ "${s}" == "ingress-nginx" ]; then - rfile="${svcdir}/Chart.yaml" - fi - grep -q "pull-secret" ${rfile} 2>/dev/null - rc=$? - if [ ${rc} -eq 0 ] ; then - echo 1>&2 "${rfile} already has pull-secret." - else - echo -n "${addreq}" >> ${rfile} - fi -done - -for ap in ${add_pull}; do - for e in ${envs}; do - chartname=${ap} - case ${ap} in - tap) - chartname="cadc-tap" - ;; - obstap) - chartname="cadc-tap-postgres" - ;; - portal) - chartname="firefly" - ;; - *) - ;; - esac - svcdir="${topdir}/services/${ap}" - efile="${svcdir}/values-${e}.yaml" - if [ ! -e ${efile} ]; then # Don't add it if it doesn't exist. - continue - fi - # We also need to check for pull_secret being defined in the - # top-level app: this is the glue to actually enable it. - grep -q '^ pull_secret:' ${efile} - rc=$? - if [ ${rc} -eq 0 ] ; then - echo 1>&2 "${efile} already has pull_secret." - else - # Do we have the first line of the values file equalling the - # key? If not, make it so. - head -n 1 ${efile} | grep -q "^${chartname}:" - rc=$? - # Sorry about the newlines; running on macOS and real BSD sed - if [ ${rc} -ne 0 ]; then - sed -i .init "0 a \\ -${chartname}: -" ${efile} - fi - sed -i .bak "1 a \\ - pull_secret: 'pull-secret' -" ${efile} - fi - done -done -exit 0 diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000000..6650653286 --- /dev/null +++ b/tox.ini @@ -0,0 +1,45 @@ +[tox] +envlist = py,coverage-report,typing,lint,docs,docs-linkcheck +isolated_build = True + +[testenv] +description = Run pytest against {envname}. +extras = + dev + +[testenv:py] +description = Run pytest +commands = + coverage run -m pytest {posargs} + +[testenv:coverage-report] +description = Compile coverage from each test run. +skip_install = true +deps = coverage[toml]>=5.0.2 +depends = + py +commands = + coverage combine + coverage report + +[testenv:typing] +description = Run mypy. +commands = + mypy src/phalanx tests + +[testenv:lint] +description = Lint codebase by running pre-commit (Black, isort, Flake8). +skip_install = true +deps = + pre-commit +commands = pre-commit run --all-files + +[testenv:docs] +description = Build documentation (HTML) with Sphinx. +commands = + sphinx-build --keep-going -n -W -T -b html -d {envtmpdir}/doctrees docs docs/_build/html + +[testenv:docs-linkcheck] +description = Check links in the documentation. +commands = + sphinx-build --keep-going -n -W -T -b linkcheck -d {envtmpdir}/doctrees docs docs/_build/linkcheck