Skip to content

Commit

Permalink
Test GCP/AWS Deployment with Pytest (#1871)
Browse files Browse the repository at this point in the history
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
aktech and pre-commit-ci[bot] authored Aug 3, 2023
1 parent 2dc82f2 commit fe23d04
Show file tree
Hide file tree
Showing 59 changed files with 747 additions and 316 deletions.
18 changes: 7 additions & 11 deletions .github/workflows/kubernetes_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@ on:
paths:
- ".github/workflows/kubernetes_test.yaml"
- "tests/**"
- "tests_deployment/**"
- "tests_e2e/**"
- "scripts/**"
- "src/**"
- "pyproject.toml"
Expand All @@ -20,8 +18,6 @@ on:
paths:
- ".github/workflows/kubernetes_test.yaml"
- "tests/**"
- "tests_deployment/**"
- "tests_e2e/**"
- "scripts/**"
- "src/**"
- "pyproject.toml"
Expand Down Expand Up @@ -150,14 +146,14 @@ jobs:
env:
CYPRESS_BASE_URL: https://github-actions.nebari.dev/
with:
working-directory: tests_e2e
working-directory: tests/tests_e2e

- name: Playwright Tests
env:
KEYCLOAK_USERNAME: ${{ env.CYPRESS_EXAMPLE_USER_NAME }}
KEYCLOAK_PASSWORD: ${{ env.CYPRESS_EXAMPLE_USER_PASSWORD }}
NEBARI_FULL_URL: https://github-actions.nebari.dev/
working-directory: tests_e2e/playwright
working-directory: tests/tests_e2e/playwright
run: |
# create environment file
envsubst < .env.tpl > .env
Expand All @@ -170,15 +166,15 @@ jobs:
with:
name: e2e-cypress
path: |
./tests_e2e/cypress/screenshots/
./tests_e2e/cypress/videos/
./tests_e2e/playwright/videos/
./tests/tests_e2e/cypress/screenshots/
./tests/tests_e2e/cypress/videos/
./tests/tests_e2e/playwright/videos/
- name: Deployment Pytests
run: |
export KEYCLOAK_USERNAME=${CYPRESS_EXAMPLE_USER_NAME}
export KEYCLOAK_PASSWORD=${CYPRESS_EXAMPLE_USER_PASSWORD}
pytest tests_deployment/ -v -s
pytest tests/tests_deployment/ -v -s
- name: JupyterHub Notebook Tests
# run jhub-client after pytest since jhubctl can cleanup
Expand All @@ -192,7 +188,7 @@ jobs:
--validate --no-verify-ssl \
--kernel python3 \
--stop-server \
--notebook tests_deployment/assets/notebook/simple.ipynb \
--notebook tests/tests_deployment/assets/notebook/simple.ipynb \
### CLEANUP AFTER TESTS
- name: Cleanup nebari deployment
Expand Down
4 changes: 0 additions & 4 deletions .github/workflows/test-provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@ on:
- ".github/failed-workflow-issue-templates/test-provider.md"
- ".github/actions/publish-from-template"
- "tests/**"
- "tests_deployment/**"
- "tests_e2e/**"
- "scripts/**"
- "src/**"
- "pyproject.toml"
Expand All @@ -23,8 +21,6 @@ on:
paths:
- ".github/workflows/test-provider.yaml"
- "tests/**"
- "tests_deployment/**"
- "tests_e2e/**"
- "scripts/**"
- "src/**"
- "pyproject.toml"
Expand Down
6 changes: 1 addition & 5 deletions .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@ on:
paths:
- ".github/workflows/test.yaml"
- "tests/**"
- "tests_deployment/**"
- "tests_e2e/cypress/**"
- "scripts/**"
- "src/**"
- "pyproject.toml"
Expand All @@ -19,8 +17,6 @@ on:
paths:
- ".github/workflows/test.yaml"
- "tests/**"
- "tests_deployment/**"
- "tests_e2e/cypress/**"
- "scripts/**"
- "src/**"
- "pyproject.toml"
Expand Down Expand Up @@ -56,4 +52,4 @@ jobs:
- name: Test Nebari
run: |
pytest --version
pytest --ignore=tests_deployment --ignore=tests_e2e/playwright
pytest --ignore=tests/tests_deployment --ignore=tests/tests_e2e/playwright --ignore=tests/tests_integration
57 changes: 48 additions & 9 deletions .github/workflows/test_integration.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: "Deploy on Digital Ocean"
name: "Integration Tests"

on:
schedule:
Expand All @@ -12,6 +12,13 @@ jobs:
permissions:
id-token: write
contents: read
strategy:
matrix:
provider:
- aws
- do
- gcp
fail-fast: false
steps:
- name: "Checkout Infrastructure"
uses: actions/checkout@v3
Expand All @@ -21,6 +28,11 @@ jobs:
uses: actions/setup-python@v4
with:
python-version: 3.11
- name: Install Nebari
run: |
pip install .[dev]
conda install --quiet --yes conda-build
playwright install
- name: Retrieve secret from Vault
uses: hashicorp/vault-action@v2.5.0
Expand All @@ -34,19 +46,46 @@ jobs:
kv/data/repository/nebari-dev/nebari/google_cloud_platform/nebari-dev-ci/github-nebari-dev-repo-ci project_id | PROJECT_ID;
kv/data/repository/nebari-dev/nebari/google_cloud_platform/nebari-dev-ci/github-nebari-dev-repo-ci workload_identity_provider | GCP_WORKFLOW_PROVIDER;
kv/data/repository/nebari-dev/nebari/google_cloud_platform/nebari-dev-ci/github-nebari-dev-repo-ci service_account_name | GCP_SERVICE_ACCOUNT;
kv/data/repository/nebari-dev/nebari/azure/nebari-dev-ci/github-nebari-dev-repo-ci tenant_id | ARM_TENANT_ID;
kv/data/repository/nebari-dev/nebari/azure/nebari-dev-ci/github-nebari-dev-repo-ci subscription_id | ARM_SUBSCRIPTION_ID;
kv/data/repository/nebari-dev/nebari/shared_secrets DIGITALOCEAN_TOKEN | DIGITALOCEAN_TOKEN;
kv/data/repository/nebari-dev/nebari/cloudflare/internal-devops@quansight.com/nebari-dev-ci token | CLOUDFLARE_TOKEN;
- name: Install Nebari
- name: 'Authenticate to GCP'
if: ${{ matrix.provider == 'gcp' }}
uses: 'google-github-actions/auth@v1'
with:
token_format: access_token
create_credentials_file: 'true'
workload_identity_provider: ${{ env.GCP_WORKFLOW_PROVIDER }}
service_account: ${{ env.GCP_SERVICE_ACCOUNT }}

- name: Set required environment variables
if: ${{ matrix.provider == 'gcp' }}
run: |
pip install .[dev]
conda install --quiet --yes conda-build
echo "GOOGLE_CREDENTIALS=${{ env.GOOGLE_APPLICATION_CREDENTIALS }}" >> $GITHUB_ENV
- name: Authenticate to AWS
if: ${{ matrix.provider == 'aws' }}
uses: aws-actions/configure-aws-credentials@v1
with:
role-to-assume: ${{ env.AWS_ROLE_ARN }}
role-session-name: github-action
aws-region: us-west-2

- name: Set Environment AWS
if: ${{ matrix.provider == 'aws' }}
run: |
echo "AWS_REGION=us-west-2" >> $GITHUB_ENV
- name: Set Environment DO
if: ${{ matrix.provider == 'do' }}
run: |
echo "SPACES_ACCESS_KEY_ID=${{ secrets.SPACES_ACCESS_KEY_ID }}" >> $GITHUB_ENV
echo "SPACES_SECRET_ACCESS_KEY=${{ secrets.SPACES_SECRET_ACCESS_KEY }}" >> $GITHUB_ENV
echo "NEBARI_K8S_VERSION"=1.25.12-do.0 >> $GITHUB_ENV
- name: Integration Tests
run: |
pytest --version
pytest tests_integration/ -vvv -s
env:
NEBARI_K8S_VERSION: 1.25.12-do.0
SPACES_ACCESS_KEY_ID: ${{ secrets.SPACES_ACCESS_KEY_ID }}
SPACES_SECRET_ACCESS_KEY: ${{ secrets.SPACES_SECRET_ACCESS_KEY }}
pytest tests/tests_integration/ -vvv -s -m ${{ matrix.provider }}
10 changes: 10 additions & 0 deletions pytest.ini
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,16 @@ addopts =
-Werror
markers =
conda: conda required to run this test (deselect with '-m \"not conda\"')
aws: deploy on aws
do: deploy on do
gcp: deploy on gcp
azure: deploy on azure
gpu: test gpu working properly
testpaths =
tests
xfail_strict = True

log_format = %(asctime)s %(levelname)9s %(lineno)4s %(module)s: %(message)s
log_date_format = %Y-%m-%d %H:%M:%S
log_cli = True
log_cli_level = INFO
3 changes: 2 additions & 1 deletion src/_nebari/keycloak.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,9 @@ def create_user(
rich.print(
f"Creating user=[green]{username}[/green] without password (none supplied)"
)
keycloak_admin.create_user(payload)
user = keycloak_admin.create_user(payload)
rich.print(f"Created user=[green]{username}[/green]")
return user


def list_users(keycloak_admin: keycloak.KeycloakAdmin):
Expand Down
File renamed without changes.
117 changes: 117 additions & 0 deletions tests/common/config_mod_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
import dataclasses
import typing

PREEMPTIBLE_NODE_GROUP_NAME = "preemptible-node-group"


@dataclasses.dataclass
class GPUConfig:
cloud: str
gpu_name: str
node_selector: str
node_selector_val: str
extra_config: dict
min_nodes: typing.Optional[int] = 0
max_nodes: typing.Optional[int] = 2
node_group_name: typing.Optional[str] = "gpu-node"
docker_image: typing.Optional[str] = "quay.io/nebari/nebari-jupyterlab-gpu:2023.7.1"

def node(self):
return {
"instance": self.gpu_name,
"min_nodes": self.min_nodes,
"max_nodes": self.max_nodes,
**self.extra_config,
}


AWS_GPU_CONF = GPUConfig(
cloud="amazon_web_services",
gpu_name="g4dn.xlarge",
node_selector="beta.kubernetes.io/instance-type",
node_selector_val="g4dn.xlarge",
extra_config={
"single_subnet": False,
"gpu": True,
},
)


GCP_GPU_CONF = GPUConfig(
cloud="google_cloud_platform",
gpu_name="n1-standard-16",
node_selector="cloud.google.com/gke-nodepool",
node_selector_val="g4dn.xlarge",
extra_config={"guest_accelerators": [{"name": "nvidia-tesla-t4", "count": 1}]},
)


GPU_CONFIG = {
"aws": AWS_GPU_CONF,
"gcp": GCP_GPU_CONF,
}


def _create_gpu_environment():
return {
"name": "gpu",
"channels": ["pytorch", "nvidia", "conda-forge"],
"dependencies": [
"python=3.10.8",
"ipykernel=6.21.0",
"ipywidgets==7.7.1",
"torchvision",
"torchaudio",
"cudatoolkit",
"pytorch-cuda=11.7",
"pytorch::pytorch",
],
}


def add_gpu_config(config, cloud="aws"):
gpu_config = GPU_CONFIG.get(cloud)
if not gpu_config:
raise ValueError(f"GPU not supported/tested on {cloud}")

gpu_node = gpu_config.node()
gpu_docker_image = gpu_config.docker_image
jupyterlab_profile = {
"display_name": "GPU Instance",
"description": "4 CPU / 16GB RAM / 1 NVIDIA T4 GPU (16 GB GPU RAM)",
"groups": ["gpu-access"],
"kubespawner_override": {
"image": gpu_docker_image,
"cpu_limit": 4,
"cpu_guarantee": 3,
"mem_limit": "16G",
"mem_guarantee": "10G",
"extra_resource_limits": {"nvidia.com/gpu": 1},
"node_selector": {
gpu_config.node_selector: gpu_config.node_selector_val,
},
},
}
config[gpu_config.cloud]["node_groups"][gpu_config.node_group_name] = gpu_node
config["profiles"]["jupyterlab"].append(jupyterlab_profile)
config["environments"]["environment-gpu.yaml"] = _create_gpu_environment()
return config


def add_preemptible_node_group(config, cloud="aws"):
if cloud == "aws":
cloud_name = "amazon_web_services"
instance_name = "m5.xlarge"
elif cloud == "gcp":
cloud_name = "google_cloud_platform"
instance_name = "n1-standard-8"
else:
raise ValueError("Invalid cloud for preemptible config")
config[cloud_name]["node_groups"][PREEMPTIBLE_NODE_GROUP_NAME] = {
"instance": instance_name,
"min_nodes": 1,
"max_nodes": 5,
"single_subnet": False,
"preemptible": True,
}
return config
13 changes: 9 additions & 4 deletions tests_e2e/playwright/navigator.py → tests/common/navigator.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ def start_server(self) -> None:
"""
# wait for the page to load
logout_button = self.page.get_by_text("Logout", exact=True)
logout_button.wait_for(state="attached")
logout_button.wait_for(state="attached", timeout=90000)

# if server is not yet running
start_locator = self.page.get_by_role("button", name="Start My Server")
Expand All @@ -214,6 +214,7 @@ def start_server(self) -> None:
self.page.wait_for_url(
urllib.parse.urljoin(self.nebari_url, f"user/{self.username}/*"),
wait_until="networkidle",
timeout=180000,
)

# the jupyter page loads independent of network activity so here
Expand All @@ -235,8 +236,8 @@ def _check_for_kernel_popup(self):
True if the kernel popup is open.
"""
self.page.wait_for_load_state("networkidle")
time.sleep(3)
visible = self.page.get_by_text("Select Kernel", exact=True).is_visible()

return visible

def reset_workspace(self):
Expand All @@ -247,7 +248,7 @@ def reset_workspace(self):
* reset file browser is reset to root
* Finally, ensure that the Launcher screen is showing
"""
logger.debug(">>> Reset JupyterLab workspace")
logger.info(">>> Reset JupyterLab workspace")

# server is already running and there is no popup
popup = self._check_for_kernel_popup()
Expand Down Expand Up @@ -307,7 +308,11 @@ def _set_environment_via_popup(self, kernel=None):
if kernel is None:
# close dialog (deal with the two formats of this dialog)
try:
self.page.get_by_text("Cancel", exact=True).click()
cancel_button = self.page.get_by_text("Cancel", exact=True)
if cancel_button.is_visible():
cancel_button.click()
else:
self.page.mouse.click(0, 0)
except Exception:
self.page.locator("div").filter(has_text="No KernelSelect").get_by_role(
"button", name="No Kernel"
Expand Down
Loading

0 comments on commit fe23d04

Please sign in to comment.