diff --git a/demos/demos-v2.yaml b/demos/demos-v2.yaml index 4e04efb..e41560d 100644 --- a/demos/demos-v2.yaml +++ b/demos/demos-v2.yaml @@ -147,6 +147,25 @@ demos: cpu: 6800m memory: 15822Mi pvc: 28Gi + trino-subsea-data: + description: Demo loading ca. 600m^2 of ocean floor in a surface plot to visualize the irregularities of the ocean floor. + # documentation: -- Currently not documented + stackableStack: trino-superset-s3 + labels: + - trino + - superset + - minio + - s3 + - parquet + manifests: + - plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/trino-subsea-data/load-test-data.yaml + - plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/trino-subsea-data/create-table-in-trino.yaml + - plainYaml: https://raw.githubusercontent.com/stackabletech/demos/main/demos/trino-subsea-data/setup-superset.yaml + supportedNamespaces: [] + resourceRequests: + cpu: 6800m + memory: 15822Mi + pvc: 28Gi data-lakehouse-iceberg-trino-spark: description: Data lakehouse using Iceberg lakehouse on S3, Trino as query engine, Spark for streaming ingest and Superset for data visualization. Multiple datasources like taxi data, water levels in Germany, earthquakes, e-charging stations and more are loaded. documentation: https://docs.stackable.tech/stackablectl/stable/demos/data-lakehouse-iceberg-trino-spark.html diff --git a/demos/trino-subsea-data/create-table-in-trino.yaml b/demos/trino-subsea-data/create-table-in-trino.yaml new file mode 100644 index 0000000..1c5dec4 --- /dev/null +++ b/demos/trino-subsea-data/create-table-in-trino.yaml @@ -0,0 +1,83 @@ +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: create-subsea-multibeam-table-in-trino +spec: + template: + spec: + containers: + - name: create-subsea-multibeam-table-in-trino + image: docker.stackable.tech/stackable/testing-tools:0.2.0-stackable24.7.0 + command: ["bash", "-c", "python -u /tmp/script/script.py"] + volumeMounts: + - name: script + mountPath: /tmp/script + - name: trino-users + mountPath: /trino-users + volumes: + - name: script + configMap: + name: create-subsea-multibeam-table-in-trino-script + - name: trino-users + secret: + secretName: trino-users + restartPolicy: OnFailure + backoffLimit: 50 +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: create-subsea-multibeam-table-in-trino-script +data: + script.py: | + import sys + import trino + + if not sys.warnoptions: + import warnings + warnings.simplefilter("ignore") + + def get_connection(): + connection = trino.dbapi.connect( + host="trino-coordinator", + port=8443, + user="admin", + http_scheme='https', + auth=trino.auth.BasicAuthentication("admin", open("/trino-users/admin").read()), + ) + connection._http_session.verify = False + return connection + + def run_query(connection, query): + print(f"[DEBUG] Executing query {query}") + cursor = connection.cursor() + cursor.execute(query) + return cursor.fetchall() + + connection = get_connection() + + run_query(connection, "CREATE SCHEMA IF NOT EXISTS hive.demo WITH (location = 's3a://demo/')") + run_query(connection, """ + CREATE TABLE IF NOT EXISTS hive.demo.subsea ( + footprint_x DOUBLE, + footprint_y DOUBLE, + water_depth DOUBLE, + data_point_density DOUBLE, + geometry VARBINARY + ) WITH ( + external_location = 's3a://demo/subsea/', + format = 'parquet' + ) + """) + + loaded_rows = run_query(connection, "SELECT COUNT(*) FROM hive.demo.subsea")[0][0] + print(f"Loaded {loaded_rows} rows") + assert loaded_rows > 0 + + print("Analyzing table subsea") + analyze_rows = run_query(connection, """ANALYZE hive.demo.subsea""")[0][0] + assert analyze_rows == loaded_rows + stats = run_query(connection, """show stats for hive.demo.subsea""") + print("Produced the following stats:") + print(*stats, sep="\n") diff --git a/demos/trino-subsea-data/load-test-data.yaml b/demos/trino-subsea-data/load-test-data.yaml new file mode 100644 index 0000000..8971b06 --- /dev/null +++ b/demos/trino-subsea-data/load-test-data.yaml @@ -0,0 +1,21 @@ +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: load-subsea-multibeam-data +spec: + template: + spec: + containers: + - name: load-subsea-multibeam-data + image: "bitnami/minio:2024-debian-12" + command: ["bash", "-c", "cd /tmp && curl -O https://repo.stackable.tech/repository/misc/marispace/multibeam_data_point_density_example.parquet && mc --insecure alias set minio http://minio:9000/ $(cat /minio-s3-credentials/accessKey) $(cat /minio-s3-credentials/secretKey) && mc cp multibeam_data_point_density_example.parquet minio/demo/subsea"] + volumeMounts: + - name: minio-s3-credentials + mountPath: /minio-s3-credentials + volumes: + - name: minio-s3-credentials + secret: + secretName: minio-s3-credentials + restartPolicy: OnFailure + backoffLimit: 50 diff --git a/demos/trino-subsea-data/setup-superset.yaml b/demos/trino-subsea-data/setup-superset.yaml new file mode 100644 index 0000000..51fa8b5 --- /dev/null +++ b/demos/trino-subsea-data/setup-superset.yaml @@ -0,0 +1,95 @@ +--- +apiVersion: batch/v1 +kind: Job +metadata: + name: setup-superset +spec: + template: + spec: + containers: + - name: setup-superset + image: docker.stackable.tech/stackable/testing-tools:0.2.0-stackable24.7.0 + command: ["bash", "-c", "curl -o superset-assets.zip https://raw.githubusercontent.com/stackabletech/demos/main/demos/trino-subsea-data/superset-assets.zip && python -u /tmp/script/script.py"] + volumeMounts: + - name: script + mountPath: /tmp/script + - name: trino-users + mountPath: /trino-users + - name: superset-credentials + mountPath: /superset-credentials + volumes: + - name: script + configMap: + name: setup-superset-script + - name: superset-credentials + secret: + secretName: superset-credentials + - name: trino-users + secret: + secretName: trino-users + restartPolicy: OnFailure + backoffLimit: 50 +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: setup-superset-script +data: + script.py: | + import logging + import requests + + base_url = "http://superset-external:8088" # For local testing / developing replace it, afterwards change back to http://superset-external:8088 + superset_username = open("/superset-credentials/adminUser.username").read() + superset_password = open("/superset-credentials/adminUser.password").read() + trino_username = "admin" + trino_password = open("/trino-users/admin").read() + + logging.basicConfig(level=logging.INFO) + logging.info("Starting setup of Superset") + + logging.info("Getting access token from /api/v1/security/login") + session = requests.session() + access_token = session.post(f"{base_url}/api/v1/security/login", json={"username": superset_username, "password": superset_password, "provider": "db", "refresh": True}).json()['access_token'] + # print(f"access_token: {access_token}") + + logging.info("Getting csrf token from /api/v1/security/csrf_token") + csrf_token = session.get(f"{base_url}/api/v1/security/csrf_token", headers={"Authorization": f"Bearer {access_token}"}).json()["result"] + # print(f"csrf_token: {csrf_token}") + + headers = { + "accept": "application/json", + "Authorization": f"Bearer {access_token}", + "X-CSRFToken": csrf_token, + } + + # To retrieve all of the assets (datasources, datasets, charts and dashboards) run the following commands + # logging.info("Exporting all assets") + # result = session.get(f"{base_url}/api/v1/assets/export", headers=headers) + # assert result.status_code == 200 + # with open("superset-assets.zip", "wb") as f: + # f.write(result.content) + + + ######################### + # IMPORTANT + ######################### + # The exported zip file had to be modified, otherwise we get: + # + # {"errors": [{"message": "Error importing assets", "error_type": "GENERIC_COMMAND_ERROR", "level": "warning", "extra": {"databases/Trino.yaml": {"extra": {"disable_data_preview": ["Unknown field."]}}, "issue_codes": [{"code": 1010, "message": "Issue 1010 - Superset encountered an error while running a command."}]}}]} + # + # The file databases/Trino.yaml was modified and the attribute "extra.disable_data_preview" was removed + ######################### + logging.info("Importing all assets") + files = { + "bundle": ("superset-assets.zip", open("superset-assets.zip", "rb")), + } + data = { + "passwords": '{"databases/Trino.yaml": "' + trino_password + '"}' + } + result = session.post(f"{base_url}/api/v1/assets/import", headers=headers, files=files, data=data) + print(result) + print(result.text) + assert result.status_code == 200 + + logging.info("Finished setup of Superset") diff --git a/demos/trino-subsea-data/superset-assets.zip b/demos/trino-subsea-data/superset-assets.zip new file mode 100644 index 0000000..60a0255 Binary files /dev/null and b/demos/trino-subsea-data/superset-assets.zip differ diff --git a/stacks/trino-superset-s3/superset.yaml b/stacks/trino-superset-s3/superset.yaml index 24a8cfd..b8363fa 100644 --- a/stacks/trino-superset-s3/superset.yaml +++ b/stacks/trino-superset-s3/superset.yaml @@ -14,6 +14,11 @@ spec: roleGroups: default: replicas: 1 + configOverrides: + superset_config.py: + # Needed by trino-subsea-data demo + ROW_LIMIT: "200000" + SQL_MAX_ROW: "200000" --- apiVersion: v1 kind: Secret