-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Visualize ocean floor geo data in Superset (with Trino, from S3) (#88)
* WIP * some fixes * Added geometry column * WIP * WIP * fix link * rename some k8s objects and change reference to reference github raw files * Update stacks/trino-superset-s3/superset.yaml Co-authored-by: Sebastian Bernauer <sebastian.bernauer@stackable.de> * changed location and name of dataset * Update description * Don't document for now * change branch refs * Update demos/trino-subsea-data/setup-superset.yaml Co-authored-by: Sebastian Bernauer <sebastian.bernauer@stackable.de> --------- Co-authored-by: Sebastian Bernauer <sebastian.bernauer@stackable.de>
- Loading branch information
Showing
6 changed files
with
223 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
--- | ||
apiVersion: batch/v1 | ||
kind: Job | ||
metadata: | ||
name: create-subsea-multibeam-table-in-trino | ||
spec: | ||
template: | ||
spec: | ||
containers: | ||
- name: create-subsea-multibeam-table-in-trino | ||
image: docker.stackable.tech/stackable/testing-tools:0.2.0-stackable24.7.0 | ||
command: ["bash", "-c", "python -u /tmp/script/script.py"] | ||
volumeMounts: | ||
- name: script | ||
mountPath: /tmp/script | ||
- name: trino-users | ||
mountPath: /trino-users | ||
volumes: | ||
- name: script | ||
configMap: | ||
name: create-subsea-multibeam-table-in-trino-script | ||
- name: trino-users | ||
secret: | ||
secretName: trino-users | ||
restartPolicy: OnFailure | ||
backoffLimit: 50 | ||
--- | ||
apiVersion: v1 | ||
kind: ConfigMap | ||
metadata: | ||
name: create-subsea-multibeam-table-in-trino-script | ||
data: | ||
script.py: | | ||
import sys | ||
import trino | ||
if not sys.warnoptions: | ||
import warnings | ||
warnings.simplefilter("ignore") | ||
def get_connection(): | ||
connection = trino.dbapi.connect( | ||
host="trino-coordinator", | ||
port=8443, | ||
user="admin", | ||
http_scheme='https', | ||
auth=trino.auth.BasicAuthentication("admin", open("/trino-users/admin").read()), | ||
) | ||
connection._http_session.verify = False | ||
return connection | ||
def run_query(connection, query): | ||
print(f"[DEBUG] Executing query {query}") | ||
cursor = connection.cursor() | ||
cursor.execute(query) | ||
return cursor.fetchall() | ||
connection = get_connection() | ||
run_query(connection, "CREATE SCHEMA IF NOT EXISTS hive.demo WITH (location = 's3a://demo/')") | ||
run_query(connection, """ | ||
CREATE TABLE IF NOT EXISTS hive.demo.subsea ( | ||
footprint_x DOUBLE, | ||
footprint_y DOUBLE, | ||
water_depth DOUBLE, | ||
data_point_density DOUBLE, | ||
geometry VARBINARY | ||
) WITH ( | ||
external_location = 's3a://demo/subsea/', | ||
format = 'parquet' | ||
) | ||
""") | ||
loaded_rows = run_query(connection, "SELECT COUNT(*) FROM hive.demo.subsea")[0][0] | ||
print(f"Loaded {loaded_rows} rows") | ||
assert loaded_rows > 0 | ||
print("Analyzing table subsea") | ||
analyze_rows = run_query(connection, """ANALYZE hive.demo.subsea""")[0][0] | ||
assert analyze_rows == loaded_rows | ||
stats = run_query(connection, """show stats for hive.demo.subsea""") | ||
print("Produced the following stats:") | ||
print(*stats, sep="\n") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
--- | ||
apiVersion: batch/v1 | ||
kind: Job | ||
metadata: | ||
name: load-subsea-multibeam-data | ||
spec: | ||
template: | ||
spec: | ||
containers: | ||
- name: load-subsea-multibeam-data | ||
image: "bitnami/minio:2024-debian-12" | ||
command: ["bash", "-c", "cd /tmp && curl -O https://repo.stackable.tech/repository/misc/marispace/multibeam_data_point_density_example.parquet && mc --insecure alias set minio http://minio:9000/ $(cat /minio-s3-credentials/accessKey) $(cat /minio-s3-credentials/secretKey) && mc cp multibeam_data_point_density_example.parquet minio/demo/subsea"] | ||
volumeMounts: | ||
- name: minio-s3-credentials | ||
mountPath: /minio-s3-credentials | ||
volumes: | ||
- name: minio-s3-credentials | ||
secret: | ||
secretName: minio-s3-credentials | ||
restartPolicy: OnFailure | ||
backoffLimit: 50 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
--- | ||
apiVersion: batch/v1 | ||
kind: Job | ||
metadata: | ||
name: setup-superset | ||
spec: | ||
template: | ||
spec: | ||
containers: | ||
- name: setup-superset | ||
image: docker.stackable.tech/stackable/testing-tools:0.2.0-stackable24.7.0 | ||
command: ["bash", "-c", "curl -o superset-assets.zip https://raw.githubusercontent.com/stackabletech/demos/main/demos/trino-subsea-data/superset-assets.zip && python -u /tmp/script/script.py"] | ||
volumeMounts: | ||
- name: script | ||
mountPath: /tmp/script | ||
- name: trino-users | ||
mountPath: /trino-users | ||
- name: superset-credentials | ||
mountPath: /superset-credentials | ||
volumes: | ||
- name: script | ||
configMap: | ||
name: setup-superset-script | ||
- name: superset-credentials | ||
secret: | ||
secretName: superset-credentials | ||
- name: trino-users | ||
secret: | ||
secretName: trino-users | ||
restartPolicy: OnFailure | ||
backoffLimit: 50 | ||
--- | ||
apiVersion: v1 | ||
kind: ConfigMap | ||
metadata: | ||
name: setup-superset-script | ||
data: | ||
script.py: | | ||
import logging | ||
import requests | ||
base_url = "http://superset-external:8088" # For local testing / developing replace it, afterwards change back to http://superset-external:8088 | ||
superset_username = open("/superset-credentials/adminUser.username").read() | ||
superset_password = open("/superset-credentials/adminUser.password").read() | ||
trino_username = "admin" | ||
trino_password = open("/trino-users/admin").read() | ||
logging.basicConfig(level=logging.INFO) | ||
logging.info("Starting setup of Superset") | ||
logging.info("Getting access token from /api/v1/security/login") | ||
session = requests.session() | ||
access_token = session.post(f"{base_url}/api/v1/security/login", json={"username": superset_username, "password": superset_password, "provider": "db", "refresh": True}).json()['access_token'] | ||
# print(f"access_token: {access_token}") | ||
logging.info("Getting csrf token from /api/v1/security/csrf_token") | ||
csrf_token = session.get(f"{base_url}/api/v1/security/csrf_token", headers={"Authorization": f"Bearer {access_token}"}).json()["result"] | ||
# print(f"csrf_token: {csrf_token}") | ||
headers = { | ||
"accept": "application/json", | ||
"Authorization": f"Bearer {access_token}", | ||
"X-CSRFToken": csrf_token, | ||
} | ||
# To retrieve all of the assets (datasources, datasets, charts and dashboards) run the following commands | ||
# logging.info("Exporting all assets") | ||
# result = session.get(f"{base_url}/api/v1/assets/export", headers=headers) | ||
# assert result.status_code == 200 | ||
# with open("superset-assets.zip", "wb") as f: | ||
# f.write(result.content) | ||
######################### | ||
# IMPORTANT | ||
######################### | ||
# The exported zip file had to be modified, otherwise we get: | ||
# <Response [422]> | ||
# {"errors": [{"message": "Error importing assets", "error_type": "GENERIC_COMMAND_ERROR", "level": "warning", "extra": {"databases/Trino.yaml": {"extra": {"disable_data_preview": ["Unknown field."]}}, "issue_codes": [{"code": 1010, "message": "Issue 1010 - Superset encountered an error while running a command."}]}}]} | ||
# | ||
# The file databases/Trino.yaml was modified and the attribute "extra.disable_data_preview" was removed | ||
######################### | ||
logging.info("Importing all assets") | ||
files = { | ||
"bundle": ("superset-assets.zip", open("superset-assets.zip", "rb")), | ||
} | ||
data = { | ||
"passwords": '{"databases/Trino.yaml": "' + trino_password + '"}' | ||
} | ||
result = session.post(f"{base_url}/api/v1/assets/import", headers=headers, files=files, data=data) | ||
print(result) | ||
print(result.text) | ||
assert result.status_code == 200 | ||
logging.info("Finished setup of Superset") |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters