Skip to content

Commit

Permalink
feat: Update demos for Release 24.11 (#116)
Browse files Browse the repository at this point in the history
* chore(opa): bump to 0.67.0

Depends on: stackabletech/docker-images#797

* chore(opa): bump 0.67.1

* chore(opa): bump to 0.67.0

Depends on: stackabletech/docker-images#797

* chore: Bump Vector to 0.40.0

* chore(airflow): bump version to 2.9.3

* chore(trino): bump version to 455

* chore(spark): version 3.5.2

* Update create-spark-ingestion-job.yaml

* chore: bump hdfs version to 3.4.0

* chore: bump nifi for 24.11

* chore: Upgrade the Vector aggregator

* chore(druid): Bump to 30.0.0

* chore(pre-commit): Ignore braces lint for templates yaml files

* chore: lint

* Apply suggestions from code review

Use images that exist.

We can't use hdfs from 24.7 because there is a tool needed. For consistency, we use the same image for consistency

* Apply suggestions from code review

* chore: fix lint issue

* Apply suggestions from code review

* chore: fix lint issue

* chore: disable the brace lint

---------

Co-authored-by: xeniape <xenia.fischer@stackable.tech>
Co-authored-by: Razvan-Daniel Mihai <84674+razvan@users.noreply.github.com>
Co-authored-by: Siegfried Weber <mail@siegfriedweber.net>
  • Loading branch information
4 people authored Nov 11, 2024
1 parent f8ef88d commit a7d34d2
Show file tree
Hide file tree
Showing 31 changed files with 67 additions and 60 deletions.
5 changes: 1 addition & 4 deletions .yamllint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,4 @@ rules:
check-keys: false
comments:
min-spaces-from-content: 1 # Needed due to https://github.com/adrienverge/yamllint/issues/443
braces:
ignore: |
stacks/signal-processing/*
stacks/_templates/*
braces: disable # because the yaml files are templates which can have {{ ... }}
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,13 @@ data:
stackable.tech/vendor: Stackable
spec:
sparkImage:
productVersion: 3.5.1
productVersion: 3.5.2
mode: cluster
mainApplicationFile: local:///stackable/spark/jobs/spark-ingest-into-lakehouse.py
deps:
packages:
- org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.5.0
- org.apache.spark:spark-sql-kafka-0-10_2.12:3.5.1
- org.apache.iceberg:iceberg-spark-runtime-3.5_2.12:1.6.1
- org.apache.spark:spark-sql-kafka-0-10_2.12:3.5.2
s3connection:
reference: minio
sparkConf:
Expand Down
2 changes: 1 addition & 1 deletion demos/end-to-end-security/create-spark-report.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ data:
name: spark-report
spec:
sparkImage:
productVersion: 3.5.1
productVersion: 3.5.2
mode: cluster
mainApplicationFile: local:///stackable/spark/jobs/spark-report.py
deps:
Expand Down
3 changes: 2 additions & 1 deletion demos/hbase-hdfs-load-cycling-data/distcp-cycling-data.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,15 @@ spec:
# We use 24.3.0 here which contains the distcp MapReduce components
# This is not included in the 24.7 images and will fail.
# See: https://github.com/stackabletech/docker-images/issues/793
image: docker.stackable.tech/stackable/hadoop:3.3.4-stackable24.3.0
image: docker.stackable.tech/stackable/hadoop:3.4.0-stackable0.0.0-dev
env:
- name: HADOOP_USER_NAME
value: stackable
- name: HADOOP_CONF_DIR
value: "/stackable/conf/hdfs"
- name: HADOOP_CLASSPATH
value: "/stackable/hadoop/share/hadoop/tools/lib/*.jar"
# yamllint disable-line rule:line-length
command: ["bash", "-c", "bin/hdfs dfs -mkdir -p /data/raw && bin/hadoop distcp -D fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider s3a://public-backup-nyc-tlc/cycling-tripdata/demo-cycling-tripdata.csv.gz hdfs://hdfs/data/raw"]
volumeMounts:
- name: config-volume-hdfs
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@ spec:
spec:
containers:
- name: load-ny-taxi-data
image: docker.stackable.tech/stackable/hadoop:3.3.4-stackable24.7.0
image: docker.stackable.tech/stackable/hadoop:3.4.0-stackable0.0.0-dev
# yamllint disable rule:line-length
command: ["bash", "-c", "/stackable/hadoop/bin/hdfs dfs -mkdir -p /ny-taxi-data/raw \
&& cd /tmp \
&& for month in 2020-09; do \
curl -O https://repo.stackable.tech/repository/misc/ny-taxi-data/fhvhv_tripdata_$month.parquet \
&& /stackable/hadoop/bin/hdfs dfs -put fhvhv_tripdata_$month.parquet /ny-taxi-data/raw/; \
done"]
# yamllint enable rule:line-length
volumeMounts:
- name: hdfs-discovery-configmap
mountPath: /hdfs
Expand Down
12 changes: 6 additions & 6 deletions demos/signal-processing/DownloadAndWriteToDB.xml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
<bundle>
<artifact>nifi-dbcp-service-nar</artifact>
<group>org.apache.nifi</group>
<version>1.21.0</version>
<version>1.27.0</version>
</bundle>
<comments></comments>
<descriptors>
Expand Down Expand Up @@ -258,7 +258,7 @@
<bundle>
<artifact>nifi-record-serialization-services-nar</artifact>
<group>org.apache.nifi</group>
<version>1.21.0</version>
<version>1.27.0</version>
</bundle>
<comments></comments>
<descriptors>
Expand Down Expand Up @@ -561,7 +561,7 @@
</position>
<height>88.0</height>
<label>This flow downloads a dataset, writing it to a temporary table in TimescaleDB.
This data is then written to the target table with the time offsets preserved,
This data is then written to the target table with the time offsets preserved,
but re-based to the current time. This means that the data can be displayed
in Grafana as if it were being streamed, whereas in fact the dashboard moves
through "future" data that has already been persisted.</label>
Expand All @@ -584,7 +584,7 @@ through "future" data that has already been persisted.</label>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.21.0</version>
<version>1.27.0</version>
</bundle>
<config>
<backoffMechanism>PENALIZE_FLOWFILE</backoffMechanism>
Expand Down Expand Up @@ -1069,7 +1069,7 @@ through "future" data that has already been persisted.</label>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.21.0</version>
<version>1.27.0</version>
</bundle>
<config>
<backoffMechanism>PENALIZE_FLOWFILE</backoffMechanism>
Expand Down Expand Up @@ -1223,7 +1223,7 @@ from conditions_temp;</value>
<bundle>
<artifact>nifi-standard-nar</artifact>
<group>org.apache.nifi</group>
<version>1.21.0</version>
<version>1.27.0</version>
</bundle>
<config>
<backoffMechanism>PENALIZE_FLOWFILE</backoffMechanism>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ data:
name: spark-ad
spec:
sparkImage:
productVersion: 3.5.1
productVersion: 3.5.2
mode: cluster
mainApplicationFile: local:///spark-scripts/spark-ad.py
deps:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ spec:
containers:
- name: load-ny-taxi-data
image: "bitnami/minio:2022-debian-10"
# yamllint disable-line rule:line-length
command: ["bash", "-c", "cd /tmp && for month in 2020-09 2020-10 2020-11 2020-12; do curl -O https://repo.stackable.tech/repository/misc/ny-taxi-data/fhvhv_tripdata_$month.parquet && mc --insecure alias set minio http://minio:9000/ $(cat /minio-s3-credentials/accessKey) $(cat /minio-s3-credentials/secretKey) && mc cp fhvhv_tripdata_$month.parquet minio/demo/ny-taxi-data/raw/ && mc mb --ignore-existing minio/prediction; done"]
volumeMounts:
- name: minio-s3-credentials
Expand Down
1 change: 1 addition & 0 deletions demos/trino-taxi-data/load-test-data.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ spec:
containers:
- name: load-ny-taxi-data
image: "bitnami/minio:2024-debian-12"
# yamllint disable-line rule:line-length
command: ["bash", "-c", "cd /tmp && for month in 2020-01 2020-02 2020-03 2020-04 2020-05 2020-06 2020-07 2020-08 2020-09 2020-10 2020-11 2020-12 2021-01 2021-02 2021-03 2021-04 2021-05 2021-06 2021-07 2021-08 2021-09 2021-10 2021-11 2021-12 2022-01 2022-02 2022-03 2022-04; do curl -O https://repo.stackable.tech/repository/misc/ny-taxi-data/yellow_tripdata_$month.parquet && mc --insecure alias set minio http://minio:9000/ $(cat /minio-s3-credentials/accessKey) $(cat /minio-s3-credentials/secretKey) && mc cp yellow_tripdata_$month.parquet minio/demo/ny-taxi-data/raw/; done"]
volumeMounts:
- name: minio-s3-credentials
Expand Down
2 changes: 1 addition & 1 deletion docs/modules/demos/pages/trino-iceberg.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ As an alternative, you can use https://trino.io/download.html[trino-cli] by runn

[source,console]
----
$ java -jar ~/Downloads/trino-cli-451-executable.jar --user admin --insecure --password --server https://172.18.0.2:30856
$ java -jar ~/Downloads/trino-cli-455-executable.jar --user admin --insecure --password --server https://172.18.0.2:30856
----

Make sure to replace the server endpoint with the endpoint listed in the `stackablectl stacklet list` output.
Expand Down
6 changes: 3 additions & 3 deletions stacks/_templates/vector-aggregator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ name: vector
repo:
name: vector
url: https://helm.vector.dev
version: 0.37.0
version: 0.36.1 # app version 0.41.1
options:
commonLabels:
stackable.tech/vendor: Stackable
podLabels: # Doesn't seem to work?
podLabels:
stackable.tech/vendor: Stackable
role: Aggregator
customConfig:
Expand All @@ -26,7 +26,7 @@ options:
- https://opensearch-cluster-master.default.svc.cluster.local:9200
mode: bulk
# The auto-detection of the API version does not work in Vector
# 0.39.0 for OpenSearch, so the version must be set explicitly
# 0.41.1 for OpenSearch, so the version must be set explicitly
# (see https://github.com/vectordotdev/vector/issues/17690).
api_version: v8
tls:
Expand Down
12 changes: 6 additions & 6 deletions stacks/airflow/airflow.yaml
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
{% raw %}
---
# {% raw %}
apiVersion: airflow.stackable.tech/v1alpha1
kind: AirflowCluster
metadata:
name: airflow
spec:
image:
productVersion: 2.9.2
productVersion: 2.9.3
clusterConfig:
listenerClass: external-unstable
loadExamples: false
Expand Down Expand Up @@ -272,7 +272,7 @@ data:
spec:
version: "1.0"
sparkImage:
productVersion: 3.5.1
productVersion: 3.5.2
mode: cluster
mainApplicationFile: local:///stackable/spark/examples/src/main/python/pi.py
job:
Expand All @@ -285,22 +285,22 @@ data:
limit: 512Mi
driver:
config:
resources:
resources:
cpu:
min: 1000m
max: 1200m
memory:
limit: 1024Mi
executor:
config:
resources:
resources:
cpu:
min: 500m
max: 1000m
memory:
limit: 1024Mi
replicas: 3
{% endraw %}
# {% endraw %}
---
apiVersion: v1
kind: Secret
Expand Down
2 changes: 1 addition & 1 deletion stacks/data-lakehouse-iceberg-trino-spark/nifi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ metadata:
name: nifi
spec:
image:
productVersion: 1.25.0
productVersion: 1.27.0
clusterConfig:
authentication:
- authenticationClass: nifi-admin-credentials
Expand Down
4 changes: 2 additions & 2 deletions stacks/data-lakehouse-iceberg-trino-spark/trino.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ metadata:
name: trino
spec:
image:
productVersion: "451"
productVersion: "455"
clusterConfig:
listenerClass: external-unstable
catalogLabelSelector:
Expand Down Expand Up @@ -114,7 +114,7 @@ metadata:
name: opa
spec:
image:
productVersion: 0.66.0
productVersion: 0.67.1
servers:
roleGroups:
default: {}
Expand Down
2 changes: 1 addition & 1 deletion stacks/dual-hive-hdfs-s3/hdfs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ metadata:
name: hdfs
spec:
image:
productVersion: 3.3.4
productVersion: 3.4.0
clusterConfig:
listenerClass: external-unstable
dfsReplication: 1
Expand Down
2 changes: 1 addition & 1 deletion stacks/dual-hive-hdfs-s3/trino.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ metadata:
name: trino
spec:
image:
productVersion: "451"
productVersion: "455"
clusterConfig:
authorization:
opa:
Expand Down
8 changes: 5 additions & 3 deletions stacks/end-to-end-security/hdfs.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
---
apiVersion: hdfs.stackable.tech/v1alpha1
kind: HdfsCluster
metadata:
name: hdfs
spec:
image:
productVersion: 3.3.4
productVersion: 3.4.0
clusterConfig:
zookeeperConfigMapName: hdfs-znode
authentication:
Expand All @@ -29,8 +30,9 @@ spec:
level: DEBUG
configOverrides: &configOverrides
core-site.xml:
# The idea is that the user "hive" can't do anything in hdfs, *but* it can impersonate other users
# (such as trino), that have the needed permissions
# The idea is that the user "hive" can't do anything in hdfs,
# *but* it can impersonate other users (such as trino),
# that have the needed permissions
hadoop.proxyuser.hive.users: "*"
hadoop.proxyuser.hive.hosts: "*"
roleGroups:
Expand Down
2 changes: 1 addition & 1 deletion stacks/end-to-end-security/opa.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ metadata:
name: opa
spec:
image:
productVersion: 0.66.0
productVersion: 0.67.1
clusterConfig:
userInfo:
backend:
Expand Down
12 changes: 6 additions & 6 deletions stacks/end-to-end-security/trino-regorules.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{% raw %}
---
# {% raw %}
apiVersion: v1
kind: ConfigMap
metadata:
Expand Down Expand Up @@ -1460,7 +1460,7 @@ data:
# "groups": ["group1", ...],
# "user": "username",
# },
# "softwareStack": {"trinoVersion": "442"},
# "softwareStack": {"trinoVersion": "455"},
# }
# }
#
Expand Down Expand Up @@ -1610,7 +1610,7 @@ data:
# "groups": ["group1", ...],
# "user": "username",
# },
# "softwareStack": {"trinoVersion": "442"},
# "softwareStack": {"trinoVersion": "455"},
# }
# }
#
Expand Down Expand Up @@ -1671,7 +1671,7 @@ data:
# "groups": ["group1", ...],
# "user": "username",
# },
# "softwareStack": {"trinoVersion": "442"},
# "softwareStack": {"trinoVersion": "455"},
# }
# }
#
Expand Down Expand Up @@ -1736,7 +1736,7 @@ data:
# "groups": ["group1", ...],
# "user": "username",
# },
# "softwareStack": {"trinoVersion": "442"},
# "softwareStack": {"trinoVersion": "455"},
# }
# }
#
Expand Down Expand Up @@ -1797,4 +1797,4 @@ data:
regex.match(pattern_with_anchors, value)
}
{% endraw %}
# {% endraw %}
2 changes: 1 addition & 1 deletion stacks/end-to-end-security/trino.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ metadata:
name: trino
spec:
image:
productVersion: "451"
productVersion: "455"
clusterConfig:
listenerClass: external-unstable
tls:
Expand Down
3 changes: 2 additions & 1 deletion stacks/hdfs-hbase/hdfs.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
---
apiVersion: hdfs.stackable.tech/v1alpha1
kind: HdfsCluster
metadata:
name: hdfs
spec:
image:
productVersion: 3.3.4
productVersion: 3.4.0
clusterConfig:
dfsReplication: 1
zookeeperConfigMapName: hdfs-znode
Expand Down
2 changes: 1 addition & 1 deletion stacks/jupyterhub-pyspark-hdfs/hdfs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ metadata:
name: hdfs
spec:
image:
productVersion: 3.3.4
productVersion: 3.4.0
clusterConfig:
dfsReplication: 1
zookeeperConfigMapName: hdfs-znode
Expand Down
Loading

0 comments on commit a7d34d2

Please sign in to comment.