Skip to content

Run Spark Job for Medium Stats #377

Run Spark Job for Medium Stats

Run Spark Job for Medium Stats #377

name: Run Spark Job for Medium Stats
on:
push:
branches:
- master
schedule:
- cron: '0 1 * * *'
jobs:
sparkStats:
runs-on: self-hosted
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Spark Monthly and Yearly Stats
run: |
export KUBECONFIG="/root/.kube/config"
cd scripts/medium
todaysDate=$(date +"%Y-%m-%d")
spark-submit \
--master k8s://https://10.0.0.119:6443 \
--deploy-mode cluster \
--name medium-stats \
--conf spark.executor.instances=3 \
--conf spark.kubernetes.authenticate.driver.serviceAccountName=spark \
--conf spark.kubernetes.namespace=data-analytics \
--conf spark.kubernetes.driver.request.cores=2 \
--conf spark.driver.memory=1g \
--conf spark.kubernetes.executor.request.cores=2 \
--conf spark.executor.memory=1g \
--packages org.apache.hadoop:hadoop-aws:3.3.4 \
--conf spark.kubernetes.pyspark.pythonVersion=3 \
--conf spark.kubernetes.container.image=greypavan/medium-manifests:medium-stats \
--conf spark.kubernetes.driver.podTemplateFile=./driver.yaml \
--conf spark.kubernetes.executor.podTemplateFile=./executor.yaml \
--conf spark.kubernetes.container.image.pullPolicy=Always \
https://raw.githubusercontent.com/pavan-kumar-99/medium-manifests/master/scripts/medium/medium-stats-spark-driver.py $todaysDate/ create