-
Notifications
You must be signed in to change notification settings - Fork 1
/
discount.sh
executable file
·35 lines (26 loc) · 1.45 KB
/
discount.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#!/bin/bash
#Command-line run script for the Discount k-mer counter.
#This script (as well as the similar scripts in this directory) can be symlinked to somewhere in $PATH for convenient use.
#Run everything in one process (don't forget to adjust Spark's driver memory)
MASTER=local[*]
#Full cluster running independently
#MASTER=spark://localhost:7077
#Set this variable to the location of your Spark distribution.
SPARK=/path/to/spark-x.x.x-hadoop
DISCOUNT_HOME="$(dirname -- "$(readlink "${BASH_SOURCE}")")"
#For standalone mode (one process), it is helpful to provide as much memory as possible.
MEMORY=spark.driver.memory=16g
#Scratch space location. This has a big effect on performance; should ideally be a fast SSD or similar.
LOCAL_DIR="spark.local.dir=/tmp"
#Max size of input splits in bytes. A smaller number reduces memory usage but increases the number of
#partitions for the first stage. If this variable is unset, Spark's default of 128 MB will be used.
#SPLIT="spark.hadoop.mapreduce.input.fileinputformat.split.maxsize=$((64 * 1024 * 1024))"
#On Windows: Change bin/spark-submit to bin/spark-submit.cmd.
#--conf $SPLIT
exec $SPARK/bin/spark-submit \
--driver-java-options -Dlog4j.configuration="file:$DISCOUNT_HOME/log4j.properties" \
--conf spark.driver.maxResultSize=2g \
--conf $MEMORY \
--conf $LOCAL_DIR \
--master $MASTER \
--class com.jnpersson.discount.spark.Discount "$DISCOUNT_HOME/target/scala-2.12/Discount-assembly-3.1.0.jar" $*