forked from holdenk/learning-spark-examples
-
Notifications
You must be signed in to change notification settings - Fork 0
/
setup-project
executable file
·68 lines (67 loc) · 2.48 KB
/
setup-project
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
#!/usr/bin/env bash
set -x
set -e
set -o pipefail
sudo apt-get install -y axel time
echo "Downloading misc tools"
sudo rm -f /etc/apt/sources.list.d/cassandra.sources.list
echo "deb http://debian.datastax.com/community stable main" | sudo tee -a /etc/apt/sources.list.d/cassandra.sources.list
curl -L http://debian.datastax.com/debian/repo_key | sudo apt-key add -
sudo apt-get update > aptlog &
APT_GET_UPDATE_PID=$!
axel http://d3kbcqa49mib13.cloudfront.net/spark-1.3.1-bin-hadoop1.tgz > sparkdl &
SPARK_DL_PID=$!
axel http://mirrors.ibiblio.org/apache/kafka/0.8.1.1/kafka_2.9.2-0.8.1.1.tgz > kafkadl &
KAFKA_DL_PID=$!
axel http://mirror.cogentco.com/pub/apache/flume/1.5.0.1/apache-flume-1.5.0.1-bin.tar.gz > flumedl &
FLUME_DL_PID=$!
wait $SPARK_DL_PID
sudo mkdir -p /etc/apt/sources.list.d/
echo "install urllib3"
sudo pip install urllib3
wait $SPARK_DL_PID || echo "Spark DL finished early"
tar -xf spark-1.3.1-bin-hadoop1.tgz
wait $APT_GET_UPDATE_PID
echo "Installing protobuf"
sudo apt-get install protobuf-compiler
echo $?
# Set up cassandra
echo "Waiting for apt-get update to finish"
wait $APT_GET_UPDATE_PID || echo "apt-get update finished early"
echo "Setting up dsc (cassandra)"
sleep 1;
#sudo apt-get -y --force-yes remove cassandra cassandra-tools
#sudo rm -rf /etc/security/limits.d/cassandra.conf || echo "No cassandra security conf"
#yes | sudo apt-get -y --force-yes install dsc21 > dscinstall.log
#yes | sudo apt-get -y --force-yes install cassandra-tools > ctoolsinstall.log
echo "Starting cassandra"
sudo /etc/init.d/cassandra start
echo $?
echo "set up hive directories"
export IAM=`whoami`
sudo mkdir -p /user/hive && sudo chown -R $IAM /user/hive
echo "done with setup"
# Set up kafka
echo "Setting up kafka"
wait $KAFKA_DL_PID || echo "Kafka DL finished early"
tar -xzf kafka_2.9.2-0.8.1.1.tgz
cd kafka_2.9.2-0.8.1.1
echo "Starting zookeeper"
./bin/zookeeper-server-start.sh config/zookeeper.properties &
echo "Starting kafka"
sleep 5
./bin/kafka-server-start.sh config/server.properties &
sleep 5
# publish a pandas topic to kafka
./bin/kafka-topics.sh --zookeeper localhost:2181 --topic pandas --partition 1 --replication-factor 1 --create
./bin/kafka-topics.sh --zookeeper localhost:2181 --topic logs --partition 1 --replication-factor 1 --create
cd ..
# set up flume
wait $FLUME_DL_PID || echo "Flume DL finished early"
echo "Setting up flume"
tar -xf apache-flume-1.5.0.1-bin.tar.gz
cd apache-flume-1.5.0.1-bin
./bin/flume-ng agent -n panda --conf-file ../files/flumeconf.cfg &
disown $!
cd ..
echo $?