-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathhadoop-setup.txt
134 lines (102 loc) · 3.54 KB
/
hadoop-setup.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
Installation
=============
--Hadoop:
--Install java
--Setup SSH
sudo apt-get install ssh
ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa
cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys
-- vi /etc/hosts
127.0.0.1 localhost
127.0.0.1 kshitiz
# The following lines are desirable for IPv6 capable hosts
::1 ip6-localhost ip6-loopback
fe00::0 ip6-localnet
ff00::0 ip6-mcastprefix
ff02::1 ip6-allnodes
ff02::2 ip6-allrouters
--Create a directory for Hadoop to store its data locally
/home/kshitiz/hadoop/data
chmod 777 /home/kshitiz/hadoop/data
-- Create these variables in .bashrc
#HADOOP VARIABLES START
export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64
export HADOOP_HOME=/home/kshitiz/hadoop
export PIG_HOME=/home/kshitiz/pig
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib"
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$PIG_HOME/bin
export HADOOP_CLASSPATH=/home/kshitiz/workspace/hadoop-executables
#HADOOP VARIABLES END
--Download Hadoop.
--Define following parameters in etc/hadoop/hadoop-env.sh
export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64
--Define following parameters in etc/hadoop/core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:8020</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/home/kshitiz/hadoop/data</value>
</property>
</configuration>
--Define following parameters in etc/hadoop/hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>
--PIG: https://archanaschangale.wordpress.com/2013/10/14/pig-installation-on-ubuntu/
--Word Count Example Using Pig Script
lines = LOAD '/user/hadoop/HDFS_File.txt' AS (line:chararray);
words = FOREACH lines GENERATE FLATTEN(TOKENIZE(line)) as word;
grouped = GROUP words BY word;
wordcount = FOREACH grouped GENERATE group, COUNT(words);
DUMP wordcount;
Eclipse
========
-- Install eclipse and subclipse
Installing JavaHL for Subclipse/Eclipse on Ubuntu
http://islandlinux.org/howto/installing-javahl-subclipseeclipse-ubuntu
Create a spring bootstarter project and add this in pom.xml. Also make an env variable HADOOP_HOME=/home/kshitiz/hadoop
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-auth</artifactId>
<version>2.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-jobclient</artifactId>
<version>2.6.0</version>
</dependency>
HDFS commands and running jar
=============================
-- In case of any problem, do this
hdfs namenode -format;stop-dfs.sh;start-dfs.sh
-- Check the processes and the ports used
sudo netstat -plten | grep java
--Run wordCount.jar
hdfs dfs -mkdir hdfs://kshitiz/kshitiz
hdfs dfs -mkdir hdfs://kshitiz/kshitiz/input
dfs dfs -put /home/kshitiz/workspace/input/java-wc.txt hdfs://kshitiz/kshitiz/input/java-wc.txt
hadoop jar /home/kshitiz/workspace/hadoop/target/hadoop-1.0.jar hdfs://kshitiz/kshitiz/input/ hdfs://kshitiz/kshitiz/output1