-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHW_ClusterAnalysis_#16.4_#16.9_SanjayKumarPattanayak_Dec9_2017.sas
180 lines (130 loc) · 4.01 KB
/
HW_ClusterAnalysis_#16.4_#16.9_SanjayKumarPattanayak_Dec9_2017.sas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
/*Problem Cluster Analysis #16.4, #16.9*/
ods pdf
file='C:\Users\sanja\Desktop\SAS_KashaDehnad\SAS_data\HW_Cluster Analysis_SanjayKPattanayak_#16.4, #16.9.pdf';
title "Cluster Analysis_Hierarchical and K-means clustering with K = 2 Problem 16.4";
libname assign "C:\Users\sanja\Desktop\SAS_KashaDehnad\SAS_data";
run;
data clusteranalysis;
input Cases X1 X2;
datalines;
1 11 10
2 8 10
3 9 11
4 5 4
5 3 4
6 8 5
7 11 11
8 10 12
;
run;
title "Scatter plot for visually estimating the no. of clusters required Problem 16.4";
proc sgplot data=clusteranalysis;
scatter x=X1 y=X2;
run;
title "The CLUSTER Procedure-Centroid Hierarchical Cluster Analysis Problem 16.4";
Proc cluster data=clusteranalysis outtree=clusteranalysistree noeigen nonorm method=centroid rsquare;
id cases;
var X1 X2;
run;
quit;
title "The TREE Procedure-Centroid Hierarchical Cluster Analysis Problem 16.4";
proc tree data=clusteranalysistree out=clusteranalysisout nclusters=2;
id cases;
copy X1 X2;
run;
quit;
proc sort data=clusteranalysisout;
by cluster;
run;
title "Cluster distribution-Hierarchical Cluster Analysis Problem 16.4";
proc print data=clusteranalysisout;
by cluster;
var cases X1 X2;
run;
quit;
Title "Scatter plot showing clusters formed by heirarchical method Problem 16.4";
proc sgplot data=clusteranalysisout;
scatter y=X2 x=X1/group=cluster;
run;
quit;
Title "Clustering using K-means - proc fastclus Problem 16.4";
proc fastclus data=clusteranalysis maxclusters=2 maxiter=100 list out=clusterKmeans;
id cases;
var X1 X2;
run;
Title "Plotting the clusters formed by K-means method Problem 16.4";
proc sgplot data=clusterKmeans;
scatter y=X2 x=X1/group=cluster;
run;
quit;
Title "Cluster Distribution - K-means method Problem 16.4";
proc sort data=clusterKmeans;
by cluster;
run;
proc print data=clusterKmeans;
by cluster;
var cases X1 X2;
run;
quit;
/**** Question 16.9 ****/
title "Cluster Analysis_data set from the family lung function data_Problem 16.9";
libname sas_data "C:\Users\sanja\Desktop\SAS_KashaDehnad\SAS_data"
access=read;
run;
proc copy in=sas_data out=work;
select lung;
run;
/*Creating the new data set lungcluster_Problem 16.9"*/
data lung_mother;
set lung (keep=age_mother height_mother weight_mother FVC_mother FEV1_mother);
Class=1;
run;
data lung_mother;
set lung_mother;
rename age_mother=age height_mother=height weight_mother=weight FVC_mother=FVC FEV1_mother=FEV1;
run;
data lung_father;
set lung (keep=age_father height_father weight_father FVC_father FEV1_father);
Class=2;
run;
data lung_father;
set lung_father;
rename age_father=age height_father=height weight_father=weight FVC_father=FVC FEV1_father=FEV1;
run;
data lung_oldch;
set lung (keep=age_oldest_child height_oldest_child weight_oldest_child FVC_oldest_child FEV1_oldest_child);
Class=3;
run;
data lung_oldch;
set lung_oldch;
rename age_oldest_child=age height_oldest_child=height weight_oldest_child=weight FVC_oldest_child=FVC FEV1_oldest_child=FEV1;
run;
data lungcluster;
set lung_mother;
run;
proc append base=lungcluster data=lung_father;
run;
proc append base=lungcluster data=lung_oldch;
run;
proc standard data=lungcluster mean=0 std=1 out=lungcluster_standard;
var age height weight FVC FEV1;
run;
title "K-means clustering_First with 3 Clusters_Problem 16.9";
proc fastclus data=lungcluster_standard maxclusters=3 maxiter=100 list out=lungcluster_standard1;
id class;
var age height weight FVC FEV1;
run;
title 'Cluster Analysis for k=3 of Lung Function';
proc sgplot data = lungcluster_standard1;
vbar cluster / group = class;
run;
title "K-means clustering_Second with 2 Clusters_Problem 16.9";
proc fastclus data=lungcluster_standard maxclusters=2 maxiter=100 list out=lungcluster_standard2;
id class;
var age height weight FVC FEV1;
run;
title 'Cluster Analysis for k=2 of Lung Function';
proc sgplot data = lungcluster_standard2;
vbar cluster / group = class;
run;
ods pdf close;