-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRandom Forest.py
84 lines (69 loc) · 2.82 KB
/
Random Forest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# Import required libraries
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import export_graphviz
import graphviz
# Read the CSV file into a pandas DataFrame
df = pd.read_csv('Data.csv')
# Merge the first three columns using string concatenation
df['Material'] = df[['Std', 'Material', 'Heat treatment']].fillna('').agg(' '.join, axis=1)
# Remove any string values from Sy column
df['Sy'] = df['Sy'].str.replace(' max', '').astype(int)
# Drop the unnecessary columns
df.drop(['Std','ID', 'Heat treatment', 'Desc','A5','Bhn','pH','Desc','HV'], axis=1, inplace=True)
# Define the rating function
def get_rating(row):
if (438.3 <= row['Su'] <= 535.7 and
318.6 <= row['Sy'] <= 389.4 and
204930 <= row['E'] <= 209070 and
71100 <= row['G'] <= 86900 and
0.285 <= row['mu'] <= 0.315 and
7467 <= row['Ro'] <= 8253):
return 5
elif (389.6 <= row['Su'] <= 584.4 and
283.2 <= row['Sy'] <= 424.8 and
202860 <= row['E'] <= 211140 and
63200 <= row['G'] <= 94800 and
0.27 <= row['mu'] <= 0.33 and
7074 <= row['Ro'] <= 8646):
return 4
elif (340.9 <= row['Su'] <= 633.1 and
247.8 <= row['Sy'] <= 460.2 and
200790 <= row['E'] <= 213210 and
55300 <= row['G'] <= 102700 and
0.255 <= row['mu'] <= 0.345 and
6681 <= row['Ro'] <= 9039):
return 3
elif (292.2 <= row['Su'] <= 681.8 and
212.4 <= row['Sy'] <= 495.6 and
198720 <= row['E'] <= 215280 and
47400 <= row['G'] <= 110600 and
0.24 <= row['mu'] <= 0.36 and
6288 <= row['Ro'] <= 9432):
return 2
else:
return 1
# Calculate the rating for each row
df['rating'] = df.apply(get_rating, axis=1)
# Save the results to a new file data.csv
df.to_csv('Data Rating.csv', index=False)
# Separate the features and label columns
X = df.iloc[:, 1:-1].values
y = df.iloc[:, -1].values
# Train a random forest classifier with 100 trees
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X, y)
# Predict the rating of a new material
new_material = np.array([440, 325, 207000, 79000, 0.3, 7860]).reshape(1, -1)
new_rating = rf.predict(new_material)
print("Predicted rating of the new material:", new_rating)
# Visualize the decision trees in the random forest
dot_data = export_graphviz(rf.estimators_[0], out_file=None,
feature_names=['Su', 'Sy', 'E', 'G', 'mu', 'Ro'],
class_names=['1', '2', '3', '4', '5'], filled=True, rounded=True)
# graph = graphviz.Source(dot_data)
# graph.render('decision_tree') # Save the visualization as a pdf file
# Render the graph as a PNG image
graph = graphviz.Source(dot_data, format='png')
graph.render('decision_tree', view=True)