forked from Open-EO/openeo-processes
-
Notifications
You must be signed in to change notification settings - Fork 4
/
fit_regr_random_forest.json
116 lines (114 loc) · 3.89 KB
/
fit_regr_random_forest.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
{
"categories":[
"machine learning"
],
"deprecated":false,
"description":"Executes the fit of a random forest regression based on training data. The process does not include a separate split of the data in test, validation and training data. The Random Forest regression model is based on the approach by Breiman (2001).",
"exceptions":{
},
"experimental":true,
"id":"fit_regr_random_forest",
"links":[
{
"href":"https://doi.org/10.1023/A:1010933404324",
"rel":"about",
"title":"Breiman (2001): Random Forests",
"type":"text/html"
}
],
"parameters":[
{
"deprecated":false,
"description":"The data for the regression model as a vector data cube. The data needs to include the features (vectors) of the target input variable as well as the target variable.",
"experimental":false,
"name":"data",
"optional":false,
"schema":{
"subtype":"vector-cube",
"type":"object"
}
},
{
"deprecated":false,
"description":"Specifies how many split variables will be used at a node.\n\nThe following options are available:\n\n- *integer*: The given number of variables are considered for each split.\n- `all`: All variables are considered for each split.\n- `log2`: The logarithm with base 2 of the number of variables are considered for each split.\n- `onethird`: A third of the number of variables are considered for each split. This is often the default for regression.\n- `sqrt`: The square root of the number of variables are considered for each split.",
"experimental":false,
"name":"max_variables",
"optional":false,
"schema":[
{
"minimum":1,
"type":"integer"
},
{
"enum":[
"all",
"log2",
"onethird",
"sqrt"
],
"type":"string"
}
]
},
{
"default":100,
"deprecated":false,
"description":"The number of trees build within the Random Forest regression.",
"experimental":false,
"name":"num_trees",
"optional":true,
"schema":{
"minimum":1,
"type":"integer"
}
},
{
"deprecated":false,
"description":"A randomization seed to use for the random sampling in training. If not given or `null`, no seed is used and results may differ on subsequent use.",
"experimental":false,
"name":"seed",
"optional":true,
"schema":{
"type":[
"integer",
"null"
]
}
},
{
"deprecated":false,
"description":"List of columns in the predictors vector-cube that is used for the model training.",
"experimental":false,
"name":"predictors_vars",
"optional":false,
"schema":{
"examples":[
"B04",
"B08"
],
"type":"array"
}
},
{
"deprecated":false,
"description":"Column in the target vector-cube that is used for the model training.",
"experimental":false,
"name":"target_var",
"optional":false,
"schema":{
"examples":"target_canopy_cover",
"type":"string"
}
}
],
"process_graph":{
},
"returns":{
"description":"A model object that can be saved with ``save_ml_model()`` and restored with ``load_ml_model()``.",
"schema":{
"subtype":"ml-model",
"type":"object"
}
},
"summary":"Train a random forest regression model"
}