@@ -62,7 +62,7 @@ def input_data() -> pd.DataFrame:
6262def data (input_data : pd .DataFrame ) -> DataContainer :
6363 data = DataContainer ({"is_train" : True })
6464 data .columns_to_ignore_for_training = []
65- data .train = input_data
65+ data .X_train = input_data
6666 return data
6767
6868
@@ -72,7 +72,7 @@ def test_skipping_with_no_parameters(data: DataContainer):
7272 result = calculate_features_step .execute (data )
7373
7474 assert isinstance (result , DataContainer )
75- assert result .train .equals (data .train )
75+ assert result .X_train .equals (data .X_train )
7676
7777
7878def test_feature_names (data : DataContainer ):
@@ -87,22 +87,22 @@ def test_feature_names(data: DataContainer):
8787 result = calculate_features_step .execute (data )
8888
8989 assert isinstance (result , DataContainer )
90- assert "creation_date_year" in result .train .columns
91- assert "creation_date_month" in result .train .columns
92- assert "creation_date_day" in result .train .columns
93- assert "creation_date_hour" in result .train .columns
94- assert "creation_date_minute" in result .train .columns
95- assert "creation_date_second" in result .train .columns
96- assert "creation_date_weekday" in result .train .columns
97- assert "creation_date_dayofyear" in result .train .columns
98- assert "deletion_date_year" in result .train .columns
99- assert "deletion_date_month" in result .train .columns
100- assert "deletion_date_day" in result .train .columns
101- assert "deletion_date_hour" in result .train .columns
102- assert "deletion_date_minute" in result .train .columns
103- assert "deletion_date_second" in result .train .columns
104- assert "deletion_date_weekday" in result .train .columns
105- assert "deletion_date_dayofyear" in result .train .columns
90+ assert "creation_date_year" in result .X_train .columns
91+ assert "creation_date_month" in result .X_train .columns
92+ assert "creation_date_day" in result .X_train .columns
93+ assert "creation_date_hour" in result .X_train .columns
94+ assert "creation_date_minute" in result .X_train .columns
95+ assert "creation_date_second" in result .X_train .columns
96+ assert "creation_date_weekday" in result .X_train .columns
97+ assert "creation_date_dayofyear" in result .X_train .columns
98+ assert "deletion_date_year" in result .X_train .columns
99+ assert "deletion_date_month" in result .X_train .columns
100+ assert "deletion_date_day" in result .X_train .columns
101+ assert "deletion_date_hour" in result .X_train .columns
102+ assert "deletion_date_minute" in result .X_train .columns
103+ assert "deletion_date_second" in result .X_train .columns
104+ assert "deletion_date_weekday" in result .X_train .columns
105+ assert "deletion_date_dayofyear" in result .X_train .columns
106106
107107
108108def test_date_columns_are_ignored_for_training (data : DataContainer ):
@@ -117,8 +117,8 @@ def test_date_columns_are_ignored_for_training(data: DataContainer):
117117 result = calculate_features_step .execute (data )
118118
119119 assert isinstance (result , DataContainer )
120- assert "creation_date" in result .columns_to_ignore_for_training
121- assert "deletion_date" in result .columns_to_ignore_for_training
120+ assert "creation_date" not in result .X_train . columns
121+ assert "deletion_date" not in result .X_train . columns
122122
123123
124124def test_output_dtypes (data : DataContainer ):
@@ -133,14 +133,14 @@ def test_output_dtypes(data: DataContainer):
133133 result = calculate_features_step .execute (data )
134134
135135 assert isinstance (result , DataContainer )
136- assert result .train ["creation_date_year" ].dtype == np .dtype ("uint16" )
137- assert result .train ["creation_date_month" ].dtype == np .dtype ("uint8" )
138- assert result .train ["creation_date_day" ].dtype == np .dtype ("uint8" )
139- assert result .train ["creation_date_hour" ].dtype == np .dtype ("uint8" )
140- assert result .train ["creation_date_minute" ].dtype == np .dtype ("uint8" )
141- assert result .train ["creation_date_second" ].dtype == np .dtype ("uint8" )
142- assert result .train ["creation_date_weekday" ].dtype == np .dtype ("uint8" )
143- assert result .train ["creation_date_dayofyear" ].dtype == np .dtype ("uint16" )
136+ assert result .X_train ["creation_date_year" ].dtype == np .dtype ("uint16" )
137+ assert result .X_train ["creation_date_month" ].dtype == np .dtype ("uint8" )
138+ assert result .X_train ["creation_date_day" ].dtype == np .dtype ("uint8" )
139+ assert result .X_train ["creation_date_hour" ].dtype == np .dtype ("uint8" )
140+ assert result .X_train ["creation_date_minute" ].dtype == np .dtype ("uint8" )
141+ assert result .X_train ["creation_date_second" ].dtype == np .dtype ("uint8" )
142+ assert result .X_train ["creation_date_weekday" ].dtype == np .dtype ("uint8" )
143+ assert result .X_train ["creation_date_dayofyear" ].dtype == np .dtype ("uint16" )
144144
145145
146146def test_output_values (data : DataContainer ):
@@ -155,28 +155,28 @@ def test_output_values(data: DataContainer):
155155 result = calculate_features_step .execute (data )
156156
157157 assert isinstance (result , DataContainer )
158- assert result .train ["creation_date_year" ].equals (
158+ assert result .X_train ["creation_date_year" ].equals (
159159 pd .Series ([2023 , 2023 , 2023 , 2023 , 2023 , 2023 , 2024 , 2024 ], dtype = "uint16" )
160160 )
161- assert result .train ["creation_date_month" ].equals (
161+ assert result .X_train ["creation_date_month" ].equals (
162162 pd .Series ([1 , 1 , 1 , 1 , 1 , 11 , 2 , 3 ], dtype = "uint8" )
163163 )
164- assert result .train ["creation_date_day" ].equals (
164+ assert result .X_train ["creation_date_day" ].equals (
165165 pd .Series ([1 , 2 , 3 , 4 , 5 , 1 , 28 , 28 ], dtype = "uint8" )
166166 )
167- assert result .train ["creation_date_hour" ].equals (
167+ assert result .X_train ["creation_date_hour" ].equals (
168168 pd .Series ([0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ], dtype = "uint8" )
169169 )
170- assert result .train ["creation_date_minute" ].equals (
170+ assert result .X_train ["creation_date_minute" ].equals (
171171 pd .Series ([0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ], dtype = "uint8" )
172172 )
173- assert result .train ["creation_date_second" ].equals (
173+ assert result .X_train ["creation_date_second" ].equals (
174174 pd .Series ([0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ], dtype = "uint8" )
175175 )
176- assert result .train ["creation_date_weekday" ].equals (
176+ assert result .X_train ["creation_date_weekday" ].equals (
177177 pd .Series ([6 , 0 , 1 , 2 , 3 , 2 , 2 , 3 ], dtype = "uint8" )
178178 )
179- assert result .train ["creation_date_dayofyear" ].equals (
179+ assert result .X_train ["creation_date_dayofyear" ].equals (
180180 pd .Series ([1 , 2 , 3 , 4 , 5 , 305 , 59 , 88 ], dtype = "uint16" )
181181 )
182182
@@ -214,7 +214,7 @@ def test_init_with_unsupported_features():
214214
215215def test_execute_with_prediction (data : DataContainer ):
216216 data .is_train = False
217- data .flow = data .train .copy ()
217+ data .X_prediction = data .X_train .copy ()
218218
219219 datetime_columns = ["creation_date" ]
220220 features = ["year" , "month" , "day" ]
@@ -226,6 +226,6 @@ def test_execute_with_prediction(data: DataContainer):
226226 result = calculate_features_step .execute (data )
227227
228228 assert isinstance (result , DataContainer )
229- assert "creation_date_year" in result .flow .columns
230- assert "creation_date_month" in result .flow .columns
231- assert "creation_date_day" in result .flow .columns
229+ assert "creation_date_year" in result .X_prediction .columns
230+ assert "creation_date_month" in result .X_prediction .columns
231+ assert "creation_date_day" in result .X_prediction .columns
0 commit comments