@@ -209,21 +209,22 @@ def split(dataset: pd.DataFrame):
209
209
all_benign ._is_copy = None
210
210
v_benign_samples = all_benign .sample (frac = 0.1 , random_state = __random_state__ )
211
211
# A bit cryptic to remove the random samples from the benign dataset, but it works
212
- all_benign = all_benign . append ( v_benign_samples )
212
+ all_benign = pd . concat ([ all_benign , v_benign_samples ], axis = 0 , ignore_index = True )
213
213
all_benign .drop_duplicates (keep = False , inplace = True )
214
- return_dataset = return_dataset .append (all_benign , ignore_index = True )
215
- validation_dataset = validation_dataset .append (v_benign_samples , ignore_index = True )
214
+ return_dataset = pd .concat ([return_dataset , all_benign ], axis = 0 , ignore_index = True )
215
+ validation_dataset = pd .concat ([validation_dataset , v_benign_samples ], axis = 0 ,
216
+ ignore_index = True )
216
217
217
218
# Pathogenic
218
219
all_pathogenic = dataset [dataset ['binarized_label' ] == 1 ]
219
220
all_pathogenic ._is_copy = None
220
221
v_patho_samples = all_pathogenic .sample (frac = 0.1 , random_state = __random_state__ )
221
222
# Again a cryptic way to remove the randomly samples pathogenic samples
222
- all_pathogenic = all_pathogenic . append ( v_patho_samples )
223
+ all_pathogenic = pd . concat ([ all_pathogenic , v_patho_samples ], axis = 0 , ignore_index = True )
223
224
all_pathogenic .drop_duplicates (keep = False , inplace = True )
224
- return_dataset = return_dataset . append ( all_pathogenic , ignore_index = True )
225
- validation_dataset = validation_dataset . append ( v_patho_samples , ignore_index = True )
226
-
225
+ return_dataset = pd . concat ([ return_dataset , all_pathogenic ], axis = 0 , ignore_index = True )
226
+ validation_dataset = pd . concat ([ validation_dataset , v_patho_samples ], axis = 0 ,
227
+ ignore_index = True )
227
228
return validation_dataset , return_dataset
228
229
229
230
@@ -247,7 +248,12 @@ def balance(self, dataset: pd.DataFrame):
247
248
processed_consequence = self ._process_consequence (
248
249
pathogenic_dataset = selected_pathogenic , benign_dataset = selected_benign
249
250
)
250
- return_dataset = return_dataset .append (processed_consequence )
251
+ return_dataset = pd .concat (
252
+ [
253
+ return_dataset ,
254
+ processed_consequence
255
+ ], axis = 0 , ignore_index = True
256
+ )
251
257
return return_dataset
252
258
253
259
def _process_consequence (self , pathogenic_dataset , benign_dataset ):
@@ -267,10 +273,13 @@ def _process_consequence(self, pathogenic_dataset, benign_dataset):
267
273
lower_bound = bins [ind ]
268
274
upper_bound = bins [ind + 1 ]
269
275
sample_number = pathogenic_histogram [ind ]
270
- processed_bins = processed_bins .append (
271
- self ._process_bins (
272
- pathogenic_dataset , benign_dataset , upper_bound , lower_bound , sample_number
273
- )
276
+ processed_bins = pd .concat (
277
+ [
278
+ processed_bins ,
279
+ self ._process_bins (
280
+ pathogenic_dataset , benign_dataset , upper_bound , lower_bound , sample_number
281
+ )
282
+ ], axis = 0 , ignore_index = True
274
283
)
275
284
return processed_bins
276
285
@@ -295,7 +304,9 @@ def _process_bins(
295
304
selected_benign .shape [0 ],
296
305
random_state = __random_state__
297
306
)
298
- return return_benign .append (return_pathogenic , ignore_index = True )
307
+ return pd .concat (
308
+ [return_benign , return_pathogenic ], axis = 0 , ignore_index = True
309
+ )
299
310
300
311
@staticmethod
301
312
def _get_variants_within_range (dataset , upper_bound , lower_bound ):
0 commit comments