66from eds_scikit .utils .checks import MissingConceptError , algo_checker , concept_checker
77from eds_scikit .utils .datetime_helpers import substract_datetime
88from eds_scikit .utils .framework import get_framework
9+ from eds_scikit .utils .sort_values_first import sort_values_first
910from eds_scikit .utils .typing import DataFrame
1011
1112
@@ -73,10 +74,10 @@ def cleaning(
7374@concept_checker (concepts = ["STAY_ID" , "CONTIGUOUS_STAY_ID" ])
7475def merge_visits (
7576 vo : DataFrame ,
77+ open_stay_end_datetime : Optional [datetime ],
7678 remove_deleted_visits : bool = True ,
7779 long_stay_threshold : timedelta = timedelta (days = 365 ),
7880 long_stay_filtering : Optional [str ] = "all" ,
79- open_stay_end_datetime : Optional [datetime ] = None ,
8081 max_timedelta : timedelta = timedelta (days = 2 ),
8182 merge_different_hospitals : bool = False ,
8283 merge_different_source_values : Union [bool , List [str ]] = ["hospitalisés" , "urgence" ],
@@ -108,6 +109,11 @@ def merge_visits(
108109 - care_site_id (if ``merge_different_hospitals == True``)
109110 - visit_source_value (if ``merge_different_source_values != False``)
110111 - row_status_source_value (if ``remove_deleted_visits= True``)
112+ open_stay_end_datetime: Optional[datetime]
113+ Datetime to use in order to fill the `visit_end_datetime` of open visits. This is necessary in
114+ order to compute stay duration and to filter long stays.
115+ You might provide the extraction date of your data or datetime.now()
116+ (be aware it will produce undeterministic outputs).
111117 remove_deleted_visits: bool
112118 Wether to remove deleted visits from the merging procedure.
113119 Deleted visits are extracted via the `row_status_source_value` column
@@ -126,10 +132,6 @@ def merge_visits(
126132 Long stays are determined by the ``long_stay_threshold`` value.
127133 long_stay_threshold : timedelta
128134 Minimum visit duration value to consider a visit as candidate for "long visits filtering"
129- open_stay_end_datetime: Optional[datetime]
130- Datetime to use in order to fill the `visit_end_datetime` of open visits. This is necessary in
131- order to compute stay duration and to filter long stays. If not provided `datetime.now()` will be used.
132- You might provide the extraction date of your data here.
133135 max_timedelta : timedelta
134136 Maximum time difference between the end of a visit and the start of another to consider
135137 them as belonging to the same stay. This duration is internally converted in seconds before
@@ -291,21 +293,18 @@ def get_first(
291293 how = "inner" ,
292294 )
293295
294- # Getting the corresponding first visit
295- first_visit = (
296- merged .sort_values (
297- by = [flag_name , "visit_start_datetime_1" ], ascending = [False , False ]
298- )
299- .groupby ("visit_occurrence_id_2" )
300- .first ()["visit_occurrence_id_1" ]
301- .reset_index ()
302- .rename (
303- columns = {
304- "visit_occurrence_id_1" : f"{ concept_prefix } STAY_ID" ,
305- "visit_occurrence_id_2" : "visit_occurrence_id" ,
306- }
307- )
296+ first_visit = sort_values_first (
297+ merged ,
298+ by_cols = ["visit_occurrence_id_2" ],
299+ cols = [flag_name , "visit_start_datetime_1" , "visit_occurrence_id_1" ],
300+ )
301+ first_visit = first_visit .rename (
302+ columns = {
303+ "visit_occurrence_id_1" : f"{ concept_prefix } STAY_ID" ,
304+ "visit_occurrence_id_2" : "visit_occurrence_id" ,
305+ }
308306 )
307+ first_visit = first_visit [["visit_occurrence_id" , f"{ concept_prefix } STAY_ID" ]]
309308
310309 return merged , first_visit
311310
0 commit comments