From ca23805fbae46101067533bf2dd35182b5347952 Mon Sep 17 00:00:00 2001 From: Lohit Bhambri <45014992+lohitb009@users.noreply.github.com> Date: Wed, 26 Feb 2025 18:06:28 -0800 Subject: [PATCH 1/2] Current CW Submission CW Discussion 25 Feb 2025 --- ...te_special_bonus_dataframe_modification.py | 18 +++++++++++++++ calculate_special_bonus_result_list.py | 22 +++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 calculate_special_bonus_dataframe_modification.py create mode 100644 calculate_special_bonus_result_list.py diff --git a/calculate_special_bonus_dataframe_modification.py b/calculate_special_bonus_dataframe_modification.py new file mode 100644 index 0000000..fa7c7d8 --- /dev/null +++ b/calculate_special_bonus_dataframe_modification.py @@ -0,0 +1,18 @@ +import pandas as pd + +def calculate_special_bonus(employees: pd.DataFrame) -> pd.DataFrame: + + for i in range(0,len(employees)): + e_id = employees['employee_id'][i] + e_name = employees['name'][i] # example [M,i,e,r] + + if e_id % 2 != 0 and e_name[0] != 'M': + pass + else: + employees['salary'][i] = 0 + + # sort employees by employee_id + employees.sort_values(by=['employee_id'], inplace = True) + + return employees[['employee_id', 'salary']].rename(columns = {'salary' : 'bonus'}) + \ No newline at end of file diff --git a/calculate_special_bonus_result_list.py b/calculate_special_bonus_result_list.py new file mode 100644 index 0000000..961ead8 --- /dev/null +++ b/calculate_special_bonus_result_list.py @@ -0,0 +1,22 @@ +import pandas as pd + +def calculate_special_bonus(employees: pd.DataFrame) -> pd.DataFrame: + + # make a resultant list + result = [] + + for i in range(0,len(employees)): + e_id = employees['employee_id'][i] + e_name = employees['name'][i] # example [M,i,e,r] + + if e_id % 2 != 0 and e_name[0] != 'M': + result.append([e_id, employees['salary'][i]]) + else: + result.append([e_id, 0]) + + # convert result to datatframe + result = pd.DataFrame(result, columns= ['employee_id', 'bonus']) + result.sort_values(by=['employee_id'], inplace = True) + + return result + \ No newline at end of file From d4ad24a6db7a6d4ab53682530bfe1330d7499632 Mon Sep 17 00:00:00 2001 From: Lohit Bhambri <45014992+lohitb009@users.noreply.github.com> Date: Thu, 27 Feb 2025 18:30:48 -0800 Subject: [PATCH 2/2] CW Submission CW Submission 27 Feb 2025 --- ...e_special_bonus_dataframe_vectorization.py | 21 +++++++++++++++ fix_names_in_table.py | 11 ++++++++ ...ith_conditions_dataframe_approach_regex.py | 13 ++++++++++ ...ents_with_conditions_dataframe_approach.py | 13 ++++++++++ patients_with_conditions_loop.py | 26 +++++++++++++++++++ 5 files changed, 84 insertions(+) create mode 100644 calculate_special_bonus_dataframe_vectorization.py create mode 100644 fix_names_in_table.py create mode 100644 patiens_with_conditions_dataframe_approach_regex.py create mode 100644 patients_with_conditions_dataframe_approach.py create mode 100644 patients_with_conditions_loop.py diff --git a/calculate_special_bonus_dataframe_vectorization.py b/calculate_special_bonus_dataframe_vectorization.py new file mode 100644 index 0000000..7a66548 --- /dev/null +++ b/calculate_special_bonus_dataframe_vectorization.py @@ -0,0 +1,21 @@ +import pandas as pd + +# approach -- vectorization i.e. adding extra column to the dataframe + +def calculate_special_bonus(employees: pd.DataFrame) -> pd.DataFrame: + + # use vectorization -- creating additional column and performing lambda expression + employees['bonus'] = employees.apply( + + lambda x : x['salary'] + if x['employee_id'] %2 != 0 and not x['name'].startswith('M') + else 0, + + axis = 1 + + ) + + # sort employees by employee_id + employees.sort_values(by=['employee_id'], inplace = True) + + return employees[['employee_id', 'bonus']] \ No newline at end of file diff --git a/fix_names_in_table.py b/fix_names_in_table.py new file mode 100644 index 0000000..9f7e916 --- /dev/null +++ b/fix_names_in_table.py @@ -0,0 +1,11 @@ +import pandas as pd + +def fix_names(users: pd.DataFrame) -> pd.DataFrame: + + # using vectorization -- vectorize str method + + # users['name'] = users['name'].str[0].str.upper() + users['name'].str[1:].str.lower() + + users['name'] = users['name'].str.capitalize() # capitalize method + + return users.sort_values(by = ['user_id']) \ No newline at end of file diff --git a/patiens_with_conditions_dataframe_approach_regex.py b/patiens_with_conditions_dataframe_approach_regex.py new file mode 100644 index 0000000..48d3049 --- /dev/null +++ b/patiens_with_conditions_dataframe_approach_regex.py @@ -0,0 +1,13 @@ +import pandas as pd + +def find_patients(patients: pd.DataFrame) -> pd.DataFrame: + + # make modification in DataFrame + + # convert to series -- regex expressions + df = patients['conditions'].str.contains(r"(^|\s)DIAB1", regex = True) + + # convert to dataframe + df = patients[df] + + return df \ No newline at end of file diff --git a/patients_with_conditions_dataframe_approach.py b/patients_with_conditions_dataframe_approach.py new file mode 100644 index 0000000..21e53dd --- /dev/null +++ b/patients_with_conditions_dataframe_approach.py @@ -0,0 +1,13 @@ +import pandas as pd + +def find_patients(patients: pd.DataFrame) -> pd.DataFrame: + + # make modification in DataFrame + + # convert to series + df = patients['conditions'].str.startswith('DIAB1') | patients['conditions'].str.contains(' DIAB1') + + # convert to dataframe + df = patients[df] + + return df \ No newline at end of file diff --git a/patients_with_conditions_loop.py b/patients_with_conditions_loop.py new file mode 100644 index 0000000..12d8201 --- /dev/null +++ b/patients_with_conditions_loop.py @@ -0,0 +1,26 @@ +import pandas as pd + +def find_patients(patients: pd.DataFrame) -> pd.DataFrame: + + # set the result + result = [] + + for i in range(0,len(patients)): + + p_id = patients['patient_id'][i] + p_name = patients['patient_name'][i] + p_conditions = patients['conditions'][i] + + for c in p_conditions.split(): + print(c) + if c.startswith('DIAB1'): + result.append([p_id, p_name, p_conditions]) + + # for condition DIAB100 MYOP DIAB100 -- entry will be added twice + break + # end of condition for loop + # end of patients iteration + + # convert result to dataframe + + return pd.DataFrame(result, columns = ['patient_id', 'patient_name', 'conditions']) \ No newline at end of file