super30admin · lohitb009 · Feb 27, 2025 · Feb 28, 2025
diff --git a/calculate_special_bonus_dataframe_modification.py b/calculate_special_bonus_dataframe_modification.py
@@ -0,0 +1,18 @@
+import pandas as pd
+
+def calculate_special_bonus(employees: pd.DataFrame) -> pd.DataFrame:
+
+    for i in range(0,len(employees)):
+        e_id = employees['employee_id'][i]
+        e_name = employees['name'][i] # example [M,i,e,r]
+
+        if e_id % 2 != 0 and e_name[0] != 'M':
+            pass
+        else:
+            employees['salary'][i] = 0
+
+    # sort employees by employee_id
+    employees.sort_values(by=['employee_id'], inplace = True)
+
+    return employees[['employee_id', 'salary']].rename(columns = {'salary' : 'bonus'})
+
diff --git a/calculate_special_bonus_dataframe_vectorization.py b/calculate_special_bonus_dataframe_vectorization.py
@@ -0,0 +1,21 @@
+import pandas as pd
+
+# approach -- vectorization i.e. adding extra column to the dataframe
+
+def calculate_special_bonus(employees: pd.DataFrame) -> pd.DataFrame:
+
+    # use vectorization -- creating additional column and performing lambda expression
+    employees['bonus'] = employees.apply(
+
+        lambda x : x['salary']
+            if x['employee_id'] %2 != 0 and not x['name'].startswith('M')
+            else 0,
+
+        axis = 1
+
+    )
+
+    # sort employees by employee_id
+    employees.sort_values(by=['employee_id'], inplace = True)
+
+    return employees[['employee_id', 'bonus']]
diff --git a/calculate_special_bonus_result_list.py b/calculate_special_bonus_result_list.py
@@ -0,0 +1,22 @@
+import pandas as pd
+
+def calculate_special_bonus(employees: pd.DataFrame) -> pd.DataFrame:
+
+    # make a resultant list
+    result = []
+
+    for i in range(0,len(employees)):
+        e_id = employees['employee_id'][i]
+        e_name = employees['name'][i] # example [M,i,e,r]
+
+        if e_id % 2 != 0 and e_name[0] != 'M':
+            result.append([e_id, employees['salary'][i]])
+        else:
+            result.append([e_id, 0])
+
+    # convert result to datatframe
+    result = pd.DataFrame(result, columns= ['employee_id', 'bonus'])
+    result.sort_values(by=['employee_id'], inplace = True)
+
+    return result
+
diff --git a/fix_names_in_table.py b/fix_names_in_table.py
@@ -0,0 +1,11 @@
+import pandas as pd
+
+def fix_names(users: pd.DataFrame) -> pd.DataFrame:
+
+    # using vectorization -- vectorize str method
+
+    # users['name'] = users['name'].str[0].str.upper() + users['name'].str[1:].str.lower()
+
+    users['name'] = users['name'].str.capitalize() # capitalize method
+
+    return users.sort_values(by = ['user_id'])
diff --git a/patiens_with_conditions_dataframe_approach_regex.py b/patiens_with_conditions_dataframe_approach_regex.py
@@ -0,0 +1,13 @@
+import pandas as pd
+
+def find_patients(patients: pd.DataFrame) -> pd.DataFrame:
+
+    # make modification in DataFrame
+
+    # convert to series -- regex expressions
+    df = patients['conditions'].str.contains(r"(^|\s)DIAB1", regex = True)
+
+    # convert to dataframe
+    df = patients[df]
+
+    return df
diff --git a/patients_with_conditions_dataframe_approach.py b/patients_with_conditions_dataframe_approach.py
@@ -0,0 +1,13 @@
+import pandas as pd
+
+def find_patients(patients: pd.DataFrame) -> pd.DataFrame:
+
+    # make modification in DataFrame
+
+    # convert to series
+    df = patients['conditions'].str.startswith('DIAB1') | patients['conditions'].str.contains(' DIAB1')
+
+    # convert to dataframe
+    df = patients[df]
+
+    return df
diff --git a/patients_with_conditions_loop.py b/patients_with_conditions_loop.py
@@ -0,0 +1,26 @@
+import pandas as pd
+
+def find_patients(patients: pd.DataFrame) -> pd.DataFrame:
+
+    # set the result
+    result = []
+
+    for i in range(0,len(patients)):
+
+        p_id = patients['patient_id'][i]
+        p_name = patients['patient_name'][i]
+        p_conditions = patients['conditions'][i]
+
+        for c in p_conditions.split():
+            print(c)
+            if c.startswith('DIAB1'):
+                result.append([p_id, p_name, p_conditions])
+
+                # for condition DIAB100 MYOP DIAB100 -- entry will be added twice
+                break
+        # end of condition for loop
+    # end of patients iteration    
+
+    # convert result to dataframe
+
+    return pd.DataFrame(result, columns = ['patient_id', 'patient_name', 'conditions'])