From ca23805fbae46101067533bf2dd35182b5347952 Mon Sep 17 00:00:00 2001
From: Lohit Bhambri <45014992+lohitb009@users.noreply.github.com>
Date: Wed, 26 Feb 2025 18:06:28 -0800
Subject: [PATCH 1/2] Current CW Submission

CW Discussion 25 Feb 2025
---
 ...te_special_bonus_dataframe_modification.py | 18 +++++++++++++++
 calculate_special_bonus_result_list.py        | 22 +++++++++++++++++++
 2 files changed, 40 insertions(+)
 create mode 100644 calculate_special_bonus_dataframe_modification.py
 create mode 100644 calculate_special_bonus_result_list.py

diff --git a/calculate_special_bonus_dataframe_modification.py b/calculate_special_bonus_dataframe_modification.py
new file mode 100644
index 0000000..fa7c7d8
--- /dev/null
+++ b/calculate_special_bonus_dataframe_modification.py
@@ -0,0 +1,18 @@
+import pandas as pd
+
+def calculate_special_bonus(employees: pd.DataFrame) -> pd.DataFrame:
+
+    for i in range(0,len(employees)):
+        e_id = employees['employee_id'][i]
+        e_name = employees['name'][i] # example [M,i,e,r]
+
+        if e_id % 2 != 0 and e_name[0] != 'M':
+            pass
+        else:
+            employees['salary'][i] = 0
+    
+    # sort employees by employee_id
+    employees.sort_values(by=['employee_id'], inplace = True)
+
+    return employees[['employee_id', 'salary']].rename(columns = {'salary' : 'bonus'})
+    
\ No newline at end of file
diff --git a/calculate_special_bonus_result_list.py b/calculate_special_bonus_result_list.py
new file mode 100644
index 0000000..961ead8
--- /dev/null
+++ b/calculate_special_bonus_result_list.py
@@ -0,0 +1,22 @@
+import pandas as pd
+
+def calculate_special_bonus(employees: pd.DataFrame) -> pd.DataFrame:
+
+    # make a resultant list
+    result = []
+
+    for i in range(0,len(employees)):
+        e_id = employees['employee_id'][i]
+        e_name = employees['name'][i] # example [M,i,e,r]
+
+        if e_id % 2 != 0 and e_name[0] != 'M':
+            result.append([e_id, employees['salary'][i]])
+        else:
+            result.append([e_id, 0])
+    
+    # convert result to datatframe
+    result = pd.DataFrame(result, columns= ['employee_id', 'bonus'])
+    result.sort_values(by=['employee_id'], inplace = True)
+
+    return result
+    
\ No newline at end of file

From d4ad24a6db7a6d4ab53682530bfe1330d7499632 Mon Sep 17 00:00:00 2001
From: Lohit Bhambri <45014992+lohitb009@users.noreply.github.com>
Date: Thu, 27 Feb 2025 18:30:48 -0800
Subject: [PATCH 2/2] CW Submission

CW Submission 27 Feb 2025
---
 ...e_special_bonus_dataframe_vectorization.py | 21 +++++++++++++++
 fix_names_in_table.py                         | 11 ++++++++
 ...ith_conditions_dataframe_approach_regex.py | 13 ++++++++++
 ...ents_with_conditions_dataframe_approach.py | 13 ++++++++++
 patients_with_conditions_loop.py              | 26 +++++++++++++++++++
 5 files changed, 84 insertions(+)
 create mode 100644 calculate_special_bonus_dataframe_vectorization.py
 create mode 100644 fix_names_in_table.py
 create mode 100644 patiens_with_conditions_dataframe_approach_regex.py
 create mode 100644 patients_with_conditions_dataframe_approach.py
 create mode 100644 patients_with_conditions_loop.py

diff --git a/calculate_special_bonus_dataframe_vectorization.py b/calculate_special_bonus_dataframe_vectorization.py
new file mode 100644
index 0000000..7a66548
--- /dev/null
+++ b/calculate_special_bonus_dataframe_vectorization.py
@@ -0,0 +1,21 @@
+import pandas as pd
+
+# approach -- vectorization i.e. adding extra column to the dataframe
+
+def calculate_special_bonus(employees: pd.DataFrame) -> pd.DataFrame:
+
+    # use vectorization -- creating additional column and performing lambda expression
+    employees['bonus'] = employees.apply(
+
+        lambda x : x['salary']
+            if x['employee_id'] %2 != 0 and not x['name'].startswith('M')
+            else 0,
+        
+        axis = 1
+
+    )
+    
+    # sort employees by employee_id
+    employees.sort_values(by=['employee_id'], inplace = True)
+
+    return employees[['employee_id', 'bonus']]
\ No newline at end of file
diff --git a/fix_names_in_table.py b/fix_names_in_table.py
new file mode 100644
index 0000000..9f7e916
--- /dev/null
+++ b/fix_names_in_table.py
@@ -0,0 +1,11 @@
+import pandas as pd
+
+def fix_names(users: pd.DataFrame) -> pd.DataFrame:
+    
+    # using vectorization -- vectorize str method
+    
+    # users['name'] = users['name'].str[0].str.upper() + users['name'].str[1:].str.lower()
+
+    users['name'] = users['name'].str.capitalize() # capitalize method
+    
+    return users.sort_values(by = ['user_id'])
\ No newline at end of file
diff --git a/patiens_with_conditions_dataframe_approach_regex.py b/patiens_with_conditions_dataframe_approach_regex.py
new file mode 100644
index 0000000..48d3049
--- /dev/null
+++ b/patiens_with_conditions_dataframe_approach_regex.py
@@ -0,0 +1,13 @@
+import pandas as pd
+
+def find_patients(patients: pd.DataFrame) -> pd.DataFrame:
+
+    # make modification in DataFrame
+    
+    # convert to series -- regex expressions
+    df = patients['conditions'].str.contains(r"(^|\s)DIAB1", regex = True)
+
+    # convert to dataframe
+    df = patients[df]
+
+    return df
\ No newline at end of file
diff --git a/patients_with_conditions_dataframe_approach.py b/patients_with_conditions_dataframe_approach.py
new file mode 100644
index 0000000..21e53dd
--- /dev/null
+++ b/patients_with_conditions_dataframe_approach.py
@@ -0,0 +1,13 @@
+import pandas as pd
+
+def find_patients(patients: pd.DataFrame) -> pd.DataFrame:
+
+    # make modification in DataFrame
+    
+    # convert to series
+    df = patients['conditions'].str.startswith('DIAB1') | patients['conditions'].str.contains(' DIAB1')
+
+    # convert to dataframe
+    df = patients[df]
+
+    return df
\ No newline at end of file
diff --git a/patients_with_conditions_loop.py b/patients_with_conditions_loop.py
new file mode 100644
index 0000000..12d8201
--- /dev/null
+++ b/patients_with_conditions_loop.py
@@ -0,0 +1,26 @@
+import pandas as pd
+
+def find_patients(patients: pd.DataFrame) -> pd.DataFrame:
+    
+    # set the result
+    result = []
+
+    for i in range(0,len(patients)):
+
+        p_id = patients['patient_id'][i]
+        p_name = patients['patient_name'][i]
+        p_conditions = patients['conditions'][i]
+
+        for c in p_conditions.split():
+            print(c)
+            if c.startswith('DIAB1'):
+                result.append([p_id, p_name, p_conditions])
+                
+                # for condition DIAB100 MYOP DIAB100 -- entry will be added twice
+                break
+        # end of condition for loop
+    # end of patients iteration    
+        
+    # convert result to dataframe
+
+    return pd.DataFrame(result, columns = ['patient_id', 'patient_name', 'conditions'])
\ No newline at end of file