Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions calculate_special_bonus_dataframe_modification.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import pandas as pd

def calculate_special_bonus(employees: pd.DataFrame) -> pd.DataFrame:

for i in range(0,len(employees)):
e_id = employees['employee_id'][i]
e_name = employees['name'][i] # example [M,i,e,r]

if e_id % 2 != 0 and e_name[0] != 'M':
pass
else:
employees['salary'][i] = 0

# sort employees by employee_id
employees.sort_values(by=['employee_id'], inplace = True)

return employees[['employee_id', 'salary']].rename(columns = {'salary' : 'bonus'})

21 changes: 21 additions & 0 deletions calculate_special_bonus_dataframe_vectorization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import pandas as pd

# approach -- vectorization i.e. adding extra column to the dataframe

def calculate_special_bonus(employees: pd.DataFrame) -> pd.DataFrame:

# use vectorization -- creating additional column and performing lambda expression
employees['bonus'] = employees.apply(

lambda x : x['salary']
if x['employee_id'] %2 != 0 and not x['name'].startswith('M')
else 0,

axis = 1

)

# sort employees by employee_id
employees.sort_values(by=['employee_id'], inplace = True)

return employees[['employee_id', 'bonus']]
22 changes: 22 additions & 0 deletions calculate_special_bonus_result_list.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import pandas as pd

def calculate_special_bonus(employees: pd.DataFrame) -> pd.DataFrame:

# make a resultant list
result = []

for i in range(0,len(employees)):
e_id = employees['employee_id'][i]
e_name = employees['name'][i] # example [M,i,e,r]

if e_id % 2 != 0 and e_name[0] != 'M':
result.append([e_id, employees['salary'][i]])
else:
result.append([e_id, 0])

# convert result to datatframe
result = pd.DataFrame(result, columns= ['employee_id', 'bonus'])
result.sort_values(by=['employee_id'], inplace = True)

return result

11 changes: 11 additions & 0 deletions fix_names_in_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import pandas as pd

def fix_names(users: pd.DataFrame) -> pd.DataFrame:

# using vectorization -- vectorize str method

# users['name'] = users['name'].str[0].str.upper() + users['name'].str[1:].str.lower()

users['name'] = users['name'].str.capitalize() # capitalize method

return users.sort_values(by = ['user_id'])
13 changes: 13 additions & 0 deletions patiens_with_conditions_dataframe_approach_regex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import pandas as pd

def find_patients(patients: pd.DataFrame) -> pd.DataFrame:

# make modification in DataFrame

# convert to series -- regex expressions
df = patients['conditions'].str.contains(r"(^|\s)DIAB1", regex = True)

# convert to dataframe
df = patients[df]

return df
13 changes: 13 additions & 0 deletions patients_with_conditions_dataframe_approach.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import pandas as pd

def find_patients(patients: pd.DataFrame) -> pd.DataFrame:

# make modification in DataFrame

# convert to series
df = patients['conditions'].str.startswith('DIAB1') | patients['conditions'].str.contains(' DIAB1')

# convert to dataframe
df = patients[df]

return df
26 changes: 26 additions & 0 deletions patients_with_conditions_loop.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import pandas as pd

def find_patients(patients: pd.DataFrame) -> pd.DataFrame:

# set the result
result = []

for i in range(0,len(patients)):

p_id = patients['patient_id'][i]
p_name = patients['patient_name'][i]
p_conditions = patients['conditions'][i]

for c in p_conditions.split():
print(c)
if c.startswith('DIAB1'):
result.append([p_id, p_name, p_conditions])

# for condition DIAB100 MYOP DIAB100 -- entry will be added twice
break
# end of condition for loop
# end of patients iteration

# convert result to dataframe

return pd.DataFrame(result, columns = ['patient_id', 'patient_name', 'conditions'])