diff --git a/class.py b/class.py new file mode 100644 index 0000000..4a53b63 --- /dev/null +++ b/class.py @@ -0,0 +1,39 @@ +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt + +data = pd.read_csv('works.csv') + +# 1 +print(len(data.index)) +print(data.shape[0]) +print(data.info()) + +# 2 +# print((data['gender']=='Мужской').sum(), (data['gender']=='Женский').sum()) +print(data.gender.value_counts()) + +# 3 +print(data.info()) +print(data.skills.notna().sum()) +print(data.skills.count()) + +# 4 +print(data[data.skills.notna()]['skills']) +print(data.skills.dropna()) + +# 5 +new_data = data[data.skills.notna()] +print(new_data[new_data.skills.str.lower().str.contains('python|питон')].salary) + +# 6 +index = data.skills.str.lower().str.contains('python|питон') +mask = index.notna() +print(data[mask & index].salary) + +# 7 +data.query("gender == 'Мужской' and educationType == 'Высшее'").hist(bins=100, alpha=0.5) +plt.show() + +data.query("gender == 'Женский' and educationType == 'Высшее'").hist(bins=100, alpha=0.5) +plt.show() \ No newline at end of file diff --git a/homework.py b/homework.py new file mode 100644 index 0000000..00e882e --- /dev/null +++ b/homework.py @@ -0,0 +1,37 @@ +import pandas as pd + +data = pd.read_csv('works.csv') +data + +data.dropna(subset=['jobTitle'], inplace=True) +data.dropna(subset=['qualification'], inplace=True) + +data.shape[0] + +data['qualification'] = data['qualification'].str.replace('-', ' ') +data["jobTitle"] = data["jobTitle"].str.replace('-', ' ') + +# Профессия и должность не совпадают +(data['jobTitle'] == data['qualification']).value_counts() + +def comparison(column1, column2): + list1 = column1.split() + for word in list1: + if word in column2: + return True + return False + +count = 0 +for (job, qualification) in zip(data["jobTitle"], data["qualification"]): + if comparison(job, qualification) or comparison(qualification, job): + count += 1 + +data.shape[0] - count + +# Топ-5 образовний менеджеров +managers = data[data["jobTitle"].str.lower().str.contains("менедж")] +managers['qualification'].str.lower().value_counts().head(5) + +# Топ-5 образовний инженеров +engineers = data[data["qualification"].str.lower().str.contains("инженер")] +engineers['jobTitle'].str.lower().value_counts().head(5) \ No newline at end of file