diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..13566b8 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,8 @@ +# Default ignored files +/shelf/ +/workspace.xml +# Editor-based HTTP Client requests +/httpRequests/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml diff --git a/.idea/inspectionProfiles/Project_Default.xml b/.idea/inspectionProfiles/Project_Default.xml new file mode 100644 index 0000000..cff1354 --- /dev/null +++ b/.idea/inspectionProfiles/Project_Default.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..09d5b03 --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..2cdf9da --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/pandas_task.iml b/.idea/pandas_task.iml new file mode 100644 index 0000000..d0876a7 --- /dev/null +++ b/.idea/pandas_task.iml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/analiz resh 1-7.py b/analiz resh 1-7.py new file mode 100644 index 0000000..4322c42 --- /dev/null +++ b/analiz resh 1-7.py @@ -0,0 +1,50 @@ +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd + +works = pd.read_csv('works.csv') + +# задание 1 +print('общее количество записей в датасете:', works.shape[0]) +# 32683 + +# задание 2 +print('Количество мужчин:', works[works['gender'] == 'Мужской'].shape[0]) +# 13386 +print('Количество женщин:', (works['gender'] == 'Женский').sum()) +# 17910 + +# задание 3 +print('Количество значений в столбце skills не NAN:', works['skills'].notna().values.sum()) +# 8972 + +# задание 4 +print('Все заполненные скиллы:\n', works['skills'].dropna()) + +# задание 5 +skills_base = works.skills.str.lower().str.contains('python|питон').dropna() +print('Зарплата у тех, у кого в скиллах есть Python(Питон):\n', works[works.skills.notna()][skills_base]['salary']) + +# задание 6 +percentiles = np.linspace(.1, 1, 10) +m_salary = works[works.gender == 'Мужской']['salary'].quantile(percentiles) +w_salary = works[works.gender == 'Женский']['salary'].quantile(percentiles) +plt.plot(m_salary, color='blue') +plt.plot(w_salary, color='red') +plt.xlabel('Перцентили') +plt.ylabel('Зарплата') +plt.show() + +# задание 7 +men_salary = works.query('gender == "Мужской"').groupby('educationType').agg('mean').reset_index() +women_salary = works.query('gender == "Женский"').groupby('educationType').agg('mean').reset_index() +educationType = men_salary['educationType'].values +men_salary = men_salary['salary'].values +women_salary = women_salary['salary'].values +index = np.arange(len(educationType)) +wd = 0.1 +plt.bar(index - wd / 2, men_salary, wd, color='blue', label='Средняя зарплата мужчин') +plt.bar(index + wd / 2, women_salary, wd, color='red', label='Средняя зарплата женщин') +plt.xticks(index, educationType) +plt.legend() +plt.show() diff --git a/z8.py b/z8.py new file mode 100644 index 0000000..482d271 --- /dev/null +++ b/z8.py @@ -0,0 +1,34 @@ +import pandas as pd + + +works = pd.read_csv('works.csv').dropna() + + +def count(f1, f2, works): + result = 0 + for n1, n2 in zip(works[f1], works[f2]): + if not match(n1, n2) and not match(n2, n1): + result += 1 + return result + + +def match(c1, c2): + array = c1.lower().replace('-', ' ').split() + for word in array: + if word in c2.lower(): + return True + return False + + +result = count('jobTitle', 'qualification', works) +print('Из {} людей не совпадают профессия и должность у {}'.format(works.shape[0], result)) + +print('\nТоп образований для менеджеров:') +print( + works[works['jobTitle'].str.lower().str.contains('менеджер'[:-2])]['qualification'].str.lower().value_counts().head( + 5)) + +print('\nТоп образований для инженеров:') +print( + works[works['jobTitle'].str.lower().str.contains('инженер'[:-2])]['qualification'].str.lower().value_counts().head( + 5)) \ No newline at end of file