diff --git a/task_1-7.ipynb b/task_1-7.ipynb new file mode 100644 index 0000000..56e024f --- /dev/null +++ b/task_1-7.ipynb @@ -0,0 +1,1256 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "431b73df", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d89e36d1", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
| \n", + " | salary | \n", + "educationType | \n", + "jobTitle | \n", + "qualification | \n", + "gender | \n", + "dateModify | \n", + "skills | \n", + "otherInfo | \n", + "
|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "60000 | \n", + "Высшее | \n", + "Специалист пресс-службы | \n", + "Магистр | \n", + "Мужской | \n", + "2021-04-01 | \n", + "<p>Аналитическое мышление, <span cla... | \n", + "NaN | \n", + "
| 1 | \n", + "85000 | \n", + "Высшее | \n", + "менеджер проектов | \n", + "NaN | \n", + "Мужской | \n", + "2021-04-01 | \n", + "NaN | \n", + "NaN | \n", + "
| 2 | \n", + "15000 | \n", + "Среднее профессиональное | \n", + ".... | \n", + "NaN | \n", + "Женский | \n", + "2021-06-01 | \n", + "NaN | \n", + "NaN | \n", + "
| 3 | \n", + "30000 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "Женский | \n", + "2021-04-03 | \n", + "NaN | \n", + "NaN | \n", + "
| 4 | \n", + "45000 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "Мужской | \n", + "2021-06-28 | \n", + "NaN | \n", + "NaN | \n", + "
| ... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
| 32678 | \n", + "15000 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "Женский | \n", + "2021-06-23 | \n", + "NaN | \n", + "NaN | \n", + "
| 32679 | \n", + "13000 | \n", + "Среднее | \n", + "уборщица | \n", + "NaN | \n", + "Женский | \n", + "2021-07-29 | \n", + "NaN | \n", + "NaN | \n", + "
| 32680 | \n", + "16000 | \n", + "Среднее профессиональное | \n", + "кочегар машинист котельной | \n", + "NaN | \n", + "Мужской | \n", + "2021-11-11 | \n", + "NaN | \n", + "NaN | \n", + "
| 32681 | \n", + "35000 | \n", + "Высшее | \n", + "NaN | \n", + "NaN | \n", + "Мужской | \n", + "2020-04-21 | \n", + "NaN | \n", + "NaN | \n", + "
| 32682 | \n", + "30000 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "Мужской | \n", + "2021-06-20 | \n", + "NaN | \n", + "NaN | \n", + "
32683 rows × 8 columns
\n", + "Аналитическое мышление, Аналитическое мышление, Коммуникабельность
\n", + "13Ответственность в работе
\n", + "21Усидчивость, умение удерживать в памяти нуж...\n", + " ... \n", + "32665
Отвественность
Исполнительность
исполнительный
\n", + "32674Нацелен на результат. Считаю себя командным...\n", + "32675
трудоспособен
\n", + "Name: skills, Length: 8972, dtype: object" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "works['skills'].dropna()" + ] + }, + { + "cell_type": "markdown", + "id": "58a3bc5d", + "metadata": {}, + "source": [ + "### 5.Вывести зарплату только у тех, у которых в скиллах есть Python (Питон)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "b93d618d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "230 35000\n", + "334 20000\n", + "2394 35000\n", + "8096 15000\n", + "9014 25000\n", + "9667 90000\n", + "20930 30000\n", + "22530 50000\n", + "28286 23000\n", + "30430 23000\n", + "Name: salary, dtype: int64" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = works.skills.dropna().str.lower().str.contains('python|питон')\n", + "works[works.skills.notna()][df]['salary']" + ] + }, + { + "cell_type": "markdown", + "id": "cc98d1b7", + "metadata": {}, + "source": [ + "### 6.Построить перцентили по заработной плате у мужчин и женщин" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "92355509", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1. ])" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "percentiles = np.linspace(.1, 1, 10)\n", + "percentiles" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "4772a178", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "| \n", + " | salary | \n", + "
|---|---|
| 0.1 | \n", + "15000.0 | \n", + "
| 0.2 | \n", + "20000.0 | \n", + "
| 0.3 | \n", + "25000.0 | \n", + "
| 0.4 | \n", + "30000.0 | \n", + "
| 0.5 | \n", + "30000.0 | \n", + "
| 0.6 | \n", + "35000.0 | \n", + "
| 0.7 | \n", + "40000.0 | \n", + "
| 0.8 | \n", + "50000.0 | \n", + "
| 0.9 | \n", + "60000.0 | \n", + "
| 1.0 | \n", + "1000000.0 | \n", + "
| \n", + " | salary | \n", + "
|---|---|
| 0.1 | \n", + "15000.0 | \n", + "
| 0.2 | \n", + "18000.0 | \n", + "
| 0.3 | \n", + "20000.0 | \n", + "
| 0.4 | \n", + "22000.0 | \n", + "
| 0.5 | \n", + "25000.0 | \n", + "
| 0.6 | \n", + "30000.0 | \n", + "
| 0.7 | \n", + "30000.0 | \n", + "
| 0.8 | \n", + "35000.0 | \n", + "
| 0.9 | \n", + "47000.0 | \n", + "
| 1.0 | \n", + "900000.0 | \n", + "
| \n", + " | salary | \n", + "educationType | \n", + "jobTitle | \n", + "qualification | \n", + "gender | \n", + "dateModify | \n", + "skills | \n", + "otherInfo | \n", + "
|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "60000 | \n", + "Высшее | \n", + "Специалист пресс-службы | \n", + "Магистр | \n", + "Мужской | \n", + "2021-04-01 | \n", + "<p>Аналитическое мышление, <span cla... | \n", + "NaN | \n", + "
| 1 | \n", + "85000 | \n", + "Высшее | \n", + "менеджер проектов | \n", + "NaN | \n", + "Мужской | \n", + "2021-04-01 | \n", + "NaN | \n", + "NaN | \n", + "
| 2 | \n", + "15000 | \n", + "Среднее профессиональное | \n", + ".... | \n", + "NaN | \n", + "Женский | \n", + "2021-06-01 | \n", + "NaN | \n", + "NaN | \n", + "
| 3 | \n", + "30000 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "Женский | \n", + "2021-04-03 | \n", + "NaN | \n", + "NaN | \n", + "
| 4 | \n", + "45000 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "Мужской | \n", + "2021-06-28 | \n", + "NaN | \n", + "NaN | \n", + "
| ... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
| 32678 | \n", + "15000 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "Женский | \n", + "2021-06-23 | \n", + "NaN | \n", + "NaN | \n", + "
| 32679 | \n", + "13000 | \n", + "Среднее | \n", + "уборщица | \n", + "NaN | \n", + "Женский | \n", + "2021-07-29 | \n", + "NaN | \n", + "NaN | \n", + "
| 32680 | \n", + "16000 | \n", + "Среднее профессиональное | \n", + "кочегар машинист котельной | \n", + "NaN | \n", + "Мужской | \n", + "2021-11-11 | \n", + "NaN | \n", + "NaN | \n", + "
| 32681 | \n", + "35000 | \n", + "Высшее | \n", + "NaN | \n", + "NaN | \n", + "Мужской | \n", + "2020-04-21 | \n", + "NaN | \n", + "NaN | \n", + "
| 32682 | \n", + "30000 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "Мужской | \n", + "2021-06-20 | \n", + "NaN | \n", + "NaN | \n", + "
32683 rows × 8 columns
\n", + "Аналитическое мышление, "
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "names = managers_list.index.tolist()\n",
+ "values = managers_list.tolist()\n",
+ "\n",
+ "fig = plt.figure()\n",
+ "fig.set_figwidth(7)\n",
+ "plt.bar(names, values)\n",
+ "plt.title('ТОП-5 образований, после которых становятся менеджерами') \n",
+ "plt.xlabel('Должность') \n",
+ "plt.ylabel('Количество человек') \n",
+ "\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "617ea494",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ "