From 57e0439f7fe203d3a831c87f025fbd25bdadddbe Mon Sep 17 00:00:00 2001 From: frog016 Date: Sat, 11 Dec 2021 22:26:50 +0500 Subject: [PATCH] Done homework and report. --- main.py | 37 +++++++++++++++++++++++++++++++++++++ report.md | 24 ++++++++++++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 main.py create mode 100644 report.md diff --git a/main.py b/main.py new file mode 100644 index 0000000..409e7df --- /dev/null +++ b/main.py @@ -0,0 +1,37 @@ +import numpy as np +import pandas as pd + + +def calculate_mismatches_between_profession_and_qualification(data): + result = 0 + for (job, qualification) in zip(data["jobTitle"], data['qualification']): + if not check_substring(job, qualification) and not check_substring(qualification, job): + result += 1 + return result + + +def check_substring(substring, string) -> bool: + words = substring.split(' ') + for word in words: + if word in string: + return True + + return False + + +def get_profession_top_by_parameter(data, profession, first_parameter, second_parameter): + workers = data[data[first_parameter].str.contains(profession)] + return workers[second_parameter].value_counts().head(5) + + +works = pd.read_csv('works.csv').dropna() +works_lower = works.apply(lambda record: record.astype(str).str.lower()) + +count_mismatches = calculate_mismatches_between_profession_and_qualification(works_lower) +print(count_mismatches) +print(works.shape[0]) + +managers_top = get_profession_top_by_parameter(works_lower, "менеджер", "jobTitle", "qualification") +print(managers_top) +engineers_top = get_profession_top_by_parameter(works_lower, "инженер", "qualification", "jobTitle") +print(engineers_top) diff --git a/report.md b/report.md new file mode 100644 index 0000000..63048e5 --- /dev/null +++ b/report.md @@ -0,0 +1,24 @@ +# Анализ данных + +### 1. Количество несовпадений профессий и должностей. +Всего 1068 человека, у 794 из которых обнаружено несовпадение. + +### 2. Топ-5 образований для менеджеров. + +|Образование|Количество| +|---|---| +|бакалавр|11| +|менеджер|10| +|специалист|6| +|экономист|6| +|экономист-менеджер|4| + +### 3. Топ-5 работ для инженеров. + +|Работа|Количество| +|---|---| +|заместитель директора|3| +|главный инженер|3| +|ведущий инженер-конструктор|2| +|инженер лесопользования|2| +|директор|2| \ No newline at end of file