Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.idea
30 changes: 30 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import pandas as pd


def get_match_count(first_list, second_list):
return len(list((filter(lambda x: contains(x[0], x[1]) or contains(x[1], x[0]), zip(first_list, second_list)))))


def contains(sub_text, text):
words = sub_text.replace('-', ' ').split(' ')
for word in words:
if word in text:
return True
return False


def get_top(source, search_field, return_field, value):
return source[source[search_field].str.contains(value)][return_field].value_counts().head(5)


data = pd.read_csv('works.csv').dropna().apply(lambda x: x.astype(str).str.lower())
count = len(data)
mismatch_count = count - get_match_count(data["jobTitle"], data["qualification"])

print(f"Всего людей: {count}.")
print(f"Людей с несовпадающими профессией и должностью: {mismatch_count}.")
print(f"Что составляет {mismatch_count / count:.0%} от общего числа.")
print("\nТоп 5 квалификаций менеджеров:")
print(get_top(data, "jobTitle", "qualification", "менеджер"))
print("\nТоп 5 должностей инженеров:")
print(get_top(data, "qualification", "jobTitle", "инженер"))
23 changes: 23 additions & 0 deletions report.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
### Всего людей: 1068.
### Людей с несовпадающими профессией и должностью: 769.
### Что составляет 72% от общего числа.

# Топ 5 квалификаций менеджеров:

|Квалификация|Количество|
|---|---|
|бакалавр|11|
|менеджер|10|
|специалист|6|
|экономист|6|
|экономист-менеджер|4|

# Топ 5 должностей инженеров:

|Должность|Количество|
|---|---|
|заместитель директора|3|
|главный инженер |3|
|ведущий инженер-конструктор|2|
|инженер лесопользования|2|
|директор|2|
32 changes: 32 additions & 0 deletions tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


works = pd.read_csv("works.csv")
head = works.head(5)
tail = works.tail(5)


# Задание 1
print(len(works.index))

# Задание 2
print(works["gender"].value_counts())

# Задание 3
print(works["skills"].count())

# Задание 4
print(works["skills"].dropna())

# Задание 5
skills_bool = works["skills"].str.lower().str.contains("python|питон") & works["skills"].notnull()
print(works[skills_bool]["salary"])

# Задание 6
person = np.linspace(.1, 1, 10)
men = works.query('gender == "Мужской"').quantile(person)
women = works.query('gender == "Женский"').quantile(person)
print(men)
print(women)