-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
74 lines (47 loc) · 2.39 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# -*- coding: utf-8 -*-
"""baseline.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1zPGSjSpOj-XdrGGPzZUULHdtnABcLu19
# Проект
"""## Подключение библиотек"""
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from lightgbm import LGBMClassifier
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
"""## Загрузка данных"""
train_df = pd.read_parquet('datasets/train.parquet')
test_df = pd.read_parquet('datasets/test.parquet')
"""## Удаление отдельных таргетов и использование только total_target"""
train_df.drop(["id", "target_1", "target_2"], axis=1, inplace=True)
"""## Преобразование категориальных признаков"""
cat_cols = [
'channel_code', 'city', 'city_type',
'index_city_code', 'ogrn_month', 'ogrn_year',
'branch_code', 'okved', 'segment'
]
train_df[cat_cols] = train_df[cat_cols].astype('category')
test_df[cat_cols] = test_df[cat_cols].astype('category')
"""## Создание новых признаков"""
train_df['max_min_end_fact_fin_deals'] = train_df['max_end_fact_fin_deals'] + train_df['min_end_fact_fin_deals']
test_df['max_min_end_fact_fin_deals'] = test_df['max_end_fact_fin_deals'] + test_df['min_end_fact_fin_deals']
"""## Разбиение на train и validation с использованием StratifiedKFold"""
X = train_df.drop("total_target", axis=1)
y = train_df.total_target
x_train, x_val, y_train, y_val = train_test_split(X, y,
test_size=0.2,
random_state=42)
"""## Тренировка модели LGBMClassifier с параметрами"""
model = LGBMClassifier(verbosity=-1, random_state=42, n_estimators=1000, max_depth=8, learning_rate=0.05)
model.fit(x_train, y_train)
"""## Прогнозирование и ROC_AUC Score"""
y_pred = model.predict_proba(x_val)[:, 1]
roc_auc_score(y_val, y_pred)
"""## Выгрузка результатов"""
test_score = model.predict_proba(test_df.drop("id", axis=1))[:, 1]
sample_submission_df = pd.read_csv("sample_submission.csv")
sample_submission_df["score"] = test_score
sample_submission_df.head()
sample_submission_df.to_csv("my_submission.csv", index=False)