generated from c17hawke/dvc-project-template
-
Notifications
You must be signed in to change notification settings - Fork 2
/
params.py
104 lines (96 loc) · 2.64 KB
/
params.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import numpy as np
# This contains params to be used by the stages to train or predict
INPUT_FILE_NAME_REGEX = "['fraudDetection']+['\_'']+[\d_]+[\d]+\.csv"
TARGET_COLUMN_NAME = "fraud_reported_1" # after one hot encoding the value column name chnage to fraud_reported_1
CLUSTER_COLUMN_NAME = "cluster"
# list of columns where scaling is needed
LIST_OF_COLUMNS_FOR_SCALING = [
"months_as_customer",
"policy_deductable",
"umbrella_limit",
"capital-gains",
"capital-loss",
"incident_hour_of_the_day",
"number_of_vehicles_involved",
"bodily_injuries",
"witnesses",
"injury_claim",
"property_claim",
"vehicle_claim",
]
# list of columns with string datatype variables [ DATA TRANSFORMATION ]
COLUMNS_WITH_STRING_DATATYPE = [
"policy_bind_date",
"policy_state",
"policy_csl",
"insured_sex",
"insured_education_level",
"insured_occupation",
"insured_hobbies",
"insured_relationship",
"incident_state",
"incident_date",
"incident_type",
"collision_type",
"incident_severity",
"authorities_contacted",
"incident_city",
"incident_location",
"property_damage",
"police_report_available",
"auto_make",
"auto_model",
"fraud_reported",
]
COLUMNS_TO_IGNORE_FOR_MODEL_TRAINING = [
"policy_number",
"policy_bind_date",
"policy_state",
"insured_zip",
"incident_location",
"incident_date",
"incident_state",
"incident_city",
"insured_hobbies",
"auto_make",
"auto_model",
"auto_year",
"age",
"total_claim_amount",
]
MAPPING_CATEGORICAL_COLUMNS = {
"policy_csl": {"100/300": 1, "250/500": 2.5, "500/1000": 5},
"insured_education_level": {
"JD": 1,
"High School": 2,
"College": 3,
"Masters": 4,
"Associate": 5,
"MD": 6,
"PhD": 7,
},
"incident_severity": {
"Trivial Damage": 1,
"Minor Damage": 2,
"Major Damage": 3,
"Total Loss": 4,
},
"insured_sex": {"FEMALE": 0, "MALE": 1},
"property_damage": {"NO": 0, "YES": 1},
"police_report_available": {"NO": 0, "YES": 1},
"fraud_reported": {"N": 0, "Y": 1},
}
# PARAMS FOR HYPER_PARAMETER_TUNING
SVM_HYPER_PARAMS = {
"kernel": ["rbf", "sigmoid"],
"C": [0.1, 0.5, 1.0],
"random_state": [0, 100, 200, 300],
}
XGBOOST_HYPER_PARAMS = {
"max_depth": [3, 5, 6, 10, 15, 20],
"learning_rate": [0.01, 0.1, 0.2, 0.3, 0.4, 0.5],
"subsample": [0.5, 0.6, 0.7, 0.8, 0.9],
"colsample_bytree": [0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
"colsample_bylevel": [0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
"n_estimators": [100, 200, 300, 400, 450, 500, 540, 580, 600],
}