65
65
"cell_type" : " markdown" ,
66
66
"metadata" : {},
67
67
"source" : [
68
- " #### Let's train CatBoost on clear data and take a look at the quality: "
68
+ " #### Let's train CatBoost on clear data and take a look at the quality. We set a small learning rate to avoid overfitting when we start removing noisy objects. "
69
69
]
70
70
},
71
71
{
77
77
"name" : " stdout" ,
78
78
"output_type" : " stream" ,
79
79
"text" : [
80
- " 0.2157984851490331 \n "
80
+ " 0.22947301323494568 \n "
81
81
]
82
82
}
83
83
],
84
84
"source" : [
85
- " cb = CatBoost({'iterations': 100, 'verbose': False, 'random_seed': 42})\n " ,
85
+ " cb = CatBoost({'iterations': 100, 'verbose': False, 'random_seed': 42, 'learning_rate': 0.001 })\n " ,
86
86
" cb.fit(train_pool);\n " ,
87
87
" print(cb.eval_metrics(validation_pool, ['RMSE'])['RMSE'][-1])"
88
88
]
124
124
"name" : " stdout" ,
125
125
"output_type" : " stream" ,
126
126
"text" : [
127
- " 0.25915746122622113 \n "
127
+ " 0.24770929523786442 \n "
128
128
]
129
129
}
130
130
],
168
168
},
169
169
{
170
170
"cell_type" : " code" ,
171
- "execution_count" : 8 ,
171
+ "execution_count" : 7 ,
172
172
"metadata" : {},
173
173
"outputs" : [
174
174
{
175
175
"name" : " stdout" ,
176
176
"output_type" : " stream" ,
177
177
"text" : [
178
- " RMSE on validation datset when 0 harmful objects from train are dropped: 0.25915746122622113 \n " ,
179
- " RMSE on validation datset when 250 harmful objects from train are dropped: 0.25601149050939825 \n " ,
180
- " RMSE on validation datset when 500 harmful objects from train are dropped: 0.25158044983631966 \n " ,
181
- " RMSE on validation datset when 750 harmful objects from train are dropped: 0.24570533776587475 \n " ,
182
- " RMSE on validation datset when 1000 harmful objects from train are dropped: 0.24171376432589384 \n " ,
183
- " RMSE on validation datset when 1250 harmful objects from train are dropped: 0.23716221792112202 \n " ,
184
- " RMSE on validation datset when 1500 harmful objects from train are dropped: 0.23352830055657348 \n " ,
185
- " RMSE on validation datset when 1750 harmful objects from train are dropped: 0.23035731488436903 \n " ,
186
- " RMSE on validation datset when 2000 harmful objects from train are dropped: 0.2275943109556251 \n "
178
+ " RMSE on validation datset when 0 harmful objects from train are dropped: 0.24770929523786442 \n " ,
179
+ " RMSE on validation datset when 250 harmful objects from train are dropped: 0.2447175042288005 \n " ,
180
+ " RMSE on validation datset when 500 harmful objects from train are dropped: 0.24225895802476696 \n " ,
181
+ " RMSE on validation datset when 750 harmful objects from train are dropped: 0.23953255257505965 \n " ,
182
+ " RMSE on validation datset when 1000 harmful objects from train are dropped: 0.23730021406692955 \n " ,
183
+ " RMSE on validation datset when 1250 harmful objects from train are dropped: 0.23571326583727906 \n " ,
184
+ " RMSE on validation datset when 1500 harmful objects from train are dropped: 0.23414087500696676 \n " ,
185
+ " RMSE on validation datset when 1750 harmful objects from train are dropped: 0.23269021852578387 \n " ,
186
+ " RMSE on validation datset when 2000 harmful objects from train are dropped: 0.231598588484771 \n "
187
187
]
188
188
}
189
189
],
210
210
" \n " ,
211
211
" ||RMSE on the validation dataset|\n " ,
212
212
" |-|-|\n " ,
213
- " |Clear train dataset: | 0.215798485149 |\n " ,
214
- " |Noisy train dataset: | 0.259157461226 |\n " ,
215
- " |Purified train dataset: | 0.227594310956 |"
213
+ " |Clear train dataset: | 0.22947301323494568 |\n " ,
214
+ " |Noisy train dataset: | 0.24770929523786442 |\n " ,
215
+ " |Purified train dataset: | 0.231598588484771 |"
216
216
]
217
217
},
218
218
{
244
244
},
245
245
"nbformat" : 4 ,
246
246
"nbformat_minor" : 1
247
- }
247
+ }
0 commit comments